# Basic Logic Mill API usage

In [1]:
import requests
import json
import pandas as pd
API_KEY = '<YOUR API KEY>'
# API documentation: https://api.logic-mill.net/api/v1/graph

# Get Information about Logic Mill API

## Version

In [2]:
query = """{
  Version
}"""

url = 'https://api.logic-mill.net/api/v1/graphql/'
r = requests.post(url, json={'query': query})

print(r.status_code) # should be 200 else an error has occured, eg. server not available
print(r.text) # result in text format

200
{
    "data": {
        "Version": "0.1"
    }
}


In [3]:
# make sure the API endpoint is of a specific version
assert r.json()["data"]["Version"] == '0.1'

## Get the names of document sets (indices)
https://api.logic-mill.net/api/v1/graph/?query=%7B%0A%20%20IndicesNames%20%7B%0A%20%20%20%20amountOfDocuments%0A%20%20%20%20name%0A%20%20%7D%0A%7D

In [4]:
query = """{
  IndicesNames {
    amountOfDocuments
    name
  }
}"""

url = 'https://api.logic-mill.net/api/v1/graphql/'
r = requests.post(url, json={'query': query})

print(r.text)


{
    "data": {
        "IndicesNames": [
            {
                "amountOfDocuments": 3831564,
                "name": "wipo_cos"
            },
            {
                "amountOfDocuments": 205011643,
                "name": "semanticscholar_cos"
            },
            {
                "amountOfDocuments": 7380143,
                "name": "epo_cos"
            },
            {
                "amountOfDocuments": 13352873,
                "name": "uspto_cos"
            }
        ]
    }
}


In [5]:
# convert the names of the endpoints to a list (we'll use them later)
indices = pd.DataFrame(r.json()['data']['IndicesNames'])["name"].to_list()
indices

['wipo_cos', 'semanticscholar_cos', 'epo_cos', 'uspto_cos']

## Keyword search

In [6]:
## TODO

## Getting basic information from a document by using the document ID

By changing the GraphQL query you can limit or extend which fields the server will retrieve for you.
The full list can be found online: https://api.logic-mill.net/api/v1/graph

In [7]:
# simple version
headers = {
    'content-type': 'application/json',
    'Authorization': 'Bearer ' + API_KEY, 
}

query="""{ 
	Document(index:"semanticscholar_cos", id:"123fa2d18fab2ae1752451405f0eac5e273f2695") {
	  id
    documentParts {
      title
    }
    metadata {
      country
      datePubl
      docNumber
      doi
      journalName
      magId
      status
      venue
      year
    }
	}
}"""


url = 'https://api.logic-mill.net/api/v1/graphql/'

r = requests.post(url, headers=headers, json={'query': query})

if r.status_code == 200:
    
    print(r.text)
else:
    print(r.text)
    print(f"Error executing\n{query}\non {url}")
    


{
    "data": {
        "Document": {
            "documentParts": {
                "title": "Airbag system for occupants of a vehicle and vehicle provided with such an air bag system."
            },
            "id": "123fa2d18fab2ae1752451405f0eac5e273f2695",
            "metadata": {
                "country": "",
                "datePubl": null,
                "docNumber": "",
                "doi": "",
                "journalName": "",
                "magId": "2940882518",
                "status": "",
                "venue": "1999",
                "year": 1999
            }
        }
    }
}


In [8]:
# Parameterized version

# 1. add `query` and the variables in parenthesis. Copy the types from the documentation.
# 2. add the variables in the query (eg. `$index`)
query="""query ($id:String!, $index:String!) { 
	Document(index:$index, id:$id) {
	  id
    documentParts {
      title
    }
    metadata {
      country
      datePubl
      docNumber
      doi
      journalName
      magId
      status
      venue
      year
    }
	}
}"""


variables = {
    "id":"123fa2d18fab2ae1752451405f0eac5e273f2695",
    "index":"semanticscholar_cos", 
}

r = requests.post(url, headers=headers, json={'query': query , 'variables': variables})
r.json()

{'data': {'Document': {'documentParts': {'title': 'Airbag system for occupants of a vehicle and vehicle provided with such an air bag system.'},
   'id': '123fa2d18fab2ae1752451405f0eac5e273f2695',
   'metadata': {'country': '',
    'datePubl': None,
    'docNumber': '',
    'doi': '',
    'journalName': '',
    'magId': '2940882518',
    'status': '',
    'venue': '1999',
    'year': 1999}}}}

## Info from the document

To select specific items you have to use the keys in the dictionary


In [9]:
data = r.json()['data']
title = data['Document']['documentParts']['title']
print(f"Title:\n{title}\n")




Title:
Airbag system for occupants of a vehicle and vehicle provided with such an air bag system.



## Get the numerical representiation/embedding

The query is the same as above, only different fields are requested

In [10]:
# same parameter setup as above
query="""query ($id:String!, $index:String!) { 
	Document(index:$index, id:$id) {
    id
    vector
	}
}"""


variables = {"index":"semanticscholar_cos", "id":"123fa2d18fab2ae1752451405f0eac5e273f2695"}

url = 'https://api.logic-mill.net/api/v1/graphql/'

r = requests.post(url, headers=headers, json={'query': query , 'variables': variables})


if r.status_code == 200:
    embedding = r.json()['data']['Document']['vector']
    print("First 10 numbers of the numerical representation")
    print(embedding[:10])
else:
    print(f"Error executing\n{query}\non {url}")

First 10 numbers of the numerical representation
[1.0929432, -1.4752408, 1.9569354, -1.1937265, -2.4067235, -0.26992768, 1.2921627, -1.0821707, 0.68318826, 0.26330143]


## Search _n_ most similar documents compared to a document in the database

In [11]:
n=5

query="""query ($id:String!, $index:String!, $amount:Int) {
  SimilaritySearch(index:$index, id:$id, amount:$amount) {
 	  id
    score
    document {
      documentParts {
        title  
      }
    }
  }
}"""

variables = {
  "index":"semanticscholar_cos", "id":"123fa2d18fab2ae1752451405f0eac5e273f2695", 
  "amount":n+1 # the first one is always the focal document
}

url = 'https://api.logic-mill.net/api/v1/graphql/'
r = requests.post(url, headers=headers, json={
                  'query': query, 'variables': variables})

if r.status_code == 200:
    results = r.json()['data']['SimilaritySearch']
    for i in results:
        print(i)
else:
    print(f"Error executing\n{query}\non {url}")

{'document': {'documentParts': {'title': 'Airbag system for occupants of a vehicle and vehicle provided with such an air bag system.'}}, 'id': '123fa2d18fab2ae1752451405f0eac5e273f2695', 'score': 1}
{'document': {'documentParts': {'title': 'WIRING HARNESS FOR AN AIRBAG MODULE OF A VEHICLE OCCUPANT SAFETY SYSTEM, AIRBAG MODULE, VEHICLE CABLING AND VEHICLE OCCUPANT SAFETY SYSTEM HAVING A WIRING HARNESS OF THIS KIND AND A METHOD OF PRODUCTION'}}, 'id': 'e88a357b4d2885e43038461cbe9d97c6dbed3dc7', 'score': 0.92575514}
{'document': {'documentParts': {'title': 'Center curtain airbag apparatus for a vehicle'}}, 'id': 'ddcb07dbdaecf374cf11af530c1829de8c6775a4', 'score': 0.916877}
{'document': {'documentParts': {'title': 'Airbag Bestückter seat for a vehicle occupant'}}, 'id': '4cfc283192a7b76d155349ab0f7c650b12f0bc20', 'score': 0.91184974}
{'document': {'documentParts': {'title': 'An arrangement for automatic support a vehicle occupant'}}, 'id': 'c1468aa0fbddcc9ae33b879530b540db9edc11a2', 'scor

### Create a Dataframe of the result

Since we get a nested result from the API, we need to gather the results from the different hierarchical layers


Level 1: 
```
id
score
document
```

document is a nested value with 

```
documentsParts
    abstract (available for patents not for Semantic Scholar)
    title
    ...
```

In [12]:
pd.DataFrame(results)[1:]

Unnamed: 0,document,id,score
1,{'documentParts': {'title': 'WIRING HARNESS FO...,e88a357b4d2885e43038461cbe9d97c6dbed3dc7,0.925755
2,{'documentParts': {'title': 'Center curtain ai...,ddcb07dbdaecf374cf11af530c1829de8c6775a4,0.916877
3,{'documentParts': {'title': 'Airbag Bestückter...,4cfc283192a7b76d155349ab0f7c650b12f0bc20,0.91185
4,{'documentParts': {'title': 'An arrangement fo...,c1468aa0fbddcc9ae33b879530b540db9edc11a2,0.909902
5,{'documentParts': {'title': 'Knee air bag arra...,c0899a20cf1aafbd2fe570ee543dfdb61dceed33,0.908046


**Observation 1:**

`id` and `score` at the top of the hierarchy and can be used directly. Within `document`, `documentParts` contains the other relevant information. 

**Observation 2:**

Because of the nested structure, a loop or an apply is needed. 

In [13]:
# Example with apply
similar_docs = pd.DataFrame(results)
keys = similar_docs["document"][0]["documentParts"].keys() 

for k in keys:
    similar_docs[k] = similar_docs["document"].apply(lambda x: x["documentParts"][k])

# remove document
del similar_docs["document"]

similar_docs

Unnamed: 0,id,score,title
0,123fa2d18fab2ae1752451405f0eac5e273f2695,1.0,Airbag system for occupants of a vehicle and v...
1,e88a357b4d2885e43038461cbe9d97c6dbed3dc7,0.925755,WIRING HARNESS FOR AN AIRBAG MODULE OF A VEHIC...
2,ddcb07dbdaecf374cf11af530c1829de8c6775a4,0.916877,Center curtain airbag apparatus for a vehicle
3,4cfc283192a7b76d155349ab0f7c650b12f0bc20,0.91185,Airbag Bestückter seat for a vehicle occupant
4,c1468aa0fbddcc9ae33b879530b540db9edc11a2,0.909902,An arrangement for automatic support a vehicle...
5,c0899a20cf1aafbd2fe570ee543dfdb61dceed33,0.908046,Knee air bag arrangement for a vehicle


## Create a new embedding for a user document

We use the `embedDocument` endpoint. This query expects an `LmDocumentMutationObject`. This object look like this

```
id: String!
parts: [LmDocumentPartsMutationObject]
```

`LmDocumentPartsMutationObject` is a key/value pair for every field (title and abstract)

The `id` is for identification purposes and can be any number.

Many time we may have the data available in some datastructure (CSV, Excel file, dictionary). In the following example we will use a dictionary.

In [14]:
# our data

biblios = [{
    "id": "ML001",
    "title" : "Towards A Rigorous Science of Interpretable Machine Learning",
    "abstract" : "As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning."
} , {
    "id": "ML002",
    "title": "Machine Learning Interpretability: A Science rather than a tool",
    "abstract": """The term "interpretability" is oftenly used by machine learning researchers each with their own intuitive understanding of it. There is no universal well agreed upon definition of interpretability in machine learning. As any type of science discipline is mainly driven by the set of formulated questions rather than by different tools in that discipline, e.g. astrophysics is the discipline that learns the composition of stars, not as the discipline that use the spectroscopes. Similarly, we propose that machine learning interpretability should be a discipline that answers specific questions related to interpretability. These questions can be of statistical, causal and counterfactual nature. Therefore, there is a need to look into the interpretability problem of machine learning in the context of questions that need to be addressed rather than different tools. We discuss about a hypothetical interpretability framework driven by a question based scientific approach rather than some specific machine learning model. Using a question based notion of interpretability, we can step towards understanding the science of machine learning rather than its engineering. This notion will also help us understanding any specific problem more in depth rather than relying solely on machine learning methods"""
}, {
    "id": "ML003",
    "title": "Opening the black box of neural networks: methods for interpreting neural network models in clinical applications",
    "abstract": """Artificial neural networks (ANNs) are powerful tools for data analysis and are particularly suitable for modeling relationships between variables for best prediction of an outcome. While these models can be used to answer many important research questions, their utility has been critically limited because the interpretation of the "black box" model is difficult. Clinical investigators usually employ ANN models to predict the clinical outcomes or to make a diagnosis; the model however is difficult to interpret for clinicians. To address this important shortcoming of neural network modeling methods, we describe several methods to help subject-matter audiences (e.g., clinicians, medical policy makers) understand neural network models. Garson's algorithm describes the relative magnitude of the importance of a descriptor (predictor) in its connection with outcome variables by dissecting the model weights. The Lek's profile method explores the relationship of the outcome variable and a predictor of interest, while holding other predictors at constant values (e.g., minimum, 20th quartile, maximum). While Lek's profile was developed specifically for neural networks, partial dependence plot is a more generic version that visualize the relationship between an outcome and one or two predictors. Finally, the local interpretable model-agnostic explanations (LIME) method can show the predictions of any classification or regression, by approximating it locally with an interpretable model. R code for the implementations of these methods is shown by using example data fitted with a standard, feed-forward neural network model. We offer codes and step-by-step description on how to use these tools to facilitate better understanding of ANN"""
}]

pd.DataFrame(biblios)

Unnamed: 0,id,title,abstract
0,ML001,Towards A Rigorous Science of Interpretable Ma...,"As machine learning systems become ubiquitous,..."
1,ML002,Machine Learning Interpretability: A Science r...,"The term ""interpretability"" is oftenly used by..."
2,ML003,Opening the black box of neural networks: meth...,Artificial neural networks (ANNs) are powerful...


We wish to encode the first item

In [15]:
# take the first record
biblio = biblios[0]

In [16]:
# prepare the data 

data = {"id": biblio['id'], "parts":[]}
# create the key value pairs; (a more direct way of doing this is also possible)
data["parts"] = [{"key": k, "value":v} for k,v in biblio.items() if k!='id']
data


{'id': 'ML001',
 'parts': [{'key': 'title',
   'value': 'Towards A Rigorous Science of Interpretable Machine Learning'},
  {'key': 'abstract',
   'value': 'As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning.'}]}

In [17]:
# add to the variables dictionary
variables = {"data" : data}
variables

{'data': {'id': 'ML001',
  'parts': [{'key': 'title',
    'value': 'Towards A Rigorous Science of Interpretable Machine Learning'},
   {'key': 'abstract',
    'value': 'As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning.'}]}}

In [18]:
# prepare query with 
query = """mutation ($data:LmDocumentMutationObject) {
  embedDocument(data: $data)
}"""

In [19]:
url = 'https://api.logic-mill.net/api/v1/graphql/'
r = requests.post(url, json={'query': query, 'variables': variables})
if r.status_code == 200:
    # print(r.text)
    vector=r.json()["data"]["embedDocument"]
    print(vector[:10])



## Create embedding for multiple documents

We use a similar setup as with one document but in a loop with multiple API calls


In [20]:
query = """mutation ($data:LmDocumentMutationObject) {
  embedDocument(data: $data)
}"""
url = 'https://api.logic-mill.net/api/v1/graphql/'


df = pd.DataFrame()
for biblio in biblios:
    data = {"id": biblio['id']}
    data["parts"] = [{"key": k, "value":v} for k,v in biblio.items() if k!='id']
    variables = {"data" : data}

    r = requests.post(url, json={'query': query, 'variables': variables})
    if r.status_code == 200:

      # store the results
      vector=r.json()["data"]["embedDocument"]
      embedding =  pd.DataFrame(vector).T
      embedding.index = [biblio['id']]
      df = pd.concat([df,embedding], axis=0)
      
df
    

## Find similarity scores between user supplied documents

In [21]:
# prepare the data field
data = []

for biblio in biblios:
    # put data in expected structure.
    record = {"id": biblio['id'], "parts":[]}
    record["parts"] = [{"key": k, "value":v} for k,v in biblio.items() if k!='id']
    data.append(record)



In [22]:
data

[{'id': 'ML001',
  'parts': [{'key': 'title',
    'value': 'Towards A Rigorous Science of Interpretable Machine Learning'},
   {'key': 'abstract',
    'value': 'As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning.'}]},
 {'id': 'ML002',
  'parts': [{'key': 'title',
    'value': 'Machine Learning Interpretability: A Science rather than a tool'},
   

In [23]:
query = """
query encodeDocumentAndSimilarityCalculation($data: [EncodeObject]) {
  encodeDocumentAndSimilarityCalculation(data: $data, similarityMetric: cosine) {
    similarities
    xs {
      id
    }
    ys {
      id
    }
  }
}
"""

variables = {"data": data, "metric": 'cosine'}

url = 'https://api.logic-mill.net/api/v1/graphql/'
r = requests.post(url,headers=headers,  json={'query': query, 'variables': variables})
print(r.text)

{
    "data": {
        "encodeDocumentAndSimilarityCalculation": {
            "similarities": [
                [
                    1,
                    0.8112392,
                    0.55713564
                ],
                [
                    0.8112392,
                    1,
                    0.54929286
                ],
                [
                    0.55713564,
                    0.54929286,
                    1
                ]
            ],
            "xs": [
                {
                    "id": "ML001"
                },
                {
                    "id": "ML002"
                },
                {
                    "id": "ML003"
                }
            ],
            "ys": [
                {
                    "id": "ML001"
                },
                {
                    "id": "ML002"
                },
                {
                    "id": "ML003"
                }
            ]
        }
    }
}


In [24]:
# prettify the similarity data in a dataframe

# create a df of just the scores
df = pd.DataFrame(
    r.json()['data']['encodeDocumentAndSimilarityCalculation']['similarities'])

# get column and row names
cols = pd.DataFrame(
    r.json()['data']['encodeDocumentAndSimilarityCalculation']['xs'])
rows = pd.DataFrame(
    r.json()['data']['encodeDocumentAndSimilarityCalculation']['ys'])

# set the column names and indexes
df.columns = cols["id"].to_list()
df.index = cols["id"].to_list()

df

Unnamed: 0,ML001,ML002,ML003
ML001,1.0,0.811239,0.557136
ML002,0.811239,1.0,0.549293
ML003,0.557136,0.549293,1.0


## Find similar documents in the database compared to user uploaded document


In [32]:
# simple version

query = """mutation {
  embedDocumentAndSimilaritySearch(
    data: 
    
     [
        {
            key: "title",
            value: "Towards A Rigorous Science of Interpretable Machine Learning"
        },
        {
            key: "abstract",
            value: "As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning."
        }
    ]
      
  	amount: 3
  	indices:["epo_cos"]
  ) {
    document {
      id
      documentParts {
        title
      }
      
    }
    index
    score
  }
}"""

url = 'https://api.logic-mill.net/api/v1/graphql/'
r = requests.post(url, headers=headers, json={'query': query})
# print(r.status_code)
print(r.text)


{
    "data": {
        "embedDocumentAndSimilaritySearch": [
            {
                "document": {
                    "documentParts": {
                        "title": "SYSTEM AND METHOD FOR GENERATING EXPLAINABLE LATENT FEATURES OF MACHINE LEARNING MODELS"
                    },
                    "id": "EP19175570A1"
                },
                "index": "epo_cos",
                "score": 0.84660786
            },
            {
                "document": {
                    "documentParts": {
                        "title": "AUTOMATED, PROGRESSIVE EXPLANATIONS OF MACHINE LEARNING RESULTS"
                    },
                    "id": "EP20215206A3"
                },
                "index": "epo_cos",
                "score": 0.8455859
            },
            {
                "document": {
                    "documentParts": {
                        "title": "AUTOMATED, PROGRESSIVE EXPLANATIONS OF MACHINE LEARNING RESULTS"
                    },
      

In [33]:
# parameterized version

query = """
query embedDocumentAndSimilaritySearch($data: [EncodeDocumentPart], $indices: [String], $amount: Int) {
  encodeDocumentAndSimilaritySearch(
    data: $data
    indices: $indices
    amount: $amount
  ) {
    document {
      documentParts {
        title
      }
    }
    id
    score
    index
  }
}
"""




In [34]:
biblio = biblios[2]
data = [{"key": k, "value":v} for k,v in biblio.items() if k!='id']

variables = {
    "amount": 3,
    "data": data,
    "indices": indices # we have received them above
}

url = 'https://api.logic-mill.net/api/v1/graphql/'
r = requests.post(url, headers=headers, json={'query': query, 'variables': variables})
print(r.status_code)


200


In [28]:
r.json()

{'data': {'encodeDocumentAndSimilaritySearch': [{'document': {'documentParts': {'title': 'Opening the black box of neural networks: methods for interpreting neural network models in clinical applications.'}},
    'id': 'a4552ac3d7a4510227203b8388c899ff9428a969',
    'index': 'semanticscholar_cos',
    'score': 1.0000001},
   {'document': {'documentParts': {'title': 'Entering the black box of neural networks.'}},
    'id': '7da6505dfae0451f3379be6b534fb6af0575b80a',
    'index': 'semanticscholar_cos',
    'score': 0.90859485},
   {'document': {'documentParts': {'title': 'Combining the Performance Strengths of the Logistic Regression and Neural Network Models: A Medical Outcomes Approach'}},
    'id': 'bb94be56c28f8d16ddaf6abc0f0b021837478717',
    'index': 'semanticscholar_cos',
    'score': 0.9082638},
   {'document': {'documentParts': {'title': 'SYSTEMS AND METHODS TO SUPPORT MEDICAL THERAPY DECISIONS'}},
    'id': 'WO2017027432A1',
    'index': 'wipo_cos',
    'score': 0.87672776},
 

In [35]:
# prettify output
results = r.json()["data"]["encodeDocumentAndSimilaritySearch"]

#  see above
similar_docs = pd.DataFrame(results)
keys = similar_docs["document"][0]["documentParts"].keys() 

for k in keys:
    similar_docs[k] = similar_docs["document"].apply(lambda x: x["documentParts"][k])


# remove document
del similar_docs["document"]

similar_docs.sort_values("score", ascending=False)[1:]

Unnamed: 0,id,index,score,title
1,7da6505dfae0451f3379be6b534fb6af0575b80a,semanticscholar_cos,0.908595,Entering the black box of neural networks.
2,bb94be56c28f8d16ddaf6abc0f0b021837478717,semanticscholar_cos,0.908264,Combining the Performance Strengths of the Log...
3,WO2017027432A1,wipo_cos,0.876728,SYSTEMS AND METHODS TO SUPPORT MEDICAL THERAPY...
4,20180168516,uspto_cos,0.870999,SYSTEMS AND METHODS TO SUPPORT MEDICAL THERAPY...
5,WO1999045452A2,wipo_cos,0.867212,ARTIFICIAL NEURAL NETWORK FOR PREDICTING RESPI...
6,10133980,uspto_cos,0.866618,Optimizing neural networks for risk assessment
7,08078554,uspto_cos,0.86576,Knowledge-based interpretable predictive model...
8,WO2014152395A1,wipo_cos,0.864901,SELF-EVOLVING PREDICTIVE MODEL
9,EP21158394A1,epo_cos,0.855063,OPTIMIZING NEURAL NETWORKS FOR RISK ASSESSMENT
10,EP21191089A1,epo_cos,0.837178,SYSTEM AND METHOD FOR PREDICTING THE RISK OF A...


# Get embedding vectors for multiple documents alread the database 
Provide mulitple IDs

In [30]:
sample_ids = ['7233050d4e325d7ff70693af5e6234c4d3274e02', 'c12615777e76852b0ff2e7495efb19a806a02221']

In [36]:
URL = 'https://api.logic-mill.net/api/v1/graphql/'
QUERY = """
query Document($data: [DatabaseSearchDocument]) {
  Documents(data: $data) {
    id
    vector
  }
}
"""

data = [{"index": "semanticscholar_cos", "id": ss_id} for ss_id in sample_ids]
variables = {"data": data}
# send request
r = requests.post(URL, headers = headers, json={'query': QUERY, 'variables': variables})
for d in r.json()["data"]['Documents']:
    print(d["vector"][:10])

[-1.784908, -3.624857, -0.9794372, 0.892107, 1.5166028, 0.08386493, 0.606217, -0.9956352, 1.4736364, 0.5631342]
[-2.0602982, -1.2430692, -0.9919605, -0.15281165, 3.601084, 0.6789297, 1.5839859, -2.0787039, 0.9900625, 1.0501337]
