# Basic Logic Mill API usage

In [1]:
import pandas as pd
import requests
from dotenv import dotenv_values
from requests.adapters import HTTPAdapter
from urllib3.util import Retry

# Load environment variables from .env file
conf = dotenv_values()
API_KEY = conf["API_KEY"]

# API documentation: https://api.logic-mill.net/api/v1/graph

In [2]:
# Establish requests.Session for robust connection
s = requests.Session()
retries = Retry(total=5, backoff_factor=0.1,
                status_forcelist=[500, 501, 502, 503, 504, 524])

s.mount('https://', HTTPAdapter(max_retries=retries))

URL = 'https://api.logic-mill.net/api/v1/graphql/'
headers = {
    'content-type': 'application/json',
    'Authorization': 'Bearer ' + API_KEY,
}

# Get Information about Logic Mill API

## Get the names of document sets (indices)
https://api.logic-mill.net/api/v1/graph/?query=%7B%0A%20%20IndicesNames%20%7B%0A%20%20%20%20amountOfDocuments%0A%20%20%20%20name%0A%20%20%7D%0A%7D

In [3]:
query = """{
  IndicesNames {
    amountOfDocuments
    name
  }
}"""

r = s.post(URL, json={'query': query})

print(r.text)

{
    "data": {
        "IndicesNames": [
            {
                "amountOfDocuments": 64501832,
                "name": "patents_v2"
            },
            {
                "amountOfDocuments": 585730,
                "name": "upc_cases"
            },
            {
                "amountOfDocuments": 126918838,
                "name": "publications"
            }
        ]
    }
}


## Getting basic information from a document by using the document ID

By changing the GraphQL query you can limit or extend which fields the server will retrieve for you.
The full list can be found online: https://api.logic-mill.net/api/v1/graph

In [4]:
# simple version
query="""
query Documents($data: [DatabaseSearchDocument]) {
  Documents(data: $data) {
    id
    title
    url
  }
}
"""
variables = {
  "data": [
    {
      "id": "W4313483528",
      "index": "publications"
     },
    {
      "index": "patents",
      "id": "US11394112B2"
    },
    {
      "index": "patents",
      "id": "69888419"
    }
  ]
}
r = requests.post(URL, headers=headers, json={'query': query , 'variables': variables})

if r.status_code == 200:
    print(r.text)
else:
    print(r.text)
    print(f"Error executing\n{query}\non {url}")

{
    "data": {
        "Documents": [
            {
                "id": "W4313483528",
                "title": "Logic Mill -- A Knowledge Navigation System",
                "url": "https://openalex.org/works/W4313483528"
            },
            {
                "id": "69888419",
                "title": "ADVERTISING MEDIUM THAT REPEATS 5G TELEPHONE SIGNAL",
                "url": "https://worldwide.espacenet.com/patent/search?q=US11394112B2"
            },
            {
                "id": "69888419",
                "title": "ADVERTISING MEDIUM THAT REPEATS 5G TELEPHONE SIGNAL",
                "url": "https://worldwide.espacenet.com/patent/search?q=US11394112B2"
            }
        ]
    }
}


## Info from the document

To select specific items you have to use the keys in the dictionary


In [5]:
data = r.json()['data']
title = data['Documents'][0]['title']
print(f"Title:\n{title}\n")

Title:
Logic Mill -- A Knowledge Navigation System



## Get the numerical representiation/embedding

Just add the fields you are interested in.

In [6]:
query="""
query Documents($data: [DatabaseSearchDocument]) {
  Documents(data: $data) {
    id
    PatspecterEmbedding
    PaecterEmbedding
  }
}
"""
# same documents as above
r = requests.post(URL, headers=headers, json={'query': query , 'variables': variables})
data = r.json()['data']['Documents']
for entry in data:
    print(entry["id"])
    if entry["PaecterEmbedding"]:  # publications do not have PaECTER embeddings
        print(entry["PaecterEmbedding"][:10])
    print(entry["PatspecterEmbedding"][:10])

W4313483528
[-0.23040916, 0.09366139, -0.5264982, 0.35253185, -0.32284304, -1.2042444, 1.4183519, 0.85640496, -0.4407626, 0.8229229]
69888419
[-0.11776501, 0.25963417, 0.06177773, -0.84297, -1.0275449, -0.4726793, 0.28333065, 0.02848985, -0.8005307, -0.32174096]
[0.26689774, 0.5151021, -0.23139937, 0.20953831, -0.031730473, -1.0606387, 1.2447457, 0.09842408, 0.19466443, 0.31455356]
69888419
[-0.11776501, 0.25963417, 0.06177773, -0.84297, -1.0275449, -0.4726793, 0.28333065, 0.02848985, -0.8005307, -0.32174096]
[0.26689774, 0.5151021, -0.23139937, 0.20953831, -0.031730473, -1.0606387, 1.2447457, 0.09842408, 0.19466443, 0.31455356]


## Search _n_ most similar documents compared to a document in the database

In [7]:
n = 5

query="""query SimilaritySearch($index: String!, $id: String!, $amount: Int, $indices: [String], $model: String!) {
  SimilaritySearch(
    index: $index
    id: $id
    amount: $amount
    indices: $indices
    model: $model
  ) {
    id
    score
    index
    document {
      title
      doi
    }
  }
}
"""

variables={
  "model": "patspecter",
  "amount": n+1,
  "id": "91081326",
  "index": "patents",
  "indices": [
    "publications"
  ]
}

r = requests.post(URL, headers=headers, json={'query': query, 'variables': variables})

if r.status_code == 200:
    # results = r.json()
    results = r.json()['data']['SimilaritySearch']
    for i in results:
        print(i)
else:
    print(f"Error executing\n{query}\non {url}")

{'document': {'doi': 'https://doi.org/10.17816/0321-4443-66244', 'title': 'SPECIAL CHARACTERISTICS OF GLEANER COMBINE HARVESTERS'}, 'id': 'W3169719195', 'index': 'publications', 'score': 0.97302663}
{'document': {'doi': 'https://doi.org/10.13031/2013.35039', 'title': 'A Wheel Elevator for Sugar Beet Harvesters'}, 'id': 'W2016749744', 'index': 'publications', 'score': 0.97295034}
{'document': {'doi': '', 'title': 'Stepwise Construction of the Dedekind-MacNeille Completion (Research Note)'}, 'id': 'W1501118946', 'index': 'publications', 'score': 0.9728551}
{'document': {'doi': '', 'title': 'Stochastic Models of Control and Economic Dynamics'}, 'id': 'W1588789815', 'index': 'publications', 'score': 0.97222877}
{'document': {'doi': '', 'title': 'Diseño y simulación de un dispositivo vibrador multidireccional para cosechar capulí.'}, 'id': 'W3162661967', 'index': 'publications', 'score': 0.971483}
{'document': {'doi': 'https://doi.org/10.17816/0321-4443-66374', 'title': 'Methods and devices

`id` and `score` at the top of the hierarchy and can be used directly. Within `document` contains the other relevant information. 

In [8]:
df = pd.DataFrame(results)
df = df.join(pd.json_normalize(df['document']))
df = df.drop(columns="document")
display(df)

Unnamed: 0,id,index,score,doi,title
0,W3169719195,publications,0.973027,https://doi.org/10.17816/0321-4443-66244,SPECIAL CHARACTERISTICS OF GLEANER COMBINE HAR...
1,W2016749744,publications,0.97295,https://doi.org/10.13031/2013.35039,A Wheel Elevator for Sugar Beet Harvesters
2,W1501118946,publications,0.972855,,Stepwise Construction of the Dedekind-MacNeill...
3,W1588789815,publications,0.972229,,Stochastic Models of Control and Economic Dyna...
4,W3162661967,publications,0.971483,,Diseño y simulación de un dispositivo vibrador...
5,W4289387989,publications,0.971297,https://doi.org/10.17816/0321-4443-66374,Methods and devices for processing the stemsto...


## Create a new embedding for a user document

We use the `encodeDocuments` endpoint. 


The data variable has the following form (`EncodeObject`):

```
  "data": [
    {
      "id": "trade_resolutions_pat",
      "parts": [
        {
          "key": "title",
          "value": "d"
        },
        {
          "key": "abstract",
          "value": "An electronic trading system utilizes...."
        }
      ]
    },
```
The `id` is for identification purposes and can be any number.

Many time we may have the data available in some datastructure (CSV, Excel file, dictionary). In the following example we will use a dictionary.

In [9]:
# our data

biblios = [{
    "id": "ML001",
    "title" : "Towards A Rigorous Science of Interpretable Machine Learning",
    "abstract" : "As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning."
} , {
    "id": "ML002",
    "title": "Machine Learning Interpretability: A Science rather than a tool",
    "abstract": """The term "interpretability" is oftenly used by machine learning researchers each with their own intuitive understanding of it. There is no universal well agreed upon definition of interpretability in machine learning. As any type of science discipline is mainly driven by the set of formulated questions rather than by different tools in that discipline, e.g. astrophysics is the discipline that learns the composition of stars, not as the discipline that use the spectroscopes. Similarly, we propose that machine learning interpretability should be a discipline that answers specific questions related to interpretability. These questions can be of statistical, causal and counterfactual nature. Therefore, there is a need to look into the interpretability problem of machine learning in the context of questions that need to be addressed rather than different tools. We discuss about a hypothetical interpretability framework driven by a question based scientific approach rather than some specific machine learning model. Using a question based notion of interpretability, we can step towards understanding the science of machine learning rather than its engineering. This notion will also help us understanding any specific problem more in depth rather than relying solely on machine learning methods"""
}, {
    "id": "ML003",
    "title": "Opening the black box of neural networks: methods for interpreting neural network models in clinical applications",
    "abstract": """Artificial neural networks (ANNs) are powerful tools for data analysis and are particularly suitable for modeling relationships between variables for best prediction of an outcome. While these models can be used to answer many important research questions, their utility has been critically limited because the interpretation of the "black box" model is difficult. Clinical investigators usually employ ANN models to predict the clinical outcomes or to make a diagnosis; the model however is difficult to interpret for clinicians. To address this important shortcoming of neural network modeling methods, we describe several methods to help subject-matter audiences (e.g., clinicians, medical policy makers) understand neural network models. Garson's algorithm describes the relative magnitude of the importance of a descriptor (predictor) in its connection with outcome variables by dissecting the model weights. The Lek's profile method explores the relationship of the outcome variable and a predictor of interest, while holding other predictors at constant values (e.g., minimum, 20th quartile, maximum). While Lek's profile was developed specifically for neural networks, partial dependence plot is a more generic version that visualize the relationship between an outcome and one or two predictors. Finally, the local interpretable model-agnostic explanations (LIME) method can show the predictions of any classification or regression, by approximating it locally with an interpretable model. R code for the implementations of these methods is shown by using example data fitted with a standard, feed-forward neural network model. We offer codes and step-by-step description on how to use these tools to facilitate better understanding of ANN"""
}]

pd.DataFrame(biblios)

Unnamed: 0,id,title,abstract
0,ML001,Towards A Rigorous Science of Interpretable Ma...,"As machine learning systems become ubiquitous,..."
1,ML002,Machine Learning Interpretability: A Science r...,"The term ""interpretability"" is oftenly used by..."
2,ML003,Opening the black box of neural networks: meth...,Artificial neural networks (ANNs) are powerful...


We wish to encode the first item

In [10]:
# take the first record
biblio = biblios[0]

In [11]:
# prepare the data

data = {"id": biblio['id'], "parts":[]}
# create the key value pairs; (a more direct way of doing this is also possible)
data["parts"] = [{"key": k, "value":v} for k,v in biblio.items() if k!='id']
data


{'id': 'ML001',
 'parts': [{'key': 'title',
   'value': 'Towards A Rigorous Science of Interpretable Machine Learning'},
  {'key': 'abstract',
   'value': 'As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning.'}]}

In [12]:
# add to the variables dictionary
variables = {"data" : [data]}

In [13]:
# We need to add the model
variables['model']="patspecter"
variables

{'data': [{'id': 'ML001',
   'parts': [{'key': 'title',
     'value': 'Towards A Rigorous Science of Interpretable Machine Learning'},
    {'key': 'abstract',
     'value': 'As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning.'}]}],
 'model': 'patspecter'}

In [14]:
# prepare query with
query="""
query encodeDocuments($data: [EncodeObject], $model: String!) {
  encodeDocuments(data: $data, model: $model)
}
"""

In [15]:
r = s.post(URL, headers=headers, json={'query': query , 'variables': variables})

if r.status_code == 200:
    response = r.json()
    print(response)

{'data': {'encodeDocuments': [[-0.15173213, 0.39466915, -0.26544675, 0.1092064, -0.097115465, -1.2353461, 1.3304781, 0.70219463, -0.06913787, 0.3280643, 0.15932687, -0.23166955, -0.37336734, -0.3493267, -0.5799768, 0.075430065, -0.2885678, 0.50012237, -0.40116823, -0.2272772, 0.4327774, 0.19313827, -0.62526125, 0.3533519, 0.6597146, 0.55982786, -0.20348778, 1.0925517, 0.07548555, 0.95632106, 0.6006828, -0.7990245, 0.36585793, 0.08133721, -0.88682884, 0.035699174, 0.35787007, -0.6586926, -0.96956295, 0.6702604, -0.5663672, 0.15465163, 0.8242074, -1.5030347, -0.3781958, 1.1294978, -0.12525551, 0.7187593, -0.18786238, -0.011435427, 1.1182588, -1.27731, 0.6875963, 1.0919564, 0.45729992, 0.52055216, -0.87896144, -0.81807554, 1.0895197, 0.2811168, -0.91285336, 0.32776853, 0.18254817, -1.3118012, 1.5330341, -0.013770792, 0.10494889, 0.2914862, 0.18329017, 1.1091548, 0.9763152, -0.3220751, 0.31508708, 0.3901258, 1.6321836, 0.54006624, -0.47993678, 0.90476996, -0.5797334, -0.08604169, -0.508257

## Create embedding for multiple documents

We use a similar setup as with one document but in a loop with multiple API calls


In [16]:
query = """mutation ($data:LmDocumentMutationObject) {
  embedDocument(data: $data)
}"""

df = pd.DataFrame()
for biblio in biblios:
    data = {"id": biblio['id']}
    data["parts"] = [{"key": k, "value":v} for k,v in biblio.items() if k!='id']
    variables = {"data" : data}

    r = s.post(URL, json={'query': query, 'variables': variables})
    if r.status_code == 200:

      # store the results
      vector=r.json()["data"]["embedDocument"]
      embedding =  pd.DataFrame(vector).T
      embedding.index = [biblio['id']]
      df = pd.concat([df, embedding], axis=0)

df

## Find similarity scores between user supplied documents

In [17]:
# prepare the data field
data = []

for biblio in biblios:
    # put data in expected structure.
    record = {"id": biblio['id'], "parts":[]}
    record["parts"] = [{"key": k, "value":v} for k,v in biblio.items() if k!='id']
    data.append(record)


In [18]:
data

[{'id': 'ML001',
  'parts': [{'key': 'title',
    'value': 'Towards A Rigorous Science of Interpretable Machine Learning'},
   {'key': 'abstract',
    'value': 'As machine learning systems become ubiquitous, there has been a surge of interest in interpretable machine learning: systems that provide explanation for their outputs. These explanations are often used to qualitatively assess other criteria such as safety or non-discrimination. However, despite the interest in interpretability, there is very little consensus on what interpretable machine learning is and how it should be measured. In this position paper, we first define interpretability and describe when interpretability is needed (and when it is not). Next, we suggest a taxonomy for rigorous evaluation and expose open questions towards a more rigorous science of interpretable machine learning.'}]},
 {'id': 'ML002',
  'parts': [{'key': 'title',
    'value': 'Machine Learning Interpretability: A Science rather than a tool'},
   

In [19]:
query = """
query encodeDocumentAndSimilarityCalculation($data: [EncodeObject], $similarityMetric: similarityMetric, $model: String!) {
  encodeDocumentAndSimilarityCalculation(
    data: $data
    similarityMetric: $similarityMetric
    model: $model
  ) {
    similarities
    xs {
      id
    }
    ys {
      id
    }
  }
}
"""

variables = {"data": data, "metric": 'cosine',  "model": "patspecter"}

r = s.post(URL, headers=headers,  json={'query': query, 'variables': variables})
print(r.text)

{
    "data": {
        "encodeDocumentAndSimilarityCalculation": {
            "similarities": [
                [
                    1,
                    0.9697906,
                    0.9458891
                ],
                [
                    0.9697906,
                    1,
                    0.93912196
                ],
                [
                    0.9458891,
                    0.93912196,
                    1
                ]
            ],
            "xs": [
                {
                    "id": "ML001"
                },
                {
                    "id": "ML002"
                },
                {
                    "id": "ML003"
                }
            ],
            "ys": [
                {
                    "id": "ML001"
                },
                {
                    "id": "ML002"
                },
                {
                    "id": "ML003"
                }
            ]
        }
    }
}


In [20]:
data = r.json()['data']['encodeDocumentAndSimilarityCalculation']
doc_ids = [d["id"] for d in data["xs"]]
df = pd.DataFrame(data['similarities'], columns=doc_ids, index=doc_ids)
display(df)

Unnamed: 0,ML001,ML002,ML003
ML001,1.0,0.969791,0.945889
ML002,0.969791,1.0,0.939122
ML003,0.945889,0.939122,1.0


## Find similar documents in the database compared to user uploaded document


In [21]:
# simple version

query = """query embedDocumentAndSimilaritySearch($data: [EncodeDocumentPart], $indices: [String], $amount: Int, $model: String!) {
  encodeDocumentAndSimilaritySearch(
    data: $data
    indices: $indices
    amount: $amount
    model: $model
  ) {
    id
    score
    index
    document {
      title
      url
      PatspecterEmbedding
    }
  }
}
"""
variables= {
  "model": "patspecter",
  "data": [
    {
      "key": "title",
      "value": "Airbags"
    },
    {
      "key": "abstract",
      "value": "Airbags are one of the most important safety gears in motor vehicles such as cars and SUVs. These are cushions built into a vehicle that are intended to inflate in case of a car accident in order to protect occupants from injuries by preventing them from striking the interior of vehicle during a crash."
    }
  ],
  "amount": 25,
  "indices": [
    "patents",
    "publications"
  ]
}

r = s.post(URL, headers=headers, json={'query': query, 'variables':variables})
print("Number of results:", len(r.json()["data"]["encodeDocumentAndSimilaritySearch"]))
print(r.json()["data"]["encodeDocumentAndSimilaritySearch"][0]["document"].keys())

Number of results: 50
dict_keys(['PatspecterEmbedding', 'title', 'url'])


In [22]:
# parameterized version

query = """
query embedDocumentAndSimilaritySearch($data: [EncodeDocumentPart], $indices: [String], $amount: Int, $model: String!) {
  encodeDocumentAndSimilaritySearch(
    data: $data
    indices: $indices
    amount: $amount
    model: $model
  ) {
    id
    score
    index
    document {
      title
      url
    }
  }
}
"""

In [23]:
biblio = biblios[2]
data = [{"key": k, "value": v} for k,v in biblio.items() if k != 'id']

variables = {
    "model": "patspecter",
    "amount": 3,
    "data": data,
    "indices": [
      "patents",
      "publications"
    ]
}

r = s.post(URL, headers=headers, json={'query': query, 'variables': variables})

In [24]:
results = r.json()["data"]["encodeDocumentAndSimilaritySearch"]
print(len(results))
print(results[0]["document"].keys())

6
dict_keys(['title', 'url'])


In [25]:
# prettify output
df = pd.DataFrame(results)
df = df.join(pd.json_normalize(df['document']))
df = df.drop(columns="document")
display(df)

Unnamed: 0,id,index,score,title,url
0,W2807036076,publications,0.998372,Opening the black box of neural networks: meth...,
1,W2121394390,publications,0.992756,Advantages and disadvantages of using artifici...,
2,W2885307904,publications,0.990867,<b>NeuralNetTools</b>: Visualization and Analy...,
3,30118004,patents,0.98501,Time-dependent outcome prediction using neural...,
4,45807655,patents,0.984483,TIME TO EVENT DATA ANALYSIS METHOD AND SYSTEM,
5,80855462,patents,0.984008,Attribution methodologies for neural networks ...,
