# Question Augmentation (similarity)

A frequent source of difficulty in using Translator Knowledge Graphs is the traversal of similar nodes.  If two non-identical (but very similar) concepts exist, they may be independently associated with other information by individual KPs.  If a user is unaware of this subtlety, it is easy to miss highly relevant answers.

Here is a simple query: (gene)--(chemical)--(disease).  This query might be used to explain gene/disease relationships that are driven by some metabolic process.   For instance, there is a known association between the gene SLC34A1 and Fanconi Syndrome:

In [21]:
class RobokopMessenger:
    def __init__(self):
        self.url = 'http://robokop.renci.org:4868'
    def pipeline(self,request,full = True):
        #normalize question
        response = requests.post( f'{self.url}/normalize', json=request )
        normalized = response.json()
        
        #answer question
        request = { 'message': normalized, }
        response = requests.post( f'{self.url}/answer', json=request )
        answered = response.json()
        print(json.dumps(answered,indent=2))
        if not full:
            return answered
        
        #Yank
        request = { 'message': answered, }
        response = requests.post( f'{self.url}/yank', json=request )
        filled = response.json()
        print(filled)
        
        #support
        request = { 'message': filled, }
        response = requests.post( 'http://robokop.renci.org:4868/support', json=request )
        supported = response.json()
        #weight
        request = { 'message': supported, }
        response = requests.post( 'http://robokop.renci.org:4868/weight_correctness', json=request )
        weighted = response.json()
        #score
        request = { 'message': weighted, }
        response = requests.post( 'http://robokop.renci.org:4868/score', json=request )
        scored = response.json()
        return scored

    
robokop=RobokopMessenger()

In [17]:
def get_view_url(returnanswer,robokop='robokop.renci.org'):
    """Given an answer in KGS v0.9 format, post the answer to robokop, and return a link that can be followed to
    view the answer in the UI"""
    view_post_url = f'https://{robokop}/api/simple/view/'
    view_post_response = requests.post(view_post_url, json=returnanswer)
    uid=view_post_response.json()
    view_url = f'https://{robokop}/simple/view/{uid}'
    return view_url

In [3]:
import requests
import json

## The relationship between the gene and disease is a known one.

In [4]:
question = { 'nodes': [{'id':'n0', 'type':'gene', 'curie':'NCBIGene:6569'},
                      {'id': 'n1', 'type':'disease', 'curie': 'MONDO:0001083'}],
             'edges': [ {'id': 'e0', 'source_id': 'n1', 'target_id': 'n0'}]}
message = {'message': {'query_graph': question}}

In [5]:
result = robokop.pipeline(message)
print( json.dumps(result,indent=2))

{
  "knowledge_graph": {
    "edges": [
      {
        "ctime": [
          1573152746.1343825,
          1572932974.4445496
        ],
        "edge_source": [
          "pharos.disease_get_gene",
          "pharos.gene_get_disease"
        ],
        "id": "d4bd7eab33347c12a5fac135ad7eab05",
        "predicate_id": "NCIT:R176",
        "publications": [],
        "relation": [
          "PHAROS:gene_involved",
          "PHAROS:gene_involved"
        ],
        "relation_label": [
          "gene_involved",
          "gene_involved"
        ],
        "source_database": [
          "pharos",
          "pharos"
        ],
        "source_id": "MONDO:0001083",
        "target_id": "HGNC:11019",
        "type": "disease_to_gene_association",
        "weight": 1
      },
      {
        "ctime": [
          1573152747.0609732,
          1572932973.4711454
        ],
        "edge_source": [
          "biolink.disease_get_gene",
          "biolink.gene_get_disease"
        ],
        "id

## There is no chemical relating the gene to the disease

In [6]:
question = { 'nodes': [{'id':'n0', 'type':'gene', 'curie':'NCBIGene:6569'},
                       {'id':'n1', 'type':'chemical_substance'},
                      {'id': 'n2', 'type':'disease', 'curie':'MONDO:0001083'}],
             'edges': [ {'id': 'e0', 'source_id': 'n0', 'target_id': 'n1'},
                        {'id': 'e1', 'source_id': 'n1', 'target_id': 'n2'}]}
message = {'message': {'query_graph': question}}

#strider = 'http://robokop.renci.org:5781/query'
#response = requests.post(strider,json=message)
#print(response.status_code)

In [7]:
result = robokop.pipeline(message)
print( json.dumps(result,indent=2))

{
  "knowledge_graph": {
    "edges": [],
    "nodes": []
  },
  "query_graph": {
    "edges": [
      {
        "id": "e0",
        "source_id": "n0",
        "target_id": "n1"
      },
      {
        "id": "e1",
        "source_id": "n1",
        "target_id": "n2"
      }
    ],
    "nodes": [
      {
        "curie": [
          "HGNC:11019"
        ],
        "id": "n0",
        "type": "gene"
      },
      {
        "id": "n1",
        "type": "chemical_substance"
      },
      {
        "curie": [
          "MONDO:0001083"
        ],
        "id": "n2",
        "type": "disease"
      }
    ]
  },
  "results": []
}


## We can augment the question with similarity

In [39]:
qa_url = 'https://questionaugmentation.renci.org/node_expand'
rq = requests.post(qa_url,json=message)
new_questions = rq.json()
print(len(new_questions))
newq = new_questions[0]

newq['query_graph']['edges'][2]['id']='e77'
newq['query_graph']['nodes'][-1]['id']='n4'
newq['query_graph']['edges'][1]['source_id']='n4'
newq['query_graph']['edges'][2]['target_id']='n4'
newq['query_graph']['edges'][2]['type']='similar_to'
print(json.dumps(newq,indent=2))

1
{
  "query_graph": {
    "nodes": [
      {
        "id": "n0",
        "type": "gene",
        "curie": "NCBIGene:6569"
      },
      {
        "id": "n1",
        "type": "chemical_substance"
      },
      {
        "id": "n2",
        "type": "disease",
        "curie": "MONDO:0001083"
      },
      {
        "id": "n4",
        "type": "chemical_substance"
      }
    ],
    "edges": [
      {
        "id": "e0",
        "source_id": "n0",
        "target_id": "n1"
      },
      {
        "id": "e1",
        "source_id": "n4",
        "target_id": "n2"
      },
      {
        "id": "e77",
        "source_id": "n1",
        "target_id": "n4",
        "type": "similar_to"
      }
    ]
  }
}


In [40]:
newmessage = {'message': newq}

In [41]:
result = robokop.pipeline(newmessage)
#print( json.dumps(result,indent=2))

{
  "knowledge_graph": {
    "credentials": {
      "password": "ncatsgamma",
      "username": "neo4j"
    },
    "url": "bolt://robokopdb2.renci.org:7687"
  },
  "query_graph": {
    "edges": [
      {
        "id": "e0",
        "source_id": "n0",
        "target_id": "n1"
      },
      {
        "id": "e1",
        "source_id": "n4",
        "target_id": "n2"
      },
      {
        "id": "e77",
        "source_id": "n1",
        "target_id": "n4",
        "type": "similar_to"
      }
    ],
    "nodes": [
      {
        "curie": [
          "HGNC:11019"
        ],
        "id": "n0",
        "type": "gene"
      },
      {
        "id": "n1",
        "type": "chemical_substance"
      },
      {
        "curie": [
          "MONDO:0001083"
        ],
        "id": "n2",
        "type": "disease"
      },
      {
        "id": "n4",
        "type": "chemical_substance"
      }
    ]
  },
  "results": [
    {
      "edge_bindings": [
        {
          "kg_id": "5c0ae693fe5f2a2e

In [18]:
newmessage

{'message': {'query_graph': {'nodes': [{'id': 'n0',
     'type': 'gene',
     'curie': 'NCBIGene:6569'},
    {'id': 'n1', 'type': 'chemical_substance'},
    {'id': 'n2', 'type': 'disease', 'curie': 'MONDO:0001083'},
    {'id': 'sim_to_n1_0', 'type': 'chemical_substance'}],
   'edges': [{'id': 'e0', 'source_id': 'n0', 'target_id': 'n1'},
    {'id': 'e1', 'source_id': 'sim_to_n1_0', 'target_id': 'n2'},
    {'id': 'sim_edge_0', 'source_id': 'n1', 'target_id': 'sim_to_n1_0'}]}}}