# Famotidine Covid

This is a test case to see whether Answer Merging helps with the arbitrary query problem. The problem is: I want to know the most interesting way in which these two entities are related.  What is it?  One option is to do question rewriting.  But another (this one) is to general queries and then work on merging them.

In [26]:
import requests
import json
import time
import pandas as pd

#GammaViewer available from https://github.com/ranking-agent/gamma-viewer
#from GammaViewer import GammaViewer
from IPython.display import display

In [27]:
def getlabel(curie):
    url = 'https://nodenormalization-sri.renci.org/get_normalized_nodes'
    try:
        r = requests.get(url,params={'curie':curie}).json()
        return r[curie]['id']['label']
    except:
        return ''

class RobokopMessenger:
    def __init__(self):
        self.url = 'https://covidkop.renci.org/messenger'
    def pipeline(self,request,full = True):
        #normalize question
        purl = f'{self.url}/normalize'
        print(purl)
        response = requests.post( purl , json=request )
        normalized = response.json()
        
        #answer question
        request = { 'message': normalized, }
        response = requests.post( f'{self.url}/answer', json=request )
        answered = response.json()

        if not full:
            return answered
        
        #Yank
        #request = { 'message': answered, }
        #response = requests.post( f'{self.url}/yank', json=request )
        #filled = response.json()
        
        #support
        request = { 'message': answered, }
        response = requests.post( 'http://robokop.renci.org:4868/support', json=request )
        supported = response.json()
        #weight
        request = { 'message': supported, }
        response = requests.post( 'http://robokop.renci.org:4868/weight_correctness', json=request )
        weighted = response.json()
        #score
        request = { 'message': weighted, }
        response = requests.post( 'http://robokop.renci.org:4868/score', json=request )
        scored = response.json()
        return scored

    
robokop=RobokopMessenger()

In [28]:
def create_message():
    query_graph = {
    "nodes": [
        {
            #Covid-19
            "id": "a",
            "type": "disease",
            "curie": "MONDO:0100096"
        },
        {
            "id": "b",
            "type": "named_thing"
        },
        {
            #Famotidine
            "id": "c",
            "type": "chemical_substance",
            "curie": "CHEBI:4975"
        }
    ],
    "edges": [
        {
            "id": "ab",
            "source_id": "a",
            "target_id": "b"
        },
        {
            "id": "bc",
            "source_id": "c",
            "target_id": "b"
        }
    ]
    }
    message = {"message": {"query_graph": query_graph,
                           'results':[{"node_bindings": [],"edge_bindings": []}]}}
    return message

In [29]:
message = create_message()
#react_component = GammaViewer(props={"data":message['message']})
#display(react_component)

In [30]:
robokop_result = robokop.pipeline(message,full=True)
print( json.dumps(robokop_result,indent=2))

https://covidkop.renci.org/messenger/normalize
{
  "query_graph": {
    "nodes": [
      {
        "id": "a",
        "curie": [
          "MONDO:0100096"
        ],
        "type": "disease"
      },
      {
        "id": "b",
        "curie": null,
        "type": "named_thing"
      },
      {
        "id": "c",
        "curie": [
          "CHEBI:4975"
        ],
        "type": "chemical_substance"
      }
    ],
    "edges": [
      {
        "id": "ab",
        "source_id": "a",
        "target_id": "b",
        "type": null
      },
      {
        "id": "bc",
        "source_id": "c",
        "target_id": "b",
        "type": null
      }
    ]
  },
  "knowledge_graph": {
    "nodes": [
      {
        "id": "MONDO:0100096",
        "type": [
          "named_thing",
          "biological_entity",
          "disease",
          "disease_or_phenotypic_feature"
        ],
        "equivalent_identifiers": [
          "SNOMEDCT:840539006",
          "MONDO:0100096",
          "DO

In [31]:
with open('famcov.json','w') as outf:
    json.dump(robokop_result,outf,indent=2)

## Coalesce Answers by Property

In [33]:
url = 'https://answercoalesce.renci.org/coalesce'
params = {'method':'property'}
p_r = requests.post(url,json=robokop_result,params=params)
print(p_r.status_code)
property_result = p_r.json()

200


In [34]:
property_result['results']

[]

In [35]:
results = property_result['results']
num_grouped=[]
p_values=[]
properties=[]
for result in results:
    chemnode = result['node_bindings'][2]
    for p,prop in zip(chemnode['p_values'],chemnode['properties']):
        num_grouped.append(len(chemnode['kg_id']))
        p_values.append(p)
        properties.append(prop)
df = pd.DataFrame( {'NumberGrouped': num_grouped, 'p_value':p_values, 'PropertyGroup':properties})
df.sort_values(by='p_value',inplace=True)
df

Unnamed: 0,NumberGrouped,p_value,PropertyGroup


## Coalesce Answers by Ontology

In [36]:
url = 'https://answercoalesce.renci.org/coalesce'
params = {'method':'ontology'}
o_r = requests.post(url,json=robokop_result,params=params)
print(o_r.status_code)
ontology_result = o_r.json()
print( len(ontology_result['results']))

200
1


In [37]:
results = ontology_result['results']
for result in results:
    print('\n================')
    chemnode = result['node_bindings'][1]
    sclass = chemnode['superclass']
    label = getlabel(sclass)
    nodes = chemnode['kg_id']
    labels = {x: f'{getlabel(x)}' for x in nodes }
    for x in nodes:
        print(f'{labels[x]} ({x})')
    print('----have superclass----')
    print(f'{label} ({sclass})')


respiratory failure (MONDO:0021113)
acute lung injury (MONDO:0015796)
bronchiolitis obliterans organizing pneumonia (MONDO:0056821)
bacterial pneumonia (MONDO:0004652)
pneumonia (MONDO:0005249)
acute respiratory failure (MONDO:0001208)
----have superclass----
lung disease (MONDO:0005275)


In [38]:
from copy import deepcopy
onrd = deepcopy(ontology_result)
for n in onrd['query_graph']['nodes']:
    if 'curie' in n and n['curie'] is None:
        del[n['curie']]
for n in onrd['knowledge_graph']['nodes']:
    if 'name' not in n:
        label = getlabel(n['id'])
        n['name'] = label
for e in onrd['knowledge_graph']['edges']:
    e['id'] = str(e['id'])
for r in onrd['results']:
    for eb in r['edge_bindings']:
        eb['kg_id'] = [ str(x) for x in eb['kg_id']]
#react_component = GammaViewer(props={"data":onrd})
#display(react_component)

In [39]:
with open('ontology_output.json.txt','w') as outf:
    json.dump(onrd,outf,indent=2)

## Coalescence by Graph

Coalescence by graph is similar to coalescence by ontology, but the new relationships can be more general than `is_a`.  

In [42]:
url = 'https://answercoalesce.renci.org/coalesce'
params = {'method':'graph'}
g_r = requests.post(url,json=robokop_result,params=params)
print(g_r.status_code)
graph_result = g_r.json()
print( len(graph_result['results']))

500


JSONDecodeError: Expecting value: line 2 column 5 (char 5)

In [None]:
from copy import deepcopy
gd = deepcopy(graph_result)
for n in gd['query_graph']['nodes']:
    if 'curie' in n and n['curie'] is None:
        del[n['curie']]
for n in gd['knowledge_graph']['nodes']:
    if 'name' not in n:
        label = getlabel(n['id'])
        n['name'] = label
for e in gd['knowledge_graph']['edges']:
    e['id'] = str(e['id'])
for r in gd['results']:
    for eb in r['edge_bindings']:
        eb['kg_id'] = [ str(x) for x in eb['kg_id']]
#react_component = GammaViewer(props={"data":gd})
#display(react_component)

In [24]:
def draw_one(results,rnum):
    g = deepcopy(results)
    g['results']=[g['results'][rnum]]
    kgs = set()
    for nb in g['results'][0]['node_bindings']:
        for kgid in nb['kg_id']:
            kgs.add(kgid)
    kg = g['knowledge_graph']
    kg['nodes'] = [n for n in kg['nodes'] if n['id'] in kgs ]
    kg['edges'] = [e for e in kg['edges'] if (e['source_id'] in kgs) and (e['target_id'] in kgs)]
#    react_component = GammaViewer(props={"data":g})
#    display(react_component)

In [25]:
draw_one(gd,3)

NameError: name 'gd' is not defined

In [None]:
gd

In [None]:
gd['results'][1]

In [None]:
names = {}
for node in gd['knowledge_graph']['nodes']:
    names[node['id']] = node['name']
r=0
for result in gd['results']:
    for nb in result['node_bindings']:
        if nb['qg_id'].startswith('extra'):
            kgid = nb['kg_id'][0]
            print(r, kgid, names[kgid])
    r += 1


In [32]:
def create_threehop_message():
    query_graph = {
    "nodes": [
        {
            #Covid-19
            "id": "a",
            "type": "disease",
            "curie": "MONDO:0100096"
        },
        {
            "id": "b",
            "type": "gene"
        },
        {
            "id": "c",
            "type": "named_thing"
        },
        {
            #Famotidine
            "id": "d",
            "type": "chemical_substance",
            "curie": "CHEBI:4975"
        }
    ],
    "edges": [
        {
            "id": "ab",
            "source_id": "a",
            "target_id": "b"
        },
        {
            "id": "bc",
            "source_id": "c",
            "target_id": "b"
        },
        {
            "id": "cd",
            "source_id": "c",
            "target_id": "d"
        }
    ]
    }
    message = {"message": {"query_graph": query_graph,
                           'results':[{"node_bindings": [],"edge_bindings": []}]}}
    return message

In [33]:
message3 = create_threehop_message()
robokop_result2 = robokop.pipeline(message3,full=True)

https://covidkop.renci.org/messenger/normalize


In [34]:
print (len(robokop_result2['results']))

866


In [None]:
params = {'method':'graph'}
g_r2 = requests.post(url,json=robokop_result2,params=params)
print(g_r2.status_code)
graph_result2 = g_r2.json()
print( len(graph_result2['results']))

In [35]:
with open('bigger.json','w') as outf:
    json.dump(robokop_result2,outf,indent=2)