# Famotidine Covid

This is a test case to see whether Answer Merging helps with the arbitrary query problem. The problem is: I want to know the most interesting way in which these two entities are related.  What is it?  One option is to do question rewriting.  But another (this one) is to general queries and then work on merging them.

In [14]:
import requests
import json
import time
import pandas as pd

#GammaViewer available from https://github.com/ranking-agent/gamma-viewer
from GammaViewer import GammaViewer
from IPython.display import display

In [19]:
def getlabel(curie):
    url = 'https://nodenormalization-sri.renci.org/get_normalized_nodes'
    try:
        r = requests.get(url,params={'curie':curie}).json()
        return r[curie]['id']['label']
    except:
        return ''

class RobokopMessenger:
    def __init__(self):
        self.url = 'https://covidkop.renci.org/messenger'
    def pipeline(self,request,full = True):
        #normalize question
        purl = f'{self.url}/normalize'
        print(purl)
        response = requests.post( purl , json=request )
        normalized = response.json()
        
        #answer question
        request = { 'message': normalized, }
        response = requests.post( f'{self.url}/answer', json=request )
        answered = response.json()
        if not full:
            return answered
        
        #Yank
        request = { 'message': answered, }
        response = requests.post( f'{self.url}/yank', json=request )
        filled = response.json()
        
        #support
        request = { 'message': filled, }
        response = requests.post( 'http://robokop.renci.org:4868/support', json=request )
        supported = response.json()
        #weight
        request = { 'message': supported, }
        response = requests.post( 'http://robokop.renci.org:4868/weight_correctness', json=request )
        weighted = response.json()
        #score
        request = { 'message': weighted, }
        response = requests.post( 'http://robokop.renci.org:4868/score', json=request )
        scored = response.json()
        return scored

    
robokop=RobokopMessenger()

In [29]:
def create_message():
    query_graph = {
    "nodes": [
        {
            #Covid-19
            "id": "a",
            "type": "disease",
            "curie": "MONDO:0100096"
        },
        {
            "id": "b",
            "type": "named_thing"
        },
        {
            #Famotidine
            "id": "c",
            "type": "chemical_substance",
            "curie": "CHEBI:4975"
        }
    ],
    "edges": [
        {
            "id": "ab",
            "source_id": "a",
            "target_id": "b"
        },
        {
            "id": "bc",
            "source_id": "c",
            "target_id": "b"
        }
    ]
    }
    message = {"message": {"query_graph": query_graph,
                           'results':[{"node_bindings": [],"edge_bindings": []}]}}
    return message

In [30]:
message = create_message()
react_component = GammaViewer(props={"data":message['message']})
display(react_component)

In [32]:
robokop_result = robokop.pipeline(message,full=False)
print( json.dumps(robokop_result,indent=2))

https://covidkop.renci.org/messenger/normalize
{
  "query_graph": {
    "nodes": [
      {
        "id": "a",
        "curie": [
          "MONDO:0100096"
        ],
        "type": "disease"
      },
      {
        "id": "b",
        "curie": null,
        "type": "named_thing"
      },
      {
        "id": "c",
        "curie": [
          "CHEBI:4975"
        ],
        "type": "chemical_substance"
      }
    ],
    "edges": [
      {
        "id": "ab",
        "source_id": "a",
        "target_id": "b",
        "type": null
      },
      {
        "id": "bc",
        "source_id": "c",
        "target_id": "b",
        "type": null
      }
    ]
  },
  "knowledge_graph": {
    "nodes": [
      {
        "id": "MONDO:0100096",
        "type": [
          "named_thing",
          "biological_entity",
          "disease",
          "disease_or_phenotypic_feature"
        ],
        "synonyms": [],
        "equivalent_identifiers": [
          "SNOMEDCT:840539006",
          "MONDO

In [33]:
with open('famcov.json','w') as outf:
    json.dump(robokop_result,outf,indent=2)

## Coalesce Answers by Property

In [7]:
url = 'https://answercoalesce.renci.org/coalesce'
params = {'method':'property'}
p_r = requests.post(url,json=robokop_result,params=params)
print(p_r.status_code)
property_result = p_r.json()

200


In [8]:
property_result['results']

[]

In [9]:
results = property_result['results']
num_grouped=[]
p_values=[]
properties=[]
for result in results:
    chemnode = result['node_bindings'][2]
    for p,prop in zip(chemnode['p_values'],chemnode['properties']):
        num_grouped.append(len(chemnode['kg_id']))
        p_values.append(p)
        properties.append(prop)
df = pd.DataFrame( {'NumberGrouped': num_grouped, 'p_value':p_values, 'PropertyGroup':properties})
df.sort_values(by='p_value',inplace=True)
df

Unnamed: 0,NumberGrouped,p_value,PropertyGroup


## Coalesce Answers by Ontology

In [35]:
url = 'https://answercoalesce.renci.org/coalesce'
params = {'method':'ontology'}
o_r = requests.post(url,json=robokop_result,params=params)
print(o_r.status_code)
ontology_result = o_r.json()
print( len(ontology_result['results']))

200
3


In [36]:
results = ontology_result['results']
for result in results:
    print('\n================')
    chemnode = result['node_bindings'][1]
    sclass = chemnode['superclass']
    label = getlabel(sclass)
    nodes = chemnode['kg_id']
    labels = {x: f'{getlabel(x)}' for x in nodes }
    for x in nodes:
        print(f'{labels[x]} ({x})')
    print('----have superclass----')
    print(f'{label} ({sclass})')


acute respiratory distress syndrome (MONDO:0006502)
respiratory failure (MONDO:0021113)
----have superclass----
respiratory failure (MONDO:0021113)

cortisol (CHEBI:17650)
prednisone (CHEBI:8382)
dexamethasone (CHEBI:41879)
methylprednisolone succinate (CHEBI:135765)
----have superclass----
corticosteroid hormone (CHEBI:36699)

prednisone (CHEBI:8382)
dexamethasone (CHEBI:41879)
cortisol (CHEBI:17650)
----have superclass----
glucocorticoid (CHEBI:24261)


In [12]:
from copy import deepcopy
onrd = deepcopy(ontology_result)
for n in onrd['query_graph']['nodes']:
    if 'curie' in n and n['curie'] is None:
        del[n['curie']]
for n in onrd['knowledge_graph']['nodes']:
    if 'name' not in n:
        label = getlabel(n['id'])
        n['name'] = label
for e in onrd['knowledge_graph']['edges']:
    e['id'] = str(e['id'])
for r in onrd['results']:
    for eb in r['edge_bindings']:
        eb['kg_id'] = [ str(x) for x in eb['kg_id']]
react_component = GammaViewer(props={"data":onrd})
display(react_component)

In [13]:
with open('ontology_output.json.txt','w') as outf:
    json.dump(onrd,outf,indent=2)

## Coalescence by Graph

Coalescence by graph is similar to coalescence by ontology, but the new relationships can be more general than `is_a`.  

In [14]:
url = 'https://answercoalesce.renci.org/coalesce'
params = {'method':'graph'}
g_r = requests.post(url,json=robokop_result,params=params)
print(g_r.status_code)
graph_result = g_r.json()
print( len(graph_result['results']))

200
25


In [15]:
from copy import deepcopy
gd = deepcopy(graph_result)
for n in gd['query_graph']['nodes']:
    if 'curie' in n and n['curie'] is None:
        del[n['curie']]
for n in gd['knowledge_graph']['nodes']:
    if 'name' not in n:
        label = getlabel(n['id'])
        n['name'] = label
for e in gd['knowledge_graph']['edges']:
    e['id'] = str(e['id'])
for r in gd['results']:
    for eb in r['edge_bindings']:
        eb['kg_id'] = [ str(x) for x in eb['kg_id']]
react_component = GammaViewer(props={"data":gd})
display(react_component)

In [16]:
def draw_one(results,rnum):
    g = deepcopy(results)
    g['results']=[g['results'][rnum]]
    kgs = set()
    for nb in g['results'][0]['node_bindings']:
        for kgid in nb['kg_id']:
            kgs.add(kgid)
    kg = g['knowledge_graph']
    kg['nodes'] = [n for n in kg['nodes'] if n['id'] in kgs ]
    kg['edges'] = [e for e in kg['edges'] if (e['source_id'] in kgs) and (e['target_id'] in kgs)]
    react_component = GammaViewer(props={"data":g})
    display(react_component)

In [17]:
draw_one(gd,3)

In [18]:
gd

{'query_graph': {'edges': [{'id': 'ab', 'source_id': 'a', 'target_id': 'b'},
   {'id': 'bc', 'source_id': 'c', 'target_id': 'b'},
   {'id': 'extra_qe_0', 'source_id': 'b', 'target_id': 'extra_qn_0'}],
  'nodes': [{'curie': ['MONDO:0100096'], 'id': 'a', 'type': 'disease'},
   {'id': 'b', 'type': 'named_thing', 'set': True},
   {'curie': ['CHEBI:4975'], 'id': 'c', 'type': 'chemical_substance'},
   {'id': 'extra_qn_0', 'type': []}]},
 'knowledge_graph': {'edges': [{'ctime': [1592973258.6295419],
    'edge_source': ['cord19_scigraph_v7'],
    'enrichment_p': 7.200162544618264e-26,
    'id': '6495101038d3ebd2aef30675f0bef227',
    'num_publications': 0.8943779167033549,
    'predicate_id': 'owl:ObjectProperty',
    'publications': [],
    'relation': ['SEMMEDDB:ASSOCIATED_WITH'],
    'relation_label': ['related_to'],
    'source_database': ['cord19_scigraph_v7'],
    'source_id': 'CHEBI:29320',
    'target_id': 'CHEBI:4975',
    'type': 'related_to',
    'weight': 1},
   {'count': 3,
    'e

In [19]:
gd['results'][1]

{'node_bindings': [{'qg_id': 'a', 'kg_id': ['MONDO:0100096']},
  {'qg_id': 'b',
   'kg_id': ['MONDO:0005009', 'MONDO:0003019'],
   'coalescence_method': 'graph_enrichment',
   'p_value': 0.0,
   'enriched_nodes': ['CARO:0000006']},
  {'qg_id': 'c', 'kg_id': ['CHEBI:4975']},
  {'qg_id': 'extra_qn_0', 'kg_id': ['CARO:0000006']},
  {'qg_id': 's5',
   'kg_id': ['c282432b-a290-4bc8-b517-cade68508e66'],
   'weight': 0.00025229939954596325},
  {'qg_id': 's3',
   'kg_id': ['6d426fbf-56d8-4f22-b252-6f00c5d5457b'],
   'weight': 0.0}],
 'edge_bindings': [{'qg_id': 'ab',
   'kg_id': ['1ab4bb5350a0e315b05708cf61348735',
    '8af37d23e5428c5a1cb8547385f6772c'],
   'weight': 0.0},
  {'qg_id': 'bc',
   'kg_id': ['6ff38fffa4c6f0dcacc578327d518a21',
    '8872e78619d787c6ebede9c092e74db5',
    '97d2d4d898611e086700cdd83d8e97fd',
    '5d1a7a8fad6fa52d4eec8857c540a664'],
   'weight': 0.0},
  {'qg_id': 'extra_qe_0',
   'kg_id': ['2353862735879336843', '-6970308096730686244'],
   'weight': 0.0}],
 'score': 0

In [20]:
names = {}
for node in gd['knowledge_graph']['nodes']:
    names[node['id']] = node['name']
r=0
for result in gd['results']:
    for nb in result['node_bindings']:
        if nb['qg_id'].startswith('extra'):
            kgid = nb['kg_id'][0]
            print(r, kgid, names[kgid])
    r += 1


0 CARO:0000003 
1 CARO:0000006 
2 CARO:0000011 
3 CARO:0000003 
4 CARO:0000003 
5 CARO:0000006 
6 CARO:0000006 
7 CARO:0000003 
8 CARO:0000003 
9 CARO:0000006 
10 CARO:0000006 
11 CARO:0000003 
12 CARO:0000006 
13 CARO:0000011 
14 CHEBI:134674 Yosprala
15 CHEBI:134674 Yosprala
16 CHEBI:134674 Yosprala
17 CHEBI:139588 alpha-hydroxy ketone
18 CHEBI:139590 primary alpha-hydroxy ketone
19 CHEBI:139592 tertiary alpha-hydroxy ketone
20 CHEBI:15734 primary alcohol
21 CHEBI:17087 ketone
22 CHEBI:18059 lipid
23 CHEBI:18059 lipid
24 CHEBI:20156 3-oxo-Delta(1) steroid


In [23]:
def create_threehop_message():
    query_graph = {
    "nodes": [
        {
            #Covid-19
            "id": "a",
            "type": "disease",
            "curie": "MONDO:0100096"
        },
        {
            "id": "b",
            "type": "gene"
        },
        {
            "id": "c",
            "type": "named_thing"
        },
        {
            #Famotidine
            "id": "d",
            "type": "chemical_substance",
            "curie": "CHEBI:4975"
        }
    ],
    "edges": [
        {
            "id": "ab",
            "source_id": "a",
            "target_id": "b"
        },
        {
            "id": "bc",
            "source_id": "c",
            "target_id": "b"
        },
        {
            "id": "cd",
            "source_id": "c",
            "target_id": "d"
        }
    ]
    }
    message = {"message": {"query_graph": query_graph,
                       'knowledge_graph':{'url': 'bolt://stars-k6.edc.renci.org:31333',
            'credentials': {
                'username': 'neo4j',
                'password': 'ncatsgamma',
            },}, 
                           'results':[{"node_bindings": [],"edge_bindings": []}]}}
    return message

In [24]:
message3 = create_threehop_message()
robokop_result2 = robokop.pipeline(message3,full=True)

In [26]:
print (len(robokop_result2['results']))

2062


In [27]:
params = {'method':'graph'}
g_r2 = requests.post(url,json=robokop_result2,params=params)
print(g_r2.status_code)
graph_result2 = g_r2.json()
print( len(graph_result2['results']))

KeyboardInterrupt: 

In [28]:
with open('bigger.json','w') as outf:
    json.dump(robokop_result2,outf,indent=2)