# Answer Coalescence

Strider finds and ranks all paths that fulfill the query.  Some of these answers can be very similar.  For instance, two answers may differ only by a gene node, but perhaps those genes are from the same family.  This permits a higher-level, more parsimonious description of an answer set.

In [2]:
import requests
import json
import time
import pandas as pd

#GammaViewer available from https://github.com/ranking-agent/gamma-viewer
from GammaViewer import GammaViewer
from IPython.display import display

In [16]:
def getlabel(curie):
    url = 'https://nodenormalization-sri.renci.org/get_normalized_nodes'
    try:
        r = requests.get(url,params={'curie':curie}).json()
        return r[curie]['id']['label']
    except:
        return ''

class Strider:
    def __init__(self):
        self.url='http://robokop.renci.org:5781'
    def call(self,question):
        message = {'message': {'query_graph': question}}
        return self.send_message(message)
    def send_message(self,message):
        response = requests.post(f'{self.url}/query',json=message)
        if response.status_code == 200:
            pid = response.json()
            return pid
        else:
            print(response.status_code)
            return None
    def query_result(self,pid):
        r = requests.get(f'{self.url}/results',params={'query_id':pid})
        return r.json()
    
strider = Strider()

In [4]:
def create_message():
    query_graph = {
    "nodes": [
        {
            "id": "a",
            "type": "disease",
            "curie": "MONDO:0005015"
        },
        {
            "id": "b",
            "type": "gene"
        },
        {
            "id": "c",
            "type": "chemical_substance"
        }
    ],
    "edges": [
        {
            "id": "ab",
            "source_id": "a",
            "target_id": "b",
            "type": "gene_associated_with_condition",
        },
        {
            "id": "bc",
            "source_id": "c",
            "target_id": "b",
            "type": "decreases_activity_of"
        }
    ]
    }
    message = {"message": {"query_graph": query_graph,
                          'knowledge_graph':{"nodes": [], "edges": [],}, 
                           'results':[{"node_bindings": [],"edge_bindings": []}]}}
    return message

In [5]:
message = create_message()
react_component = GammaViewer(props={"data":message['message']})
display(react_component)

In [6]:
m2 = {'message': {'query_graph': message['message']['query_graph']}}
p = strider.send_message(m2)
print(p)

5ce82a12-3caa-4d58-807d-f84a33538d06


In [7]:
strider_result=strider.query_result(p)
while len(strider_result['results']) == 0:
    time.sleep(5)
    print('checking')
    strider_result = strider.query_result(p)
print('results found')

checking
results found


In [8]:
time.sleep(5)
strider_result = strider.query_result(p)
print(f"Found {len(strider_result['results'])} results")

Found 17 results


In [9]:
#This is temporary because gammaviewer doesn't like the 'curie:None' coming back from strider, but that will
# soon be fixed.
from copy import deepcopy
sr = deepcopy(strider_result)
for n in sr['query_graph']['nodes']:
    if 'curie' in n and n['curie'] is None:
        del[n['curie']]
react_component = GammaViewer(props={"data":sr})
display(react_component)

## Coalesce Answers by Property

In [10]:
url = 'https://answercoalesce.renci.org/coalesce'
params = {'method':'property'}
p_r = requests.post(url,json=strider_result,params=params)
print(p_r.status_code)
property_result = p_r.json()

200


In [11]:
results = property_result['results']
num_grouped=[]
p_values=[]
properties=[]
for result in results:
    chemnode = result['node_bindings'][2]
    for p,prop in zip(chemnode['p_values'],chemnode['properties']):
        num_grouped.append(len(chemnode['kg_id']))
        p_values.append(p)
        properties.append(prop)
df = pd.DataFrame( {'NumberGrouped': num_grouped, 'p_value':p_values, 'PropertyGroup':properties})
df.sort_values(by='p_value',inplace=True)
df

Unnamed: 0,NumberGrouped,p_value,PropertyGroup
0,14,1.1796050000000001e-55,Oral Hypoglycemics
1,14,3.985015e-54,Drugs Used in Diabetes
2,14,1.900595e-51,Blood Glucose Lowering Agents
4,11,2.2681899999999998e-50,Sulfonylureas
3,14,2.981574e-41,Alimentary Tract and Metabolism
7,10,4.974737e-36,Urea
8,12,9.391828999999999e-36,Cytochrome P-450 CYP2C9 Substrates
5,11,1.8044559999999999e-34,Sulfones
9,7,9.077801000000001e-32,Sulfonylurea Compounds
10,9,3.6544440000000004e-31,Hypoglycemia-Associated Agents


## Coalesce Answers by Ontology

In [12]:
url = 'https://answercoalesce.renci.org/coalesce'
params = {'method':'ontology'}
o_r = requests.post(url,json=strider_result,params=params)
print(o_r.status_code)
ontology_result = o_r.json()
print( len(ontology_result['results']))

200
2


In [13]:
results = ontology_result['results']
for result in results:
    print('\n================')
    chemnode = result['node_bindings'][2]
    sclass = chemnode['superclass']
    label = getlabel(sclass)
    nodes = chemnode['kg_id']
    labels = {x: f'{getlabel(x)}' for x in nodes }
    for x in nodes:
        print(f'{labels[x]} ({x})')
    print('----have superclass----')
    print(f'{label} ({sclass})')


glisoxepide (CHEBI:135731)
carbutamide (CHEBI:135118)
chlorpropamide (CHEBI:3650)
tolbutamide (CHEBI:27999)
glimepiride (CHEBI:5383)
glipizide (CHEBI:5384)
acetohexamide (CHEBI:28052)
tolazamide (CHEBI:9613)
gliclazide (CHEBI:31654)
glyburide (CHEBI:5441)
----have superclass----
sulfonamide (CHEBI:35358)

tolazamide (CHEBI:9613)
gliclazide (CHEBI:31654)
chlorpropamide (CHEBI:3650)
glipizide (CHEBI:5384)
tolbutamide (CHEBI:27999)
glyburide (CHEBI:5441)
glimepiride (CHEBI:5383)
acetohexamide (CHEBI:28052)
----have superclass----
N-sulfonylurea (CHEBI:76983)


In [18]:
from copy import deepcopy
onrd = deepcopy(ontology_result)
for n in onrd['query_graph']['nodes']:
    if 'curie' in n and n['curie'] is None:
        del[n['curie']]
for n in onrd['knowledge_graph']['nodes']:
    if 'name' not in n:
        label = getlabel(n['id'])
        n['name'] = label
for e in onrd['knowledge_graph']['edges']:
    e['id'] = str(e['id'])
for r in onrd['results']:
    for eb in r['edge_bindings']:
        eb['kg_id'] = [ str(x) for x in eb['kg_id']]
react_component = GammaViewer(props={"data":onrd})
display(react_component)

## Coalescence by Graph

Coalescence by graph is similar to coalescence by ontology, but the new relationships can be more general than `is_a`.  

In [14]:
url = 'https://answercoalesce.renci.org/coalesce'
params = {'method':'graph'}
g_r = requests.post(url,json=strider_result,params=params)
print(g_r.status_code)
graph_result = g_r.json()
print( len(graph_result['results']))

200
18


In [19]:
from copy import deepcopy
gd = deepcopy(graph_result)
for n in gd['query_graph']['nodes']:
    if 'curie' in n and n['curie'] is None:
        del[n['curie']]
for n in gd['knowledge_graph']['nodes']:
    if 'name' not in n:
        label = getlabel(n['id'])
        n['name'] = label
for e in gd['knowledge_graph']['edges']:
    e['id'] = str(e['id'])
for r in gd['results']:
    for eb in r['edge_bindings']:
        eb['kg_id'] = [ str(x) for x in eb['kg_id']]
react_component = GammaViewer(props={"data":gd})
display(react_component)

In [24]:
def draw_one(results,rnum):
    g = deepcopy(results)
    g['results']=[g['results'][rnum]]
    kgs = set()
    for nb in g['results'][0]['node_bindings']:
        for kgid in nb['kg_id']:
            kgs.add(kgid)
    kg = g['knowledge_graph']
    kg['nodes'] = [n for n in kg['nodes'] if n['id'] in kgs ]
    kg['edges'] = [e for e in kg['edges'] if (e['source_id'] in kgs) and (e['target_id'] in kgs)]
    react_component = GammaViewer(props={"data":g})
    display(react_component)

In [29]:
draw_one(gd,3)