# Answer Coalescence

Strider finds and ranks all paths that fulfill the query.  Some of these answers can be very similar.  For instance, two answers may differ only by a gene node, but perhaps those genes are from the same family.  This permits a higher-level, more parsimonious description of an answer set.

In [106]:
import requests
import json
import time
import pandas as pd

from doc_utils import *
#GammaViewer available from https://github.com/ranking-agent/gamma-viewer
from gamma_viewer import GammaViewer
from IPython.display import display

In [64]:
def getlabel(curie):
    url = 'https://nodenormalization-sri.renci.org/get_normalized_nodes'
    try:
        r = requests.get(url,params={'curie':curie}).json()
        return r[curie]['id']['label']
    except:
        return ''

class Strider:
    def __init__(self):
        # self.url='http://robokop.renci.org:5781'
        self.url = f'https://aragorn.renci.org/robokop'
    def call(self,question):
        message = {'message': {'query_graph': question}}
        return self.send_message(message)
    def send_message(self,message):
        response = requests.post(f'{self.url}/query',json=message)
        if response.status_code == 200:
            pid = response.json()
            return pid
        else:
            print(response.status_code)
            return None
    def query_result(self,pid):
        r = requests.get(f'{self.url}/results',params={'query_id':pid})
        return r.json()
    
strider = Strider()

In [95]:
def create_message():
    query_graph = {
    "nodes": {
        "n0":{
            "ids": ["MONDO:0005015"],'is_set': False,'constraints': []
        },
        "n1":{
            "categories": ["biolink:Disease"],'is_set': False,'constraints': []
        },
        "n2":{
            "categories": ["biolink:GeneProductMixin"],'is_set': False,'constraints': []
        }
    },
    "edges": {
        'e00': {'subject': 'n1',
                'object': 'n0',
                'predicates': ["biolink:subclass_of"],
                'attribute_constraints': [],
                'qualifier_constraints': []},
        'e01': {'subject': 'n1',
                'object': 'n2',
                'predicates': ["biolink:regulates"],
                'attribute_constraints': [],
                'qualifier_constraints': []}}
    }
    message = {"message": {"query_graph": query_graph,
                          'knowledge_graph':{"nodes": [], "edges": [],}, 
                           'results':[{"node_bindings": [],"edge_bindings": []}]}}
    return message
message = create_message()
m2 = {'message': {'query_graph': message['message']['query_graph']}}
p = strider.send_message(m2)
print(f"{len(p['message']['results'])} results, {len(p['message']['auxiliary_graphs'])} Omnicorp Support graphs returned")

17 results, 17 Omnicorp Support graphs returned


In [96]:
p['message'].pop('auxiliary_graphs')
p['message'].keys()

dict_keys(['query_graph', 'knowledge_graph', 'results'])

In [97]:
# message = create_message()
react_component = GammaViewer(props={"data":message['message']})
display(react_component)

In [91]:
strider_result=strider.query_result(p)
while len(strider_result['results']) == 0:
    time.sleep(5)
    print('checking')
    strider_result = strider.query_result(p)
print('results found')

In [92]:
time.sleep(5)
strider_result = strider.query_result(p)
print(f"Found {len(strider_result['results'])} results")

## Coalesce Answers by Property

In [102]:
def create_message():
    query_graph = {
    "nodes": {
        "n0":{
            "ids": ["MONDO:0005015"],'is_set': False,'constraints': []
        },
        "n1":{
            "categories": ["biolink:ChemicalEntity"],'is_set': False,'constraints': []
        }
    },
    "edges": {
        'e00': {'subject': 'n1',
                'object': 'n0',
                'predicates': ["biolink:treats"],
                'attribute_constraints': [],
                'qualifier_constraints': []}}
    }
    message = {"message": {"query_graph": query_graph,
                          'knowledge_graph':{"nodes": [], "edges": [],}, 
                           'results':[{"node_bindings": [],"edge_bindings": []}]}}
    return message
message = create_message()
m2 = {'message': {'query_graph': message['message']['query_graph']}}
pp = strider.send_message(m2)
print(f"{len(pp['message']['results'])} results, {len(pp['message'].get('auxiliary_graphs', ''))} Omnicorp Support graphs returned")

3907 results, 3052 Omnicorp Support graphs returned


In [103]:
url = "https://answercoalesce.renci.org/1.4/coalesce/property"
p_r = requests.post(url,json=pp)
print(p_r.status_code)
property_result = p_r.json()
print(property_result['message'].keys())
print(f"{len(property_result['message']['results'])} results, {len(property_result['message']['auxiliary_graphs'])} enrichments returned")

200
dict_keys(['query_graph', 'knowledge_graph', 'results', 'auxiliary_graphs'])
3907 results, 366 enrichments returned


In [113]:
property_result['message']['auxiliary_graphs'].get('_n_ac_1_0','')

{'edges': [],
 'attributes': [{'attribute_type_id': 'biolink:supporting_study_method_type',
   'value': 'property_enrichment'},
  {'attribute_type_id': 'biolink:p_value', 'value': 1.0465880763988569e-52},
  {'attribute_type_id': 'biolink:chemical_role',
   'value': 'CHEBI_ROLE_pharmaceutical'}]}

In [107]:
enrichments_to_results  = get_enrichments2results(property_result)
auxg = property_result['message']['auxiliary_graphs']
row = []
for enrichment, chemnodes in enrichments_to_results.items():
    attributes = auxg[enrichment]['attributes']
    values = [attr['value'] for attr in attributes if attr.get('value')]
    row.append([len(chemnodes), values[1], values[2], enrichment])
df = pd.DataFrame(row, columns=['NumberGrouped', 'p_value', 'PropertyGroup', 'PropertyId'])
df.sort_values(by='p_value',inplace=True)
df

Unnamed: 0,NumberGrouped,p_value,PropertyGroup,PropertyId
2,317,8.336618e-100,CHEBI_ROLE_pharmaceutical,_n_ac_84_0
74,311,4.570827e-99,CHEBI_ROLE_drug,_n_ac_85_0
98,259,8.314241e-68,CHEBI_ROLE_drug,_n_ac_194_0
99,260,3.559243e-67,CHEBI_ROLE_pharmaceutical,_n_ac_195_0
15,161,1.046588e-52,CHEBI_ROLE_pharmaceutical,_n_ac_1_0
...,...,...,...,...
79,31,9.296224e-05,CHEBI_ROLE_anti_inflammatory_agent,_n_ac_191_0
80,7,9.601183e-05,CHEBI_ROLE_neuroprotective_agent,_n_ac_192_0
82,5,9.617281e-05,CHEBI_ROLE_hematologic_agent,_n_ac_323_0
297,8,9.663413e-05,CHEBI_ROLE_adrenergic_antagonist,_n_ac_278_0


## Coalescence by Graph

Coalescence by graph is similar to coalescence by ontology, but the new relationships can be more general than `is_a`.  

In [117]:
try:
    url = "https://answercoalesce.renci.org/1.4/coalesce/graph"
    g_r = requests.post(url,json=pp)
except:
    'curl -X POST -H "Content-Type: application/json" -d @pp.json https://answercoalesce.renci.org/1.4/coalesce/all > ac-response.json'
print(g_r.status_code)

200


In [118]:
graph_result = g_r.json()
print(graph_result['message'].keys())
print(f"{len(graph_result['message']['results'])} results, {len(graph_result['message']['auxiliary_graphs'])} enrichments returned")

dict_keys(['query_graph', 'knowledge_graph', 'results', 'auxiliary_graphs'])
3907 results, 3903 enrichments returned


In [167]:
enrichments_to_results  = get_enrichments2results(graph_result)
auxg = graph_result['message']['auxiliary_graphs']
row = []
for enrichment, chemnodes in enrichments_to_results.items():
    attributes = auxg[enrichment]['attributes']
    values = [attr['value'] for attr in attributes if attr.get('value')]
    row.append([len(chemnodes), values[1], values[2], values[-1]])
df = pd.DataFrame(row, columns=['NumberGrouped', 'p_value', 'EnrichNode', 'Predicate'])
# df.sort_values(by='p_value',inplace=True)
df

Unnamed: 0,NumberGrouped,p_value,EnrichNode,Predicate
0,66,0.0,MONDO:0005359,biolink:contributes_to
1,47,0.0,NCBIGene:836,biolink:affects
2,21,0.0,MONDO:0001751,biolink:contributes_to
3,32,0.0,MESH:D009336,biolink:contributes_to
4,24,0.0,HP:0002240,biolink:contributes_to
...,...,...,...,...
3896,4,0.0,MONDO:0005155,biolink:ameliorates
3897,21,0.0,HP:0000790,biolink:contributes_to
3898,1,0.0,MONDO:0016022,biolink:treats
3899,1,0.0,HP:0001883,biolink:has_adverse_event


In [168]:
from copy import deepcopy
gd = deepcopy(graph_result)
react_component = GammaViewer(props={"data":gd})
display(react_component)

In [None]:
def draw_one(results,rnum):
    g = deepcopy(results)
    g['message']['results']=[g['message']['results'][rnum]]
    kgs = set()
    for nb in g['message']['results'][0]['node_bindings'].values():
        for kgid in nb:
            kgs.add(kgid['id'])
    kg = g['message']['knowledge_graph']
    kg['nodes'] = [n for n in kg['nodes'] if n in kgs ]
    kg['edges'] = [e for e in kg['edges'] if (kg['edges'][e]['subject'] in kgs) or (kg['edges'][e]['object'] in kgs)]
    react_component = GammaViewer(props={"data":g})
    display(react_component)

In [None]:
draw_one(gd,3)