# Answer Coalescence using only Aragorn

In [45]:
import requests
from datetime import datetime
import json
from notebook_functions import get_enriched_results, get_enrichments2results

In [8]:
from datetime import datetime as dt
def query_aragorn(question,plate):
    url = f'https://aragorn.renci.org/{plate}/query'
    t0 = dt.now()
    r = requests.post(url,json=question)
    t1= dt.now()
    # print(f'It took {t1-t0}s to return')
    if r.status_code == 200:
        print(f'It took {t1-t0}s to return')
        rj = r.json()
        #Coming from the KP we don't have a score, but AC expects it, so we're going to dummy it.
        # for result in rj['results']:
        #     result['score'] = 1
        return rj
    else:
       print(r.status_code)
       return None

In [9]:
def create_question(disease_curie, relation, answer_type, knowledge_type):
    question = {"nodes": { "n1": { "categories": [answer_type],"is_set": False, "constraints": []},
                "n0": {"ids": [disease_curie ],"is_set": False, "constraints": []}},
            "edges": { "e00": { "subject": "n1", "object": "n0","predicates": [relation], "attribute_constraints": [], "qualifier_constraints": []}}}
    if knowledge_type:
        question['edges']['e00'].update({"knowledge_type": knowledge_type})
    return {"message": {"query_graph":question}}


In [10]:
def getlabel(curie):
    url = 'https://nodenormalization-sri.renci.org/get_normalized_nodes'
    r = requests.get(url,params={'curie':curie}).json()
    try:
        return r[curie]['id']['label']
    except:
        return ''

#### LOOKUP

In [44]:
# disease = "MONDO:0004975"  #Alzheimer
# disease = 'MONDO:0005180'  #Parkinson
# disease = 'MONDO:0004979'  #Asthma
disease = "MONDO:0005148"   #T2D
knowledgetype= "inferred"
edge = "biolink:treats"
answer_type = "biolink:ChemicalEntity"
question = create_question(disease, edge, answer_type, knowledgetype)
res = query_aragorn(question,'robokop')

It took 0:00:00.990898s to return


In [13]:
# res

## All Coalescence

In [48]:
url = 'https://answercoalesce.renci.org/1.4/coalesce/all'
ac = requests.post(url,json=res)

In [49]:
if ac.status_code==200:
    print(ac.status_code)
    with open(f"ac_response{datetime.now()}.json", 'w') as outf:
        json.dump(ac.json(), outf, indent=4)

200


## Property Coalescence

In [14]:
url = "https://answercoalesce.renci.org/1.4/coalesce/property"
pc = requests.post(url,json=res)
print(pc.status_code)

200


In [15]:
pc = pc.json()
pc['message']['results'] = get_enriched_results(r['message']['results'])

In [17]:
pc['message'].keys()

dict_keys(['query_graph', 'knowledge_graph', 'results', 'auxiliary_graphs'])

In [18]:
pc_results = pc['message']['results']

In [19]:
print(pc_results[0])

{'node_bindings': {'n1': [{'id': 'PUBCHEM.COMPOUND:2723872'}], 'n0': [{'id': 'MONDO:0005148', 'qnode_id': 'MONDO:0005148'}]}, 'analyses': [{'resource_id': 'infores:robokop', 'edge_bindings': {'e00': [{'id': 'ccc53379-fce6-4c84-a1dd-f84dd8b470d4'}]}, 'score': 0.9999999891567859}], 'enrichments': ['_n_ac_11_0', '_n_ac_12_0']}


In [20]:
pc['message']['auxiliary_graphs']['_n_ac_14_0']

{'edges': [],
 'attributes': [{'attribute_type_id': 'biolink:supporting_study_method_type',
   'value': 'property_enrichment'},
  {'attribute_type_id': 'biolink:supporting_study_cohort', 'value': 'n1'},
  {'attribute_type_id': 'biolink:p_value', 'value': 1.9654425543766206e-13},
  {'attribute_type_id': 'biolink:chemical_role',
   'value': 'CHEBI_ROLE_EC_3_hydrolase_inhibitor'}]}

In [21]:
attributes =pc['message']['auxiliary_graphs']['_n_ac_14_0']['attributes']
pvalue = [attr['value'] for attr in attributes if attr.get('value')]
pvalue

['property_enrichment',
 'n1',
 1.9654425543766206e-13,
 'CHEBI_ROLE_EC_3_hydrolase_inhibitor']

In [22]:
enrichments_to_results  = get_enrichments2results(pc)

In [23]:
print(pc_results[0]['node_bindings'])

{'n1': [{'id': 'PUBCHEM.COMPOUND:2723872'}], 'n0': [{'id': 'MONDO:0005148', 'qnode_id': 'MONDO:0005148'}]}


In [24]:
print(pc_results[0]['analyses'])

[{'resource_id': 'infores:robokop', 'edge_bindings': {'e00': [{'id': 'ccc53379-fce6-4c84-a1dd-f84dd8b470d4'}]}, 'score': 0.9999999891567859}]


In [25]:
auxg = pc['message']['auxiliary_graphs']
for enrichment, chemnodes in enrichments_to_results.items():
    attributes = auxg[enrichment]['attributes']
    values = [attr['value'] for attr in attributes if attr.get('value')]
    print(len(chemnodes), values[1], values[2], enrichment)

35 n1 2.5266801155450817e-14 _n_ac_11_0
35 n1 2.5266801155450817e-14 _n_ac_12_0
16 n1 1.5915495481326194e-08 _n_ac_33_0
5 n1 4.5972829883101113e-07 _n_ac_44_0
12 n1 9.072374919205088e-06 _n_ac_55_0
12 n1 1.0737919824210222e-05 _n_ac_56_0
60 n1 1.3421872291871116e-05 _n_ac_63_0
20 n1 1.6064692033080184e-13 _n_ac_13_0
110 n1 1.9982662417591194e-12 _n_ac_16_0
99 n1 7.21133884082788e-11 _n_ac_20_0
38 n1 2.7907139370550145e-08 _n_ac_35_0
6 n1 5.5475027860406724e-05 _n_ac_78_0
3 n1 8.796833888765922e-05 _n_ac_82_0
44 n1 5.163567968217111e-21 _n_ac_3_0
51 n1 1.2188127807622095e-20 _n_ac_4_0
28 n1 6.802521768209122e-15 _n_ac_10_0
34 n1 5.176931832852225e-11 _n_ac_19_0
22 n1 6.638613649087973e-08 _n_ac_36_0
11 n1 1.9733714254372883e-05 _n_ac_65_0
28 n1 2.663630560101017e-05 _n_ac_69_0
22 n1 3.695199807491082e-05 _n_ac_76_0
62 n1 5.11176576375361e-33 _n_ac_1_0
55 n1 1.9654425543766206e-13 _n_ac_14_0
9 n1 2.099762576646993e-05 _n_ac_66_0
7 n1 6.715597913583189e-05 _n_ac_80_0
42 n1 9.4572888436575

## Graph Coalescence

In [26]:
url = 'https://answercoalesce.renci.org/1.4/coalesce/graph'
gc = requests.post(url,json=res)
print(gc.status_code)

200


In [27]:
gc = gc.json()
gc['message']['results'] = get_enriched_results(gc['message']['results'])

In [28]:
gc['message'].keys()

dict_keys(['query_graph', 'knowledge_graph', 'results', 'auxiliary_graphs'])

In [29]:
gc_results = gc['message']['results']

In [30]:
print(gc_results[0])

{'node_bindings': {'n1': [{'id': 'PUBCHEM.COMPOUND:10954115'}], 'n0': [{'id': 'MONDO:0005148', 'qnode_id': 'MONDO:0005148'}]}, 'analyses': [{'resource_id': 'infores:robokop', 'edge_bindings': {'e00': [{'id': 'bfc01f78-9d50-4a21-8db0-834c61ba8a9a'}]}, 'score': 0.9999999915683112}], 'enrichments': ['_e_ac_413_0', '_e_ac_483_0']}


In [36]:
# gc['message']['knowledge_graph']['edges'][]#An edge from kg edges???]

In [37]:
gc['message']['auxiliary_graphs']['_e_ac_119_0']

{'edges': ['-1319035598780375590',
  '-3545107625109263755',
  '-5933692865864744765',
  '1083030025430284553',
  '3738905101533478539',
  '7816785463645712138',
  '3007459911658257312',
  '-5706461267590385012',
  '320198773721754335',
  '675805578295402053',
  '-5250087713033121731',
  '-2581624608718783949',
  '6814453734418978212',
  '4596598261712682826',
  '4865160130226322539',
  '-5798857865975601364',
  '-3318762731851440179',
  '6225938971844331740',
  '-8557647135191163238',
  '-4020369933962726474',
  '2676350216773008759',
  '4279918663262217455',
  '-472926228915852818',
  '624892821240093063',
  '6769692112878816756',
  '8625823636189834354',
  '1973715440363489959',
  '-4280616710187600298',
  '-7391593373731076614',
  '-1108196011052013057',
  '6939365348209583597',
  '7961641688578293674',
  '3038647042307501799',
  '1188111351150637548',
  '-667798451293826344',
  '-2232235630211916712',
  '-7398271315137793628',
  '45101176258509756',
  '2958439759745503920',
  '185

In [38]:
enrichments_to_results  = (get_enrichments2results(gc))

In [39]:
print(gc_results[0]['node_bindings'])

{'n1': [{'id': 'PUBCHEM.COMPOUND:10954115'}], 'n0': [{'id': 'MONDO:0005148', 'qnode_id': 'MONDO:0005148'}]}


In [40]:
print(gc_results[0]['analyses'])

[{'resource_id': 'infores:robokop', 'edge_bindings': {'e00': [{'id': 'bfc01f78-9d50-4a21-8db0-834c61ba8a9a'}]}, 'score': 0.9999999915683112}]


In [41]:
auxg = gc['message']['auxiliary_graphs']
for enrichment, chemnodes in enrichments_to_results.items():
    attributes = auxg[enrichment]['attributes']
    values = [attr['value'] for attr in attributes]
    print(len(chemnodes), values[1], values[2], enrichment)

77 2.1899817060911548e-136 n1 _e_ac_413_0
80 4.633079532614574e-125 n1 _e_ac_483_0
168 0.0 n1 _e_ac_17_0
158 0.0 n1 _e_ac_19_0
153 1.45561085283357e-310 n1 _e_ac_21_0
145 5.395387149595033e-300 n1 _e_ac_25_0
141 5.0156163220744834e-297 n1 _e_ac_30_0
141 3.8066074029778645e-294 n1 _e_ac_35_0
144 1.288031385026528e-291 n1 _e_ac_36_0
136 2.3094913430583003e-275 n1 _e_ac_48_0
133 3.84123872614836e-275 n1 _e_ac_49_0
137 8.138985207324306e-275 n1 _e_ac_51_0
130 1.6342242347039137e-271 n1 _e_ac_53_0
133 9.662754100654202e-270 n1 _e_ac_56_0
133 3.619577622786523e-268 n1 _e_ac_61_0
126 6.721354818037547e-263 n1 _e_ac_67_0
128 1.2792368379294236e-262 n1 _e_ac_68_0
129 4.809686263328177e-262 n1 _e_ac_69_0
129 6.389803580319588e-262 n1 _e_ac_70_0
121 1.778395262789521e-257 n1 _e_ac_76_0
121 6.404285729890484e-255 n1 _e_ac_80_0
111 1.5186847606789772e-241 n1 _e_ac_97_0
118 6.383106743353435e-241 n1 _e_ac_99_0
120 2.2534051012882174e-240 n1 _e_ac_101_0
114 1.7555210854115e-237 n1 _e_ac_103_0
116 2.8

Another Example

In [43]:
def create_proc_question(disease_curie, relation_type, answer_type):
    question = {'query_graph':{ 
                    "nodes": {"n0": {'ids': [disease_curie], 'is_set': False, 'constraints': []},
                               'n1': {'categories': [answer_type],'is_set': False,'constraints': []}},
                    'edges': {'e00': {'subject': 'n1',
                                    'object': 'n0',
                                    'predicates': [relation_type],
                                    'attribute_constraints': [],
                                    'qualifier_constraints': []}}
                    }
                }
    return {'message':question}

disease = "MONDO:0005148"   #T2D
edgetype="biolink:disrupts"
answer_type = 'biolink:BiologicalProcessOrActivity'
question = create_proc_question(disease,edgetype, answer_type)
# res = query_automat(question,'uberon')
res = query_aragorn(question,'aragorn')

It took 0:00:06.111491s to return
