# Answer Coalescence using only Automat

Strider finds and ranks all paths that fulfill the query.  Some of these answers can be very similar.  For instance, two answers may differ only by a gene node, but perhaps those genes are from the same family.  This permits a higher-level, more parsimonious description of an answer set.

In [1]:
import requests
import json


In [2]:
from datetime import datetime as dt
def query_automat(question,plate):
    url = f'https://aragorn.renci.org/{plate}/query'
    t0 = dt.now()
    r = requests.post(url,json=question)
    t1= dt.now()
    print(f'It took {t1-t0} s to return')
    if r.status_code == 200:
        rj = r.json()
        #Coming from the KP we don't have a score, but AC expects it, so we're going to dummy it.
        # for result in rj['results']:
        #     result['score'] = 1
        return rj
    else:
       print(r.status_code)
       return None

In [125]:
def create_question(disease_curie, relation, answer_type, knowledge_type):
    question = {"nodes": { "n1": { "categories": [answer_type],"is_set": False, "constraints": []},
                "n0": {"ids": [disease_curie ],"is_set": False, "constraints": []}},
            "edges": { "e00": { "subject": "n1", "object": "n0","predicates": [relation], "attribute_constraints": [], "qualifier_constraints": []}}}
    if knowledge_type:
        question['edges']['e00'].update({"knowledge_type": knowledge_type})
    return {"message": {"query_graph":question}}


In [4]:
def getlabel(curie):
    url = 'https://nodenormalization-sri.renci.org/get_normalized_nodes'
    r = requests.get(url,params={'curie':curie}).json()
    try:
        return r[curie]['id']['label']
    except:
        return ''

## Property Coalescence

Find drugs that treat diabetes in mychem.

In [127]:
disease = "MONDO:0004975"  #Alzheimer
# disease = 'MONDO:0005180'  #Parkinson
# disease = 'MONDO:0004979'  #Asthma
# disease = "MONDO:0005148"   #T2D
knowledgetype= "inferred"
edge = "biolink:treats"
answer_type = "biolink:ChemicalEntity"
question = create_question(disease, edge, answer_type, knowledgetype)
res = query_automat(question,'robokop')

It took 0:00:00.967604 s to return


In [32]:
url = "https://answercoalesce.renci.org/1.4/coalesce/property"
r = requests.post(url,json=res)
print(r.status_code)

200


In [59]:
r = r.json()
def get_enriched_results(res):
    return list(filter(lambda message: message['enrichments'], res))
r['message']['results'] = get_enriched_results(r['message']['results'])

In [60]:
r['message'].keys()

dict_keys(['query_graph', 'knowledge_graph', 'results', 'auxiliary_graphs'])

In [62]:
results = r['message']['results']

In [63]:
print(results[0])

{'node_bindings': {'chemical': [{'id': 'PUBCHEM.COMPOUND:6013'}], 'disease': [{'id': 'MONDO:0004975', 'qnode_id': 'MONDO:0004975'}, {'id': 'MONDO:0100087'}, {'id': 'MONDO:0012344'}]}, 'analyses': [{'resource_id': 'infores:robokop', 'edge_bindings': {'e00': [{'id': '45351889'}]}, 'score': 0.9810554009250266}], 'enrichments': ['_n_ac_58_0', '_n_ac_59_0', '_n_ac_60_0', '_n_ac_61_0', '_n_ac_62_0', '_n_ac_63_0', '_n_ac_64_0', '_n_ac_65_0', '_n_ac_66_0']}


In [76]:
r['message']['auxiliary_graphs']['_n_ac_58_0']

{'edges': [],
 'attributes': [{'attribute_type_id': 'biolink:supporting_study_method_type',
   'value': 'property_enrichment'},
  {'attribute_type_id': 'biolink:p_value', 'value': 2.1191190572104969e-07},
  {'attribute_type_id': 'biolink:chemical_role',
   'value': 'CHEBI_ROLE_mouse_metabolite'}]}

In [75]:
attributes = r['message']['auxiliary_graphs']['_n_ac_58_0']['attributes']
pvalue = [attr['value'] for attr in attributes if attr.get('value')]
pvalue

['property_enrichment', 2.1191190572104969e-07, 'CHEBI_ROLE_mouse_metabolite']

In [71]:
def get_enrichments2results(r):
    results = r["message"]["results"]
    result_to_enrichments = {}
    enrichments_to_results = {}
    for result in results:
        keys = [key for key in result['node_bindings'] if not 'qnode_id' in result['node_bindings'][key][0]]
        k = keys[0]
        identifier = result["node_bindings"][k][0]["id"]
        name = r["message"]["knowledge_graph"]["nodes"][identifier]["name"]
        name = identifier
        result_to_enrichments[name] = result["enrichments"]
    for result, enrichments in result_to_enrichments.items():
        for enrichment in enrichments:
            if enrichment not in enrichments_to_results:
                enrichments_to_results[enrichment] = []
            enrichments_to_results[enrichment].append(result)
    return enrichments_to_results
enrichments_to_results  = (get_enrichments2results(r))

In [72]:
print(results[0]['node_bindings'])

{'chemical': [{'id': 'PUBCHEM.COMPOUND:6013'}], 'disease': [{'id': 'MONDO:0004975', 'qnode_id': 'MONDO:0004975'}, {'id': 'MONDO:0100087'}, {'id': 'MONDO:0012344'}]}


In [73]:
print(results[0]['analyses'])

[{'resource_id': 'infores:robokop', 'edge_bindings': {'e00': [{'id': '45351889'}]}, 'score': 0.9810554009250266}]


In [78]:
auxg = r['message']['auxiliary_graphs']
for enrichment, chemnodes in enrichments_to_results.items():
    attributes = auxg[enrichment]['attributes']
    values = [attr['value'] for attr in attributes if attr.get('value')]
    print(len(chemnodes), values[1], values[2], enrichment)

7 2.1191190572104969e-07 CHEBI_ROLE_mouse_metabolite _n_ac_58_0
7 1.0552299223963925e-05 CHEBI_ROLE_human_metabolite _n_ac_59_0
3 1.2465570099451747e-06 CHEBI_ROLE_Daphnia_magna_metabolite _n_ac_60_0
3 1.0370528168003705e-05 CHEBI_ROLE_Daphnia_metabolite _n_ac_61_0
3 1.0893659043628082e-05 CHEBI_ROLE_crustacean_metabolite _n_ac_62_0
4 2.8309571319172373e-06 CHEBI_ROLE_sex_hormone _n_ac_63_0
4 8.813850115159998e-06 CHEBI_ROLE_hormone _n_ac_64_0
4 3.5546661879763916e-05 CHEBI_ROLE_molecular_messenger _n_ac_65_0
4 6.74952828273942e-05 CHEBI_ROLE_agonist _n_ac_66_0
216 9.75950843422541e-47 CHEBI_ROLE_drug _n_ac_1_0
217 2.9818672983070737e-46 CHEBI_ROLE_pharmaceutical _n_ac_2_0
55 3.703713408107945e-19 CHEBI_ROLE_neurotransmitter_agent _n_ac_7_0
88 1.6888778926994379e-18 CHEBI_ROLE_pharmacological_role _n_ac_8_0
27 8.681185863824432e-09 CHEBI_ROLE_cardiovascular_drug _n_ac_21_0
18 7.698873837175989e-08 CHEBI_ROLE_adrenergic_agent _n_ac_28_0
9 5.524821995947849e-06 CHEBI_ROLE_alpha_adrenergi

## Graph Coalescence

In [93]:
url = 'https://answercoalesce.renci.org/1.4/coalesce/graph'
r = requests.post(url,json=res)
print(r.status_code)

200


In [96]:
r = r.json()
r['message']['results'] = get_enriched_results(r['message']['results'])

In [97]:
r['message'].keys()

dict_keys(['query_graph', 'knowledge_graph', 'results', 'auxiliary_graphs'])

In [98]:
results = r['message']['results']

In [99]:
print(results[0])

{'node_bindings': {'chemical': [{'id': 'MESH:D002800'}], 'disease': [{'id': 'MONDO:0004975', 'qnode_id': 'MONDO:0004975'}]}, 'analyses': [{'resource_id': 'infores:robokop', 'edge_bindings': {'e00': [{'id': '105682211'}]}, 'score': 0.9992083086664353}], 'enrichments': ['_e_ac_119_0', '_e_ac_258_0', '_e_ac_305_0']}


In [101]:
r['message']['auxiliary_graphs']['_e_ac_119_0']

{'edges': ['261180019156679031',
  '-1178194912137699869',
  '2373973440012019691',
  '-296947377203541552',
  '-7557434463906463919',
  '-7213441619002072108',
  '-4436849232629182109',
  '-1021565412192226330',
  '-6001358890101272806',
  '-5857090605758490335',
  '642225132184811163',
  '-8239036555484639314',
  '-2419163180936882588',
  '-2268327022151627742',
  '-5669979856187961082',
  '7208874078642277595',
  '510396625418501212',
  '-2344661864223075910',
  '-8925720489055150852',
  '-643177312624469975',
  '-645147382608452666',
  '1303562234024250170',
  '868110430189074435',
  '3753110098407304540',
  '-6576569211676167826',
  '4147111277180386074',
  '6003067312642224813',
  '8629642084935352774',
  '5178468072989176532',
  '-5558477613974146344',
  '-4404400423461290992',
  '3194349908320354591',
  '5305154082610386143',
  '4433629993932026726',
  '-2987203011131711162',
  '2240588603850827017',
  '1677642041280187206',
  '1405357775033545356',
  '-8182090656646464083',
  

In [102]:
enrichments_to_results  = (get_enrichments2results(r))

In [103]:
print(results[0]['node_bindings'])

{'chemical': [{'id': 'MESH:D002800'}], 'disease': [{'id': 'MONDO:0004975', 'qnode_id': 'MONDO:0004975'}]}


In [104]:
print(results[0]['analyses'])

[{'resource_id': 'infores:robokop', 'edge_bindings': {'e00': [{'id': '105682211'}]}, 'score': 0.9992083086664353}]


In [105]:
auxg = r['message']['auxiliary_graphs']
for enrichment, chemnodes in enrichments_to_results.items():
    attributes = auxg[enrichment]['attributes']
    values = [attr['value'] for attr in attributes]
    print(len(chemnodes), values[1], values[2], enrichment)

75 8.359139878997489e-142 HP:0002018 _e_ac_119_0
61 3.828522619594381e-107 MONDO:0001386 _e_ac_258_0
53 2.5557285917223835e-98 HP:0001662 _e_ac_305_0
6 6.2281083518185664e-27 NCBIGene:3972 _e_ac_401_0
5 1.5613719443452162e-18 NCBIGene:866 _e_ac_402_0
4 2.949413878732459e-18 NCBIGene:1081 _e_ac_403_0
4 2.388674650659292e-16 MONDO:0008487 _e_ac_404_0
5 3.184815334384216e-16 NCBIGene:6462 _e_ac_405_0
4 7.548837531496952e-16 NCBIGene:6462 _e_ac_407_0
4 2.8996209669348918e-15 MONDO:0007886 _e_ac_408_0
5 4.3887519650135e-15 UMLS:C0340865 _e_ac_409_0
151 8.913479289444605e-290 HP:0002013 _e_ac_1_0
146 5.064525069064484e-277 HP:0002315 _e_ac_3_0
108 1.2054501057196803e-216 HP:0000975 _e_ac_19_0
107 1.315356998737993e-209 HP:0002094 _e_ac_23_0
101 1.440964052509518e-202 HP:0100749 _e_ac_29_0
98 1.3371968625024013e-196 MONDO:0011918 _e_ac_31_0
100 2.0939496560369885e-193 UMLS:C0687133 _e_ac_35_0
101 1.8615534567616384e-187 UMLS:C0852908 _e_ac_40_0
93 1.3472590433570499e-182 UMLS:C3160741 _e_ac_4

In [134]:
def create_proc_question(disease_curie, relation_type, answer_type):
    question = {'query_graph':{ 
                    "nodes": {"n0": {'ids': [disease_curie], 'is_set': False, 'constraints': []},
                               'n1': {'categories': [answer_type],'is_set': False,'constraints': []}},
                    'edges': {'e00': {'subject': 'n1',
                                    'object': 'n0',
                                    'predicates': [relation_type],
                                    'attribute_constraints': [],
                                    'qualifier_constraints': []}}
                    }
                }
    return {'message':question}

disease = "MONDO:0005148"   #T2D
edgetype="biolink:disrupts"
answer_type = 'biolink:BiologicalProcessOrActivity'
question = create_proc_question(disease,edgetype, answer_type)
res = query_automat(question,'uberon')

It took 0:00:00.092915 s to return
404
