# Answer Coalescence using only Automat

Strider finds and ranks all paths that fulfill the query.  Some of these answers can be very similar.  For instance, two answers may differ only by a gene node, but perhaps those genes are from the same family.  This permits a higher-level, more parsimonious description of an answer set.

In [1]:
import requests
import json
from doc_utils import get_enriched_results, get_enrichments2results

In [2]:
from datetime import datetime as dt
def query_automat(question,plate):
    # url = f'https://automat.renci.org/{plate}/reasonerapi'
    url = f'https://aragorn.renci.org/{plate}/query'
    t0 = dt.now()
    r = requests.post(url,json=question)
    t1= dt.now()
    print(f'It took {t1-t0} s to return')
    if r.status_code == 200:
        rj = r.json()
        #Coming from the KP we don't have a score, but AC expects it, so we're going to dummy it.
        # for result in rj['results']:
        #     result['score'] = 1
        return rj
    else:
       print(r.status_code)
       return None

In [3]:
def create_question(disease_curie, relation, answer_type, knowledge_type):
    question = {"nodes": { "n1": { "categories": [answer_type],"is_set": False, "constraints": []},
                "n0": {"ids": [disease_curie ],"is_set": False, "constraints": []}},
            "edges": { "e00": { "subject": "n1", "object": "n0","predicates": [relation], "attribute_constraints": [], "qualifier_constraints": []}}}
    if knowledge_type:
        question['edges']['e00'].update({"knowledge_type": knowledge_type})
    return {"message": {"query_graph":question}}


In [4]:
def getlabel(curie):
    url = 'https://nodenormalization-sri.renci.org/get_normalized_nodes'
    r = requests.get(url,params={'curie':curie}).json()
    try:
        return r[curie]['id']['label']
    except:
        return ''

## Property Coalescence

Find drugs that treat diabetes in mychem.

In [5]:
# disease = "MONDO:0004975"  #Alzheimer
# disease = 'MONDO:0005180'  #Parkinson
# disease = 'MONDO:0004979'  #Asthma
disease = "MONDO:0005148"   #T2D
knowledgetype= "inferred"
edge = "biolink:treats"
answer_type = "biolink:ChemicalEntity"
question = create_question(disease, edge, answer_type, knowledgetype)
# res = query_automat(question,'mychem')
res = query_automat(question,'robokop')

It took 0:00:03.086229 s to return


In [6]:
url = "https://answercoalesce.renci.org/1.4/coalesce/property"
r = requests.post(url,json=res)
print(r.status_code)

200


In [7]:
r = r.json()
r['message']['results'] = get_enriched_results(r['message']['results'])

In [8]:
r['message'].keys()

dict_keys(['query_graph', 'knowledge_graph', 'results', 'auxiliary_graphs'])

In [9]:
results = r['message']['results']

In [10]:
print(results[0])

{'node_bindings': {'n1': [{'id': 'PUBCHEM.COMPOUND:2723872'}], 'n0': [{'id': 'MONDO:0005148', 'qnode_id': 'MONDO:0005148'}]}, 'analyses': [{'resource_id': 'infores:robokop', 'edge_bindings': {'e00': [{'id': 'ccc53379-fce6-4c84-a1dd-f84dd8b470d4'}]}, 'score': 0.9999999891567859}], 'enrichments': ['_n_ac_14_0', '_n_ac_15_0']}


In [11]:
r['message']['auxiliary_graphs']['_n_ac_14_0']

{'edges': [],
 'attributes': [{'attribute_type_id': 'biolink:supporting_study_method_type',
   'value': 'property_enrichment'},
  {'attribute_type_id': 'biolink:p_value', 'value': 2.5266801155450817e-14},
  {'attribute_type_id': 'biolink:chemical_role',
   'value': 'CHEBI_ROLE_food_component'}]}

In [12]:
attributes = r['message']['auxiliary_graphs']['_n_ac_14_0']['attributes']
pvalue = [attr['value'] for attr in attributes if attr.get('value')]
pvalue

['property_enrichment', 2.5266801155450817e-14, 'CHEBI_ROLE_food_component']

In [13]:
enrichments_to_results  = get_enrichments2results(r)

In [14]:
print(results[0]['node_bindings'])

{'n1': [{'id': 'PUBCHEM.COMPOUND:2723872'}], 'n0': [{'id': 'MONDO:0005148', 'qnode_id': 'MONDO:0005148'}]}


In [15]:
print(results[0]['analyses'])

[{'resource_id': 'infores:robokop', 'edge_bindings': {'e00': [{'id': 'ccc53379-fce6-4c84-a1dd-f84dd8b470d4'}]}, 'score': 0.9999999891567859}]


In [16]:
auxg = r['message']['auxiliary_graphs']
for enrichment, chemnodes in enrichments_to_results.items():
    attributes = auxg[enrichment]['attributes']
    values = [attr['value'] for attr in attributes if attr.get('value')]
    print(len(chemnodes), values[1], values[2], enrichment)

35 2.5266801155450817e-14 CHEBI_ROLE_food_component _n_ac_14_0
35 2.5266801155450817e-14 CHEBI_ROLE_physiological_role _n_ac_15_0
16 1.5915495481326194e-08 CHEBI_ROLE_algal_metabolite _n_ac_36_0
5 4.5972829883101113e-07 CHEBI_ROLE_Daphnia_galeata_metabolite _n_ac_47_0
12 9.072374919205088e-06 CHEBI_ROLE_Daphnia_metabolite _n_ac_58_0
12 1.0737919824210222e-05 CHEBI_ROLE_crustacean_metabolite _n_ac_59_0
60 1.3421872291871116e-05 CHEBI_ROLE_mouse_metabolite _n_ac_66_0
263 8.606351289464478e-54 CHEBI_ROLE_drug _n_ac_1_0
265 1.115447956700274e-53 CHEBI_ROLE_pharmaceutical _n_ac_2_0
121 5.57655298354839e-29 CHEBI_ROLE_pharmacological_role _n_ac_4_0
20 1.6064692033080184e-13 CHEBI_ROLE_hypoglycemic_agent _n_ac_16_0
110 1.9982662417591194e-12 CHEBI_ROLE_inhibitor _n_ac_19_0
99 7.21133884082788e-11 CHEBI_ROLE_enzyme_inhibitor _n_ac_23_0
38 2.7907139370550145e-08 CHEBI_ROLE_agonist _n_ac_38_0
6 5.5475027860406724e-05 CHEBI_ROLE_ferroptosis_inhibitor _n_ac_81_0
3 8.796833888765922e-05 CHEBI_ROLE_

## Graph Coalescence

In [17]:
url = 'https://answercoalesce.renci.org/1.4/coalesce/graph'
r = requests.post(url,json=res)
print(r.status_code)

200


In [96]:
r = r.json()
r['message']['results'] = get_enriched_results(r['message']['results'])

In [97]:
r['message'].keys()

dict_keys(['query_graph', 'knowledge_graph', 'results', 'auxiliary_graphs'])

In [98]:
results = r['message']['results']

In [99]:
print(results[0])

{'node_bindings': {'chemical': [{'id': 'MESH:D002800'}], 'disease': [{'id': 'MONDO:0004975', 'qnode_id': 'MONDO:0004975'}]}, 'analyses': [{'resource_id': 'infores:robokop', 'edge_bindings': {'e00': [{'id': '105682211'}]}, 'score': 0.9992083086664353}], 'enrichments': ['_e_ac_119_0', '_e_ac_258_0', '_e_ac_305_0']}


In [136]:
r['message']['knowledge_graph']['edges']

{'74178216': {'subject': 'PUBCHEM.COMPOUND:3547',
  'object': 'MONDO:0004975',
  'predicate': 'biolink:treats',
  'sources': [{'resource_id': 'infores:robokop',
    'resource_role': 'aggregator_knowledge_source',
    'upstream_resource_ids': ['infores:automat-robokop'],
    'source_record_urls': None},
   {'resource_id': 'infores:text-mining-provider-targeted',
    'resource_role': 'primary_knowledge_source',
    'upstream_resource_ids': None,
    'source_record_urls': None},
   {'resource_id': 'infores:automat-robokop',
    'resource_role': 'aggregator_knowledge_source',
    'upstream_resource_ids': ['infores:text-mining-provider-targeted'],
    'source_record_urls': None}],
  'qualifiers': None,
  'attributes': [{'attribute_type_id': 'biolink:Attribute',
    'value': 0.7511516283774782,
    'value_type_id': 'EDAM:data_0006',
    'original_attribute_name': 'biolink:tmkp_confidence_score',
    'value_url': None,
    'attribute_source': None,
    'description': None,
    'attributes': N

In [135]:
r['message']['knowledge_graph']['edges']['261180019156679031']

{'subject': 'PUBCHEM.COMPOUND:2995',
 'object': 'HP:0002018',
 'predicate': 'biolink:contributes_to',
 'sources': [{'resource_id': 'infores:ctd',
   'resource_role': 'primary_knowledge_source'},
  {'resource_id': 'infores:automat-robokop',
   'resource_role': 'aggregator_knowledge_source',
   'upstream_resource_ids': ['infores:ctd']},
  {'resource_id': 'infores:aragorn',
   'resource_role': 'aggregator_knowledge_source',
   'upstream_resource_ids': ['infores:automat-robokop']}],
 'attributes': []}

In [101]:
r['message']['auxiliary_graphs']['_e_ac_119_0']

{'edges': ['261180019156679031',
  '-1178194912137699869',
  '2373973440012019691',
  '-296947377203541552',
  '-7557434463906463919',
  '-7213441619002072108',
  '-4436849232629182109',
  '-1021565412192226330',
  '-6001358890101272806',
  '-5857090605758490335',
  '642225132184811163',
  '-8239036555484639314',
  '-2419163180936882588',
  '-2268327022151627742',
  '-5669979856187961082',
  '7208874078642277595',
  '510396625418501212',
  '-2344661864223075910',
  '-8925720489055150852',
  '-643177312624469975',
  '-645147382608452666',
  '1303562234024250170',
  '868110430189074435',
  '3753110098407304540',
  '-6576569211676167826',
  '4147111277180386074',
  '6003067312642224813',
  '8629642084935352774',
  '5178468072989176532',
  '-5558477613974146344',
  '-4404400423461290992',
  '3194349908320354591',
  '5305154082610386143',
  '4433629993932026726',
  '-2987203011131711162',
  '2240588603850827017',
  '1677642041280187206',
  '1405357775033545356',
  '-8182090656646464083',
  

In [102]:
enrichments_to_results  = (get_enrichments2results(r))

In [103]:
print(results[0]['node_bindings'])

{'chemical': [{'id': 'MESH:D002800'}], 'disease': [{'id': 'MONDO:0004975', 'qnode_id': 'MONDO:0004975'}]}


In [104]:
print(results[0]['analyses'])

[{'resource_id': 'infores:robokop', 'edge_bindings': {'e00': [{'id': '105682211'}]}, 'score': 0.9992083086664353}]


In [105]:
auxg = r['message']['auxiliary_graphs']
for enrichment, chemnodes in enrichments_to_results.items():
    attributes = auxg[enrichment]['attributes']
    values = [attr['value'] for attr in attributes]
    print(len(chemnodes), values[1], values[2], enrichment)

75 8.359139878997489e-142 HP:0002018 _e_ac_119_0
61 3.828522619594381e-107 MONDO:0001386 _e_ac_258_0
53 2.5557285917223835e-98 HP:0001662 _e_ac_305_0
6 6.2281083518185664e-27 NCBIGene:3972 _e_ac_401_0
5 1.5613719443452162e-18 NCBIGene:866 _e_ac_402_0
4 2.949413878732459e-18 NCBIGene:1081 _e_ac_403_0
4 2.388674650659292e-16 MONDO:0008487 _e_ac_404_0
5 3.184815334384216e-16 NCBIGene:6462 _e_ac_405_0
4 7.548837531496952e-16 NCBIGene:6462 _e_ac_407_0
4 2.8996209669348918e-15 MONDO:0007886 _e_ac_408_0
5 4.3887519650135e-15 UMLS:C0340865 _e_ac_409_0
151 8.913479289444605e-290 HP:0002013 _e_ac_1_0
146 5.064525069064484e-277 HP:0002315 _e_ac_3_0
108 1.2054501057196803e-216 HP:0000975 _e_ac_19_0
107 1.315356998737993e-209 HP:0002094 _e_ac_23_0
101 1.440964052509518e-202 HP:0100749 _e_ac_29_0
98 1.3371968625024013e-196 MONDO:0011918 _e_ac_31_0
100 2.0939496560369885e-193 UMLS:C0687133 _e_ac_35_0
101 1.8615534567616384e-187 UMLS:C0852908 _e_ac_40_0
93 1.3472590433570499e-182 UMLS:C3160741 _e_ac_4

In [166]:
def create_proc_question(disease_curie, relation_type, answer_type):
    question = {'query_graph':{ 
                    "nodes": {"n0": {'ids': [disease_curie], 'is_set': False, 'constraints': []},
                               'n1': {'categories': [answer_type],'is_set': False,'constraints': []}},
                    'edges': {'e00': {'subject': 'n1',
                                    'object': 'n0',
                                    'predicates': [relation_type],
                                    'attribute_constraints': [],
                                    'qualifier_constraints': []}}
                    }
                }
    return {'message':question}

disease = "MONDO:0005148"   #T2D
edgetype="biolink:disrupts"
answer_type = 'biolink:BiologicalProcessOrActivity'
question = create_proc_question(disease,edgetype, answer_type)
# res = query_automat(question,'uberon')
res = query_automat(question,'aragorn')

It took 0:00:10.438542 s to return
