In [103]:
import requests
import json

In [104]:
from gamma_viewer import GammaViewer
from IPython.display import display

In [105]:
def one_hop_message(curiea,typea,typeb,edgetype,reverse=False):
    query_graph = {
    "nodes": [
        {
            "id": "a",
            "type": typea,
            "curie": curiea
        },
        {
            "id": "b",
            "type": typeb
        }
    ],
    "edges": [
        {
            "id": "ab",
            "source_id": "a",
            "target_id": "b"
        }
    ]
    }
    if edgetype is not None:
        query_graph['edges'][0]['type'] = edgetype
        if reverse:
            query_graph['edges'][0]['source_id'] = 'b'
            query_graph['edges'][0]['target_id'] = 'a'
    message = {"message": {"query_graph": query_graph,
                          'knowledge_graph':{"nodes": [], "edges": [],}, 
                           'results':[]}}
    return message

In [106]:
def automat(db,message):
    automat_url = f'https://automat.renci.org/{db}/query'
    response = requests.post(automat_url,json=message['message'])
    print(response.status_code)
    return response.json()

def post(name,url,message,params=None):
    if params is None:
        response = requests.post(url,json=message)
    else:
        response = requests.post(url,json=message,params=params)
    if not response.status_code == 200:
        print(name, 'error:',response.status_code)
        return {}
    return response.json()

def strider(message):
    url = 'http://robokop.renci.org:5781/query'
    strider_answer = post(strider,url,message)
    numanswers = len(strider_answer['results'])
    if (numanswers == 0) or ((numanswers == 1) and (len(strider_answer['results'][0]['node_bindings'])==0)):
        print('no answers')
        return {}
    #Strider for some reason doesn't return the query graph
    strider_answer['query_graph'] = message['message']['query_graph']
    return strider_answer
 
def striderandfriends(message):
    strider_answer = strider(message)    
    coalesced_answer = post('coalesce','https://answercoalesce.renci.org/coalesce/graph',{'message':strider_answer})
    omni_answer = post('omnicorp','https://aragorn-ranker.renci.org/omnicorp_overlay',{'message': coalesced_answer})
    weighted_answer = post('weight','https://aragorn-ranker.renci.org/weight_correctness',{'message': omni_answer})
    scored_answer = post('score','https://aragorn-ranker.renci.org/score',{'message': weighted_answer})
    return scored_answer

def aragorn(message, coalesce_type):
    if coalesce_type == 'xnone':
        answer = post('aragorn','https://aragorn.renci.org/query',message)
    else:
        answer = post('aragorn','https://aragorn.renci.org/query',message, params={'answer_coalesce_type':coalesce_type})
    return answer

# Drugs that Treat Schizophrenia

Send this message directly to aragorn without coalesence

In [107]:
message = one_hop_message('MONDO:0005090','disease','chemical_substance','treats',reverse=True)
aragorn_results=aragorn(message,'none')

The answer looks fine, but the first result "Thioridazine" has been recalled.  This is coming from CTD, and will be fixed in the next version, when we move CTD:Treats to CTD:Ameliorates...

In [108]:
GammaViewer(props={"data":aragorn_results})

Now do the same query, but coalesce on properties

In [110]:
message = one_hop_message('MONDO:0005090','disease','chemical_substance','treats',reverse=True)
aragorn_results_property=aragorn(message,'property')

Here we are printing the properties that are found in common across the answers.  Further parsing (not shown) can also pull out which of the results had each property.

In [111]:
props = []
for result in aragorn_results_property['results']:
    for pr,pv in zip(result['node_bindings'][1]['properties'],result['node_bindings'][1]['p_values']):
        props.append( (pv,pr) )
props.sort()
for pv,pr in props:
    print( pr, pv)

animal_metabolite 6.1604327618471825e-75
mammalian_metabolite 6.937977255750666e-72
metabolite 2.258575137625518e-67
eukaryotic_metabolite 7.480816272708632e-67
biological_role 4.1253752457842365e-66
human_metabolite 4.769104271088552e-66
biochemical_role 1.5928265013041155e-65
mouse_metabolite 6.806876244474352e-55
drugbank.approved 2.491922493462403e-45
chemical_role 4.8569098954635195e-45
Bronsted_acid 9.547125485999662e-42
acid 1.0031395011128767e-41
donor 5.131209350080362e-40
drugbank.nutraceutical 1.1792034713339206e-39
algal_metabolite 3.763944179487339e-38
Daphnia_metabolite 3.9690653243595796e-36
crustacean_metabolite 5.162694402043632e-36
Dietary Supplements 3.8362311954412027e-32
Supplements 4.6265572523507294e-32
application 1.7064000257342788e-29
Escherichia_coli_metabolite 5.160085874833554e-29
Daphnia_magna_metabolite 5.68151216905462e-29
Amino Acids, Peptides, and Proteins 8.764259786663723e-29
pharmaceutical 1.0052292107905075e-25
Amino Acids 8.457383669533408e-25
dru

Again, the same query but coalescing on ontology subclasses

In [112]:
message = one_hop_message('MONDO:0005090','disease','chemical_substance','treats',reverse=True)
aragorn_results_ontology=aragorn(message,'ontology')

In [113]:
#Extract the new nodes
nb = [r['node_bindings'] for r in aragorn_results_ontology['results'] ]
parents = [ [xi['kg_id'][0] for xi in x if (xi['qg_id']=='extra_qn_0')][0] for x in nb ]

#AC isn't returning labels (in process to be fixed) so get the labels from NN
result = requests.get('https://nodenormalization-sri.renci.org/get_normalized_nodes',
                     params={'curie':parents})
x = result.json()

#And print out the superclasses
for xid, xv in x.items():
    print(xid, xv['id']['label'])

CHEBI:72695 organic molecule
CHEBI:37577 heteroatomic molecular entity
CHEBI:24651 hydroxides
CHEBI:36586 carbonyl compound
CHEBI:64709 organic acid
CHEBI:33575 carboxylic acid
CHEBI:25384 monocarboxylic acid
CHEBI:27369 zwitterion
CHEBI:33704 alpha-amino acid
CHEBI:35366 fatty acid
CHEBI:15705 L-alpha-amino acid
CHEBI:38166 organic heteropolycyclic compound
CHEBI:35350 hydroxy steroid
CHEBI:25693 organic heteromonocyclic compound
CHEBI:35238 amino acid zwitterion
CHEBI:15904 long-chain fatty acid
CHEBI:83813 proteinogenic amino acid
CHEBI:35789 oxo steroid
CHEBI:26167 polar amino acid
CHEBI:59202 straight-chain fatty acid
CHEBI:61313 C21-steroid
CHEBI:39418 straight-chain saturated fatty acid
CHEBI:26144 piperazines
CHEBI:36885 20-oxo steroid
CHEBI:58168 1-O-acyl-sn-glycero-3-phosphocholine
CHEBI:22658 aspartate family amino acid
CHEBI:46848 N-arylpiperazine
CHEBI:55505 1,2-benzisothiazole
CHEBI:51545 1,2-benzoxazoles
CHEBI:145810 insulin


Finally, the same query but with graph coalescence

In [114]:
message = one_hop_message('MONDO:0005090','disease','chemical_substance','treats',reverse=True)
aragorn_results_graph=aragorn(message,'graph')

In [115]:
nb = [r['node_bindings'] for r in aragorn_results_graph['results'] ]
eb = [r['edge_bindings'] for r in aragorn_results_graph['results']]
extra_nodes = [ [xi['kg_id'][0] for xi in x if (xi['qg_id']=='extra_qn_0')][0] for x in nb ]
extra_edges = [ [xi['kg_id'] for xi in x if (xi['qg_id']=='extra_qe_0')] for x in eb ]

In [116]:
result = requests.get('https://nodenormalization-sri.renci.org/get_normalized_nodes',
                     params={'curie':extra_nodes})
x = result.json()

In [117]:
from collections import defaultdict
edges = {e['id']:e['type'] for e in aragorn_results_graph['knowledge_graph']['edges']}
groupy = defaultdict(list)
for extra_node, extra_edge in zip(extra_nodes, extra_edges):
    nn = x[extra_node]
    if nn is None or 'label' not in nn['id']:
        label = extra_node
    else:
        label = nn['id']
    #print(extra_edge)
    #print(label,edges[extra_edge[0]])
    groupy[edges[extra_edge[0]]].append(label)
    #print(extra_node, aragorn_results_graph['knowledge_graph']['edges'][extra_edge])
    
for etype,labels in groupy.items():
    print(etype)
    print('---------------------')
    for l in labels:
        print(l)
    print('')

related_to
---------------------
{'identifier': 'MONDO:0005090', 'label': 'schizophrenia (disease)'}
{'identifier': 'MONDO:0005335', 'label': 'colorectal neoplasm'}
UMLS:C0009402
{'identifier': 'MONDO:0005361', 'label': 'eosinophilic esophagitis'}
{'identifier': 'MONDO:0005101', 'label': 'ulcerative colitis (disease)'}
{'identifier': 'MONDO:0009960', 'label': 'inflammatory bowel disease 1'}
{'identifier': 'MONDO:0004975', 'label': 'Alzheimer disease'}
UMLS:C0032961
{'identifier': 'MONDO:0017276', 'label': 'frontotemporal dementia'}
{'identifier': 'MONDO:0007488', 'label': 'Lewy body dementia'}
{'identifier': 'MONDO:0005052', 'label': 'irritable bowel syndrome'}
UMLS:C3871280
{'identifier': 'MONDO:0005192', 'label': 'exocrine pancreatic carcinoma'}
{'identifier': 'MONDO:0011122', 'label': 'obesity disorder'}
{'identifier': 'MONDO:0005027', 'label': 'epilepsy'}
{'identifier': 'MONDO:0002635', 'label': 'periodontal disease'}
{'identifier': 'MONDO:0005059', 'label': 'leukemia (disease)'}
{

In [33]:
with open('treatsSchizophrenia_nc.json','w') as outf:
    json.dump(aragorn_results,outf,indent=2)

Fin.

In [15]:
#Target for aspirin
message= one_hop_message('CHEBI:15365','chemical_substance','gene','related_to')
results = striderandfriends(message)

In [16]:
print(json.dumps(results,indent=4))

{
    "query_graph": {
        "nodes": [
            {
                "id": "a",
                "curie": "CHEBI:15365",
                "type": "chemical_substance"
            },
            {
                "id": "b",
                "type": "gene",
                "set": true
            },
            {
                "id": "extra_qn_0",
                "type": [
                    "named_thing",
                    "ontology_class",
                    "organism_taxon"
                ]
            }
        ],
        "edges": [
            {
                "id": "ab",
                "source_id": "a",
                "target_id": "b",
                "type": "related_to"
            },
            {
                "id": "extra_qe_0",
                "source_id": "b",
                "target_id": "extra_qn_0"
            }
        ]
    },
    "knowledge_graph": {
        "nodes": [
            {
                "id": "CHEBI:15365",
                "type": "chemical_subst

In [17]:
#genes that cause cystic fibrosis
message= one_hop_message('MONDO:0009061','disease','gene','causes',reverse=True)
results = striderandfriends(message)

In [18]:
print(results['results'])

[{'node_bindings': [{'qg_id': 'a', 'kg_id': ['MONDO:0009061']}, {'qg_id': 'b', 'kg_id': ['NCBIGene:57491', 'NCBIGene:186', 'NCBIGene:4585', 'NCBIGene:22802', 'NCBIGene:6550', 'NCBIGene:6804', 'NCBIGene:51164', 'NCBIGene:6547', 'NCBIGene:1080'], 'p_value': 2.3634670232224677e-38, 'coalescence_method': 'graph_enrichment', 'enriched_nodes': ['MONDO:0009061']}, {'qg_id': 'extra_qn_0', 'kg_id': ['MONDO:0009061']}], 'edge_bindings': [{'qg_id': 'ab', 'kg_id': ['8ff819b0bf8041f7ffd624f954bd536a', 'fb317c29ae24e5886f22d3c7d6867081', '5bf84ae94cc85b1eb7f0da0d09a9428d', 'a0ddb3a5b0c5c536a14003a8a0786186', 'a8c45616851f027d6d630b42f28b919d', 'a9eac7fa54140a5bafc6b9d471479d46', '3db0c85b8e74b8510fc6fc076d104648', '7c5ad568fed616b223634fbff6860fb2', 'e123a21f4e0bab8e7c4266eddf18f43d']}, {'qg_id': 'extra_qe_0', 'kg_id': ['fb317c29ae24e5886f22d3c7d6867081', '8ff819b0bf8041f7ffd624f954bd536a', '5bf84ae94cc85b1eb7f0da0d09a9428d', 'a0ddb3a5b0c5c536a14003a8a0786186', 'a8c45616851f027d6d630b42f28b919d', 'a

In [19]:
#phenotypes of Charcot Marie Tooth
message = one_hop_message('MONDO:0015626','disease','phenotypic_feature','has_phenotype')
results = striderandfriends(message)

In [20]:
print(results['results'])

[{'node_bindings': [{'qg_id': 'a', 'kg_id': ['MONDO:0015626']}, {'qg_id': 'b', 'kg_id': ['HP:0003444', 'HP:0006460', 'HP:0003551', 'HP:0001155', 'HP:0003384', 'HP:0006915', 'HP:0002495', 'HP:0002936', 'HP:0003731', 'HP:0009046', 'HP:0003487', 'HP:0007010', 'HP:0003380', 'HP:0003383'], 'p_value': 5.436947985203358e-29, 'coalescence_method': 'graph_enrichment', 'enriched_nodes': ['MONDO:0012231']}, {'qg_id': 'extra_qn_0', 'kg_id': ['MONDO:0012231']}], 'edge_bindings': [{'qg_id': 'ab', 'kg_id': ['5378b4189d6899b577d8754d08402921', '858b4dcbe3ea4a09e2dc6ae3bdbf0b88', '2603d6380768d23bc9fa4e1374837a7d', 'a43ad0018340b3dfb7d6c0f628e82b58', 'c1097c284e40e2094880a7c0eb76a8c3', '7c23bc7b4e75213fbcae8366ff087b44', 'f7256a240e7bf4cbcf7b8272621a92e1', 'ef6e4453fba62ecfb7651c674a5b6063', 'bc1979f7b3f1cf82c08366002fab5801', '09a2583e853c3375160c0a47ef09216f', '021ff59cd842168b8bdc8a8a6712e1be', 'ed99bebc81071b73fe2b004542cbf4c9', '7edf9d6b836c239fb8fc034581773cc6', 'f41bc1ae7cdb3d334db67f47884e2aa1'

In [21]:
#What cells are in the brain?
message = one_hop_message('UBERON:0000955','anatomical_entity','cell','part_of',reverse=True)
results = striderandfriends(message)

In [23]:
print(results['results'])

[{'node_bindings': [{'qg_id': 'a', 'kg_id': ['UBERON:0000955']}, {'qg_id': 'b', 'kg_id': ['CL:2000043', 'CL:2000047', 'CL:2000004', 'CL:1001474', 'CL:2000097', 'CL:0002614', 'CL:1001579', 'CL:1001503', 'CL:1000042', 'CL:2000044', 'CL:1001602', 'CL:1001581', 'CL:0002610', 'CL:0013000', 'CL:0012001', 'CL:0010012', 'CL:0000652', 'CL:1000001', 'CL:0000706', 'CL:0002590', 'CL:1001582', 'CL:2000030', 'CL:0000708', 'CL:2000005', 'CL:0002549', 'CL:1001611'], 'p_value': 2.0528992289900936e-73, 'coalescence_method': 'graph_enrichment', 'enriched_nodes': ['UBERON:0000955']}, {'qg_id': 'extra_qn_0', 'kg_id': ['UBERON:0000955']}], 'edge_bindings': [{'qg_id': 'ab', 'kg_id': ['6288c88f27391d2e3a7dea2ef3af5afc', 'c21173bb57754d0957b89ca38e343999', '1c398352690fc02a9a61af269da460c7', '0e745690f9054ffc070a2659f0785256', '7b2c4325e24b99a753ecb1cbdf62807e', 'e2a4fac6c7a041db120126665c8b5263', 'a4c94ed3ac416a38d8b1c34156e7d2ab', '2d9a7118b254de5e8886e7bcea95db5e', 'cbad7918112146ca186b9120c54153dc', '904b8

In [10]:
#which genes enable glycolipid binding?
message = one_hop_message('GO:0051861','molecular_activity','gene','enabled_by')
results = striderandfriends(message)

In [17]:
#what part of the body does vasculitis affect?
message = one_hop_message('MONDO:0018882','disease','anatomical_entity','biolink:affects')
results = automat('uberongraph',message)
#results = striderandfriends(message)
print(message)
print(results)

500
{'message': {'query_graph': {'nodes': [{'id': 'a', 'type': 'disease', 'curie': 'MONDO:0018882'}, {'id': 'b', 'type': 'anatomical_entity'}], 'edges': [{'id': 'ab', 'source_id': 'a', 'target_id': 'b', 'type': 'biolink:affects'}]}, 'knowledge_graph': {'nodes': [], 'edges': []}, 'results': []}}
{'error': ''}


In [12]:
#what are the types of diabetes?
message = one_hop_message('MONDO:0005015','disease','disease','subclass_of',reverse=True)
results = striderandfriends(message)

no answers
{'query_graph': {'nodes': [{'id': 'a', 'curie': 'MONDO:0005015', 'type': 'disease'}, {'id': 'b', 'curie': None, 'type': 'disease'}], 'edges': [{'id': 'ab', 'source_id': 'b', 'target_id': 'a', 'type': 'subclass_of'}]}, 'knowledge_graph': {'nodes': [], 'edges': []}, 'results': []}
