# Setup

In [1]:
import json
import requests
import copy
from datetime import datetime as dt
from collections import defaultdict
import pandas as pd

In [2]:
#https://pypi.org/project/gamma-viewer/
from gamma_viewer import GammaViewer
from IPython.display import display, Markdown

In [3]:
def printjson(j):
    print(json.dumps(j,indent=4))
def print_json(j):
    printjson(j)

In [4]:
def post(name,url,message,params=None):
    """A simple function for posting to a URL and returning the json response"""
    if params is None:
        response = requests.post(url,json=message)
    else:
        response = requests.post(url,json=message,params=params)
    if not response.status_code == 200:
        print(name, 'error:',response.status_code)
        print(response)
        print(response.json())
        return {}
    return response.json()

In [5]:
def coalesce(em):
    return post('coalesce','https://answercoalesce.renci.org/coalesce/all',em)

In [6]:
def normalize(ids):
    j = {'curies':ids}
    result = post('nn','https://nodenormalization-sri.renci.org/get_normalized_nodes',j)
    new_ids = [ v['id']['identifier'] for k,v in result.items() if v is not None ]
    return new_ids

In [7]:
def enrich(biolink_category, identifiers):
    identifiers = normalize(identifiers)
    em={'message': 
         {
             'query_graph':
                  {
                    "nodes": {
                        "n1": {
                            "category": [ biolink_category ]
                            }
                    },
                    "edges":{}  
                  },
              'knowledge_graph': {
                  'nodes': {},
                  'edges': {}
                  },
              'results': []
        }
    }
    for n in identifiers:
        if n.startswith('NCBIGene'):
            n = f"NCBIGENE:{n.split(':')[1]}"
        em['message']['knowledge_graph']['nodes'][n]= {'category':[biolink_category]}
        em['message']['results'].append( {'node_bindings': {'n1':[{"id": n}]}, 'edge_bindings': {} })
    return coalesce(em)

In [8]:
def ac_to_table(aragorn_result,mnode):
    #scores = []
    answer_node_count = []
    merged_count = []
    method = []
    extra = []
    for res_i, result in enumerate(aragorn_result['message']['results']):
        #scores.append(result['score'])
        answer_node_count.append(len(result['node_bindings']))
        merged_count.append(len(result['node_bindings'][mnode]))
        try:
            method.append(result['node_bindings'][mnode][0]['coalescence_method'])
        except:
            method.append('Original')
    df = pd.DataFrame({'N_Answer_Nodes':answer_node_count, 'N_Merged_Nodes':merged_count, 'Method':method})
    return df

def filter_to_simple(aragorn_result,mnode):
    simple_result = copy.deepcopy(aragorn_result)
    simple_result['message']['results'] = list(
    filter( lambda x: 'coalescence_method' not in x['node_bindings'][mnode][0], 
           aragorn_result['message']['results'])
    )
    print(len(simple_result['message']['results']))
    return simple_result

def print_nodenames(simple_result,qnode):
    #Print the names of the answers
    for result in simple_result['message']['results']:
        #Each answer has an identifier:
        n1_id = result['node_bindings'][qnode][0]['id']
        #The information for that identifier is in the KG:
        node = simple_result['message']['knowledge_graph']['nodes'][n1_id]
        #Each node has a name
        print(node['name'])

def filter_to_coal(aragorn_result,mnode,method):
    #The results that have been coalesced:
    coalesced_results = list(
        filter( lambda x: 'coalescence_method'  in x['node_bindings'][mnode][0], 
               aragorn_result['message']['results'])
    )
    #Those that have been coalesced via a new node (graph coalescence)
    graph_coalesced_results = list(
        filter( lambda x: x['node_bindings'][mnode][0]['coalescence_method'] == method, coalesced_results)
    )
    print(len(graph_coalesced_results))
    simple_result = copy.deepcopy(aragorn_result)
    simple_result['message']['results'] = graph_coalesced_results
    return simple_result

def filter_to_gc(aragorn_result,mnode):
    return filter_to_coal(aragorn_result,mnode,'graph_enrichment')

def filter_to_pc(aragorn_result,mnode):
    return filter_to_coal(aragorn_result,mnode,'property_enrichment')

In [25]:
def print_gc_result(graph,gc_result):
    print('p_value:', gc_result['node_bindings']['n1'][0]['p_value'])
    for extra_edge in gc_result['edge_bindings']:
        for eb in gc_result['edge_bindings'][extra_edge]:        
            kge = graph['edges'][eb['id']]
            subject_node = kge['subject']
            object_node = kge['object']
            pred = kge['predicate']
            print( f"{graph['nodes'][subject_node]['name']} -[{pred}]-> {graph['nodes'][object_node]['name']}")
    
def print_pc_result(knowledge_graph,pc_result):
    print('p_value:', pc_result['node_bindings']['n1'][0]['p_values'])
    print('properties:', pc_result['node_bindings']['n1'][0]['properties'])
    for node in pc_result['node_bindings']['n1'][:5]:
        kgn = knowledge_graph['nodes'][node['id']]
        print( f"  {kgn['name']}")

In [12]:
#valid json file
with open('Step3a_MolecularKP_Output_DILI.json','r') as inf:
    result =json.load(inf)

In [18]:
chemicals = [x['object'] for x in result ]
#reformat prefix
#chemical_cc = [ f"CHEMBL.COMPOUND:{xid.split(':')[1]}" for xid in chemicals ]

In [19]:
chemicals

['CHEBI:5001',
 'CHEBI:9241',
 'CHEBI:63613',
 'PUBCHEM.COMPOUND:3177',
 'PUBCHEM.COMPOUND:11317348',
 'PUBCHEM.COMPOUND:41781',
 'PUBCHEM.COMPOUND:5281881',
 'PUBCHEM.COMPOUND:4336830',
 'PUBCHEM.COMPOUND:73707372',
 'PUBCHEM.COMPOUND:3540',
 'PUBCHEM.COMPOUND:5330286',
 'PUBCHEM.COMPOUND:9910224',
 'CHEBI:18405',
 'PUBCHEM.COMPOUND:5374',
 'PUBCHEM.COMPOUND:5281071',
 'PUBCHEM.COMPOUND:9887884',
 'PUBCHEM.COMPOUND:2253',
 'PUBCHEM.COMPOUND:10251',
 'PUBCHEM.COMPOUND:5479',
 'PUBCHEM.COMPOUND:4792',
 'PUBCHEM.COMPOUND:445580',
 'PUBCHEM.COMPOUND:104845',
 'PUBCHEM.COMPOUND:73707397',
 'PUBCHEM.COMPOUND:5614',
 'PUBCHEM.COMPOUND:5253',
 'PUBCHEM.COMPOUND:36462',
 'CHEBI:35237',
 'PUBCHEM.COMPOUND:21678144',
 'PUBCHEM.COMPOUND:57448257',
 'PUBCHEM.COMPOUND:4848',
 'PUBCHEM.COMPOUND:73707417',
 'PUBCHEM.COMPOUND:62824',
 'PUBCHEM.COMPOUND:11689583',
 'PUBCHEM.COMPOUND:6604926',
 'PUBCHEM.COMPOUND:3682',
 'PUBCHEM.COMPOUND:60773',
 'PUBCHEM.COMPOUND:441314',
 'PUBCHEM.COMPOUND:4810',
 'PU

In [20]:
answer = enrich('biolink:ChemicalSubstance',chemicals)

In [21]:
ac_to_table(answer,'n1')

Unnamed: 0,N_Answer_Nodes,N_Merged_Nodes,Method
0,1,183,property_enrichment
1,1,185,property_enrichment
2,1,186,property_enrichment
3,1,185,property_enrichment
4,1,192,property_enrichment
...,...,...,...
1646,1,1,Original
1647,1,1,Original
1648,1,1,Original
1649,1,1,Original


In [22]:
x = filter_to_gc(answer,'n1')

534


In [23]:
for i,result in enumerate(x['message']['results']):
    print('Result',i)
    print_gc_result(x['message']['knowledge_graph'],result)

Result 0
p_value: 5.812446102651206e-85
Propofol -[biolink:causes_adverse_event]-> delirium
Olmesartan -[biolink:causes_adverse_event]-> delirium
(3R)-1-[4,4-Bis(3-methylthiophen-2-yl)but-3-enyl]piperidin-1-ium-3-carboxylate -[biolink:causes_adverse_event]-> delirium
Spironolactone -[biolink:causes_adverse_event]-> delirium
Quetiapine -[biolink:causes_adverse_event]-> delirium
Axitinib -[biolink:causes_adverse_event]-> delirium
Dexmedetomidine -[biolink:causes_adverse_event]-> delirium
Prochlorperazine -[biolink:causes_adverse_event]-> delirium
Zotepine -[biolink:causes_adverse_event]-> delirium
(2S,3R)-2-Azaniumyl-3-(3,4-dihydroxyphenyl)-3-hydroxypropanoate -[biolink:causes_adverse_event]-> delirium
Cabergoline -[biolink:causes_adverse_event]-> delirium
Ziprasidone -[biolink:causes_adverse_event]-> delirium
Apomorphine -[biolink:causes_adverse_event]-> delirium
Rotigotine -[biolink:causes_adverse_event]-> delirium
Lamotrigine -[biolink:causes_adverse_event]-> delirium
Dihydroergocrist

Clarithromycin -[biolink:contributes_to]-> peripheral vertigo
Methamphetamine -[biolink:contributes_to]-> peripheral vertigo
Oxcarbazepine -[biolink:contributes_to]-> peripheral vertigo
Formaldehyde -[biolink:contributes_to]-> peripheral vertigo
Guanfacine -[biolink:contributes_to]-> peripheral vertigo
Neomycin -[biolink:contributes_to]-> peripheral vertigo
Haloperidol -[biolink:contributes_to]-> peripheral vertigo
Result 40
p_value: 2.2003661944351167e-51
Clotrimazole -[biolink:prevents]-> common cold
Paroxetine -[biolink:prevents]-> common cold
Aripiprazole -[biolink:prevents]-> common cold
Oxcarbazepine -[biolink:prevents]-> common cold
Capecitabine -[biolink:prevents]-> common cold
Irinotecan -[biolink:prevents]-> common cold
Haloperidol -[biolink:prevents]-> common cold
Chlorpromazine -[biolink:prevents]-> common cold
2-Hydroxy-4-[(1R)-1-hydroxy-2-(methylazaniumyl)ethyl]phenolate -[biolink:prevents]-> common cold
Torsemide -[biolink:prevents]-> common cold
Clonidine -[biolink:prev

Clotrimazole -[biolink:prevents]-> rheumatoid arthritis
2-Hydroxy-4-[(1R)-1-hydroxy-2-(methylazaniumyl)ethyl]phenolate -[biolink:prevents]-> rheumatoid arthritis
Haloperidol -[biolink:prevents]-> rheumatoid arthritis
Capecitabine -[biolink:prevents]-> rheumatoid arthritis
Paroxetine -[biolink:prevents]-> rheumatoid arthritis
Lamotrigine -[biolink:prevents]-> rheumatoid arthritis
Mesalamine -[biolink:prevents]-> rheumatoid arthritis
Ziprasidone -[biolink:prevents]-> rheumatoid arthritis
Oxcarbazepine -[biolink:prevents]-> rheumatoid arthritis
Glyburide -[biolink:prevents]-> rheumatoid arthritis
Nicorandil -[biolink:prevents]-> rheumatoid arthritis
Phenobarbital -[biolink:prevents]-> rheumatoid arthritis
Chlorpromazine -[biolink:prevents]-> rheumatoid arthritis
Risperidone -[biolink:prevents]-> rheumatoid arthritis
Propofol -[biolink:prevents]-> rheumatoid arthritis
Aripiprazole -[biolink:prevents]-> rheumatoid arthritis
Spironolactone -[biolink:prevents]-> rheumatoid arthritis
Torsemide

KeyError: 'name'

In [26]:
p = filter_to_pc(answer,'n1')
for i,result in enumerate(p['message']['results']):
    print('Result',i)
    print_pc_result(p['message']['knowledge_graph'],result)

534
Result 0
p_value: [1.1495370818557763e-270]
properties: ['drugbank.approved']
  Prazosin
  Paliperidone
  Olanzapine
  Amoxapine
  Tizanidine
Result 1
p_value: [6.59644127909771e-252]
properties: ['therapeutic_flag']
  Prazosin
  Linifanib
  Paliperidone
  Olanzapine
  Rescinnamine
Result 2
p_value: [6.015893583648995e-197]
properties: ['pharmaceutical']
  Prazosin
  Linifanib
  (2-(2',6'-Dimethoxy)phenoxyethylamino)methylbenzo-1,4-dioxane
  Olanzapine
  Rescinnamine
Result 3
p_value: [1.6424020007278837e-196]
properties: ['drug']
  Prazosin
  Linifanib
  (2-(2',6'-Dimethoxy)phenoxyethylamino)methylbenzo-1,4-dioxane
  Olanzapine
  Rescinnamine
Result 4
p_value: [5.453354578023201e-175]
properties: ['application']
  Prazosin
  Linifanib
  (2-(2',6'-Dimethoxy)phenoxyethylamino)methylbenzo-1,4-dioxane
  Olanzapine
  Rescinnamine
Result 5
p_value: [8.838117408888529e-156]
properties: ['Cytochrome P-450 Substrates']
  Paliperidone
  Dronabinol
  Olanzapine
  Amoxapine
  Tinidazole
Resul

  Zileuton
  Etoperidone
  Flupentixol
  Methapyrilene
  Pergolide
Result 165
p_value: [1.3174174352601868e-17]
properties: ['vasoconstrictor_agent']
  6-(Tert-butyl)-3-(2-imidazolin-2-ylmethyl)-2,4-dimethylphenol, chloride
  Zolmitriptan
  Ergotamine
  Norepinephrine
  Levonordefrin
Result 166
p_value: [1.4350069055385912e-17]
properties: ['Cytochrome P-450 CYP2C19 Inhibitors']
  Nortriptyline
  Midostaurin
  Olanzapine
  Bicalutamide
  Troglitazone
Result 167
p_value: [2.4842463910248092e-17]
properties: ['Sympathomimetics']
  Celiprolol
  Methamphetamine
  Norepinephrine
  Mephentermine
  (-)-Norephedrine
Result 168
p_value: [2.701009177630742e-17]
properties: ['EC_2.*_(transferase)_inhibitor']
  Bosutinib
  Palbociclib
  Midostaurin
  5-[(Z)-(5-Chloro-2-oxo-1,2-dihydro-3H-indol-3-ylidene)methyl]-N-[2-(diethylamino)ethyl]-2,4-dimethyl-1H-pyrrole-3-carboxamide
  Linifanib
Result 169
p_value: [4.154398065214315e-17]
properties: ['EC_2.7.*_(P_containing_group_transferase)_inhibitor']
 

  Midostaurin
  Staurosporine
  Carvedilol
  Zolmitriptan
Result 302
p_value: [1.0754228860208421e-09]
properties: ['Cytochrome P-450 CYP2C19 Inhibitors (weak)']
  Nortriptyline
  Midostaurin
  Olanzapine
  Fenofibrate
  Clozapine
Result 303
p_value: [1.0754228860208421e-09]
properties: ['OCT1 substrates']
  Cytarabine
  Norepinephrine
  Prazosin
  Spermine
  2-Hydroxy-4-[(1R)-1-hydroxy-2-(methylazaniumyl)ethyl]phenolate
Result 304
p_value: [1.1286276665287279e-09]
properties: ['Musculo-Skeletal System']
  Oxaprozin
  Nabumetone
  Diclofenac
  Flurbiprofen
  (3-Azaniumyl-1-hydroxy-1-phosphonopropyl)-hydroxyphosphinate
Result 305
p_value: [1.2594194635837179e-09]
properties: ['Cytochrome P-450 CYP2C8 Inhibitors (moderate)']
  Troglitazone
  Saquinavir
  Amitriptyline
  Spironolactone
  Clotrimazole
Result 306
p_value: [1.2594194635837179e-09]
properties: ['Isoquinolines']
  Amonafide
  Papaverine
  Apomorphine
  Saquinavir
  Debrisoquine
Result 307
p_value: [1.2898385384369211e-09]
prop

Result 396
p_value: [3.354543754660563e-07]
properties: ['EC_3.1.4.*_(phosphoric_diester_hydrolase)_inhibitor']
  (2R,3S)-3-(6-Amino-9H-purin-9-yl)nonan-2-ol
  Desipramine
  Milrinone
  5-[(Z)-(5-Chloro-2-oxo-1,2-dihydro-3H-indol-3-ylidene)methyl]-N-[2-(diethylamino)ethyl]-2,4-dimethyl-1H-pyrrole-3-carboxamide
Result 397
p_value: [3.665886090634281e-07]
properties: ['OAT1/SLC22A6 inhibitors']
  Latanoprost
  Glyburide
  Diclofenac
  Flurbiprofen
  Methotrexate
Result 398
p_value: [4.079073951797496e-07]
properties: ['Ethanolamines']
  Norepinephrine
  Sotalol
  Levonordefrin
  (2S,3R)-2-Azaniumyl-3-(3,4-dihydroxyphenyl)-3-hydroxypropanoate
Result 399
p_value: [4.079073951797496e-07]
properties: ['Fatty Acids, Unsaturated']
  Latanoprost
  Prostaglandin B2
  Docosahexaenoic acid
  Linoleic acid
Result 400
p_value: [4.152930982399474e-07]
properties: ['Adrenergic beta-1 Receptor Agonists']
  Norepinephrine
  (2S,3R)-2-Azaniumyl-3-(3,4-dihydroxyphenyl)-3-hydroxypropanoate
  2-Hydroxy-4-[(