# Setup

In [1]:
import json
import requests
import copy
from datetime import datetime as dt
from collections import defaultdict
import pandas as pd

In [2]:
#https://pypi.org/project/gamma-viewer/
from gamma_viewer import GammaViewer
from IPython.display import display, Markdown

In [3]:
def printjson(j):
    print(json.dumps(j,indent=4))
def print_json(j):
    printjson(j)

In [4]:
def post(name,url,message,params=None):
    """A simple function for posting to a URL and returning the json response"""
    if params is None:
        response = requests.post(url,json=message)
    else:
        response = requests.post(url,json=message,params=params)
    if not response.status_code == 200:
        print(name, 'error:',response.status_code)
        print(response)
        print(response.json())
        return {}
    return response.json()

In [5]:
def coalesce(em):
    return post('coalesce','https://answercoalesce.renci.org/coalesce/all',em)

In [30]:
def normalize(ids):
    j = {'curies':ids}
    result = post('nn','https://nodenormalization-sri.renci.org/get_normalized_nodes',j)
    new_ids = [ v['id']['identifier'] for k,v in result.items() if v is not None ]
    return new_ids

In [7]:
def enrich(biolink_category, identifiers):
    identifiers = normalize(identifiers)
    em={'message': 
         {
             'query_graph':
                  {
                    "nodes": {
                        "n1": {
                            "category": [ biolink_category ]
                            }
                    },
                    "edges":{}  
                  },
              'knowledge_graph': {
                  'nodes': {},
                  'edges': {}
                  },
              'results': []
        }
    }
    for n in identifiers:
        if n.startswith('NCBIGene'):
            n = f"NCBIGENE:{n.split(':')[1]}"
        em['message']['knowledge_graph']['nodes'][n]= {'category':[biolink_category]}
        em['message']['results'].append( {'node_bindings': {'n1':[{"id": n}]}, 'edge_bindings': {} })
    return coalesce(em)

In [8]:
def ac_to_table(aragorn_result,mnode):
    #scores = []
    answer_node_count = []
    merged_count = []
    method = []
    extra = []
    for res_i, result in enumerate(aragorn_result['message']['results']):
        #scores.append(result['score'])
        answer_node_count.append(len(result['node_bindings']))
        merged_count.append(len(result['node_bindings'][mnode]))
        try:
            method.append(result['node_bindings'][mnode][0]['coalescence_method'])
        except:
            method.append('Original')
    df = pd.DataFrame({'N_Answer_Nodes':answer_node_count, 'N_Merged_Nodes':merged_count, 'Method':method})
    return df

def filter_to_simple(aragorn_result,mnode):
    simple_result = copy.deepcopy(aragorn_result)
    simple_result['message']['results'] = list(
    filter( lambda x: 'coalescence_method' not in x['node_bindings'][mnode][0], 
           aragorn_result['message']['results'])
    )
    print(len(simple_result['message']['results']))
    return simple_result

def print_nodenames(simple_result,qnode):
    #Print the names of the answers
    for result in simple_result['message']['results']:
        #Each answer has an identifier:
        n1_id = result['node_bindings'][qnode][0]['id']
        #The information for that identifier is in the KG:
        node = simple_result['message']['knowledge_graph']['nodes'][n1_id]
        #Each node has a name
        print(node['name'])

def filter_to_coal(aragorn_result,mnode,method):
    #The results that have been coalesced:
    coalesced_results = list(
        filter( lambda x: 'coalescence_method'  in x['node_bindings'][mnode][0], 
               aragorn_result['message']['results'])
    )
    #Those that have been coalesced via a new node (graph coalescence)
    graph_coalesced_results = list(
        filter( lambda x: x['node_bindings'][mnode][0]['coalescence_method'] == method, coalesced_results)
    )
    print(len(graph_coalesced_results))
    simple_result = copy.deepcopy(aragorn_result)
    simple_result['message']['results'] = graph_coalesced_results
    return simple_result

def filter_to_gc(aragorn_result,mnode):
    return filter_to_coal(aragorn_result,mnode,'graph_enrichment')

def filter_to_pc(aragorn_result,mnode):
    return filter_to_coal(aragorn_result,mnode,'property_enrichment')

In [57]:
def print_gc_result(graph,gc_result):
    print('p_value:', gc_result['node_bindings']['n1'][0]['p_value'])
    for extra_edge in gc_result['edge_bindings']:
        for eb in gc_result['edge_bindings'][extra_edge]:        
            kge = graph['edges'][eb['id']]
            subject_node = kge['subject']
            object_node = kge['object']
            pred = kge['predicate']
            print( f"{graph['nodes'][subject_node]['name']} -[{pred}]-> {graph['nodes'][object_node]['name']}")
    
def print_pc_result(knowledge_graph,pc_result):
    print('p_value:', pc_result['node_bindings']['n1'][0]['p_values'])
    print('properties:', pc_result['node_bindings']['n1'][0]['properties'])
    for node in pc_result['node_bindings']['n1']:
        kgn = knowledge_graph['nodes'][node['id']]
        print( f"  {kgn['name']}")

In [49]:
#not a valid json file
#with open('Step3a_ConnectionsKP_Output_DILI.json','r') as inf:
#    j =json.load(inf)

import ast
with open('Step3a_ConnectionsKP_Output_DILI.json','r') as inf:
    s = inf.read()

result = ast.literal_eval(s)

In [22]:
chemicals = [x['subject'] for x in result[0]]
#reformat prefix
chemical_cc = [ f"CHEMBL.COMPOUND:{xid.split(':')[1]}" for xid in chemicals ]

In [23]:
chemical_cc

['CHEMBL.COMPOUND:CHEMBL88',
 'CHEMBL.COMPOUND:CHEMBL428647',
 'CHEMBL.COMPOUND:CHEMBL53463',
 'CHEMBL.COMPOUND:CHEMBL3545252',
 'CHEMBL.COMPOUND:CHEMBL83',
 'CHEMBL.COMPOUND:CHEMBL1200796',
 'CHEMBL.COMPOUND:CHEMBL92',
 'CHEMBL.COMPOUND:CHEMBL1201585',
 'CHEMBL.COMPOUND:CHEMBL1399',
 'CHEMBL.COMPOUND:CHEMBL185']

In [31]:
answer = enrich('biolink:ChemicalSubstance',chemical_cc)

In [32]:
ac_to_table(answer,'n1')

Unnamed: 0,N_Answer_Nodes,N_Merged_Nodes,Method
0,1,7,property_enrichment
1,1,9,property_enrichment
2,1,8,property_enrichment
3,1,7,property_enrichment
4,1,5,property_enrichment
5,1,5,property_enrichment
6,1,6,property_enrichment
7,1,5,property_enrichment
8,1,4,property_enrichment
9,1,5,property_enrichment


In [46]:
x = filter_to_gc(answer,'n1')

10


In [58]:
for i,result in enumerate(x['message']['results']):
    print('Result',i)
    print_gc_result(x['message']['knowledge_graph'],result)

Result 0
p_value: 3.5916218505919564e-20
Docetaxel trihydrate -[biolink:causes_adverse_event]-> Krukenberg carcinoma
Anastrozole -[biolink:causes_adverse_event]-> Krukenberg carcinoma
Paclitaxel -[biolink:causes_adverse_event]-> Krukenberg carcinoma
Doxorubicin -[biolink:causes_adverse_event]-> Krukenberg carcinoma
Tamoxifen -[biolink:causes_adverse_event]-> Krukenberg carcinoma
Result 1
p_value: 2.7129466315054048e-18
Tamoxifen -[biolink:treats]-> adenoid cystic breast carcinoma
Docetaxel trihydrate -[biolink:treats]-> adenoid cystic breast carcinoma
Anastrozole -[biolink:treats]-> adenoid cystic breast carcinoma
Doxorubicin -[biolink:treats]-> adenoid cystic breast carcinoma
Paclitaxel -[biolink:treats]-> adenoid cystic breast carcinoma
Result 2
p_value: 1.3014678485860711e-17
Paclitaxel -[biolink:causes_adverse_event]-> radiation injury
Anastrozole -[biolink:causes_adverse_event]-> radiation injury
Tamoxifen -[biolink:causes_adverse_event]-> radiation injury
Docetaxel trihydrate -[b

In [44]:
p = filter_to_pc(answer,'n1')
for i,result in enumerate(p['message']['results']):
    print('Result',i)
    print_pc_result(p['message']['knowledge_graph'],result)

33
Result 0
p_value: [3.4740732760049123e-28, 3.0150907252934714e-22, 9.19237936641059e-21, 1.4453940155679862e-17, 5.531687174713188e-15]
properties: ['Cardiotoxic antineoplastic agents', 'Antineoplastic and Immunomodulating Agents', 'Antineoplastic Agents', 'Cytochrome P-450 Substrates', 'drugbank.approved']
  Paclitaxel
  5-Fluorouracil
  Anastrozole
  Cyclophosphamide
  Docetaxel
  Tamoxifen
  Doxorubicin
Result 1
p_value: [5.66296300312727e-20]
properties: ['therapeutic_flag']
  5-Fluorouracil
  Cyclophosphamide
  Docetaxel
  Docetaxel trihydrate
  Cyclophosphamide monohydrate
  Paclitaxel
  Anastrozole
  Tamoxifen
  Doxorubicin
Result 2
p_value: [7.281244810173953e-19, 1.6899535519547302e-14, 1.893942766744108e-14, 4.072011963732734e-13]
properties: ['antineoplastic_agent', 'drug', 'pharmaceutical', 'application']
  Cyclophosphamide monohydrate
  Paclitaxel
  5-Fluorouracil
  Anastrozole
  Cyclophosphamide
  Docetaxel
  Tamoxifen
  Docetaxel trihydrate
Result 3
p_value: [6.400974