## Recapture knowledge from the KG about NPDIs and compare with NaPDI repo data (in vitro experiments)

PheKnowLator and MR graphs are combined in the original Path Search notebook and loaded here as the combined graph.

In [None]:
# # uncomment and run to install any required modules from np-kg/requirements.txt if any of the packages below 
# give import errors
# import sys
# !{sys.executable} -m pip install -r requirements.txt

In [None]:
import os
import os.path
import networkx as nx
import json
import urllib
import traceback
from itertools import islice
from rdflib import Graph, URIRef, BNode, Namespace, Literal
from rdflib.namespace import RDF, OWL
from tqdm import tqdm
import json

In [None]:
import hashlib

In [None]:
import pickle
import pandas as pd
import numpy as np
import KG_path_searches

In [None]:
KG_PATH = '../resources/knowledge_graphs/'
NodeLabelsFile = KG_PATH + 'nodeLabels_v1.0.pickle'
KG_NAME_MERGED = 'PheKnowLator_machine_read_merged_instance_based_OWLNETS_v1.0.gpickle'

In [None]:
with open(NodeLabelsFile, 'rb') as filep:
    nodeLabels = pickle.load(filep)

In [None]:
def get_graph_stats(kg):
    nodes = nx.number_of_nodes(kg)
    edges = nx.number_of_edges(kg)
    self_loops = nx.number_of_selfloops(kg)

    print('There are {} nodes, {} edges, and {} self-loop(s)'.format(nodes, edges, self_loops))
    # get degree information
    avg_degree = float(edges) / nodes
    print('The Average Degree is {}'.format(avg_degree))
    
    print('Nodes with highest degree:')
    n_deg = sorted([(str(x[0]), x[1]) for x in  kg.degree], key=lambda x: x[1], reverse=1)[:6]

    for x in n_deg:
        print('Label: {}'.format(nodeLabels[x[0]]))
        print('{} (degree={})'.format(x[0], x[1]))
    # get network density
    density = nx.density(kg)

    print('The density of the graph is: {}'.format(density))

In [None]:
##READ MERGED GRAPH
nx_graph = nx.read_gpickle(KG_PATH+KG_NAME_MERGED)

In [None]:
get_graph_stats(nx_graph)

In [None]:
# output directory (created if does not exist)
DIR_OUT = '../output_files/'
if not os.path.exists(DIR_OUT): os.mkdir(DIR_OUT)

In [None]:
obo = Namespace('http://purl.obolibrary.org/obo/')
napdi = Namespace('http://napdi.org/napdi_srs_imports:')

In [None]:
##read in pickle dictionary with obo identifiers
with open(KG_PATH+'strToOBOdict.pickle','rb') as filep:
    obodict = pickle.load(filep)
len(obodict)

In [None]:
def get_neighbors(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        nodestr = nodeLabels[str(node)]
        neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [nodestr, edgestr, neighbor]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            neighs_list.append(result)
    return neighs_list

In [None]:
inhibit_preds = [obo.RO_0002449, obo.RO_0002212, obo.RO_0002599, obo.RO_0011010]
active_preds = [obo.RO_0002448, obo.RO_0002213, obo.RO_0011002, obo.RO_0002596, obo.RO_0002598, obo.RO_0011009, obo.RO_0002566]
interact_preds = [obo.DIDEO_00000041, obo.RO_0002434, obo.RO_0002436, obo.RO_0002020, obo.RO_0003000, obo.RO_0000085, 
                  URIRef('http://purl.obolibrary.org/obo/chebi#has_functional_parent')]

In [None]:
def get_neighbors_filtered(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    nodestr = ''
    neighbor = ''
    for item in neighs:
        if str(node) in nodeLabels:
            nodestr = nodeLabels[str(node)]
        if str(item) in nodeLabels:
            neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            if edge in inhibit_preds or edge in active_preds or edge in interact_preds:
                edgestr = nodeLabels[str(edge)]
                edgevalue = edge_result[edge]
                result = [nodestr, edgestr, neighbor]
                if 'source_graph' in edgevalue:
                    result.append(edgevalue['source_graph'])
                if 'timestamp' in edgevalue:
                    result.append(edgevalue['timestamp'])
                neighs_list.append(result)
    return neighs_list

In [None]:
list(nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['CYP3A4'][0]).keys())

## Kratom - Mechanistic Exploration Part 1

In [None]:
nx_graph.__getitem__(obodict['kratom'][0])

In [None]:
mit_neighbors = get_neighbors(nx_graph, obodict['mitragynine'][0])
len(mit_neighbors)

In [None]:
mit_neighbors_filt = get_neighbors_filtered(nx_graph, obodict['mitragynine'][0])
len(mit_neighbors_filt)

In [None]:
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['CYP3A4'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['CYP3A4'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['CYP2D6'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['CYP2D6'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['CYP2C19'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['CYP2C19'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['CYP2C9'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['CYP2C9'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['CYP1A2'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['CYP1A2'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['UGT'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['UGT'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['UGT'][2])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['UGT1A1'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['UGT1A1'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['UGT2B7'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['UGT2B7'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['PGP'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['PGP'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['PGP'][2])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['PGP'][3])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['glycoprotein'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['mitragynine'][0], obodict['glycoprotein'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
get_neighbors(nx_graph, obodict['hydroxy_mitragynine'][0])

In [None]:
get_neighbors(nx_graph, obodict['hydroxy_mitragynine'][1])

In [None]:
edges = nx_graph.get_edge_data(obodict['hydroxy_mitragynine'][0], obodict['CYP2D6'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['hydroxy_mitragynine'][0], obodict['CYP2D6'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['hydroxy_mitragynine'][1], obodict['CYP2D6'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['hydroxy_mitragynine'][1], obodict['CYP2D6'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

## Green Tea - Mechanistic Exploration Part 1

#### Green tea leaf

In [None]:
edges = nx_graph.get_edge_data(obodict['greentea'][0], obo.CHEBI_17089)
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['greentea'][0], obo.PR_000037069)
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['greentea'][0], obodict['UGT'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['greentea'][0], obodict['UGT'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['greentea'][0], obodict['UGT'][2])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['greentea'][0], obodict['PGP'][3])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
get_neighbors_filtered(nx_graph, obodict['greentea'][0])

#### EGCG

In [None]:
edges = nx_graph.get_edge_data(obodict['EGCG'][0], obodict['CYP3A4'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['CYP3A4'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][0], obodict['CYP3A4'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['CYP3A4'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['EGCG'][0], obodict['CYP1A1'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['CYP1A1'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][0], obodict['CYP1A1'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['CYP1A1'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['EGCG'][0], obodict['CYP2E1'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['CYP2E1'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][0], obodict['CYP2E1'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['CYP2E1'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['EGCG'][0], obodict['UGT'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['UGT'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])


In [None]:
edges = nx_graph.get_edge_data(obodict['EGCG'][0], obodict['UGT1A8'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['UGT1A8'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][0], obodict['UGT1A8'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['UGT1A8'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['EGCG'][0], obodict['OATP1B3'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['OATP1B3'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][0], obodict['OATP1B3'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['OATP1B3'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:

edges = nx_graph.get_edge_data(obodict['EGCG'][1], obo.CHEBI_17089)
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obo.PR_000037069)
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['UGT'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['UGT'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['UGT'][2])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['EGCG'][0], obodict['PGP'][3])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['EGCG'][1], obodict['PGP'][3])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

#### ECG

In [None]:
edges = nx_graph.get_edge_data(obodict['ECG'][0], obodict['CYP2E1'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['ECG'][0], obodict['CYP2E1'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['ECG'][0], obodict['UGT1A8'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['ECG'][0], obodict['UGT1A8'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['ECG'][0], obo.PR_P78381)
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['ECG'][0], obodict['PGP'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['ECG'][0], obodict['PGP'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['ECG'][0], obodict['PGP'][2])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['ECG'][0], obodict['PGP'][3])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
get_neighbors_filtered(nx_graph, obodict['ECG'][0])

#### Epicatechin

In [None]:
edges = nx_graph.get_edge_data(obodict['epicatechin'][0], obodict['CYP1A1'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['epicatechin'][0], obodict['CYP1A1'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
edges = nx_graph.get_edge_data(obodict['epicatechin'][0], obodict['UGT1A8'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['epicatechin'][0], obodict['UGT1A8'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

#### Epigallocatechin

In [None]:
edges = nx_graph.get_edge_data(obodict['epigallocatechin'][0], obodict['UGT'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['epigallocatechin'][0], obodict['UGT'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
get_neighbors_filtered(nx_graph, obodict['epigallocatechin'][0])

#### Catechin

In [None]:
edges = nx_graph.get_edge_data(obodict['catechin'][0], obodict['UGT1A8'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['catechin'][0], obodict['UGT1A8'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['catechin'][1], obodict['UGT1A8'][0])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['catechin'][1], obodict['UGT1A8'][1])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])

In [None]:
len(get_neighbors_filtered(nx_graph, obodict['catechin'][0]))

In [None]:
len(get_neighbors_filtered(nx_graph, obodict['catechin'][1]))

#### Gallocatechin

In [None]:
get_neighbors(nx_graph, obodict['gallocatechin'][1])

In [None]:
edges = nx_graph.get_edge_data(obodict['gallocatechin'][0], obodict['UGT'][2])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['gallocatechin'][0], obodict['UGT'][2])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['gallocatechin'][1], obodict['UGT'][2])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])
edges = nx_graph.get_edge_data(obodict['gallocatechin'][1], obodict['UGT'][2])
if edges:
    for item in edges:
        print(nodeLabels[str(item)])
        print(edges[item])