## Notebook to search metapaths in the KG for clinical case studies.

### Case Studies (Natural Product-Drug Pairs)
1. Green tea-raloxifene
2. Green tea-nadolol
3. Kratom-midazolam
4. Kratom-quetiapene
5. Kratom-venlafaxine

## Meta Path Searches - with predicate restrictions

In [None]:
# # uncomment and run to install any required modules from np-kg/requirements.txt if any of the packages below 
# give import errors
# import sys
# !{sys.executable} -m pip install -r requirements.txt

In [None]:
import os
import os.path
import networkx as nx
import json
import urllib
import traceback
from itertools import islice
from rdflib import Graph, URIRef, BNode, Namespace, Literal
from rdflib.namespace import RDF, OWL
from tqdm import tqdm
import json

In [None]:
import hashlib

In [None]:
import pickle
import pandas as pd
import numpy as np
import KG_path_searches

In [None]:
KG_PATH = '../resources/knowledge_graphs/'
NodeLabelsFile = KG_PATH + 'nodeLabels_v1.0.pickle'
KG_NAME_MERGED = 'PheKnowLator_machine_read_merged_instance_based_OWLNETS_v1.0.gpickle'

In [None]:
with open(NodeLabelsFile, 'rb') as filep:
    nodeLabels = pickle.load(filep)

In [None]:
##READ MERGED GRAPH
nx_graph = nx.read_gpickle(KG_PATH+KG_NAME_MERGED)

In [None]:
# output directory (created if does not exist)
DIR_OUT = '../output_files/'
if not os.path.exists(DIR_OUT): os.mkdir(DIR_OUT)

In [None]:
obo = Namespace('http://purl.obolibrary.org/obo/')
napdi = Namespace('http://napdi.org/napdi_srs_imports:')

In [None]:
##read in pickle dictionary with OBO identifiers
with open(KG_PATH+'strToOBOdict.pickle','rb') as filep:
    obodict = pickle.load(filep)
len(obodict)

## Green Tea-Raloxifene

In [None]:
#list of green tea nodes
gtnodes = [
    obodict['greentea'][0],
    obodict['EGCG'][0],
    obodict['EGCG'][1],
    obodict['ECG'][0],
    obodict['epicatechin'][0],
    obodict['catechin'][0],
    obodict['catechin'][1],
    obodict['epigallocatechin'][0],
    obodict['gallocatechin'][0],
    obodict['gallocatechin'][1]
]
raloxifene = obo.CHEBI_8772

In [None]:
'''RELATIONS OF INTEREST
directly regulates activity of2448
positively regulates2213
capable of regulating2596
capable of positively regulating2598
interacts with2434
molecularly interacts with2436
correlated with2610
regulates activity of11002
is substrate of DIDEO_00000041
transports RO_0002020
'''
relations = [obo.RO_0002610,
             obo.RO_0002434,
             obo.RO_0002213,
             obo.RO_0002448,
             obo.RO_0002436,
             obo.RO_0011002,
             obo.RO_0002596,
             obo.RO_0002598,
            obo.RO_0002449,
            obo.DIDEO_00000041,
            obo.RO_0002020]

In [None]:
#get all neighbors of a node
def get_neighbors_meta(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        #nodestr = nodeLabels[str(node)]
        #neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [node, edge, item]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            neighs_list.append(result)
    return neighs_list

In [None]:
## for nodes of green tea and green tea constituents, get all neighbors with 
## edge in relations and object in PR or GO
gtneighs = {}
for node in gtnodes:
    if str(node) in nodeLabels:
        nodestr = nodeLabels[str(node)]
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in gtneighs:
        gtneighs[nodestr].extend(neighbors)
    else:
        gtneighs[nodestr] = neighbors
len(gtneighs)

In [None]:
for key in gtneighs:
    print(key, len(gtneighs[key]))

In [None]:
##checking if direct edge to nadolol
for node in gtnodes:
    edge_res = nx_graph.get_edge_data(node, raloxifene)
    if edge_res:
        print(nodeLabels[str(node)])
        print(node)
        print(edge_res)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
gtneighs_filt = {}
for key in gtneighs:
    gtneighs_filt[key] = []
    for item in gtneighs[key]:
        pred = item[1]
        obj_id = str(item[2]).split('/')[-1].split('_')[0]
        if pred in relations and obj_id == 'PR':
            gtneighs_filt[key].append(item)

In [None]:
for key in gtneighs_filt:
    print(key, len(gtneighs_filt[key]))

In [None]:
with open(DIR_OUT+'greentea_metapath2.pickle', 'wb') as fileo:
    pickle.dump(gtneighs_filt, fileo)

In [None]:
prlist = []
for key in gtneighs_filt:
    for item in gtneighs_filt[key]:
        prlist.append(item[2])
len(prlist)

In [None]:
prots = list(set(prlist))

In [None]:
len(prots)

In [None]:
protslabels = []
for item in prots:
    if str(item) in nodeLabels:
        if isinstance(nodeLabels[str(item)], str):
            protslabels.append(nodeLabels[str(item)])
        else:
            protslabels.append(nodeLabels[str(item)]['label'])
    else:
        print(item)
        protslabels.append(str(item))
len(protslabels)

In [None]:
for item in protslabels:
    if isinstance(item, dict):
        print(item)

In [None]:
with open(DIR_OUT+'greentea_protein_labels_with_inhibits.txt', 'w') as file2:
    for label in protslabels:
        file2.write(label)
        file2.write('\n')

In [None]:
## for nodes of green tea and green tea constituents, get all neighbors with 
## edge in relations and object in PR or GO
protneighs = {}
for node in prots:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in protneighs:
        protneighs[nodestr].extend(neighbors)
    else:
        protneighs[nodestr] = neighbors
len(protneighs)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
prots_filt = []
for key in protneighs:
    for item in protneighs[key]:
        pred = item[1]
        obj = item[2]
        if pred in relations and obj == raloxifene:
            prots_filt.append(key)
len(prots_filt)

In [None]:
prots_filt

In [None]:
def get_neighbors(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        nodestr = nodeLabels[str(node)]
        neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [nodestr, edgestr, neighbor]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            if 'pmid' in edgevalue:
                result.append(edgevalue['pmid'])
            neighs_list.append(result)
    return neighs_list

In [None]:
len(prots)

In [None]:
raloneighs = get_neighbors_meta(nx_graph, raloxifene)
raloprots1 = []
raloprots2 = []
for item in raloneighs:
    pred = item[1]
    obj = item[2]
    obj_id = str(obj).split('/')[-1].split('_')[0]
    if pred in relations and obj_id == 'PR':
        raloprots1.append(obj)
    if pred in relations and obj in prots:
        raloprots2.append(obj)
print(len(raloprots1), len(raloprots2))

In [None]:
###prots1 = raloxifene - predicate - PR
##prots2 = raloxifene - predicate - PR, gt - predicate - PR
for item in raloprots1:
    print(nodeLabels[str(item)])
    print(nx_graph.get_edge_data(raloxifene, item))

In [None]:
for item in raloprots2:
    print(nodeLabels[str(item)])
    print(nx_graph.get_edge_data(raloxifene, item))

In [None]:
for node in gtnodes:
    for item in raloprots1:
        edge_res = nx_graph.get_edge_data(node, item)
        if edge_res:
            print(nodeLabels[str(node)], nodeLabels[str(item)])
            print(nx_graph.get_edge_data(node, item))

In [None]:
for node in gtnodes:
    for item in raloprots2:
        edge_res = nx_graph.get_edge_data(node, item)
        if edge_res:
            print(nodeLabels[str(node)], nodeLabels[str(item)])
            print(nx_graph.get_edge_data(node, item))

## UGT searches

In [None]:
##get interacting drugs for UGT
obodict['UGT']

In [None]:
get_neighbors(nx_graph, obodict['UGT'][1])

In [None]:
len(get_neighbors(nx_graph, obodict['UGT'][2]))

In [None]:
raloneighs = get_neighbors_meta(nx_graph, obodict['UGT'][2])
raloprots1 = []
for item in raloneighs:
    pred = item[1]
    obj = item[2]
    if pred in relations:
        raloprots1.append(obj)
raloprots1

In [None]:
for item in raloprots1:
    print(nodeLabels[str(item)])

In [None]:
for item in raloneighs:
    pred = item[1]
    obj = item[2]
    obj_id = str(item[2]).split('/')[-1].split('_')[0]
    if obj_id == 'CHEBI':
        print(nodeLabels[str(pred)])
        print(nodeLabels[str(obj)])

## Green tea-nadolol

In [None]:
nadolol = obo.CHEBI_7444
nodeLabels[str(nadolol)]

In [None]:
##Metapath - 
#Green tea | Green tea constituent - 
#interacts with | associated with | molecularly interacts with - 
#Enzyme or Transporter - 
#interacts with | associated with | molecularly interacts with | - 
#Nadolol

In [None]:
#list of green tea nodes
gtnodes = [
    obodict['greentea'][0],
    obodict['EGCG'][0],
    obodict['EGCG'][1],
    obodict['ECG'][0],
    obodict['epicatechin'][0],
    obodict['catechin'][0],
    obodict['catechin'][1],
    obodict['epigallocatechin'][0],
    obodict['gallocatechin'][0],
    obodict['gallocatechin'][1]
]

In [None]:
'''RELATIONS OF INTEREST
directly regulates activity of2448
positively regulates2213
capable of regulating2596
capable of positively regulating2598
interacts with2434
molecularly interacts with2436
correlated with2610
regulates activity of11002
is substrate of DIDEO_00000041
transports RO_0002020
'''
relations = [obo.RO_0002610,
             obo.RO_0002434,
             obo.RO_0002213,
             obo.RO_0002448,
             obo.RO_0002436,
             obo.RO_0011002,
             obo.RO_0002596,
             obo.RO_0002598,
            obo.RO_0002449,
            obo.DIDEO_00000041,
            obo.RO_0002020]

In [None]:
#get all neighbors of a node
def get_neighbors_meta(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        #nodestr = nodeLabels[str(node)]
        #neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [node, edge, item]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            neighs_list.append(result)
    return neighs_list

In [None]:
## for nodes of green tea and green tea constituents, get all neighbors with 
## edge in relations and object in PR or GO
gtneighs = {}
for node in gtnodes:
    if str(node) in nodeLabels:
        nodestr = nodeLabels[str(node)]
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in gtneighs:
        gtneighs[nodestr].extend(neighbors)
    else:
        gtneighs[nodestr] = neighbors
len(gtneighs)

In [None]:
for key in gtneighs:
    print(key, len(gtneighs[key]))

In [None]:
##checking if direct edge to nadolol
for node in gtnodes:
    edge_res = nx_graph.get_edge_data(node, nadolol)
    if edge_res:
        print(node)
        print(edge_res)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
gtneighs_filt = {}
for key in gtneighs:
    gtneighs_filt[key] = []
    for item in gtneighs[key]:
        pred = item[1]
        obj_id = str(item[2]).split('/')[-1].split('_')[0]
        if pred in relations and obj_id == 'PR':
            gtneighs_filt[key].append(item)

In [None]:
for key in gtneighs_filt:
    print(key, len(gtneighs_filt[key]))

In [None]:
with open(DIR_OUT+'greentea_metapath2.pickle', 'wb') as fileo:
    pickle.dump(gtneighs_filt, fileo)

In [None]:
prlist = []
for key in gtneighs_filt:
    for item in gtneighs_filt[key]:
        prlist.append(item[2])
len(prlist)

In [None]:
prots = list(set(prlist))

In [None]:
len(prots)

In [None]:
protslabels = []
for item in prots:
    if str(item) in nodeLabels:
        if isinstance(nodeLabels[str(item)], str):
            protslabels.append(nodeLabels[str(item)])
        else:
            protslabels.append(nodeLabels[str(item)]['label'])
    else:
        print(item)
        protslabels.append(str(item))
len(protslabels)

In [None]:
for item in protslabels:
    if isinstance(item, dict):
        print(item)

In [None]:
with open(DIR_OUT+'greentea_protein_labels_with_inhibits.txt', 'w') as file2:
    for label in protslabels:
        file2.write(label)
        file2.write('\n')

In [None]:
## for nodes of green tea and green tea constituents, get all neighbors with 
## edge in relations and object in PR or GO
protneighs = {}
for node in prots:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in protneighs:
        protneighs[nodestr].extend(neighbors)
    else:
        protneighs[nodestr] = neighbors
len(protneighs)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
prots_filt = []
for key in protneighs:
    for item in protneighs[key]:
        pred = item[1]
        obj = item[2]
        if pred in relations and obj == nadolol:
            prots_filt.append(key)
len(prots_filt)

In [None]:
prots_filt

In [None]:
def get_neighbors(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        nodestr = nodeLabels[str(node)]
        neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [nodestr, edgestr, neighbor]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            if 'pmid' in edgevalue:
                result.append(edgevalue['pmid'])
            neighs_list.append(result)
    return neighs_list

In [None]:
len(prots)

In [None]:
nadoneighs = get_neighbors_meta(nx_graph, nadolol)
nadoprots1 = []
nadoprots2 = []
for item in nadoneighs:
    pred = item[1]
    obj = item[2]
    obj_id = str(obj).split('/')[-1].split('_')[0]
    if pred in relations and obj_id == 'PR':
        nadoprots1.append(obj)
    if pred in relations and obj in prots:
        nadoprots2.append(obj)
print(len(nadoprots1), len(nadoprots2))

In [None]:
with open(DIR_OUT+'gt-pred-pr-nadolol-pred-pr-protein_list.txt', 'w') as fileo:
    for item in nadoprots2:
        if str(item) in nodeLabels:
            if isinstance(nodeLabels[str(item)], str):
                nodestr = nodeLabels[str(item)]
            else:
                nodestr = nodeLabels[str(item)]['label']
        fileo.write(str(item)+'\t'+nodestr+'\n')

In [None]:
with open(DIR_OUT+'nadolol-pred-pr-protein_list.txt', 'w') as fileo2:
    for item in nadoprots1:
        if str(item) in nodeLabels:
            if isinstance(nodeLabels[str(item)], str):
                nodestr = nodeLabels[str(item)]
            else:
                nodestr = nodeLabels[str(item)]['label']
        fileo2.write(str(item)+'\t'+nodestr+'\n')

In [None]:
nadoneighs[0]

In [None]:
##Nadolol-green tea direct edges
nadoneighs = get_neighbors_meta(nx_graph, nadolol)
direct = []
for item in nadoneighs:
    pred = item[1]
    obj = item[2]
    if pred in relations and obj in gtnodes:
        direct.append(item)
len(direct)

In [None]:
direct

## Kratom-midazolam

In [None]:
midazolam = obo.CHEBI_6931
nodeLabels[str(midazolam)]

In [None]:
obodict.keys()

In [None]:
#list of green tea nodes
ktnodes = [
    obodict['kratom'][0],
    obodict['mitragynine'][0],
    obodict['hydroxy_mitragynine'][0],
    obodict['hydroxy_mitragynine'][1]
]

In [None]:
'''RELATIONS OF INTEREST
directly regulates activity of2448
positively regulates2213
capable of regulating2596
capable of positively regulating2598
interacts with2434
molecularly interacts with2436
correlated with2610
regulates activity of11002
'''
relations = [obo.RO_0002610,
             obo.RO_0002434,
             obo.RO_0002213,
             obo.RO_0002448,
             obo.RO_0002436,
             obo.RO_0011002,
             obo.RO_0002596,
             obo.RO_0002598,
            obo.RO_0002449,
            obo.DIDEO_00000041,
            obo.RO_0002020]

In [None]:
#get all neighbors of a node
def get_neighbors_meta(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        #nodestr = nodeLabels[str(node)]
        #neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [node, edge, item]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            neighs_list.append(result)
    return neighs_list

In [None]:
## for nodes of green tea and green tea constituents, get all neighbors with 
## edge in relations and object in PR or GO
ktneighs = {}
for node in ktnodes:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in ktneighs:
        ktneighs[nodestr].extend(neighbors)
    else:
        ktneighs[nodestr] = neighbors
len(ktneighs)

In [None]:
for key in ktneighs:
    print(key, len(ktneighs[key]))

In [None]:
##checking if direct edge to midazolam
for node in ktnodes:
    edge_res = nx_graph.get_edge_data(node, midazolam)
    if edge_res:
        print(node)
        print(edge_res)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
ktneighs_filt = {}
for key in ktneighs:
    ktneighs_filt[key] = []
    for item in ktneighs[key]:
        pred = item[1]
        obj_id = str(item[2]).split('/')[-1].split('_')[0]
        if pred in relations and obj_id == 'PR':
            ktneighs_filt[key].append(item)

In [None]:
for key in ktneighs_filt:
    print(key, len(ktneighs_filt[key]))

In [None]:
with open(DIR_OUT+'kratom_metapath1.pickle', 'wb') as fileo:
    pickle.dump(ktneighs_filt, fileo)

In [None]:
prlist = []
for key in ktneighs_filt:
    for item in ktneighs_filt[key]:
        prlist.append(item[2])
len(prlist)

In [None]:
prots = list(set(prlist))

In [None]:
len(prots)

In [None]:
protslabels = []
for item in prots:
    if str(item) in nodeLabels:
        if isinstance(nodeLabels[str(item)], str):
            protslabels.append(nodeLabels[str(item)])
        else:
            protslabels.append(nodeLabels[str(item)]['label'])
    else:
        print(item)
        protslabels.append(str(item))
len(protslabels)

In [None]:
for item in protslabels:
    if isinstance(item, dict):
        print(item)

In [None]:
with open(DIR_OUT+'kratom_protein_labels.txt', 'w') as file2:
    for label in protslabels:
        file2.write(label)
        file2.write('\n')

In [None]:
## for nodes of green tea and green tea constituents, get all neighbors with 
## edge in relations and object in PR or GO
protneighs = {}
for node in prots:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in protneighs:
        protneighs[nodestr].extend(neighbors)
    else:
        protneighs[nodestr] = neighbors
len(protneighs)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
prots_filt = []
for key in protneighs:
    for item in protneighs[key]:
        pred = item[1]
        obj = item[2]
        if pred in relations and obj == midazolam:
            prots_filt.append(key)
len(prots_filt)

In [None]:
prots_filt

In [None]:
def get_neighbors(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        nodestr = nodeLabels[str(node)]
        neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [nodestr, edgestr, neighbor]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            if 'pmid' in edgevalue:
                result.append(edgevalue['pmid'])
            neighs_list.append(result)
    return neighs_list

In [None]:
len(prots)

In [None]:
neighs = get_neighbors_meta(nx_graph, midazolam)
prots1 = []
prots2 = []
prots3 = []
for item in neighs:
    pred = item[1]
    obj = item[2]
    obj_id = str(obj).split('/')[-1].split('_')[0]
    
    if pred in relations and obj_id == 'PR':
        prots1.append(obj)
    if pred in relations and obj in prots:
        prots2.append(obj)
    if pred in relations:
        prots3.append(obj)
print(len(prots1), len(prots2), len(prots3))

In [None]:
##prots1 = midazolam - predicate - PR
##prots2 = midazolam - predicate - PR, kratom - predicate - PR
##prots3 = midazolam - predicate - object

In [None]:
for item in prots1:
    print(nodeLabels[str(item)])
    print(nx_graph.get_edge_data(midazolam, item))

In [None]:
for item in prots2:
    print(nodeLabels[str(item)])
    print(nx_graph.get_edge_data(midazolam, item))

In [None]:
for node in ktnodes:
    for item in prots1:
        edge_res = nx_graph.get_edge_data(node, item)
        if edge_res:
            print(nodeLabels[str(node)], nodeLabels[str(item)])
            print(edge_res)

In [None]:
for node in ktnodes:
    for item in prots2:
        edge_res = nx_graph.get_edge_data(node, item)
        if edge_res:
            print(nodeLabels[str(node)], nodeLabels[str(item)])
            print(edge_res)

In [None]:
##Nadolol-green tea direct edges
midazoneighs = get_neighbors_meta(nx_graph, midazolam)
direct = []
for item in midazoneighs:
    pred = item[1]
    obj = item[2]
    if pred in relations and obj in ktnodes:
        direct.append(item)
len(direct)

In [None]:
direct

In [None]:
bupr = obo.CHEBI_3216
nodeLabels[str(bupr)]

In [None]:
#list of green tea nodes
ktnodes = [
    obodict['kratom'][0],
    obodict['mitragynine'][0],
    obodict['hydroxy_mitragynine'][0],
    obodict['hydroxy_mitragynine'][1]
]

In [None]:
'''RELATIONS OF INTEREST
directly regulates activity of2448
positively regulates2213
capable of regulating2596
capable of positively regulating2598
interacts with2434
molecularly interacts with2436
correlated with2610
regulates activity of11002
'''
relations = [obo.RO_0002610,
             obo.RO_0002434,
             obo.RO_0002213,
             obo.RO_0002448,
             obo.RO_0002436,
             obo.RO_0011002,
             obo.RO_0002596,
             obo.RO_0002598,
            obo.RO_0002449]

In [None]:
#get all neighbors of a node
def get_neighbors_meta(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        #nodestr = nodeLabels[str(node)]
        #neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [node, edge, item]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            neighs_list.append(result)
    return neighs_list

In [None]:
ktneighs = {}
for node in ktnodes:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in ktneighs:
        ktneighs[nodestr].extend(neighbors)
    else:
        ktneighs[nodestr] = neighbors
len(ktneighs)

In [None]:
for key in ktneighs:
    print(key, len(ktneighs[key]))

In [None]:
##checking if direct edge to midazolam
for node in ktnodes:
    edge_res = nx_graph.get_edge_data(node, bupr)
    if edge_res:
        print(node)
        print(edge_res)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
ktneighs_filt = {}
for key in ktneighs:
    ktneighs_filt[key] = []
    for item in ktneighs[key]:
        pred = item[1]
        obj_id = str(item[2]).split('/')[-1].split('_')[0]
        if pred in relations and obj_id == 'PR':
            ktneighs_filt[key].append(item)

In [None]:
for key in ktneighs_filt:
    print(key, len(ktneighs_filt[key]))

In [None]:
with open(DIR_OUT+'kratom_metapath1.pickle', 'wb') as fileo:
    pickle.dump(ktneighs_filt, fileo)

In [None]:
prlist = []
for key in ktneighs_filt:
    for item in ktneighs_filt[key]:
        prlist.append(item[2])
len(prlist)

In [None]:
prots = list(set(prlist))

In [None]:
len(prots)

In [None]:
protslabels = []
for item in prots:
    if str(item) in nodeLabels:
        if isinstance(nodeLabels[str(item)], str):
            protslabels.append(nodeLabels[str(item)])
        else:
            protslabels.append(nodeLabels[str(item)]['label'])
    else:
        print(item)
        protslabels.append(str(item))
len(protslabels)

In [None]:
for item in protslabels:
    if isinstance(item, dict):
        print(item)

In [None]:
with open(DIR_OUT+'kratom_protein_labels.txt', 'w') as file2:
    for label in protslabels:
        file2.write(label)
        file2.write('\n')

In [None]:
## for nodes of green tea and green tea constituents, get all neighbors with 
## edge in relations and object in PR or GO
protneighs = {}
for node in prots:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in protneighs:
        protneighs[nodestr].extend(neighbors)
    else:
        protneighs[nodestr] = neighbors
len(protneighs)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
prots_filt = []
for key in protneighs:
    for item in protneighs[key]:
        pred = item[1]
        obj = item[2]
        if pred in relations and obj == bupr:
            prots_filt.append(key)
len(prots_filt)

In [None]:
prots_filt

In [None]:
def get_neighbors(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        nodestr = nodeLabels[str(node)]
        neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [nodestr, edgestr, neighbor]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            if 'pmid' in edgevalue:
                result.append(edgevalue['pmid'])
            neighs_list.append(result)
    return neighs_list

In [None]:
len(prots)

In [None]:
neighs = get_neighbors_meta(nx_graph, bupr)
prots1 = []
prots2 = []
prots3 = []
for item in neighs:
    pred = item[1]
    obj = item[2]
    obj_id = str(obj).split('/')[-1].split('_')[0]
    
    if pred in relations and obj_id == 'PR':
        prots1.append(obj)
    if pred in relations and obj in prots:
        prots2.append(obj)
    if pred in relations:
        prots3.append(obj)
print(len(prots1), len(prots2), len(prots3))

In [None]:
##prots1 = midazolam - predicate - PR
##prots2 = midazolam - predicate - PR, kratom - predicate - PR
##prots3 = midazolam - predicate - object

In [None]:
for item in prots1:
    print(nodeLabels[str(item)])

In [None]:
for item in prots2:
    print(nodeLabels[str(item)])

In [None]:
for item in prots3:
    print(nodeLabels[str(item)])

In [None]:
buprneighs = get_neighbors_meta(nx_graph, bupr)
direct = []
for item in buprneighs:
    pred = item[1]
    obj = item[2]
    if pred in relations and obj in ktnodes:
        direct.append(item)
len(direct)

In [None]:
direct

In [None]:
midazolam = obo.CHEBI_3216
nodeLabels[str(midazolam)]

In [None]:
#list of green tea nodes
ktnodes = [
    obodict['kratom'][0],
    obodict['mitragynine'][0],
    obodict['hydroxy_mitragynine'][0],
    obodict['hydroxy_mitragynine'][1]
]

In [None]:
'''RELATIONS OF INTEREST
directly regulates activity of2448
positively regulates2213
capable of regulating2596
capable of positively regulating2598
interacts with2434
molecularly interacts with2436
correlated with2610
regulates activity of11002
'''
relations = [obo.RO_0002610,
             obo.RO_0002434,
             obo.RO_0002213,
             obo.RO_0002448,
             obo.RO_0002436,
             obo.RO_0011002,
             obo.RO_0002596,
             obo.RO_0002598,
            obo.RO_0002449]

In [None]:
#get all neighbors of a node
def get_neighbors_meta(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        #nodestr = nodeLabels[str(node)]
        #neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [node, edge, item]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            neighs_list.append(result)
    return neighs_list

In [None]:
## for nodes of green tea and green tea constituents, get all neighbors with 
## edge in relations and object in PR or GO
ktneighs = {}
for node in ktnodes:
    if str(node) in nodeLabels:
        nodestr = nodeLabels[str(node)]
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in ktneighs:
        ktneighs[nodestr].extend(neighbors)
    else:
        ktneighs[nodestr] = neighbors
len(ktneighs)

In [None]:
for key in ktneighs:
    print(key, len(ktneighs[key]))

In [None]:
##checking if direct edge to midazolam
for node in ktnodes:
    edge_res = nx_graph.get_edge_data(node, midazolam)
    if edge_res:
        print(node)
        print(edge_res)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
ktneighs_filt = {}
for key in ktneighs:
    ktneighs_filt[key] = []
    for item in ktneighs[key]:
        pred = item[1]
        obj_id = str(item[2]).split('/')[-1].split('_')[0]
        if pred in relations and obj_id == 'PR':
            ktneighs_filt[key].append(item)

In [None]:
for key in ktneighs_filt:
    print(key, len(ktneighs_filt[key]))

In [None]:
with open(DIR_OUT+'kratom_metapath1.pickle', 'wb') as fileo:
    pickle.dump(ktneighs_filt, fileo)

In [None]:
prlist = []
for key in ktneighs_filt:
    for item in ktneighs_filt[key]:
        prlist.append(item[2])
len(prlist)

In [None]:
prots = list(set(prlist))

In [None]:
len(prots)

In [None]:
protslabels = []
for item in prots:
    if str(item) in nodeLabels:
        if isinstance(nodeLabels[str(item)], str):
            protslabels.append(nodeLabels[str(item)])
        else:
            protslabels.append(nodeLabels[str(item)]['label'])
    else:
        print(item)
        protslabels.append(str(item))
len(protslabels)

In [None]:
for item in protslabels:
    if isinstance(item, dict):
        print(item)

In [None]:
with open(DIR_OUT+'kratom_protein_labels.txt', 'w') as file2:
    for label in protslabels:
        file2.write(label)
        file2.write('\n')

In [None]:
## for nodes of green tea and green tea constituents, get all neighbors with 
## edge in relations and object in PR or GO
protneighs = {}
for node in prots:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in protneighs:
        protneighs[nodestr].extend(neighbors)
    else:
        protneighs[nodestr] = neighbors
len(protneighs)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
prots_filt = []
for key in protneighs:
    for item in protneighs[key]:
        pred = item[1]
        obj = item[2]
        if pred in relations and obj == midazolam:
            prots_filt.append(key)
len(prots_filt)

In [None]:
prots_filt

In [None]:
def get_neighbors(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        nodestr = nodeLabels[str(node)]
        neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [nodestr, edgestr, neighbor]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            if 'pmid' in edgevalue:
                result.append(edgevalue['pmid'])
            neighs_list.append(result)
    return neighs_list

In [None]:
len(prots)

In [None]:
neighs = get_neighbors_meta(nx_graph, midazolam)
prots1 = []
prots2 = []
prots3 = []
for item in neighs:
    pred = item[1]
    obj = item[2]
    obj_id = str(obj).split('/')[-1].split('_')[0]
    
    if pred in relations and obj_id == 'PR':
        prots1.append(obj)
    if pred in relations and obj in prots:
        prots2.append(obj)
    if pred in relations:
        prots3.append(obj)
print(len(prots1), len(prots2), len(prots3))

In [None]:
##prots1 = midazolam - predicate - PR
##prots2 = midazolam - predicate - PR, kratom - predicate - PR
##prots3 = midazolam - predicate - object

In [None]:
for item in prots1:
    print(nodeLabels[str(item)])

In [None]:
for item in prots2:
    print(nodeLabels[str(item)])

In [None]:
for item in prots3:
    print(nodeLabels[str(item)])

In [None]:
##Nadolol-green tea direct edges
midazoneighs = get_neighbors_meta(nx_graph, midazolam)
direct = []
for item in midazoneighs:
    pred = item[1]
    obj = item[2]
    if pred in relations and obj in ktnodes:
        direct.append(item)
len(direct)

In [None]:
direct

In [None]:
naloxone = obo.CHEBI_7459
nodeLabels[str(naloxone)]

In [None]:
#list of green tea nodes
ktnodes = [
    obodict['kratom'][0],
    obodict['mitragynine'][0],
    obodict['hydroxy_mitragynine'][0],
    obodict['hydroxy_mitragynine'][1]
]

In [None]:
'''RELATIONS OF INTEREST
directly regulates activity of2448
positively regulates2213
capable of regulating2596
capable of positively regulating2598
interacts with2434
molecularly interacts with2436
correlated with2610
regulates activity of11002
'''
relations = [obo.RO_0002610,
             obo.RO_0002434,
             obo.RO_0002213,
             obo.RO_0002448,
             obo.RO_0002436,
             obo.RO_0011002,
             obo.RO_0002596,
             obo.RO_0002598,
            obo.RO_0002449]

In [None]:
#get all neighbors of a node
def get_neighbors_meta(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        #nodestr = nodeLabels[str(node)]
        #neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [node, edge, item]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            neighs_list.append(result)
    return neighs_list

In [None]:
ktneighs = {}
for node in ktnodes:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in ktneighs:
        ktneighs[nodestr].extend(neighbors)
    else:
        ktneighs[nodestr] = neighbors
len(ktneighs)

In [None]:
for key in ktneighs:
    print(key, len(ktneighs[key]))

In [None]:
##checking if direct edge to midazolam
for node in ktnodes:
    edge_res = nx_graph.get_edge_data(node, naloxone)
    if edge_res:
        print(node)
        print(edge_res)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
ktneighs_filt = {}
for key in ktneighs:
    ktneighs_filt[key] = []
    for item in ktneighs[key]:
        pred = item[1]
        obj_id = str(item[2]).split('/')[-1].split('_')[0]
        if pred in relations and obj_id == 'PR':
            ktneighs_filt[key].append(item)

In [None]:
for key in ktneighs_filt:
    print(key, len(ktneighs_filt[key]))

In [None]:
with open(DIR_OUT+'kratom_metapath1.pickle', 'wb') as fileo:
    pickle.dump(ktneighs_filt, fileo)

In [None]:
prlist = []
for key in ktneighs_filt:
    for item in ktneighs_filt[key]:
        prlist.append(item[2])
len(prlist)

In [None]:
prots = list(set(prlist))

In [None]:
len(prots)

In [None]:
protslabels = []
for item in prots:
    if str(item) in nodeLabels:
        if isinstance(nodeLabels[str(item)], str):
            protslabels.append(nodeLabels[str(item)])
        else:
            protslabels.append(nodeLabels[str(item)]['label'])
    else:
        print(item)
        protslabels.append(str(item))
len(protslabels)

In [None]:
for item in protslabels:
    if isinstance(item, dict):
        print(item)

In [None]:
with open(DIR_OUT+'kratom_protein_labels.txt', 'w') as file2:
    for label in protslabels:
        file2.write(label)
        file2.write('\n')

In [None]:
## for nodes of green tea and green tea constituents, get all neighbors with 
## edge in relations and object in PR or GO
protneighs = {}
for node in prots:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in protneighs:
        protneighs[nodestr].extend(neighbors)
    else:
        protneighs[nodestr] = neighbors
len(protneighs)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
prots_filt = []
for key in protneighs:
    for item in protneighs[key]:
        pred = item[1]
        obj = item[2]
        if pred in relations and obj == naloxone:
            prots_filt.append(key)
len(prots_filt)

In [None]:
prots_filt

In [None]:
def get_neighbors(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        nodestr = nodeLabels[str(node)]
        neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [nodestr, edgestr, neighbor]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            if 'pmid' in edgevalue:
                result.append(edgevalue['pmid'])
            neighs_list.append(result)
    return neighs_list

In [None]:
len(prots)

In [None]:
neighs = get_neighbors_meta(nx_graph, naloxone)
prots1 = []
prots2 = []
prots3 = []
for item in neighs:
    pred = item[1]
    obj = item[2]
    obj_id = str(obj).split('/')[-1].split('_')[0]
    
    if pred in relations and obj_id == 'PR':
        prots1.append(obj)
    if pred in relations and obj in prots:
        prots2.append(obj)
    if pred in relations:
        prots3.append(obj)
print(len(prots1), len(prots2), len(prots3))

In [None]:
##prots1 = naloxone - predicate - PR
##prots2 = naloxone - predicate - PR, kratom - predicate - PR
##prots3 = naloxone - predicate - object

In [None]:
for item in prots1:
    print(nodeLabels[str(item)])

In [None]:
for item in prots2:
    print(nodeLabels[str(item)])

In [None]:
for item in prots3:
    print(nodeLabels[str(item)])

In [None]:
naloneighs = get_neighbors_meta(nx_graph, naloxone)
direct = []
for item in naloneighs:
    pred = item[1]
    obj = item[2]
    if pred in relations and obj in ktnodes:
        direct.append(item)
len(direct)

In [None]:
direct

## Kratom-quetiapine

In [None]:
quet = obo.CHEBI_8707
nodeLabels[str(quet)]

In [None]:
#list of green tea nodes
ktnodes = [
    obodict['kratom'][0],
    obodict['mitragynine'][0],
    obodict['hydroxy_mitragynine'][0],
    obodict['hydroxy_mitragynine'][1]
]

In [None]:
'''RELATIONS OF INTEREST
directly regulates activity of2448
positively regulates2213
capable of regulating2596
capable of positively regulating2598
interacts with2434
molecularly interacts with2436
correlated with2610
regulates activity of11002
'''
relations = [obo.RO_0002610,
             obo.RO_0002434,
             obo.RO_0002213,
             obo.RO_0002448,
             obo.RO_0002436,
             obo.RO_0011002,
             obo.RO_0002596,
             obo.RO_0002598,
            obo.RO_0002449,
            obo.DIDEO_00000041,
            obo.RO_0002020]

In [None]:
#get all neighbors of a node
def get_neighbors_meta(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        #nodestr = nodeLabels[str(node)]
        #neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [node, edge, item]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            neighs_list.append(result)
    return neighs_list

In [None]:
ktneighs = {}
for node in ktnodes:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in ktneighs:
        ktneighs[nodestr].extend(neighbors)
    else:
        ktneighs[nodestr] = neighbors
len(ktneighs)

In [None]:
for key in ktneighs:
    print(key, len(ktneighs[key]))

In [None]:
##checking if direct edge
for node in ktnodes:
    edge_res = nx_graph.get_edge_data(node, quet)
    if edge_res:
        print(node)
        print(edge_res)

In [None]:
for node in ktnodes:
    edge_res = nx_graph.get_edge_data(quet, node)
    if edge_res:
        print(node)
        print(edge_res)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
ktneighs_filt = {}
for key in ktneighs:
    ktneighs_filt[key] = []
    for item in ktneighs[key]:
        pred = item[1]
        obj_id = str(item[2]).split('/')[-1].split('_')[0]
        if pred in relations and obj_id == 'PR':
            ktneighs_filt[key].append(item)

In [None]:
for key in ktneighs_filt:
    print(key, len(ktneighs_filt[key]))

In [None]:
for item in ktneighs_filt['Mitragynine']:
    pred = item[1]
    obj = item[2]
    print(nodeLabels[str(pred)], nodeLabels[str(obj)])

In [None]:
with open(DIR_OUT+'kratom_metapath1.pickle', 'wb') as fileo:
    pickle.dump(ktneighs_filt, fileo)

In [None]:
prlist = []
for key in ktneighs_filt:
    for item in ktneighs_filt[key]:
        prlist.append(item[2])
len(prlist)

In [None]:
prots = list(set(prlist))

In [None]:
len(prots)

In [None]:
protslabels = []
for item in prots:
    if str(item) in nodeLabels:
        if isinstance(nodeLabels[str(item)], str):
            protslabels.append(nodeLabels[str(item)])
        else:
            protslabels.append(nodeLabels[str(item)]['label'])
    else:
        print(item)
        protslabels.append(str(item))
len(protslabels)

In [None]:
for item in protslabels:
    if isinstance(item, dict):
        print(item)

In [None]:
with open(DIR_OUT+'kratom_protein_labels.txt', 'w') as file2:
    for label in protslabels:
        file2.write(label)
        file2.write('\n')

In [None]:
## for nodes of green tea and green tea constituents, get all neighbors with 
## edge in relations and object in PR or GO
protneighs = {}
for node in prots:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in protneighs:
        protneighs[nodestr].extend(neighbors)
    else:
        protneighs[nodestr] = neighbors
len(protneighs)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
prots_filt = []
for key in protneighs:
    for item in protneighs[key]:
        pred = item[1]
        obj = item[2]
        if pred in relations and obj == quet:
            prots_filt.append(key)
len(prots_filt)

In [None]:
prots_filt

In [None]:
def get_neighbors(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        nodestr = nodeLabels[str(node)]
        neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [nodestr, edgestr, neighbor]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            if 'pmid' in edgevalue:
                result.append(edgevalue['pmid'])
            neighs_list.append(result)
    return neighs_list

In [None]:
len(prots)

In [None]:
neighs = get_neighbors_meta(nx_graph, quet)
prots1 = []
prots2 = []
prots3 = []
for item in neighs:
    pred = item[1]
    obj = item[2]
    obj_id = str(obj).split('/')[-1].split('_')[0]
    
    if pred in relations and obj_id == 'PR':
        prots1.append(obj)
    if pred in relations and obj in prots:
        prots2.append(obj)
    if pred in relations:
        prots3.append(obj)
print(len(prots1), len(prots2), len(prots3))

In [None]:
##prots1 = naloxone - predicate - PR
##prots2 = naloxone - predicate - PR, kratom - predicate - PR
##prots3 = naloxone - predicate - object

In [None]:
for item in prots1:
    print(nodeLabels[str(item)])
    print(nx_graph.get_edge_data(quet, item))

In [None]:
for item in prots2:
    print(nodeLabels[str(item)])
    print(nx_graph.get_edge_data(quet, item))

In [None]:
for item in prots3:
    print(nodeLabels[str(item)])
    print(nx_graph.get_edge_data(quet, item))

In [None]:
quetneigh = get_neighbors_meta(nx_graph, quet)
direct = []
for item in quetneigh:
    pred = item[1]
    obj = item[2]
    if pred in relations and obj in ktnodes:
        direct.append(item)
len(direct)

In [None]:
direct

## Kratom-venlafaxine

In [None]:
venla = obo.CHEBI_9943
nodeLabels[str(venla)]

In [None]:
#list of green tea nodes
ktnodes = [
    obodict['kratom'][0],
    obodict['mitragynine'][0],
    obodict['hydroxy_mitragynine'][0],
    obodict['hydroxy_mitragynine'][1]
]

In [None]:
'''RELATIONS OF INTEREST
directly regulates activity of2448
positively regulates2213
capable of regulating2596
capable of positively regulating2598
interacts with2434
molecularly interacts with2436
correlated with2610
regulates activity of11002
'''
relations = [obo.RO_0002610,
             obo.RO_0002434,
             obo.RO_0002213,
             obo.RO_0002448,
             obo.RO_0002436,
             obo.RO_0011002,
             obo.RO_0002596,
             obo.RO_0002598,
            obo.RO_0002449,
            obo.DIDEO_00000041,
            obo.RO_0002020]

In [None]:
#get all neighbors of a node
def get_neighbors_meta(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        #nodestr = nodeLabels[str(node)]
        #neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [node, edge, item]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            neighs_list.append(result)
    return neighs_list

In [None]:
ktneighs = {}
for node in ktnodes:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in ktneighs:
        ktneighs[nodestr].extend(neighbors)
    else:
        ktneighs[nodestr] = neighbors
len(ktneighs)

In [None]:
for key in ktneighs:
    print(key, len(ktneighs[key]))

In [None]:
##checking if direct edge
for node in ktnodes:
    edge_res = nx_graph.get_edge_data(node, venla)
    if edge_res:
        print(node)
        print(edge_res)

In [None]:
for node in ktnodes:
    edge_res = nx_graph.get_edge_data(quet, venla)
    if edge_res:
        print(node)
        print(edge_res)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
ktneighs_filt = {}
for key in ktneighs:
    ktneighs_filt[key] = []
    for item in ktneighs[key]:
        pred = item[1]
        obj_id = str(item[2]).split('/')[-1].split('_')[0]
        if pred in relations and obj_id == 'PR':
            ktneighs_filt[key].append(item)

In [None]:
for key in ktneighs_filt:
    print(key, len(ktneighs_filt[key]))

In [None]:
for item in ktneighs_filt['Mitragynine']:
    pred = item[1]
    obj = item[2]
    print(nodeLabels[str(pred)], nodeLabels[str(obj)])

In [None]:
with open(DIR_OUT+'kratom_metapath1.pickle', 'wb') as fileo:
    pickle.dump(ktneighs_filt, fileo)

In [None]:
prlist = []
for key in ktneighs_filt:
    for item in ktneighs_filt[key]:
        prlist.append(item[2])
len(prlist)

In [None]:
prots = list(set(prlist))

In [None]:
len(prots)

In [None]:
protslabels = []
for item in prots:
    if str(item) in nodeLabels:
        if isinstance(nodeLabels[str(item)], str):
            protslabels.append(nodeLabels[str(item)])
        else:
            protslabels.append(nodeLabels[str(item)]['label'])
    else:
        print(item)
        protslabels.append(str(item))
len(protslabels)

In [None]:
for item in protslabels:
    if isinstance(item, dict):
        print(item)

In [None]:
with open(DIR_OUT+'kratom_protein_labels.txt', 'w') as file2:
    for label in protslabels:
        file2.write(label)
        file2.write('\n')

In [None]:
## for nodes of green tea and green tea constituents, get all neighbors with 
## edge in relations and object in PR or GO
protneighs = {}
for node in prots:
    if str(node) in nodeLabels:
        if isinstance(nodeLabels[str(node)], str):
            nodestr = nodeLabels[str(node)]
        else:
            nodestr = nodeLabels[str(node)]['label']
    else:
        nodestr = str(node)
    neighbors = get_neighbors_meta(nx_graph, node)
    if nodestr in protneighs:
        protneighs[nodestr].extend(neighbors)
    else:
        protneighs[nodestr] = neighbors
len(protneighs)

In [None]:
#filter neighboring triples to only relations of interest and proteins as objects
prots_filt = []
for key in protneighs:
    for item in protneighs[key]:
        pred = item[1]
        obj = item[2]
        if pred in relations and obj == venla:
            prots_filt.append(key)
len(prots_filt)

In [None]:
prots_filt

In [None]:
def get_neighbors(nx_graph, node):
    neighs = nx.neighbors(nx_graph, node)
    neighs_list = []
    for item in neighs:
        nodestr = nodeLabels[str(node)]
        neighbor = nodeLabels[str(item)]
        edge_result = nx_graph.get_edge_data(node, item)
        edge_keys = list(edge_result.keys())
        for edge in edge_keys:
            edgestr = nodeLabels[str(edge)]
            edgevalue = edge_result[edge]
            result = [nodestr, edgestr, neighbor]
            if 'source_graph' in edgevalue:
                result.append(edgevalue['source_graph'])
            if 'timestamp' in edgevalue:
                result.append(edgevalue['timestamp'])
            if 'pmid' in edgevalue:
                result.append(edgevalue['pmid'])
            neighs_list.append(result)
    return neighs_list

In [None]:
len(prots)

In [None]:
neighs = get_neighbors_meta(nx_graph, venla)
prots1 = []
prots2 = []
prots3 = []
for item in neighs:
    pred = item[1]
    obj = item[2]
    obj_id = str(obj).split('/')[-1].split('_')[0]
    
    if pred in relations and obj_id == 'PR':
        prots1.append(obj)
    if pred in relations and obj in prots:
        prots2.append(obj)
    if pred in relations:
        prots3.append(obj)
print(len(prots1), len(prots2), len(prots3))

In [None]:
##prots1 = naloxone - predicate - PR
##prots2 = naloxone - predicate - PR, kratom - predicate - PR
##prots3 = naloxone - predicate - object

In [None]:
for item in prots1:
    print(nodeLabels[str(item)])
    print(nx_graph.get_edge_data(venla, item))

In [None]:
for item in prots2:
    print(nodeLabels[str(item)])
    print(nx_graph.get_edge_data(venla, item))

In [None]:
for item in prots3:
    print(nodeLabels[str(item)])
    print(nx_graph.get_edge_data(venla, item))

In [None]:
venlaneigh = get_neighbors_meta(nx_graph, venla)
direct = []
for item in venlaneigh:
    pred = item[1]
    obj = item[2]
    if pred in relations and obj in ktnodes:
        direct.append(item)
len(direct)

In [None]:
direct

In [None]:
prot