In [1]:
import sklearn
from sklearn.externals import joblib
from bionev.utils import load_embedding
import numpy as np
from sklearn.linear_model import LogisticRegression
import pandas as pd
import networkx as nx
import os
from se_kge.find_relations import find_new_relations

# Predicting with model

In [2]:
embedding_filepath = os.path.join(os.pardir, "resources", "240719_node2vec_fullgraph.embeddings")
embeddings_node2vec = load_embedding(embedding_filepath)

In [3]:
node2vec_model_filepath = os.path.join(os.pardir, "resources", "prediction_model_node2vec_final.pkl")
node2vec_model = joblib.load(node2vec_model_filepath)

# Examples of different kinds of predictions with literature evidence

## side effect - target association

In [4]:
x= []
node1 = np.array(embeddings_node2vec['8499']) #EGFR
node2 = np.array(embeddings_node2vec['7702']) #Papulopustular rash
x1 = node1 * node2
x.append(x1.tolist())
node2vec_model.predict_proba(x)[:,1]
#PMID: 18165622

array([0.93904571])

In [5]:
x= []
#9713
node1 = np.array(embeddings_node2vec['9523']) #Histamine receptor H1
node2 = np.array(embeddings_node2vec['394']) #Drowsiness
x1 = node1 * node2
x.append(x1.tolist())
node2vec_model.predict_proba(x)[:,1]
#PMID: 26626077

array([0.96786309])

## drug- side effect association

In [6]:
x= []
node1 = np.array(embeddings_node2vec['4234']) #diazepam 
node2 = np.array(embeddings_node2vec['827']) #Libido decreased
x1 = node1 * node2
x.append(x1.tolist())
node2vec_model.predict_proba(x)[:,1] 
#PMID: 29888057

array([0.88557845])

In [7]:
x= []
node1 = np.array(embeddings_node2vec['10341']) #Cytarabine 
node2 = np.array(embeddings_node2vec['224']) #Anaemia megaloblastic
x1 = node1 * node2
x.append(x1.tolist())
node2vec_model.predict_proba(x)[:,1] 
# PMID: 23157436

array([0.87790016])

## drug-target association

In [8]:
x= []
node1 = np.array(embeddings_node2vec['12693']) #Sertindole 
node2 = np.array(embeddings_node2vec['9460']) #CHRM1 receptor
x1 = node1 * node2
x.append(x1.tolist())
node2vec_model.predict_proba(x)[:,1] 
# PMID: 29942259 

array([0.97239379])

# Example of predicting relations using node2vec model and embeddings

In [9]:
graph_filepath = os.path.join(os.pardir, "resources", "fullgraph.edgelist")
graph= nx.read_edgelist(graph_filepath)

In [10]:
node_mapping_filepath = os.path.join(os.pardir, "resources", "fullgraph_nodes_mapping.tsv")
node_mapping=pd.read_csv(node_mapping_filepath, sep=',')

In [11]:
find_new_relations(
    entity='85', 
    embeddings=embeddings_node2vec,
    node_mapping=node_mapping,
    saved_model=node2vec_model,
    graph=graph, 
    entity_type='phenotype',
    k=30)

creating relations list: 100%|██████████████████████████████████████████████████| 13730/13730 [00:45<00:00, 301.37it/s]


The 30 highest phenotype predictions for 85


{'umls:Myasthenia': 0.9994092570284426,
 'umls:Fluid overload': 0.999370077459267,
 'umls:Parathyroid disorder': 0.99912533339172,
 'umls:Carnitine deficiency': 0.9987993824223084,
 'umls:Hypotension': 0.9987104681086778,
 'umls:Bronchoconstriction': 0.9986888252702472,
 'umls:Abdominal cramps': 0.998663108300242,
 'umls:Tachycardia': 0.9985530065574599,
 'umls:Bloated feeling': 0.998499177051145,
 'umls:Tearing eyes': 0.9984385825411238,
 'umls:Urinary incontinence': 0.9981212925863255,
 'umls:Micturition urgency': 0.9979719359673718,
 'umls:Sputum increased': 0.9979274910169542,
 'umls:Feeling cold': 0.997687225989656,
 'umls:Salivation': 0.9972371019595818,
 'umls:Inborn error of metabolism': 0.9970704256052745,
 'umls:Gastrointestinal symptom NOS': 0.9963205354167338,
 'umls:Salivary hypersecretion': 0.9963189190875407,
 'umls:Cycloplegia': 0.9961189119005688,
 'umls:Encephalitis': 0.9960874825190096,
 'umls:Airway obstruction NOS': 0.9957899351091318,
 'umls:Hypotonia': 0.99578068