In [1]:
import sklearn
from sklearn.externals import joblib
from bionev.utils import load_embedding
import numpy as np
from sklearn.linear_model import LogisticRegression
import pandas as pd
import networkx as nx
import os
from se_kge.find_relations import find_new_relations

# Predicting with model

In [2]:
embedding_filepath = os.path.join(os.pardir, "resources", "240719_node2vec_fullgraph.embeddings")
embeddings_node2vec = load_embedding(embedding_filepath)

In [3]:
node2vec_model_filepath = os.path.join(os.pardir, "resources", "prediction_model_node2vec_final.pkl")
node2vec_model = joblib.load(node2vec_model_filepath)

# Examples of different kinds of predictions with literature evidence

## side effect - target association

In [4]:
x= []
node1 = np.array(embeddings_node2vec['8499']) #EGFR
node2 = np.array(embeddings_node2vec['7702']) #Papulopustular rash
x1 = node1 * node2
x.append(x1.tolist())
node2vec_model.predict_proba(x)[:,1]
#PMID: 18165622

array([0.93904571])

In [5]:
x= []
#9713
node1 = np.array(embeddings_node2vec['9523']) #Histamine receptor H1
node2 = np.array(embeddings_node2vec['394']) #Drowsiness
x1 = node1 * node2
x.append(x1.tolist())
node2vec_model.predict_proba(x)[:,1]
#PMID: 26626077

array([0.96786309])

## drug- side effect association

In [6]:
x= []
node1 = np.array(embeddings_node2vec['4234']) #diazepam 
node2 = np.array(embeddings_node2vec['827']) #Libido decreased
x1 = node1 * node2
x.append(x1.tolist())
node2vec_model.predict_proba(x)[:,1] 
#PMID: 29888057

array([0.88557845])

In [7]:
x= []
node1 = np.array(embeddings_node2vec['10341']) #Cytarabine 
node2 = np.array(embeddings_node2vec['224']) #Anaemia megaloblastic
x1 = node1 * node2
x.append(x1.tolist())
node2vec_model.predict_proba(x)[:,1] 
# PMID: 23157436

array([0.87790016])

## drug-target association

In [8]:
x= []
node1 = np.array(embeddings_node2vec['12693']) #Sertindole 
node2 = np.array(embeddings_node2vec['9460']) #CHRM1 receptor
x1 = node1 * node2
x.append(x1.tolist())
node2vec_model.predict_proba(x)[:,1] 
# PMID: 29942259 

array([0.97239379])

# Example of predicting relations using node2vec model and embeddings

In [9]:
graph_filepath = os.path.join(os.pardir, "resources", "fullgraph.edgelist")
graph= nx.read_edgelist(graph_filepath)

In [10]:
node_mapping_filepath = os.path.join(os.pardir, "resources", "fullgraph_nodes_mapping.tsv")
node_mapping=pd.read_csv(node_mapping_filepath, sep=',')

In [11]:
find_new_relations(
    entity_identifier='85',
    embeddings=embeddings_node2vec,
    node_mapping=node_mapping,
    saved_model=node2vec_model,
    graph=graph, 
    entity_type='phenotype',
    k=10)

creating relations list: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13730/13730 [01:07<00:00, 203.51it/s]


The 10 highest phenotype predictions for {'node_id': 1, 'namespace': 'pubchem', 'identifier': '85', 'name': '(3-carboxy-2-hydroxypropyl)-trimethylazanium'}


{1: {'node_id': 732,
  'namespace': 'umls',
  'identifier': 'C0027796',
  'name': 'Neuralgia',
  'probability': 0.9999851353377665},
 2: {'node_id': 7689,
  'namespace': 'umls',
  'identifier': 'C0007020',
  'name': 'Carbon monoxide poisoning',
  'probability': 0.9999745568983085},
 3: {'node_id': 2333,
  'namespace': 'umls',
  'identifier': 'C0003873',
  'name': 'Rheumatoid arthritis',
  'probability': 0.9999717702747399},
 4: {'node_id': 8196,
  'namespace': 'umls',
  'identifier': 'C0524851',
  'name': 'Neurodegenerative disorder',
  'probability': 0.99995738772176},
 5: {'node_id': 7695,
  'namespace': 'umls',
  'identifier': 'C0025149',
  'name': 'Medulloblastoma',
  'probability': 0.9999252830537907},
 6: {'node_id': 642,
  'namespace': 'umls',
  'identifier': 'C0233407',
  'name': 'Disorientation',
  'probability': 0.9999161758764343},
 7: {'node_id': 3026,
  'namespace': 'umls',
  'identifier': 'C1619733',
  'name': 'Gout flare',
  'probability': 0.9999048399686196},
 8: {'node