# Using the Prediction Model

## Environment

In [1]:
import getpass
import json
import os
import sys
import time

import pandas as pd

from se_kge.constants import (
    DEFAULT_EMBEDDINGS_PATH, DEFAULT_GRAPH_PATH,
    DEFAULT_MAPPING_PATH, DEFAULT_MODEL_PATH,
)

In [2]:
print(sys.version)

3.7.3 (default, Mar 27 2019, 09:23:15) 
[Clang 10.0.1 (clang-1001.0.46.3)]


In [3]:
print(time.asctime())

Fri Aug  9 13:50:05 2019


In [4]:
print(getpass.getuser())

cthoyt


# Loading the Data

In [5]:
from se_kge.default_predictor import predictor

In [6]:
print(f"""Loaded default predictor using paths:

embeddings: {DEFAULT_EMBEDDINGS_PATH}
graph:      {DEFAULT_GRAPH_PATH}
model:      {DEFAULT_MODEL_PATH}
mapping:    {DEFAULT_MAPPING_PATH}
""")

Loaded default predictor using paths:

embeddings: /Users/cthoyt/dev/SE_KGE/resources/predictive_model/070819_node2vec_embeddings_complete01.embeddings
graph:      /Users/cthoyt/dev/SE_KGE/resources/chemsim_50_graphs/fullgraph_with_chemsim_50.edgelist
model:      /Users/cthoyt/dev/SE_KGE/resources/predictive_model/070819_node2vec_model_complete01.pkl
mapping:    /Users/cthoyt/dev/SE_KGE/resources/mapping/fullgraph_nodes_mapping.tsv



# Examples of different kinds of predictions with literature evidence

## side effect - target association

In [7]:
r = predictor.find_new_relation(
    entity_name_1='EGFR',
    entity_name_2='Papulopustular rash',
)
print(json.dumps(r, indent=2))
#PMID: 18165622

{
  "entity_1": {
    "node_id": 8499,
    "namespace": "uniprot",
    "name": "EGFR",
    "identifier": "P00533"
  },
  "entity_2": {
    "node_id": 6791,
    "namespace": "umls",
    "name": "Papulopustular rash",
    "identifier": "C2609319"
  },
  "probability": 0.02084655412288938,
  "mlp": 1.680965722421321
}


In [8]:
r = predictor.find_new_relation(
    node_id_1=9438, # Histamine receptor H1
    node_id_2=331, # Drowsiness
)
print(json.dumps(r, indent=2))
#PMID: 26626077

{
  "entity_1": {
    "node_id": 9438,
    "namespace": "uniprot",
    "name": "HRH1",
    "identifier": "P35367"
  },
  "entity_2": {
    "node_id": 331,
    "namespace": "umls",
    "name": "Drowsiness",
    "identifier": "C0013144"
  },
  "probability": 0.022773732615890885,
  "mlp": 1.6425657826640947
}


## drug- side effect association

In [9]:
r = predictor.find_new_relation(
    node_id_1='3534',  # diazepam
    node_id_2='670',  # Libido decreased
)
print(json.dumps(r, indent=2))
#PMID: 29888057

{
  "entity_1": {
    "node_id": 3534,
    "namespace": "pubchem",
    "name": "7-chloro-1-methyl-5-phenyl-3H-1,4-benzodiazepin-2-one",
    "identifier": "3016"
  },
  "entity_2": {
    "node_id": 670,
    "namespace": "umls",
    "name": "Libido decreased",
    "identifier": "C0011124"
  },
  "probability": 0.9969995536047346,
  "mlp": 0.0013050361387393866
}


In [10]:
r = predictor.find_new_relation(
    node_id_1=10270,  # Cytarabine 
    node_id_2=1149,  # Anaemia megaloblastic
)
print(json.dumps(r, indent=2))
# PMID: 23157436

{
  "entity_1": {
    "node_id": 10270,
    "namespace": "pubchem",
    "name": "4-amino-1-[(2R,3S,4S,5R)-3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one",
    "identifier": "6253"
  },
  "entity_2": {
    "node_id": 1149,
    "namespace": "umls",
    "name": "Anaemia megaloblastic",
    "identifier": "C0002888"
  },
  "probability": 0.0007503847088747362,
  "mlp": 3.1247160244677263
}


## drug-target association

In [11]:
r = predictor.find_new_relation(
    node_id_1=12765,  # Sertindole 
    node_id_2=9372,   # CHRM1 receptor
)
print(json.dumps(r, indent=2))
# PMID: 29942259 

{
  "entity_1": {
    "node_id": 12765,
    "namespace": "pubchem",
    "name": "1-[2-[4-[5-chloro-1-(4-fluorophenyl)indol-3-yl]piperidin-1-yl]ethyl]imidazolidin-2-one",
    "identifier": "60149"
  },
  "entity_2": {
    "node_id": 9372,
    "namespace": "uniprot",
    "name": "CHRM1",
    "identifier": "P11229"
  },
  "probability": 0.014544440338331974,
  "mlp": 1.8373029854963625
}


# Example of predicting relations using node2vec model and embeddings

In [12]:
results = predictor.find_new_relations(
    entity_identifier='145742',
    entity_type='phenotype',
    k=10,
)
print(json.dumps(results['query'], indent=2))

results_df = pd.DataFrame(results['predictions'])
results_df

creating relations list: 100%|██████████| 13730/13730 [00:20<00:00, 657.58it/s] 


{
  "entity": {
    "node_id": 9310,
    "namespace": "pubchem",
    "identifier": "145742",
    "name": "(2S)-pyrrolidine-2-carboxylic acid"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,identifier,mlp,name,namespace,node_id,probability
0,C0302859,1.155488,Euthyroid goiter,umls,4479,0.069906
1,C0024473,1.194602,Magnesium deficiency,umls,7586,0.063885
2,C0040524,1.214516,Toxemia,umls,7587,0.061022
3,C0429098,1.230885,Electrocardiogram QRS complex,umls,606,0.058764
4,C0233718,1.23445,Pressure of speech,umls,6303,0.058284
5,C0149875,1.235635,Primary dysmenorrhea,umls,7614,0.058125
6,C0424332,1.241167,Breath holding attack,umls,4339,0.05739
7,C0220983,1.244772,Metabolic alkalosis,umls,1373,0.056915
8,C0235575,1.251142,Hemolytic reaction,umls,6040,0.056086
9,C0079584,1.251759,Ichthyosis vulgaris,umls,7562,0.056007
