# Using the Prediction Model

## Environment

In [1]:
import getpass
import json
import os
import sys
import time

import pandas as pd

from se_kge.constants import (
    DEFAULT_EMBEDDINGS_PATH, DEFAULT_GRAPH_PATH,
    DEFAULT_MAPPING_PATH, DEFAULT_MODEL_PATH,
)

In [2]:
print(sys.version)

3.7.3 (default, Mar 27 2019, 09:23:15) 
[Clang 10.0.1 (clang-1001.0.46.3)]


In [3]:
print(time.asctime())

Fri Aug  9 16:36:15 2019


In [4]:
print(getpass.getuser())

cthoyt


# Loading the Data

In [5]:
from se_kge.default_predictor import predictor

In [6]:
print(f"""Loaded default predictor using paths:

embeddings: {DEFAULT_EMBEDDINGS_PATH}
graph:      {DEFAULT_GRAPH_PATH}
model:      {DEFAULT_MODEL_PATH}
mapping:    {DEFAULT_MAPPING_PATH}
""")

Loaded default predictor using paths:

embeddings: /Users/cthoyt/dev/SE_KGE/resources/predictive_model/070819_node2vec_embeddings_complete01.embeddings
graph:      /Users/cthoyt/dev/SE_KGE/resources/chemsim_50_graphs/fullgraph_with_chemsim_50.edgelist
model:      /Users/cthoyt/dev/SE_KGE/resources/predictive_model/070819_node2vec_model_complete01.pkl
mapping:    /Users/cthoyt/dev/SE_KGE/resources/mapping/fullgraph_nodes_mapping.tsv



# Examples of different kinds of predictions with literature evidence

## side effect - target association

In [7]:
r = predictor.find_new_relation(
    source_name='EGFR',
    target_name='Papulopustular rash',
)
print(json.dumps(r, indent=2))
#PMID: 18165622

{
  "source": {
    "node_id": "8499",
    "namespace": "uniprot",
    "identifier": "P00533",
    "name": "EGFR"
  },
  "target": {
    "node_id": "6791",
    "namespace": "umls",
    "identifier": "C2609319",
    "name": "Papulopustular rash"
  },
  "p": 0.979,
  "mlp": 0.009
}


In [8]:
r = predictor.find_new_relation(
    source_id='9438', # Histamine receptor H1
    target_id='331', # Drowsiness
)
print(json.dumps(r, indent=2))
#PMID: 26626077

{
  "source": {
    "node_id": "9438",
    "namespace": "uniprot",
    "identifier": "P35367",
    "name": "HRH1"
  },
  "target": {
    "node_id": "331",
    "namespace": "umls",
    "identifier": "C0013144",
    "name": "Drowsiness"
  },
  "p": 0.977,
  "mlp": 0.01
}


## drug- side effect association

In [9]:
r = predictor.find_new_relation(
    source_id='3534',  # diazepam
    target_id='670',  # Libido decreased
)
print(json.dumps(r, indent=2))
#PMID: 29888057

{
  "source": {
    "node_id": "3534",
    "namespace": "pubchem",
    "identifier": "3016",
    "name": "7-chloro-1-methyl-5-phenyl-3H-1,4-benzodiazepin-2-one"
  },
  "target": {
    "node_id": "670",
    "namespace": "umls",
    "identifier": "C0011124",
    "name": "Libido decreased"
  },
  "p": 0.003,
  "mlp": 2.523
}


In [10]:
r = predictor.find_new_relation(
    source_id='10270',  # Cytarabine 
    target_id='1149',  # Anaemia megaloblastic
)
print(json.dumps(r, indent=2))
# PMID: 23157436

{
  "source": {
    "node_id": "10270",
    "namespace": "pubchem",
    "identifier": "6253",
    "name": "4-amino-1-[(2R,3S,4S,5R)-3,4-dihydroxy-5-(hydroxymethyl)oxolan-2-yl]pyrimidin-2-one"
  },
  "target": {
    "node_id": "1149",
    "namespace": "umls",
    "identifier": "C0002888",
    "name": "Anaemia megaloblastic"
  },
  "p": 0.999,
  "mlp": 0.0
}


## drug-target association

In [11]:
r = predictor.find_new_relation(
    source_id='12765',  # Sertindole 
    target_id='9372',   # CHRM1 receptor
)
print(json.dumps(r, indent=2))
# PMID: 29942259 

{
  "source": {
    "node_id": "12765",
    "namespace": "pubchem",
    "identifier": "60149",
    "name": "1-[2-[4-[5-chloro-1-(4-fluorophenyl)indol-3-yl]piperidin-1-yl]ethyl]imidazolidin-2-one"
  },
  "target": {
    "node_id": "9372",
    "namespace": "uniprot",
    "identifier": "P11229",
    "name": "CHRM1"
  },
  "p": 0.985,
  "mlp": 0.006
}


# Example of predicting relations using node2vec model and embeddings

In [12]:
def get_phenotype_df(curie):
    results = predictor.find_new_relations(
        node_curie=curie,
        result_type='phenotype',
        k=10,
    )
    results_df = pd.DataFrame(results['predictions'])
    results_df = results_df[['node_id', 'namespace', 'identifier', 'name', 'p', 'mlp']]
    return results['query'], results_df

In [13]:
query, df = get_phenotype_df('pubchem:145742')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "9310",
    "namespace": "pubchem",
    "identifier": "145742",
    "name": "(2S)-pyrrolidine-2-carboxylic acid"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,4479,umls,C0302859,Euthyroid goiter,0.93,0.031
1,7586,umls,C0024473,Magnesium deficiency,0.936,0.029
2,7587,umls,C0040524,Toxemia,0.939,0.027
3,606,umls,C0429098,Electrocardiogram QRS complex,0.941,0.026
4,7614,umls,C0149875,Primary dysmenorrhea,0.942,0.026
5,6303,umls,C0233718,Pressure of speech,0.942,0.026
6,1373,umls,C0220983,Metabolic alkalosis,0.943,0.025
7,4339,umls,C0424332,Breath holding attack,0.943,0.026
8,7562,umls,C0079584,Ichthyosis vulgaris,0.944,0.025
9,6040,umls,C0235575,Hemolytic reaction,0.944,0.025


In [14]:
query, df = get_phenotype_df('pubchem:1983')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "1887",
    "namespace": "pubchem",
    "identifier": "1983",
    "name": "N-(4-hydroxyphenyl)acetamide"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,649,umls,C0004134,Ataxia,0.005,2.266
1,23,umls,C0013378,Dysgeusia,0.007,2.142
2,99,umls,C0020580,Hypoaesthesia,0.008,2.074
3,10,umls,C0003811,Arrhythmia,0.01,1.99
4,49,umls,C0030554,Paraesthesia,0.01,1.989
5,342,umls,C0040264,Tinnitus,0.011,1.96
6,180,umls,C0041657,Loss of consciousness,0.013,1.897
7,59,umls,C0042571,Vertigo,0.014,1.845
8,893,umls,C0542571,Face oedema,0.014,1.854
9,167,umls,C0038362,Stomatitis,0.018,1.745
