# Using the Prediction Model

## Environment

In [6]:
import getpass
import json
import os
import sys
import time

import pandas as pd

from se_kge.constants import (
    DEFAULT_EMBEDDINGS_PATH, DEFAULT_GRAPH_PATH,
    DEFAULT_MAPPING_PATH, DEFAULT_MODEL_PATH,
)

In [2]:
print(sys.version)

3.7.3 (default, Apr 24 2019, 15:29:51) [MSC v.1915 64 bit (AMD64)]


In [3]:
print(time.asctime())

Fri Aug 16 09:36:12 2019


In [4]:
print(getpass.getuser())

RanaAldisi


# Loading the Data

In [4]:
from se_kge.default_predictor import predictor

In [7]:
print(f"""Loaded default predictor using paths:

embeddings: {DEFAULT_EMBEDDINGS_PATH}
graph:      {DEFAULT_GRAPH_PATH}
model:      {DEFAULT_MODEL_PATH}
mapping:    {DEFAULT_MAPPING_PATH}
""")

Loaded default predictor using paths:

embeddings: c:\users\rana aldisi\documents\github\se_kge\resources\predictive_model\1508_node2vec_emb.txt
graph:      c:\users\rana aldisi\documents\github\se_kge\resources\chemsim_50_graphs\fullgraph_with_chemsim_50.edgelist
model:      c:\users\rana aldisi\documents\github\se_kge\resources\predictive_model\1508_node2vec.pkl
mapping:    c:\users\rana aldisi\documents\github\se_kge\resources\mapping\fullgraph_nodes_mapping.tsv



# Examples of different kinds of predictions with literature evidence

## side effect - target association

In [8]:
r = predictor.find_new_relation(
    source_name='EGFR_HUMAN',
    target_name='Papulopustular rash',
)
print(json.dumps(r, indent=2))
#PMID: 18165622

{
  "source": {
    "node_id": "9635",
    "namespace": "uniprot",
    "identifier": "P00533",
    "name": "EGFR_HUMAN"
  },
  "target": {
    "node_id": "6791",
    "namespace": "umls",
    "identifier": "C2609319",
    "name": "Papulopustular rash"
  },
  "p": 0.876,
  "mlp": 0.057
}


In [9]:
r = predictor.find_new_relation(
    source_id='9438', # Histamine receptor H1
    target_id='331', # Drowsiness
)
print(json.dumps(r, indent=2))
#PMID: 26626077

{
  "source": {
    "node_id": "9438",
    "namespace": "uniprot",
    "identifier": "P51160",
    "name": "PDE6C_HUMAN"
  },
  "target": {
    "node_id": "331",
    "namespace": "umls",
    "identifier": "C0013144",
    "name": "Drowsiness"
  },
  "p": 0.989,
  "mlp": 0.005
}


## drug- side effect association

In [9]:
r = predictor.find_new_relation(
    source_id='3534',  # diazepam
    target_id='670',  # Libido decreased
)
print(json.dumps(r, indent=2))
#PMID: 29888057

{
  "source": {
    "node_id": "3534",
    "namespace": "pubchem.compound",
    "identifier": "3016",
    "name": "Diazepam"
  },
  "target": {
    "node_id": "670",
    "namespace": "umls",
    "identifier": "C0011124",
    "name": "Libido decreased"
  },
  "p": 0.024,
  "mlp": 1.623
}


In [10]:
r = predictor.find_new_relation(
    source_id='10270',  # Cytarabine 
    target_id='1149',  # Anaemia megaloblastic
)
print(json.dumps(r, indent=2))
# PMID: 23157436

{
  "source": {
    "node_id": "10270",
    "namespace": "uniprot",
    "identifier": "Q9NY56",
    "name": "OBP2A_HUMAN"
  },
  "target": {
    "node_id": "1149",
    "namespace": "umls",
    "identifier": "C0002888",
    "name": "Anaemia megaloblastic"
  },
  "p": 0.999,
  "mlp": 0.0
}


## drug-target association

In [11]:
r = predictor.find_new_relation(
    source_id='12765',  # Sertindole 
    target_id='9372',   # CHRM1 receptor
)
print(json.dumps(r, indent=2))
# PMID: 29942259 

{
  "source": {
    "node_id": "12765",
    "namespace": "pubchem.compound",
    "identifier": "131704218",
    "name": "2-Amino-3-Oxo-4-Sulfo-Butyric Acid"
  },
  "target": {
    "node_id": "9372",
    "namespace": "uniprot",
    "identifier": "P48169",
    "name": "GBRA4_HUMAN"
  },
  "p": 1.0,
  "mlp": 0.0
}


# Example of predicting relations using node2vec model and embeddings

In [10]:
def get_predictions_df(curie, results_type=None):
    results = predictor.find_new_relations(
        node_curie=curie,
        results_type=results_type,
        k=10,
    )
    results_df = pd.DataFrame(results['predictions'])
    results_df = results_df[['node_id', 'namespace', 'identifier', 'name', 'p', 'mlp']]
    return results['query'], results_df

In [11]:
query, df = get_predictions_df('pubchem.compound:145742', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "9294",
    "namespace": "pubchem.compound",
    "identifier": "145742",
    "name": "Proline"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,7199,umls,C1735914,Recurrent pulmonary embolism,0.388,0.411
1,8453,umls,C1400513,Transmural myocardial infarction,0.433,0.364
2,8452,umls,C0340293,Anterior myocardial infarction,0.459,0.338
3,8451,umls,C0032807,Postphlebitic syndrome,0.547,0.262
4,7495,umls,C0035920,Rubella,0.561,0.251
5,7493,umls,C0001768,Agammaglobulinemia,0.654,0.184
6,8450,umls,C0026269,Mitral valve stenosis,0.691,0.161
7,7497,umls,C0221026,Bruton's agammaglobulinaemia,0.694,0.158
8,7965,umls,C0152545,Primary tuberculous infection,0.752,0.124
9,26,umls,C0015230,Rash,0.77,0.114


In [14]:
query, df = get_predictions_df('pubchem.compound:1983', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "1887",
    "namespace": "pubchem.compound",
    "identifier": "1983",
    "name": "Acetaminophen"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,1102,umls,C0002962,Angina pectoris,0.004,2.426
1,12,umls,C0004238,Atrial fibrillation,0.007,2.186
2,49,umls,C0030554,Paraesthesia,0.009,2.057
3,330,umls,C0004245,Atrioventricular block,0.01,1.995
4,342,umls,C0040264,Tinnitus,0.01,1.99
5,678,umls,C0012569,Diplopia,0.01,1.99
6,23,umls,C0013378,Dysgeusia,0.011,1.971
7,180,umls,C0041657,Loss of consciousness,0.011,1.978
8,59,umls,C0042571,Vertigo,0.012,1.906
9,1366,umls,C0025637,Methaemoglobinaemia,0.012,1.926


In [13]:
query, df = get_predictions_df('uniprot:P05067', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "10264",
    "namespace": "uniprot",
    "identifier": "P05067",
    "name": "A4_HUMAN"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,8148,umls,C0019202,Hepato-lenticular degeneration,0.775,0.111
1,3425,umls,C2747813,Bacterial colonisation,0.835,0.078
2,8062,umls,C0433445,Burns third degree,0.862,0.065
3,1123,umls,C0853960,Bacterial disease carrier,0.882,0.055
4,5975,umls,C0000817,Abortion infected,0.904,0.044
5,8345,umls,C0030421,Paraganglion neoplasm,0.928,0.032
6,8204,umls,C1141927,Wound sepsis,0.932,0.03
7,8260,umls,C0277730,Sexual assault victim,0.938,0.028
8,7943,umls,C0241597,Uterine relaxation,0.943,0.025
9,2037,umls,C0863090,Allergic skin reaction,0.948,0.023


In [27]:
query, df = get_predictions_df('uniprot:P10636', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "9892",
    "namespace": "uniprot",
    "identifier": "P10636",
    "name": "TAU_HUMAN"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,1373,umls,C0220983,Metabolic alkalosis,0.951,0.022
1,3586,umls,C0423271,Scleral hyperaemia,0.953,0.021
2,1367,umls,C0027709,Nephrocalcinosis,0.956,0.02
3,1945,umls,C0032914,Pre-eclampsia,0.957,0.019
4,1024,umls,C0162566,Porphyria cutanea tarda,0.957,0.019
5,1123,umls,C0853960,Bacterial disease carrier,0.957,0.019
6,7614,umls,C0149875,Primary dysmenorrhea,0.957,0.019
7,3991,umls,C0151595,Digitalis intoxication (NOS),0.958,0.019
8,2670,umls,C1561643,Chronic kidney disease,0.958,0.019
9,439,umls,C0036679,Separation,0.959,0.018


In [40]:
query, df = get_predictions_df('pubchem.compound:6234', 'phenotype')
print(json.dumps(query, indent=2))
df
#PMID: 29241812

{
  "entity": {
    "node_id": "10219",
    "namespace": "pubchem.compound",
    "identifier": "5360515",
    "name": "Naltrexone"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,6121,umls,C0853150,Nerve conduction studies abnormal,0.189,0.724
1,3395,umls,C0542044,Incoherent,0.191,0.719
2,4145,umls,C0858950,Mental aberration,0.196,0.708
3,6119,umls,C0235698,Abdominal distension gaseous,0.197,0.707
4,8320,umls,C0577866,Poor venous access,0.214,0.669
5,7231,umls,C1697973,Infusion site paraesthesia,0.214,0.669
6,8364,umls,C0341736,Detrusor hyperreflexia,0.218,0.661
7,8382,umls,C0038868,Progressive supranuclear palsy,0.23,0.639
8,7230,umls,C1695896,Infusion site hypersensitivity,0.234,0.63
9,6048,umls,C0520757,Delayed recovery from anaesthesia,0.236,0.626


In [14]:
r = predictor.find_new_relation(
    source_id='2071',
    target_id='2248',
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "2071",
    "namespace": "pubchem.compound",
    "identifier": "2130",
    "name": "Amantadine"
  },
  "target": {
    "node_id": "2248",
    "namespace": "umls",
    "identifier": "C0030567",
    "name": "Parkinson's disease"
  },
  "p": 0.2,
  "mlp": 0.698
}


In [15]:
r = predictor.find_new_relation(
    source_id='5346',
    target_id='1348',
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "5346",
    "namespace": "pubchem.compound",
    "identifier": "5095",
    "name": "Ropinirole"
  },
  "target": {
    "node_id": "1348",
    "namespace": "umls",
    "identifier": "C0035258",
    "name": "Restless legs syndrome"
  },
  "p": 0.013,
  "mlp": 1.878
}


In [16]:
r = predictor.find_new_relation(
    source_id='3627',
    target_id='2318',
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "3627",
    "namespace": "pubchem.compound",
    "identifier": "3117",
    "name": "Disulfiram"
  },
  "target": {
    "node_id": "2318",
    "namespace": "umls",
    "identifier": "C0025202",
    "name": "Malignant melanoma"
  },
  "p": 0.354,
  "mlp": 0.451
}
