# Using the Prediction Model

## Environment

In [1]:
import getpass
import json
import os
import sys
import time

import pandas as pd

from se_kge.constants import (
    DEFAULT_EMBEDDINGS_PATH, DEFAULT_GRAPH_PATH,
    DEFAULT_MAPPING_PATH, DEFAULT_MODEL_PATH,
)

In [2]:
print(sys.version)

3.7.3 (default, Apr 24 2019, 15:29:51) [MSC v.1915 64 bit (AMD64)]


In [3]:
print(time.asctime())

Thu Aug 15 13:12:36 2019


In [4]:
print(getpass.getuser())

RanaAldisi


# Loading the Data

In [5]:
from se_kge.default_predictor import predictor

In [6]:
print(f"""Loaded default predictor using paths:

embeddings: {DEFAULT_EMBEDDINGS_PATH}
graph:      {DEFAULT_GRAPH_PATH}
model:      {DEFAULT_MODEL_PATH}
mapping:    {DEFAULT_MAPPING_PATH}
""")

Loaded default predictor using paths:

embeddings: c:\users\rana aldisi\documents\github\se_kge\resources\predictive_model\070819_node2vec_embeddings_complete01.embeddings
graph:      c:\users\rana aldisi\documents\github\se_kge\resources\chemsim_50_graphs\fullgraph_with_chemsim_50.edgelist
model:      c:\users\rana aldisi\documents\github\se_kge\resources\predictive_model\070819_node2vec_model_complete01.pkl
mapping:    c:\users\rana aldisi\documents\github\se_kge\resources\mapping\fullgraph_nodes_mapping.tsv



# Examples of different kinds of predictions with literature evidence

## side effect - target association

In [14]:
r = predictor.find_new_relation(
    source_name='EGFR_HUMAN',
    target_name='Papulopustular rash',
)
print(json.dumps(r, indent=2))
#PMID: 18165622

{
  "source": {
    "node_id": "9635",
    "namespace": "uniprot",
    "identifier": "P00533",
    "name": "EGFR_HUMAN"
  },
  "target": {
    "node_id": "6791",
    "namespace": "umls",
    "identifier": "C2609319",
    "name": "Papulopustular rash"
  },
  "p": 0.981,
  "mlp": 0.008
}


In [15]:
r = predictor.find_new_relation(
    source_id='9438', # Histamine receptor H1
    target_id='331', # Drowsiness
)
print(json.dumps(r, indent=2))
#PMID: 26626077

{
  "source": {
    "node_id": "9438",
    "namespace": "uniprot",
    "identifier": "P51160",
    "name": "PDE6C_HUMAN"
  },
  "target": {
    "node_id": "331",
    "namespace": "umls",
    "identifier": "C0013144",
    "name": "Drowsiness"
  },
  "p": 0.977,
  "mlp": 0.01
}


## drug- side effect association

In [16]:
r = predictor.find_new_relation(
    source_id='3534',  # diazepam
    target_id='670',  # Libido decreased
)
print(json.dumps(r, indent=2))
#PMID: 29888057

{
  "source": {
    "node_id": "3534",
    "namespace": "pubchem.compound",
    "identifier": "3016",
    "name": "Diazepam"
  },
  "target": {
    "node_id": "670",
    "namespace": "umls",
    "identifier": "C0011124",
    "name": "Libido decreased"
  },
  "p": 0.003,
  "mlp": 2.523
}


In [17]:
r = predictor.find_new_relation(
    source_id='10270',  # Cytarabine 
    target_id='1149',  # Anaemia megaloblastic
)
print(json.dumps(r, indent=2))
# PMID: 23157436

{
  "source": {
    "node_id": "10270",
    "namespace": "uniprot",
    "identifier": "Q9NY56",
    "name": "OBP2A_HUMAN"
  },
  "target": {
    "node_id": "1149",
    "namespace": "umls",
    "identifier": "C0002888",
    "name": "Anaemia megaloblastic"
  },
  "p": 0.999,
  "mlp": 0.0
}


## drug-target association

In [33]:
r = predictor.find_new_relation(
    source_id='12765',  # Sertindole 
    target_id='9372',   # CHRM1 receptor
)
print(json.dumps(r, indent=2))
# PMID: 29942259 

{
  "source": {
    "node_id": "12765",
    "namespace": "pubchem.compound",
    "identifier": "131704218",
    "name": "2-Amino-3-Oxo-4-Sulfo-Butyric Acid"
  },
  "target": {
    "node_id": "9372",
    "namespace": "uniprot",
    "identifier": "P48169",
    "name": "GBRA4_HUMAN"
  },
  "p": 0.985,
  "mlp": 0.006
}


# Example of predicting relations using node2vec model and embeddings

In [22]:
def get_predictions_df(curie, results_type=None):
    results = predictor.find_new_relations(
        node_curie=curie,
        results_type=results_type,
        k=10,
    )
    results_df = pd.DataFrame(results['predictions'])
    results_df = results_df[['node_id', 'namespace', 'identifier', 'name', 'p', 'mlp']]
    return results['query'], results_df

In [24]:
query, df = get_predictions_df('pubchem.compound:145742', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "9294",
    "namespace": "pubchem.compound",
    "identifier": "145742",
    "name": "Proline"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,2146,umls,C0234133,Extrapyramidal symptoms,0.791,0.102
1,908,umls,C0686347,Tardive dyskinesia,0.807,0.093
2,728,umls,C0026837,Muscle rigidity,0.821,0.086
3,2930,umls,C0241442,Protrusion tongue,0.823,0.084
4,862,umls,C0392156,Akathisia,0.832,0.08
5,777,umls,C0085623,Akinesia,0.842,0.075
6,687,umls,C0015371,Extrapyramidal disorder,0.843,0.074
7,1036,umls,C0235660,Galactorrhoea,0.844,0.074
8,2074,umls,C0085637,Oculogyric crisis,0.85,0.071
9,2371,umls,C0151818,Opisthotonus,0.856,0.068


In [25]:
query, df = get_predictions_df('pubchem.compound:1983', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "1887",
    "namespace": "pubchem.compound",
    "identifier": "1983",
    "name": "Acetaminophen"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,649,umls,C0004134,Ataxia,0.005,2.266
1,23,umls,C0013378,Dysgeusia,0.007,2.142
2,99,umls,C0020580,Hypoaesthesia,0.008,2.074
3,10,umls,C0003811,Arrhythmia,0.01,1.99
4,49,umls,C0030554,Paraesthesia,0.01,1.989
5,342,umls,C0040264,Tinnitus,0.011,1.96
6,180,umls,C0041657,Loss of consciousness,0.013,1.897
7,59,umls,C0042571,Vertigo,0.014,1.845
8,893,umls,C0542571,Face oedema,0.014,1.854
9,167,umls,C0038362,Stomatitis,0.018,1.745


In [26]:
query, df = get_predictions_df('uniprot:P05067', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "10264",
    "namespace": "uniprot",
    "identifier": "P05067",
    "name": "A4_HUMAN"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,2146,umls,C0234133,Extrapyramidal symptoms,0.082,1.086
1,818,umls,C0233477,Dysphoria,0.087,1.059
2,728,umls,C0026837,Muscle rigidity,0.098,1.008
3,908,umls,C0686347,Tardive dyskinesia,0.098,1.009
4,862,umls,C0392156,Akathisia,0.122,0.913
5,784,umls,C0152128,Drug withdrawal syndrome,0.13,0.888
6,1420,umls,C0038587,Withdrawal syndrome,0.141,0.851
7,687,umls,C0015371,Extrapyramidal disorder,0.151,0.821
8,2180,umls,C0152198,Accommodation disorder,0.159,0.798
9,2930,umls,C0241442,Protrusion tongue,0.16,0.795


In [27]:
query, df = get_predictions_df('uniprot:P10636', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "9892",
    "namespace": "uniprot",
    "identifier": "P10636",
    "name": "TAU_HUMAN"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,1373,umls,C0220983,Metabolic alkalosis,0.951,0.022
1,3586,umls,C0423271,Scleral hyperaemia,0.953,0.021
2,1367,umls,C0027709,Nephrocalcinosis,0.956,0.02
3,1945,umls,C0032914,Pre-eclampsia,0.957,0.019
4,1024,umls,C0162566,Porphyria cutanea tarda,0.957,0.019
5,1123,umls,C0853960,Bacterial disease carrier,0.957,0.019
6,7614,umls,C0149875,Primary dysmenorrhea,0.957,0.019
7,3991,umls,C0151595,Digitalis intoxication (NOS),0.958,0.019
8,2670,umls,C1561643,Chronic kidney disease,0.958,0.019
9,439,umls,C0036679,Separation,0.959,0.018


In [32]:
query, df = get_predictions_df('pubchem.compound:6234', 'phenotype')
print(json.dumps(query, indent=2))
df
#PMID: 29241812

{
  "entity": {
    "node_id": "9590",
    "namespace": "pubchem.compound",
    "identifier": "6234",
    "name": "Cycloserine"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,2214,umls,C1257843,Pseudomembranous colitis,0.968,0.014
1,7755,umls,C0043388,Yaws,0.97,0.013
2,8350,umls,C0004771,Bartonellosis,0.97,0.013
3,281,umls,C0036690,Septicemia,0.971,0.013
4,2761,umls,C0085437,Meningitis bacterial,0.971,0.013
5,7716,umls,C0035436,Rheumatic fever,0.971,0.013
6,7744,umls,C1096399,Necrotising ulcerative gingivostomatitis,0.971,0.013
7,7531,umls,C0029291,Psittacosis,0.972,0.012
8,4830,umls,C3495832,Rheumatic disorder,0.972,0.012
9,757,umls,C0036830,Serum sickness,0.972,0.012


In [35]:
r = predictor.find_new_relation(
    source_id='2071',
    target_id='2248',
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "2071",
    "namespace": "pubchem.compound",
    "identifier": "2130",
    "name": "Amantadine"
  },
  "target": {
    "node_id": "2248",
    "namespace": "umls",
    "identifier": "C0030567",
    "name": "Parkinson's disease"
  },
  "p": 0.216,
  "mlp": 0.666
}


In [36]:
r = predictor.find_new_relation(
    source_id='5346',
    target_id='1348',
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "5346",
    "namespace": "pubchem.compound",
    "identifier": "5095",
    "name": "Ropinirole"
  },
  "target": {
    "node_id": "1348",
    "namespace": "umls",
    "identifier": "C0035258",
    "name": "Restless legs syndrome"
  },
  "p": 0.006,
  "mlp": 2.199
}


In [43]:
r = predictor.find_new_relation(
    source_id='3627',
    target_id='2318',
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "3627",
    "namespace": "pubchem.compound",
    "identifier": "3117",
    "name": "Disulfiram"
  },
  "target": {
    "node_id": "2318",
    "namespace": "umls",
    "identifier": "C0025202",
    "name": "Malignant melanoma"
  },
  "p": 0.64,
  "mlp": 0.194
}
