# Using the Prediction Model

## Environment

In [1]:
import getpass
import json
import os
import sys
import time

import pandas as pd

from se_kge.constants import (
    DEFAULT_EMBEDDINGS_PATH, DEFAULT_GRAPH_PATH,
    DEFAULT_MAPPING_PATH, DEFAULT_MODEL_PATH,
)

In [2]:
print(sys.version)

3.7.2 (tags/v3.7.2:9a3ffc0492, Dec 23 2018, 23:09:28) [MSC v.1916 64 bit (AMD64)]


In [3]:
print(time.asctime())

Mon Aug 26 09:42:16 2019


In [4]:
print(getpass.getuser())

RanaAldisi


# Loading the Data

In [5]:
from se_kge.default_predictor import predictor

In [6]:
print(f"""Loaded default predictor using paths:

embeddings: {DEFAULT_EMBEDDINGS_PATH}
graph:      {DEFAULT_GRAPH_PATH}
model:      {DEFAULT_MODEL_PATH}
mapping:    {DEFAULT_MAPPING_PATH}
""")

Loaded default predictor using paths:

embeddings: c:\users\rana aldisi\documents\github\se_kge\resources\predictive_model\2208_node2vec_emb.embeddings
graph:      c:\users\rana aldisi\documents\github\se_kge\resources\chemsim_50_graphs\fullgraph_with_chemsim_50.edgelist
model:      c:\users\rana aldisi\documents\github\se_kge\resources\predictive_model\2208_node2vec_model.pkl
mapping:    c:\users\rana aldisi\documents\github\se_kge\resources\mapping\fullgraph_nodes_mapping.tsv



# Examples of different kinds of predictions with literature evidence

## side effect - target association

In [7]:
r = predictor.find_new_relation(
    source_name='EGFR_HUMAN',
    target_name='Papulopustular rash',
)
print(json.dumps(r, indent=2))
#PMID: 18165622

{
  "source": {
    "node_id": "9587",
    "namespace": "uniprot",
    "identifier": "P00533",
    "name": "EGFR_HUMAN"
  },
  "target": {
    "node_id": "6791",
    "namespace": "umls",
    "identifier": "C2609319",
    "name": "Papulopustular rash"
  },
  "p": 0.878,
  "mlp": 0.056
}


In [8]:
r = predictor.find_new_relation(
    source_id='9451', # Histamine receptor H1
    target_id='331', # Drowsiness
)
print(json.dumps(r, indent=2))
#PMID: 26626077

{
  "source": {
    "node_id": "9451",
    "namespace": "uniprot",
    "identifier": "P35367",
    "name": "HRH1_HUMAN"
  },
  "target": {
    "node_id": "331",
    "namespace": "umls",
    "identifier": "C0013144",
    "name": "Drowsiness"
  },
  "p": 0.061,
  "mlp": 1.213
}


In [48]:
r = predictor.find_new_relation(
    source_id='9325', # SC6A2
    target_id='56', # Tachycardia
)
print(json.dumps(r, indent=2))
#PMID: 30952858

{
  "source": {
    "node_id": "9325",
    "namespace": "uniprot",
    "identifier": "P23975",
    "name": "SC6A2_HUMAN"
  },
  "target": {
    "node_id": "56",
    "namespace": "umls",
    "identifier": "C0039231",
    "name": "Tachycardia"
  },
  "p": 0.449,
  "mlp": 0.348
}


In [49]:
r = predictor.find_new_relation(
    source_id='8670', # ACES_HUMAN
    target_id='309', # Bradycardia
)
print(json.dumps(r, indent=2))
#PMID: 30952858

{
  "source": {
    "node_id": "8670",
    "namespace": "uniprot",
    "identifier": "P22303",
    "name": "ACES_HUMAN"
  },
  "target": {
    "node_id": "309",
    "namespace": "umls",
    "identifier": "C0428977",
    "name": "Bradycardia"
  },
  "p": 0.998,
  "mlp": 0.001
}


## drug- side effect association

In [9]:
r = predictor.find_new_relation(
    source_id='3534',  # diazepam
    target_id='670',  # Libido decreased
)
print(json.dumps(r, indent=2))
#PMID: 29888057

{
  "source": {
    "node_id": "3534",
    "namespace": "pubchem.compound",
    "identifier": "3016",
    "name": "Diazepam"
  },
  "target": {
    "node_id": "670",
    "namespace": "umls",
    "identifier": "C0011124",
    "name": "Libido decreased"
  },
  "p": 0.005,
  "mlp": 2.312
}


In [10]:
r = predictor.find_new_relation(
    source_id='1148',  # Cytarabine 
    target_id='1149',  # Anaemia megaloblastic
)
print(json.dumps(r, indent=2))
# PMID: 23157436

{
  "source": {
    "node_id": "1148",
    "namespace": "pubchem.compound",
    "identifier": "6175",
    "name": "Cytidine"
  },
  "target": {
    "node_id": "1149",
    "namespace": "umls",
    "identifier": "C0002888",
    "name": "Anaemia megaloblastic"
  },
  "p": 0.043,
  "mlp": 1.364
}


## drug-target association

In [11]:
r = predictor.find_new_relation(
    source_id='14672',  # Sertindole 
    target_id='9350',   # CHRM1 receptor
)
print(json.dumps(r, indent=2))
# PMID: 29942259 

{
  "source": {
    "node_id": "14672",
    "namespace": "pubchem.compound",
    "identifier": "60149",
    "name": "Sertindole"
  },
  "target": {
    "node_id": "9350",
    "namespace": "uniprot",
    "identifier": "P11229",
    "name": "ACM1_HUMAN"
  },
  "p": 0.705,
  "mlp": 0.152
}


# Example of predicting relations using node2vec model and embeddings

In [12]:
def get_predictions_df(curie, results_type=None):
    results = predictor.find_new_relations(
        node_curie=curie,
        results_type=results_type,
        k=10,
    )
    results_df = pd.DataFrame(results['predictions'])
    results_df = results_df[['node_id', 'namespace', 'identifier', 'name', 'p', 'mlp']]
    return results['query'], results_df

In [37]:
query, df = get_predictions_df('pubchem.compound:2159', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "2173",
    "namespace": "pubchem.compound",
    "identifier": "2159",
    "name": "Amisulpride"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,6,umls,C0002871,Anaemia,0.0,3.618
1,90,umls,C0013604,Oedema,0.0,4.644
2,764,umls,C0038661,Suicide,0.0,3.477
3,1912,umls,C0085602,Polydipsia,0.0,3.438
4,26,umls,C0015230,Rash,0.001,3.194
5,293,umls,C0086525,Lassitude,0.001,2.869
6,504,umls,C3665347,Visual impairment,0.001,3.098
7,568,umls,C0018939,Blood disorder,0.001,3.073
8,682,umls,C0013428,Dysuria,0.001,2.953
9,777,umls,C0085623,Akinesia,0.001,2.985


In [14]:
query, df = get_predictions_df('pubchem.compound:1983', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "1887",
    "namespace": "pubchem.compound",
    "identifier": "1983",
    "name": "Acetaminophen"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,1297,umls,C0018794,Block heart,0.082,1.085
1,1952,umls,C0232197,Cardiac fibrillation,0.082,1.086
2,444,umls,C0039240,Supraventricular tachycardia,0.09,1.046
3,3664,umls,C0684249,Carcinoma of lung,0.09,1.045
4,1318,umls,C0034642,Rales,0.091,1.039
5,925,umls,C0917799,Hypersomnia,0.097,1.014
6,2748,umls,C1760428,Suicidal behaviour,0.098,1.008
7,1366,umls,C0025637,Methaemoglobinaemia,0.102,0.992
8,361,umls,C0340464,Extrasystoles,0.105,0.98
9,362,umls,C0233485,Apprehension,0.106,0.974


In [39]:
query, df = get_predictions_df('uniprot:P08588', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "8733",
    "namespace": "uniprot",
    "identifier": "P08588",
    "name": "ADRB1_HUMAN"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,330,umls,C0004245,Atrioventricular block,0.005,2.283
1,858,umls,C0277925,Peripheral coldness,0.005,2.323
2,1297,umls,C0018794,Block heart,0.007,2.147
3,361,umls,C0340464,Extrasystoles,0.008,2.125
4,1879,umls,C0007398,Catatonia,0.008,2.105
5,341,umls,C0039239,Sinus tachycardia,0.011,1.947
6,7667,umls,C0008677,Bronchitis chronic,0.011,1.969
7,1894,umls,C0152032,Urinary hesitation,0.012,1.908
8,336,umls,C0033036,Supraventricular extrasystoles,0.014,1.865
9,3919,umls,C0034067,Emphysema,0.014,1.847


In [16]:
query, df = get_predictions_df('uniprot:P22303', 'chemical')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "8670",
    "namespace": "uniprot",
    "identifier": "P22303",
    "name": "ACES_HUMAN"
  },
  "k": 10,
  "type": "chemical"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,13877,pubchem.compound,15433,Lauryl Dimethylamine-N-Oxide,0.001,2.908
1,10279,pubchem.compound,3601,Hexafluronium,0.002,2.652
2,10938,pubchem.compound,2681,Cetrimonium,0.002,2.607
3,12448,pubchem.compound,8153,Dodecane-Trimethylamine,0.002,2.768
4,17053,pubchem.compound,3604,Hexamethonium,0.003,2.501
5,14193,pubchem.compound,249,"[Formylmethyl]Trimethyl-Ammonium, N,N,N-Trimet...",0.004,2.348
6,11134,pubchem.compound,16028,Tetrabutylammonium Ion,0.006,2.218
7,13991,pubchem.compound,16958,Didecyldimethylammonium,0.006,2.252
8,15676,pubchem.compound,4112111,"UNDECYLAMINE-N,N-DIMETHYL-N-OXIDE",0.006,2.213
9,17062,pubchem.compound,10240,Captodiame,0.006,2.251


In [51]:
query, df = get_predictions_df("umls:C0030567", 'chemical')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "2248",
    "namespace": "umls",
    "identifier": "C0030567",
    "name": "Parkinson's disease"
  },
  "k": 10,
  "type": "chemical"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,5232,pubchem.compound,5002,Quetiapine,0.004,2.406
1,15935,pubchem.compound,3000493,Talviraline,0.007,2.139
2,2223,pubchem.compound,2182,Anagrelide,0.01,2.004
3,6150,pubchem.compound,54840,Atomoxetine HCL,0.01,1.998
4,3534,pubchem.compound,3016,Diazepam,0.011,1.974
5,4499,pubchem.compound,3964,Loxapine,0.011,1.956
6,4451,pubchem.compound,72287,Methotrimeprazine,0.017,1.766
7,4712,pubchem.compound,3085218,Esmirtazapine,0.02,1.695
8,3988,pubchem.compound,3083544,Arformoterol,0.021,1.669
9,2439,pubchem.compound,60657,Levobetaxolol,0.021,1.669


In [18]:
query, df = get_predictions_df('pubchem.compound:6234', 'phenotype')
print(json.dumps(query, indent=2))
df
#PMID: 29241812

{
  "entity": {
    "node_id": "635",
    "namespace": "pubchem.compound",
    "identifier": "6234",
    "name": "Cycloserine"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,1310,umls,C0599918,Nephrotoxicity,0.012,1.904
1,175,umls,C0746883,Febrile neutropenia,0.013,1.875
2,1309,umls,C0595916,Nephropathy toxic,0.013,1.889
3,1115,umls,C0239134,Productive cough,0.027,1.572
4,662,umls,C0476280,Musculoskeletal chest pain,0.03,1.526
5,1604,umls,C0231856,Breath sounds abnormal,0.034,1.473
6,1244,umls,C0004610,Bacteraemia,0.036,1.44
7,1629,umls,C0023467,Acute myeloid leukaemia,0.037,1.438
8,274,umls,C0030232,Pallor,0.049,1.313
9,1795,umls,C0948976,Leukaemia cutis,0.05,1.302


In [19]:
r = predictor.find_new_relation(
    source_id='2071', #Amantadine
    target_id='2248', #Parkinson's disease
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "2071",
    "namespace": "pubchem.compound",
    "identifier": "2130",
    "name": "Amantadine"
  },
  "target": {
    "node_id": "2248",
    "namespace": "umls",
    "identifier": "C0030567",
    "name": "Parkinson's disease"
  },
  "p": 0.179,
  "mlp": 0.747
}


In [20]:
r = predictor.find_new_relation(
    source_id='5346', #Ropinirole
    target_id='1348', #Restless legs syndrome
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "5346",
    "namespace": "pubchem.compound",
    "identifier": "5095",
    "name": "Ropinirole"
  },
  "target": {
    "node_id": "1348",
    "namespace": "umls",
    "identifier": "C0035258",
    "name": "Restless legs syndrome"
  },
  "p": 0.005,
  "mlp": 2.343
}


In [21]:
r = predictor.find_new_relation(
    source_id='3627', #Disulfiram
    target_id='2318', #Malignant melanoma
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "3627",
    "namespace": "pubchem.compound",
    "identifier": "3117",
    "name": "Disulfiram"
  },
  "target": {
    "node_id": "2318",
    "namespace": "umls",
    "identifier": "C0025202",
    "name": "Malignant melanoma"
  },
  "p": 0.717,
  "mlp": 0.144
}


In [22]:
r = predictor.find_new_relation(
    source_id='17528', #Brigatinib
    target_id='5148', #Colorectal cancer
)
print(json.dumps(r, indent=2))
#PMID: 31410188

{
  "source": {
    "node_id": "17528",
    "namespace": "uniprot",
    "identifier": "Q99640",
    "name": "PMYT1_HUMAN"
  },
  "target": {
    "node_id": "5148",
    "namespace": "umls",
    "identifier": "C1527249",
    "name": "Colorectal cancer"
  },
  "p": 0.991,
  "mlp": 0.004
}


In [23]:
r = predictor.find_new_relation(
    source_id='6995', #dasatinib
    target_id='1179', #Diffuse large B-cell lymphoma
)
print(json.dumps(r, indent=2))
#PMID: 31383760

{
  "source": {
    "node_id": "6995",
    "namespace": "pubchem.compound",
    "identifier": "3062316",
    "name": "Dasatinib"
  },
  "target": {
    "node_id": "1179",
    "namespace": "umls",
    "identifier": "C0079744",
    "name": "Diffuse large B-cell lymphoma"
  },
  "p": 0.796,
  "mlp": 0.099
}


In [24]:
r = predictor.find_new_relation(
    source_id='5265', #ribavirin
    target_id='947', #Candida infection
)
print(json.dumps(r, indent=2))
#PMID: 31307986

{
  "source": {
    "node_id": "5265",
    "namespace": "pubchem.compound",
    "identifier": "37542",
    "name": "Ribavirin"
  },
  "target": {
    "node_id": "947",
    "namespace": "umls",
    "identifier": "C0006840",
    "name": "Candida infection"
  },
  "p": 0.0,
  "mlp": 3.392
}
