# Using the Prediction Model

## Environment

In [1]:
import getpass
import json
import os
import sys
import time

import pandas as pd

from se_kge.constants import (
    DEFAULT_EMBEDDINGS_PATH, DEFAULT_GRAPH_PATH,
    DEFAULT_MAPPING_PATH, DEFAULT_MODEL_PATH,
)

In [2]:
print(sys.version)

3.7.2 (tags/v3.7.2:9a3ffc0492, Dec 23 2018, 23:09:28) [MSC v.1916 64 bit (AMD64)]


In [3]:
print(time.asctime())

Mon Sep  9 11:55:23 2019


In [4]:
print(getpass.getuser())

RanaAldisi


# Loading the Data

In [5]:
from se_kge.default_predictor import predictor

In [6]:
print(f"""Loaded default predictor using paths:

embeddings: {DEFAULT_EMBEDDINGS_PATH}
graph:      {DEFAULT_GRAPH_PATH}
model:      {DEFAULT_MODEL_PATH}
mapping:    {DEFAULT_MAPPING_PATH}
""")

Loaded default predictor using paths:

embeddings: c:\users\rana aldisi\documents\github\se_kge\resources\predictive_model\0809_line_emb.embeddings
graph:      c:\users\rana aldisi\documents\github\se_kge\resources\basic_graphs\fullgraph_with_chemsim.edgelist
model:      c:\users\rana aldisi\documents\github\se_kge\resources\predictive_model\0809_line_model.pkl
mapping:    c:\users\rana aldisi\documents\github\se_kge\resources\mapping\fullgraph_nodes_mapping.tsv



# Examples of different kinds of predictions with literature evidence

## side effect - target association

In [7]:
r = predictor.find_new_relation(
    source_name='EGFR_HUMAN',
    target_name='Papulopustular rash',
)
print(json.dumps(r, indent=2))
#PMID: 18165622

{
  "source": {
    "node_id": "9587",
    "namespace": "uniprot",
    "identifier": "P00533",
    "name": "EGFR_HUMAN"
  },
  "target": {
    "node_id": "6791",
    "namespace": "umls",
    "identifier": "C2609319",
    "name": "Papulopustular rash"
  },
  "p": 0.996,
  "mlp": 0.002
}


In [8]:
r = predictor.find_new_relation(
    source_id='9451', # Histamine receptor H1
    target_id='331', # Drowsiness
)
print(json.dumps(r, indent=2))
#PMID: 26626077

{
  "source": {
    "node_id": "9451",
    "namespace": "uniprot",
    "identifier": "P35367",
    "name": "HRH1_HUMAN"
  },
  "target": {
    "node_id": "331",
    "namespace": "umls",
    "identifier": "C0013144",
    "name": "Drowsiness"
  },
  "p": 0.934,
  "mlp": 0.029
}


In [9]:
r = predictor.find_new_relation(
    source_id='9325', # SC6A2
    target_id='56', # Tachycardia
)
print(json.dumps(r, indent=2))
#PMID: 30952858

{
  "source": {
    "node_id": "9325",
    "namespace": "uniprot",
    "identifier": "P23975",
    "name": "SC6A2_HUMAN"
  },
  "target": {
    "node_id": "56",
    "namespace": "umls",
    "identifier": "C0039231",
    "name": "Tachycardia"
  },
  "p": 0.999,
  "mlp": 0.0
}


In [10]:
r = predictor.find_new_relation(
    source_id='8670', # ACES_HUMAN
    target_id='309', # Bradycardia
)
print(json.dumps(r, indent=2))
#PMID: 30952858

{
  "source": {
    "node_id": "8670",
    "namespace": "uniprot",
    "identifier": "P22303",
    "name": "ACES_HUMAN"
  },
  "target": {
    "node_id": "309",
    "namespace": "umls",
    "identifier": "C0428977",
    "name": "Bradycardia"
  },
  "p": 0.999,
  "mlp": 0.0
}


## drug- side effect association

In [11]:
r = predictor.find_new_relation(
    source_id='3534',  # diazepam
    target_id='670',  # Libido decreased
)
print(json.dumps(r, indent=2))
#PMID: 29888057

{
  "source": {
    "node_id": "3534",
    "namespace": "pubchem.compound",
    "identifier": "3016",
    "name": "Diazepam"
  },
  "target": {
    "node_id": "670",
    "namespace": "umls",
    "identifier": "C0011124",
    "name": "Libido decreased"
  },
  "p": 0.003,
  "mlp": 2.499
}


In [12]:
r = predictor.find_new_relation(
    source_id='1148',  # Cytarabine 
    target_id='1149',  # Anaemia megaloblastic
)
print(json.dumps(r, indent=2))
# PMID: 23157436

{
  "source": {
    "node_id": "1148",
    "namespace": "pubchem.compound",
    "identifier": "6175",
    "name": "Cytidine"
  },
  "target": {
    "node_id": "1149",
    "namespace": "umls",
    "identifier": "C0002888",
    "name": "Anaemia megaloblastic"
  },
  "p": 0.085,
  "mlp": 1.068
}


## drug-target association

In [13]:
r = predictor.find_new_relation(
    source_id='14672',  # Sertindole 
    target_id='9350',   # CHRM1 receptor
)
print(json.dumps(r, indent=2))
# PMID: 29942259 

{
  "source": {
    "node_id": "14672",
    "namespace": "pubchem.compound",
    "identifier": "60149",
    "name": "Sertindole"
  },
  "target": {
    "node_id": "9350",
    "namespace": "uniprot",
    "identifier": "P11229",
    "name": "ACM1_HUMAN"
  },
  "p": 0.506,
  "mlp": 0.296
}


# Example of predicting relations using node2vec model and embeddings

In [14]:
def get_predictions_df(curie, results_type=None):
    results = predictor.find_new_relations(
        node_curie=curie,
        results_type=results_type,
        k=30,
    )
    results_df = pd.DataFrame(results['predictions'])
    results_df = results_df[['node_id', 'namespace', 'identifier', 'name', 'p', 'mlp', 'novel']]
    return results['query'], results_df

In [15]:
query, df = get_predictions_df('pubchem.compound:2159', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "2173",
    "namespace": "pubchem.compound",
    "identifier": "2159",
    "name": "Amisulpride"
  },
  "k": 30,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp,novel
0,88,umls,C0004936,Mental disorder,0.0,3.432,False
1,103,umls,C0027769,Nervousness,0.0,3.349,False
2,209,umls,C0021053,Immune system disorder,0.0,3.445,False
3,216,umls,C0032961,Pregnancy,0.0,3.324,False
4,224,umls,C0080274,Urinary retention,0.0,3.675,True
5,271,umls,C0020651,Orthostatic hypotension,0.0,3.302,False
6,331,umls,C0013144,Drowsiness,0.0,4.151,True
7,343,umls,C0040479,Torsade de pointes,0.0,4.628,False
8,348,umls,C0085612,Ventricular arrhythmia,0.0,4.597,False
9,366,umls,C0236071,Throat tightness,0.0,3.814,True


In [16]:
query, df = get_predictions_df('uniprot:P11229', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "9350",
    "namespace": "uniprot",
    "identifier": "P11229",
    "name": "ACM1_HUMAN"
  },
  "k": 30,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp,novel
0,2181,umls,C0235293,Taste peculiar,0.086,1.065,True
1,848,umls,C0241934,Hypomania,0.151,0.82,True
2,935,umls,C2584688,Testicular swelling,0.152,0.817,True
3,2185,umls,C0424823,Parotid swelling,0.155,0.81,True
4,2200,umls,C0001416,Adenitis,0.258,0.588,True
5,2183,umls,C0264886,Conduction disorder,0.329,0.483,True
6,2186,umls,C0341047,Parotid gland enlargement,0.365,0.438,True
7,2180,umls,C0152198,Accommodation disorder,0.444,0.353,True
8,1535,umls,C0338614,Psychotic episode,0.447,0.35,True
9,3568,umls,C0235238,Cycloplegia,0.455,0.342,True


In [17]:
query, df = get_predictions_df('uniprot:P08588', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "8733",
    "namespace": "uniprot",
    "identifier": "P08588",
    "name": "ADRB1_HUMAN"
  },
  "k": 30,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp,novel
0,2471,umls,C0034735,Raynaud's phenomenon,0.077,1.114,True
1,1884,umls,C0701811,Short-term memory loss,0.097,1.014,True
2,1297,umls,C0018794,Block heart,0.112,0.949,True
3,2181,umls,C0235293,Taste peculiar,0.113,0.947,True
4,4348,umls,C0232201,Sinus rhythm,0.12,0.92,True
5,8111,umls,C0154723,Migraine with aura,0.21,0.678,True
6,2479,umls,C0235462,Anginal attack,0.213,0.673,True
7,3619,umls,C0750197,Sustained ventricular tachycardia,0.214,0.669,True
8,1325,umls,C0857121,Hypertensive,0.231,0.637,True
9,7709,umls,C0340288,Stable angina pectoris,0.25,0.602,True


In [18]:
query, df = get_predictions_df('uniprot:P22303', 'chemical')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "8670",
    "namespace": "uniprot",
    "identifier": "P22303",
    "name": "ACES_HUMAN"
  },
  "k": 30,
  "type": "chemical"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp,novel
0,17717,pubchem.compound,167159,Ferrous glycine sulfate,0.004,2.391,True
1,13415,pubchem.compound,6604901,N-Ethyl-5'-Carboxamido Adenosine,0.006,2.204,True
2,11601,pubchem.compound,12085802,Chlorophyll A,0.011,1.953,True
3,16656,pubchem.compound,24871491,[4-({5-(AMINOCARBONYL)-4-[(3-METHYLPHENYL)AMIN...,0.012,1.932,True
4,16874,pubchem.compound,2737071,3-(4-nitrophenyl)-1H-pyrazole,0.015,1.823,True
5,11180,pubchem.compound,445999,"1,4-dithio-beta-D-glucopyranose",0.021,1.679,True
6,13136,pubchem.compound,1174,Uracil,0.024,1.617,True
7,16123,pubchem.compound,25138280,4-[(2-{4-[(CYCLOPROPYLCARBAMOYL)AMINO]-1H-PYRA...,0.029,1.542,True
8,10419,pubchem.compound,2131,Ambenonium,0.029,1.544,False
9,14335,pubchem.compound,446622,"2,6-Diamino-8-(1h-Imidazol-2-Ylsulfanylmethyl)...",0.03,1.519,True


In [19]:
query, df = get_predictions_df('uniprot:Q9UBN7', 'chemical')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "12164",
    "namespace": "uniprot",
    "identifier": "Q9UBN7",
    "name": "HDAC6_HUMAN"
  },
  "k": 30,
  "type": "chemical"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp,novel
0,4011,pubchem.compound,57515973,Romidepsin,0.0,4.444,False
1,1148,pubchem.compound,6175,Cytidine,0.001,3.1,True
2,5451,pubchem.compound,5311,Vorinostat,0.001,3.099,False
3,1796,pubchem.compound,1875,AC1L1CFF,0.001,2.835,True
4,3562,pubchem.compound,3040,"6-({[3-(2,6-dichlorophenyl)-5-methyl-1,2-oxazo...",0.001,3.22,True
5,5722,pubchem.compound,31307,Triamcinolone,0.001,2.88,True
6,7180,pubchem.compound,6918638,Belinostat,0.001,3.282,False
7,5382,pubchem.compound,18283,Stavudine,0.002,2.66,True
8,14727,pubchem.compound,6918837,Panobinostat,0.002,2.632,False
9,4064,pubchem.compound,60750,Gemcitabine,0.003,2.534,True


In [20]:
query, df = get_predictions_df("umls:C0030567", 'chemical')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "2248",
    "namespace": "umls",
    "identifier": "C0030567",
    "name": "Parkinson's disease"
  },
  "k": 30,
  "type": "chemical"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp,novel
0,2503,pubchem.compound,31101,Bromocriptine,0.0,4.192,False
1,5232,pubchem.compound,5002,Quetiapine,0.0,5.14,True
2,6553,pubchem.compound,115237,Paliperidone,0.0,3.572,False
3,2173,pubchem.compound,2159,Amisulpride,0.0,3.409,True
4,6537,pubchem.compound,104778,Duodopa,0.0,3.663,False
5,2750,pubchem.compound,34359,Carbidopa,0.0,4.808,False
6,3272,pubchem.compound,2801,Clomipramine,0.0,3.396,False
7,6316,pubchem.compound,60853,138982-67-9,0.0,3.555,True
8,4915,pubchem.compound,4585,Olanzapine,0.0,3.463,False
9,5388,pubchem.compound,26757,Selegiline,0.0,3.578,False


In [21]:
query, df = get_predictions_df('pubchem.compound:6234', 'phenotype')
print(json.dumps(query, indent=2))
df
#PMID: 29241812

{
  "entity": {
    "node_id": "635",
    "namespace": "pubchem.compound",
    "identifier": "6234",
    "name": "Cycloserine"
  },
  "k": 30,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp,novel
0,54,umls,C0036572,Convulsion,0.0,3.325,False
1,331,umls,C0013144,Drowsiness,0.0,3.375,False
2,625,umls,C0018524,Hallucination,0.001,2.953,True
3,640,umls,C0033975,Psychotic disorder,0.001,3.111,False
4,1467,umls,C0002878,Haemolytic anaemia,0.001,2.829,True
5,549,umls,C0011206,Delirium,0.002,2.809,True
6,641,umls,C0151889,Hyperreflexia,0.002,2.817,False
7,797,umls,C0338831,Mania,0.002,2.664,True
8,1317,umls,C0022658,Nephropathy,0.003,2.593,False
9,1395,umls,C0023052,Laryngeal oedema,0.003,2.461,True


In [22]:
r = predictor.find_new_relation(
    source_id='2071', #Amantadine
    target_id='2248', #Parkinson's disease
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "2071",
    "namespace": "pubchem.compound",
    "identifier": "2130",
    "name": "Amantadine"
  },
  "target": {
    "node_id": "2248",
    "namespace": "umls",
    "identifier": "C0030567",
    "name": "Parkinson's disease"
  },
  "p": 0.002,
  "mlp": 2.701
}


In [23]:
r = predictor.find_new_relation(
    source_id='5346', #Ropinirole
    target_id='1348', #Restless legs syndrome
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "5346",
    "namespace": "pubchem.compound",
    "identifier": "5095",
    "name": "Ropinirole"
  },
  "target": {
    "node_id": "1348",
    "namespace": "umls",
    "identifier": "C0035258",
    "name": "Restless legs syndrome"
  },
  "p": 0.009,
  "mlp": 2.055
}


In [24]:
r = predictor.find_new_relation(
    source_id='3627', #Disulfiram
    target_id='2318', #Malignant melanoma
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "3627",
    "namespace": "pubchem.compound",
    "identifier": "3117",
    "name": "Disulfiram"
  },
  "target": {
    "node_id": "2318",
    "namespace": "umls",
    "identifier": "C0025202",
    "name": "Malignant melanoma"
  },
  "p": 0.928,
  "mlp": 0.032
}


In [25]:
r = predictor.find_new_relation(
    source_id='17528', #Brigatinib
    target_id='5148', #Colorectal cancer
)
print(json.dumps(r, indent=2))
#PMID: 31410188

{
  "source": {
    "node_id": "17528",
    "namespace": "uniprot",
    "identifier": "Q99640",
    "name": "PMYT1_HUMAN"
  },
  "target": {
    "node_id": "5148",
    "namespace": "umls",
    "identifier": "C1527249",
    "name": "Colorectal cancer"
  },
  "p": 1.0,
  "mlp": 0.0
}


In [26]:
r = predictor.find_new_relation(
    source_id='6995', #dasatinib
    target_id='1179', #Diffuse large B-cell lymphoma
)
print(json.dumps(r, indent=2))
#PMID: 31383760

{
  "source": {
    "node_id": "6995",
    "namespace": "pubchem.compound",
    "identifier": "3062316",
    "name": "Dasatinib"
  },
  "target": {
    "node_id": "1179",
    "namespace": "umls",
    "identifier": "C0079744",
    "name": "Diffuse large B-cell lymphoma"
  },
  "p": 0.973,
  "mlp": 0.012
}


In [27]:
r = predictor.find_new_relation(
    source_id='5265', #ribavirin
    target_id='947', #Candida infection
)
print(json.dumps(r, indent=2))
#PMID: 31307986

{
  "source": {
    "node_id": "5265",
    "namespace": "pubchem.compound",
    "identifier": "37542",
    "name": "Ribavirin"
  },
  "target": {
    "node_id": "947",
    "namespace": "umls",
    "identifier": "C0006840",
    "name": "Candida infection"
  },
  "p": 0.103,
  "mlp": 0.989
}
