# Using the Prediction Model

## Environment

In [2]:
import getpass
import json
import os
import sys
import time

import pandas as pd

from seffnet.constants import (
    DEFAULT_EMBEDDINGS_PATH, DEFAULT_GRAPH_PATH,
    DEFAULT_MAPPING_PATH, DEFAULT_PREDICTIVE_MODEL_PATH,
)

In [3]:
print(sys.version)

3.7.4 (default, Aug 13 2019, 11:17:01) 
[GCC 4.8.5 20150623 (Red Hat 4.8.5-36)]


In [4]:
print(time.asctime())

Thu Oct 24 10:44:09 2019


In [5]:
print(getpass.getuser())

raldisi


# Loading the Data

In [6]:
from seffnet.default_predictor import predictor

In [7]:
print(f"""Loaded default predictor using paths:

embeddings: {DEFAULT_EMBEDDINGS_PATH}
graph:      {DEFAULT_GRAPH_PATH}
model:      {DEFAULT_PREDICTIVE_MODEL_PATH}
mapping:    {DEFAULT_MAPPING_PATH}
""")

Loaded default predictor using paths:

embeddings: /home/raldisi/seffnet/resources/embeddings/1209_node2vec_emb.embeddings
graph:      /home/raldisi/seffnet/resources/basic_graphs/fullgraph_with_chemsim.edgelist
model:      /home/raldisi/seffnet/resources/predictive_models/1209_node2vec_predictive_model.pkl
mapping:    /home/raldisi/seffnet/resources/mapping/fullgraph_nodes_mapping.tsv



# Examples of different kinds of predictions with literature evidence

## side effect - target association

In [8]:
r = predictor.find_new_relation(
    source_name='EGFR_HUMAN',
    target_name='Papulopustular rash',
)
print(json.dumps(r, indent=2))
#PMID: 18165622

{
  "source": {
    "node_id": "9587",
    "namespace": "uniprot",
    "identifier": "P00533",
    "name": "EGFR_HUMAN"
  },
  "target": {
    "node_id": "6791",
    "namespace": "umls",
    "identifier": "C2609319",
    "name": "Papulopustular rash"
  },
  "lor": 0.853
}


In [9]:
r = predictor.find_new_relation(
    source_id='9451', # Histamine receptor H1
    target_id='331', # Drowsiness
)
print(json.dumps(r, indent=2))
#PMID: 26626077

{
  "source": {
    "node_id": "9451",
    "namespace": "uniprot",
    "identifier": "P35367",
    "name": "HRH1_HUMAN"
  },
  "target": {
    "node_id": "331",
    "namespace": "umls",
    "identifier": "C0013144",
    "name": "Drowsiness"
  },
  "lor": 0.438
}


In [10]:
r = predictor.find_new_relation(
    source_id='9325', # SC6A2
    target_id='56', # Tachycardia
)
print(json.dumps(r, indent=2))
#PMID: 30952858

{
  "source": {
    "node_id": "9325",
    "namespace": "uniprot",
    "identifier": "P23975",
    "name": "SC6A2_HUMAN"
  },
  "target": {
    "node_id": "56",
    "namespace": "umls",
    "identifier": "C0039231",
    "name": "Tachycardia"
  },
  "lor": 0.328
}


In [11]:
r = predictor.find_new_relation(
    source_id='8670', # ACES_HUMAN
    target_id='309', # Bradycardia
)
print(json.dumps(r, indent=2))
#PMID: 30952858

{
  "source": {
    "node_id": "8670",
    "namespace": "uniprot",
    "identifier": "P22303",
    "name": "ACES_HUMAN"
  },
  "target": {
    "node_id": "309",
    "namespace": "umls",
    "identifier": "C0428977",
    "name": "Bradycardia"
  },
  "lor": 0.937
}


## drug- side effect association

In [12]:
r = predictor.find_new_relation(
    source_id='3534',  # diazepam
    target_id='670',  # Libido decreased
)
print(json.dumps(r, indent=2))
#PMID: 29888057

{
  "source": {
    "node_id": "3534",
    "namespace": "pubchem.compound",
    "identifier": "3016",
    "name": "Diazepam"
  },
  "target": {
    "node_id": "670",
    "namespace": "umls",
    "identifier": "C0011124",
    "name": "Libido decreased"
  },
  "lor": 0.008
}


In [13]:
r = predictor.find_new_relation(
    source_id='1148',  # Cytarabine 
    target_id='1149',  # Anaemia megaloblastic
)
print(json.dumps(r, indent=2))
# PMID: 23157436

{
  "source": {
    "node_id": "1148",
    "namespace": "pubchem.compound",
    "identifier": "6175",
    "name": "Cytidine"
  },
  "target": {
    "node_id": "1149",
    "namespace": "umls",
    "identifier": "C0002888",
    "name": "Anaemia megaloblastic"
  },
  "lor": 0.198
}


## drug-target association

In [14]:
r = predictor.find_new_relation(
    source_id='14672',  # Sertindole 
    target_id='9350',   # CHRM1 receptor
)
print(json.dumps(r, indent=2))
# PMID: 29942259 

{
  "source": {
    "node_id": "14672",
    "namespace": "pubchem.compound",
    "identifier": "60149",
    "name": "Sertindole"
  },
  "target": {
    "node_id": "9350",
    "namespace": "uniprot",
    "identifier": "P11229",
    "name": "ACM1_HUMAN"
  },
  "lor": 0.051
}


# Example of predicting relations using node2vec model and embeddings

In [18]:
def get_predictions_df(curie, results_type=None):
    results = predictor.find_new_relations(
        node_curie=curie,
        results_type=results_type,
        k=30,
    )
    results_df = pd.DataFrame(results['predictions'])
    results_df = results_df[['node_id', 'namespace', 'identifier', 'name', 'lor', 'novel']]
    return results['query'], results_df

In [19]:
query, df = get_predictions_df('pubchem.compound:2159', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "2173",
    "namespace": "pubchem.compound",
    "identifier": "2159",
    "name": "Amisulpride"
  },
  "k": 30,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,27,umls,C0011603,Dermatitis,0.001,True
1,831,umls,C0235146,Euphoric mood,0.001,True
2,331,umls,C0013144,Drowsiness,0.002,True
3,427,umls,C0026961,Mydriasis,0.002,True
4,818,umls,C0233477,Dysphoria,0.002,True
5,852,umls,C0242422,Parkinsonism,0.002,True
6,734,umls,C0028084,Nightmare,0.002,True
7,797,umls,C0338831,Mania,0.002,True
8,26,umls,C0015230,Rash,0.003,True
9,182,umls,C0009676,Confusional state,0.003,True


In [20]:
query, df = get_predictions_df('pubchem.compound:4585', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "4915",
    "namespace": "pubchem.compound",
    "identifier": "4585",
    "name": "Olanzapine"
  },
  "k": 30,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,1539,umls,C0006384,Bundle branch block,0.0,True
1,1710,umls,C0575090,Balance disorder,0.0,True
2,1210,umls,C0878544,Cardiomyopathy,0.001,True
3,826,umls,C0233794,Memory impairment,0.001,True
4,1714,umls,C0004239,Atrial flutter,0.001,True
5,795,umls,C0160390,Liver injury,0.001,True
6,1289,umls,C0020676,Hypothyroidism,0.001,True
7,403,umls,C0002884,Hypochromic anaemia,0.001,True
8,1599,umls,C0034069,Pulmonary fibrosis,0.001,True
9,818,umls,C0233477,Dysphoria,0.001,True


In [21]:
query, df = get_predictions_df('uniprot:P08172', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "9429",
    "namespace": "uniprot",
    "identifier": "P08172",
    "name": "ACM2_HUMAN"
  },
  "k": 30,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,680,umls,C0013384,Dyskinesia,0.009,True
1,687,umls,C0015371,Extrapyramidal disorder,0.013,True
2,728,umls,C0026837,Muscle rigidity,0.015,True
3,2146,umls,C0234133,Extrapyramidal symptoms,0.022,True
4,427,umls,C0026961,Mydriasis,0.023,True
5,331,umls,C0013144,Drowsiness,0.025,True
6,292,umls,C0085631,Agitation,0.025,True
7,908,umls,C0686347,Tardive dyskinesia,0.038,True
8,852,umls,C0242422,Parkinsonism,0.044,True
9,462,umls,C0235063,Respiratory depression,0.053,True


In [22]:
print(df.to_latex())

\begin{tabular}{lllllrl}
\toprule
{} & node\_id & namespace & identifier &                               name &    lor &  novel \\
\midrule
0  &     680 &      umls &   C0013384 &                         Dyskinesia &  0.009 &   True \\
1  &     687 &      umls &   C0015371 &            Extrapyramidal disorder &  0.013 &   True \\
2  &     728 &      umls &   C0026837 &                    Muscle rigidity &  0.015 &   True \\
3  &    2146 &      umls &   C0234133 &            Extrapyramidal symptoms &  0.022 &   True \\
4  &     427 &      umls &   C0026961 &                          Mydriasis &  0.023 &   True \\
5  &     331 &      umls &   C0013144 &                         Drowsiness &  0.025 &   True \\
6  &     292 &      umls &   C0085631 &                          Agitation &  0.025 &   True \\
7  &     908 &      umls &   C0686347 &                 Tardive dyskinesia &  0.038 &   True \\
8  &     852 &      umls &   C0242422 &                       Parkinsonism &  0.044 &   True

In [23]:
query, df = get_predictions_df('uniprot:P08588', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "8733",
    "namespace": "uniprot",
    "identifier": "P08588",
    "name": "ADRB1_HUMAN"
  },
  "k": 30,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,218,umls,C0040822,Tremor,0.001,True
1,331,umls,C0013144,Drowsiness,0.003,True
2,851,umls,C0242350,Erectile dysfunction,0.006,True
3,1500,umls,C0233571,Excitement,0.006,True
4,734,umls,C0028084,Nightmare,0.009,True
5,164,umls,C0015672,Fatigue,0.01,True
6,181,umls,C0036974,Shock,0.01,True
7,1324,umls,C0234192,Feeling cold,0.012,True
8,316,umls,C2830004,Somnolence,0.014,True
9,427,umls,C0026961,Mydriasis,0.014,True


In [24]:
query, df = get_predictions_df('uniprot:P22303', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "8670",
    "namespace": "uniprot",
    "identifier": "P22303",
    "name": "ACES_HUMAN"
  },
  "k": 30,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,45,umls,C0027497,Nausea,0.002,True
1,109,umls,C0037274,Skin disorder,0.024,True
2,26,umls,C0015230,Rash,0.026,True
3,11,umls,C0004093,Asthenia,0.029,True
4,209,umls,C0021053,Immune system disorder,0.038,True
5,129,umls,C0178298,Unspecified disorder of skin and subcutaneous ...,0.054,True
6,18,umls,C0009806,Constipation,0.097,True
7,2,umls,C0000729,Abdominal cramps,0.179,True
8,260,umls,C0009782,Connective tissue disorder,0.225,True
9,66,umls,C0151735,Injection site reaction,0.25,True


In [25]:
query, df = get_predictions_df('uniprot:Q9UBN7', 'chemical')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "12164",
    "namespace": "uniprot",
    "identifier": "Q9UBN7",
    "name": "HDAC6_HUMAN"
  },
  "k": 30,
  "type": "chemical"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,3187,pubchem.compound,1548953,Enclomiphene,0.039,True
1,3042,pubchem.compound,146570,Escitalopram,0.439,True
2,1616,pubchem.compound,1690,AC1L1C0O,0.463,True
3,5727,pubchem.compound,5565,Triethylenetetramine,0.551,True
4,6633,pubchem.compound,5486971,Pregabalin,0.552,True
5,3799,pubchem.compound,3333,Felodipine,0.579,True
6,3562,pubchem.compound,3040,"6-({[3-(2,6-dichlorophenyl)-5-methyl-1,2-oxazo...",0.624,True
7,3857,pubchem.compound,3355,AC1L1FQW,0.63,True
8,5382,pubchem.compound,18283,Stavudine,0.633,True
9,4507,pubchem.compound,4030,Mebendazole,0.644,True


In [26]:
query, df = get_predictions_df("umls:C0030567", 'chemical')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "2248",
    "namespace": "umls",
    "identifier": "C0030567",
    "name": "Parkinson's disease"
  },
  "k": 30,
  "type": "chemical"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,3042,pubchem.compound,146570,Escitalopram,0.0,True
1,5232,pubchem.compound,5002,Quetiapine,0.001,True
2,6633,pubchem.compound,5486971,Pregabalin,0.001,True
3,5391,pubchem.compound,68617,Sertraline,0.002,True
4,5799,pubchem.compound,5719,Zaleplon,0.002,True
5,6316,pubchem.compound,60853,138982-67-9,0.002,True
6,3811,pubchem.compound,3345,Fentanyl,0.002,True
7,5401,pubchem.compound,5210,Sibutramine,0.002,True
8,2365,pubchem.compound,44602,Arbaclofen,0.003,True
9,4647,pubchem.compound,154101,Dexmethylphenidate,0.003,True


In [27]:
query, df = get_predictions_df('pubchem.compound:5095', 'phenotype')
print(json.dumps(query, indent=2))
df
#PMID: 29241812

{
  "entity": {
    "node_id": "5346",
    "namespace": "pubchem.compound",
    "identifier": "5095",
    "name": "Ropinirole"
  },
  "k": 30,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,5,umls,C0002418,Amblyopia,0.0,True
1,1210,umls,C0878544,Cardiomyopathy,0.0,True
2,225,umls,C0151315,Neck stiffness,0.0,True
3,227,umls,C1320474,Nuchal rigidity,0.0,True
4,278,umls,C0032326,Pneumothorax,0.0,True
5,419,umls,C0020557,Hypertriglyceridaemia,0.0,True
6,1276,umls,C0151883,Vesiculobullous rash,0.0,True
7,1282,umls,C0836924,Thrombocytosis,0.0,True
8,346,umls,C0085606,Micturition urgency,0.0,True
9,1714,umls,C0004239,Atrial flutter,0.0,True


In [28]:
r = predictor.find_new_relation(
    source_id='2071', #Amantadine
    target_id='2248', #Parkinson's disease
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "2071",
    "namespace": "pubchem.compound",
    "identifier": "2130",
    "name": "Amantadine"
  },
  "target": {
    "node_id": "2248",
    "namespace": "umls",
    "identifier": "C0030567",
    "name": "Parkinson's disease"
  },
  "lor": 0.014
}


In [29]:
r = predictor.find_new_relation(
    source_id='5346', #Ropinirole
    target_id='1348', #Restless legs syndrome
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "5346",
    "namespace": "pubchem.compound",
    "identifier": "5095",
    "name": "Ropinirole"
  },
  "target": {
    "node_id": "1348",
    "namespace": "umls",
    "identifier": "C0035258",
    "name": "Restless legs syndrome"
  },
  "lor": 0.002
}


In [30]:
r = predictor.find_new_relation(
    source_id='3627', #Disulfiram
    target_id='2318', #Malignant melanoma
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "3627",
    "namespace": "pubchem.compound",
    "identifier": "3117",
    "name": "Disulfiram"
  },
  "target": {
    "node_id": "2318",
    "namespace": "umls",
    "identifier": "C0025202",
    "name": "Malignant melanoma"
  },
  "lor": 0.886
}


In [31]:
r = predictor.find_new_relation(
    source_id='17528', #Brigatinib
    target_id='5148', #Colorectal cancer
)
print(json.dumps(r, indent=2))
#PMID: 31410188

{
  "source": {
    "node_id": "17528",
    "namespace": "uniprot",
    "identifier": "Q99640",
    "name": "PMYT1_HUMAN"
  },
  "target": {
    "node_id": "5148",
    "namespace": "umls",
    "identifier": "C1527249",
    "name": "Colorectal cancer"
  },
  "lor": 0.998
}


In [32]:
r = predictor.find_new_relation(
    source_id='6995', #dasatinib
    target_id='1179', #Diffuse large B-cell lymphoma
)
print(json.dumps(r, indent=2))
#PMID: 31383760

{
  "source": {
    "node_id": "6995",
    "namespace": "pubchem.compound",
    "identifier": "3062316",
    "name": "Dasatinib"
  },
  "target": {
    "node_id": "1179",
    "namespace": "umls",
    "identifier": "C0079744",
    "name": "Diffuse large B-cell lymphoma"
  },
  "lor": 0.536
}


In [33]:
r = predictor.find_new_relation(
    source_id='5265', #ribavirin
    target_id='947', #Candida infection
)
print(json.dumps(r, indent=2))
#PMID: 31307986

{
  "source": {
    "node_id": "5265",
    "namespace": "pubchem.compound",
    "identifier": "37542",
    "name": "Ribavirin"
  },
  "target": {
    "node_id": "947",
    "namespace": "umls",
    "identifier": "C0006840",
    "name": "Candida infection"
  },
  "lor": 0.025
}
