# Using the Prediction Model

## Environment

In [1]:
import getpass
import json
import os
import sys
import time

import pandas as pd

from seffnet.constants import (
    DEFAULT_EMBEDDINGS_PATH, DEFAULT_GRAPH_PATH,
    DEFAULT_MAPPING_PATH, DEFAULT_PREDICTIVE_MODEL_PATH,
)
from seffnet.literature import query_europe_pmc

In [2]:
print(sys.version)

3.7.4 (default, Aug 13 2019, 11:17:01) 
[GCC 4.8.5 20150623 (Red Hat 4.8.5-36)]


In [3]:
print(time.asctime())

Mon Nov  4 14:23:17 2019


In [4]:
print(getpass.getuser())

raldisi


# Loading the Data

In [5]:
from seffnet.default_predictor import predictor

In [6]:
print(f"""Loaded default predictor using paths:

embeddings: {DEFAULT_EMBEDDINGS_PATH}
graph:      {DEFAULT_GRAPH_PATH}
model:      {DEFAULT_PREDICTIVE_MODEL_PATH}
mapping:    {DEFAULT_MAPPING_PATH}
""")

Loaded default predictor using paths:

embeddings: /home/raldisi/seffnet/resources/embeddings/0411_weighted_node2vec_emb.embeddings
graph:      /home/raldisi/seffnet/resources/basic_graphs/fullgraph_with_chemsim.edgelist
model:      /home/raldisi/seffnet/resources/predictive_models/0411_weighted_node2vec_predictive_model.pkl
mapping:    /home/raldisi/seffnet/resources/mapping/fullgraph_nodes_mapping.tsv



# Examples of different kinds of predictions with literature evidence

## side effect - target association

In [7]:
r = predictor.find_new_relation(
    source_name='EGFR_HUMAN',
    target_name='Papulopustular rash',
)
print(json.dumps(r, indent=2))
#PMID: 18165622

{
  "source": {
    "node_id": "9587",
    "namespace": "uniprot",
    "identifier": "P00533",
    "name": "EGFR_HUMAN",
    "entity_type": "target"
  },
  "target": {
    "node_id": "6791",
    "namespace": "umls",
    "identifier": "C2609319",
    "name": "Papulopustular rash",
    "entity_type": "phenotype"
  },
  "lor": 0.841
}


In [8]:
r = predictor.find_new_relation(
    source_id='9451', # Histamine receptor H1
    target_id='331', # Drowsiness
)
print(json.dumps(r, indent=2))
#PMID: 26626077

{
  "source": {
    "node_id": "9451",
    "namespace": "uniprot",
    "identifier": "P35367",
    "name": "HRH1_HUMAN",
    "entity_type": "target"
  },
  "target": {
    "node_id": "331",
    "namespace": "umls",
    "identifier": "C0013144",
    "name": "Drowsiness",
    "entity_type": "phenotype"
  },
  "lor": 0.009
}


In [9]:
r = predictor.find_new_relation(
    source_id='9325', # SC6A2
    target_id='56', # Tachycardia
)
print(json.dumps(r, indent=2))
#PMID: 30952858

{
  "source": {
    "node_id": "9325",
    "namespace": "uniprot",
    "identifier": "P23975",
    "name": "SC6A2_HUMAN",
    "entity_type": "target"
  },
  "target": {
    "node_id": "56",
    "namespace": "umls",
    "identifier": "C0039231",
    "name": "Tachycardia",
    "entity_type": "phenotype"
  },
  "lor": 0.14
}


In [10]:
r = predictor.find_new_relation(
    source_id='8670', # ACES_HUMAN
    target_id='309', # Bradycardia
)
print(json.dumps(r, indent=2))
#PMID: 30952858

{
  "source": {
    "node_id": "8670",
    "namespace": "uniprot",
    "identifier": "P22303",
    "name": "ACES_HUMAN",
    "entity_type": "target"
  },
  "target": {
    "node_id": "309",
    "namespace": "umls",
    "identifier": "C0428977",
    "name": "Bradycardia",
    "entity_type": "phenotype"
  },
  "lor": 0.445
}


## drug- side effect association

In [11]:
r = predictor.find_new_relation(
    source_id='3534',  # diazepam
    target_id='670',  # Libido decreased
)
print(json.dumps(r, indent=2))
#PMID: 29888057

{
  "source": {
    "node_id": "3534",
    "namespace": "pubchem.compound",
    "identifier": "3016",
    "name": "Diazepam",
    "entity_type": "approved drug"
  },
  "target": {
    "node_id": "670",
    "namespace": "umls",
    "identifier": "C0011124",
    "name": "Libido decreased",
    "entity_type": "phenotype"
  },
  "lor": 0.038
}


In [12]:
r = predictor.find_new_relation(
    source_id='1148',  # Cytarabine 
    target_id='1149',  # Anaemia megaloblastic
)
print(json.dumps(r, indent=2))
# PMID: 23157436

{
  "source": {
    "node_id": "1148",
    "namespace": "pubchem.compound",
    "identifier": "6175",
    "name": "Cytidine",
    "entity_type": "experimental drug"
  },
  "target": {
    "node_id": "1149",
    "namespace": "umls",
    "identifier": "C0002888",
    "name": "Anaemia megaloblastic",
    "entity_type": "phenotype"
  },
  "lor": 0.309
}


## drug-target association

In [13]:
r = predictor.find_new_relation(
    source_id='14672',  # Sertindole 
    target_id='9350',   # CHRM1 receptor
)
print(json.dumps(r, indent=2))
# PMID: 29942259 

{
  "source": {
    "node_id": "14672",
    "namespace": "pubchem.compound",
    "identifier": "60149",
    "name": "Sertindole",
    "entity_type": "approved drug"
  },
  "target": {
    "node_id": "9350",
    "namespace": "uniprot",
    "identifier": "P11229",
    "name": "ACM1_HUMAN",
    "entity_type": "target"
  },
  "lor": 0.025
}


# Example of predicting relations using node2vec model and embeddings

In [14]:
def get_predictions_df(curie, results_type=None):
    results = predictor.find_new_relations(
        node_curie=curie,
        results_type=results_type,
        k=10,
    )
    results_df = pd.DataFrame(results['predictions'])
    results_df = results_df[['node_id', 'namespace', 'identifier', 'name', 'lor', 'novel']]
    return results['query'], results_df

In [15]:
query, df = get_predictions_df('pubchem.compound:2159', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "2173",
    "namespace": "pubchem.compound",
    "identifier": "2159",
    "name": "Amisulpride",
    "entity_type": "approved drug"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,1062,umls,C0497327,Dementia,0.0,True
1,6249,umls,C0036337,Schizoaffective disorder,0.0,True
2,821,umls,C0233632,Thinking abnormal,0.001,True
3,1074,umls,C0575081,Gait disturbance,0.001,True
4,797,umls,C0338831,Mania,0.001,True
5,28,umls,C0015397,Eye disorder,0.002,True
6,41,umls,C0023218,Cramps of lower extremities,0.002,True
7,181,umls,C0036974,Shock,0.002,True
8,816,umls,C0232461,Increased appetite,0.002,True
9,568,umls,C0018939,Blood disorder,0.002,True


In [16]:
query, df = get_predictions_df('pubchem.compound:4585', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "4915",
    "namespace": "pubchem.compound",
    "identifier": "4585",
    "name": "Olanzapine",
    "entity_type": "approved drug"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,287,umls,C0042401,Vasodilation,0.0,True
1,735,umls,C0028734,Nocturia,0.0,True
2,834,umls,C0235198,Cerebration impaired,0.0,True
3,1016,umls,C0151766,Liver function test abnormal,0.0,True
4,870,umls,C0424000,Suicidal ideation,0.0,True
5,1391,umls,C0019270,Hernia,0.0,True
6,2460,umls,C0162316,Iron deficiency anaemia,0.001,True
7,474,umls,C0427008,Stiffness,0.001,True
8,774,umls,C0043387,Yawning,0.001,True
9,371,umls,C0014549,Grand mal convulsion,0.001,True


In [17]:
query, df = get_predictions_df('uniprot:P08172', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "9429",
    "namespace": "uniprot",
    "identifier": "P08172",
    "name": "ACM2_HUMAN",
    "entity_type": "target"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,198,umls,C0006266,Bronchospasm,0.009,True
1,705,umls,C1322281,Rhinitis seasonal,0.014,True
2,1823,umls,C0559031,Functional gastrointestinal disorder,0.015,True
3,398,umls,C0423153,Lacrimation,0.016,True
4,957,umls,C0009443,Common cold,0.02,True
5,2043,umls,C2607914,Rhinitis allergic,0.021,True
6,81,umls,C0002792,Anaphylactic shock,0.024,True
7,674,umls,C0011334,Dental caries,0.024,True
8,673,umls,C0011253,Delusion,0.025,True
9,728,umls,C0026837,Muscle rigidity,0.03,True


In [18]:
query, df = get_predictions_df('uniprot:P08588', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "8733",
    "namespace": "uniprot",
    "identifier": "P08588",
    "name": "ADRB1_HUMAN",
    "entity_type": "target"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,1325,umls,C0857121,Hypertensive,0.002,True
1,7641,umls,C0017612,Open angle glaucoma,0.003,True
2,38,umls,C0020538,Hypertension,0.005,True
3,444,umls,C0039240,Supraventricular tachycardia,0.007,True
4,7640,umls,C0339573,Primary open angle glaucoma,0.009,True
5,198,umls,C0006266,Bronchospasm,0.011,True
6,7709,umls,C0340288,Stable angina pectoris,0.012,True
7,7642,umls,C0857070,Chronic open angle glaucoma,0.013,True
8,1714,umls,C0004239,Atrial flutter,0.014,True
9,103,umls,C0027769,Nervousness,0.015,True


In [19]:
query, df = get_predictions_df('uniprot:P22303', 'phenotype')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "8670",
    "namespace": "uniprot",
    "identifier": "P22303",
    "name": "ACES_HUMAN",
    "entity_type": "target"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,3284,umls,C0151500,Anticholinergic syndrome,0.005,True
1,224,umls,C0080274,Urinary retention,0.013,True
2,1306,umls,C0028738,Nystagmus,0.014,True
3,2178,umls,C0030446,Ileus paralytic,0.016,True
4,649,umls,C0004134,Ataxia,0.017,True
5,542,umls,C0235309,Upset stomach,0.018,True
6,7542,umls,C0024517,"Major depressive disorder, single episode",0.022,True
7,1072,umls,C0554976,Bradyphrenia,0.023,True
8,42,umls,C0037763,Muscle spasms,0.024,True
9,672,umls,C0011168,Dysphagia,0.024,True


In [20]:
query, df = get_predictions_df('uniprot:Q9UBN7', 'chemical')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "12164",
    "namespace": "uniprot",
    "identifier": "Q9UBN7",
    "name": "HDAC6_HUMAN",
    "entity_type": "target"
  },
  "k": 10,
  "type": "chemical"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,17378,pubchem.compound,11671467,Fostamatinib,0.015,True
1,1616,pubchem.compound,1690,AC1L1C0O,0.017,True
2,16237,pubchem.compound,10250490,5-CHLORO-N-(2-(4-(2-OXOPYRIDIN-1(2H)-YL)BENZAM...,0.03,True
3,14968,pubchem.compound,6914612,"(1R,2R,3R,4S,5R)-4-(BENZYLAMINO)-5-(METHYLTHIO...",0.034,True
4,15758,pubchem.compound,854023,Epibatidine,0.041,True
5,14559,pubchem.compound,456390,Halofuginone,0.041,True
6,17357,pubchem.compound,9865515,Mocetinostat,0.044,True
7,1412,pubchem.compound,89594,Nicotine,0.046,True
8,6474,pubchem.compound,82146,Bexarotene,0.049,True
9,14072,pubchem.compound,444732,"7-[4-(Dimethylamino)Phenyl]-N-Hydroxy-4,6-Dime...",0.055,True


In [31]:
query, df = get_predictions_df("umls:C0030567", 'chemical')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "2248",
    "namespace": "umls",
    "identifier": "C0030567",
    "name": "Parkinson's disease",
    "entity_type": "phenotype"
  },
  "k": 10,
  "type": "chemical"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,4517,pubchem.compound,6279,Medroxyprogesterone acetate,0.001,True
1,5192,pubchem.compound,5994,Progesterone,0.001,True
2,3629,pubchem.compound,3121,Valproic Acid,0.001,True
3,4499,pubchem.compound,3964,Loxapine,0.001,True
4,4970,pubchem.compound,4634,Oxybutynin,0.001,True
5,5753,pubchem.compound,60773,Valaciclovir,0.001,True
6,2044,pubchem.compound,2118,Alprazolam,0.001,True
7,3691,pubchem.compound,3157,Doxazosin,0.001,True
8,3697,pubchem.compound,667468,Cidoxepin,0.001,True
9,5617,pubchem.compound,443879,Tolterodine,0.001,True


In [None]:
results = []
for ind, row in df.iterrows():
    pmcid = []
    lit = query_europe_pmc(
        query_entity=row['name'],
        target_entities=[
            'umls:C0030567'
        ],
    )
    for x in lit:
        pmcid.append(x['pmcid'])
    results.append((len(pmcid), pmcid))
    print('done')
df['co-occurance'] = results

In [None]:
df

In [38]:
query, df = get_predictions_df('pubchem.compound:5095', 'phenotype')
print(json.dumps(query, indent=2))
df
#PMID: 29241812

{
  "entity": {
    "node_id": "5346",
    "namespace": "pubchem.compound",
    "identifier": "5095",
    "name": "Ropinirole",
    "entity_type": "approved drug"
  },
  "k": 10,
  "type": "phenotype"
}


Unnamed: 0,node_id,namespace,identifier,name,lor,novel
0,763,umls,C0038325,Stevens-Johnson syndrome,0.0,True
1,1484,umls,C0014518,Toxic epidermal necrolysis,0.0,True
2,1139,umls,C0853034,Blood creatine phosphokinase increased,0.0,True
3,291,umls,C0085605,Hepatic failure,0.0,True
4,536,umls,C0037317,Sleep disturbance,0.0,True
5,7667,umls,C0008677,Bronchitis chronic,0.001,True
6,903,umls,C0574067,Cough increased,0.001,True
7,1104,umls,C0024115,Lung disorder,0.001,True
8,862,umls,C0392156,Akathisia,0.001,True
9,273,umls,C0026858,Musculoskeletal pain,0.001,True


In [39]:
r = predictor.find_new_relation(
    source_id='2071', #Amantadine
    target_id='2248', #Parkinson's disease
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "2071",
    "namespace": "pubchem.compound",
    "identifier": "2130",
    "name": "Amantadine",
    "entity_type": "approved drug"
  },
  "target": {
    "node_id": "2248",
    "namespace": "umls",
    "identifier": "C0030567",
    "name": "Parkinson's disease",
    "entity_type": "phenotype"
  },
  "lor": 0.005
}


In [25]:
r = predictor.find_new_relation(
    source_id='5346', #Ropinirole
    target_id='1348', #Restless legs syndrome
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "5346",
    "namespace": "pubchem.compound",
    "identifier": "5095",
    "name": "Ropinirole",
    "entity_type": "approved drug"
  },
  "target": {
    "node_id": "1348",
    "namespace": "umls",
    "identifier": "C0035258",
    "name": "Restless legs syndrome",
    "entity_type": "phenotype"
  },
  "lor": 0.007
}


In [26]:
r = predictor.find_new_relation(
    source_id='3627', #Disulfiram
    target_id='2318', #Malignant melanoma
)
print(json.dumps(r, indent=2))
#PMID: 21654146

{
  "source": {
    "node_id": "3627",
    "namespace": "pubchem.compound",
    "identifier": "3117",
    "name": "Disulfiram",
    "entity_type": "approved drug"
  },
  "target": {
    "node_id": "2318",
    "namespace": "umls",
    "identifier": "C0025202",
    "name": "Malignant melanoma",
    "entity_type": "phenotype"
  },
  "lor": 0.511
}


In [27]:
r = predictor.find_new_relation(
    source_id='17528', #Brigatinib
    target_id='5148', #Colorectal cancer
)
print(json.dumps(r, indent=2))
#PMID: 31410188

{
  "source": {
    "node_id": "17528",
    "namespace": "uniprot",
    "identifier": "Q99640",
    "name": "PMYT1_HUMAN",
    "entity_type": "target"
  },
  "target": {
    "node_id": "5148",
    "namespace": "umls",
    "identifier": "C1527249",
    "name": "Colorectal cancer",
    "entity_type": "phenotype"
  },
  "lor": 0.821
}


In [28]:
r = predictor.find_new_relation(
    source_id='6995', #dasatinib
    target_id='1179', #Diffuse large B-cell lymphoma
)
print(json.dumps(r, indent=2))
#PMID: 31383760

{
  "source": {
    "node_id": "6995",
    "namespace": "pubchem.compound",
    "identifier": "3062316",
    "name": "Dasatinib",
    "entity_type": "approved drug"
  },
  "target": {
    "node_id": "1179",
    "namespace": "umls",
    "identifier": "C0079744",
    "name": "Diffuse large B-cell lymphoma",
    "entity_type": "phenotype"
  },
  "lor": 0.836
}


In [29]:
r = predictor.find_new_relation(
    source_id='5265', #ribavirin
    target_id='947', #Candida infection
)
print(json.dumps(r, indent=2))
#PMID: 31307986

{
  "source": {
    "node_id": "5265",
    "namespace": "pubchem.compound",
    "identifier": "37542",
    "name": "Ribavirin",
    "entity_type": "approved drug"
  },
  "target": {
    "node_id": "947",
    "namespace": "umls",
    "identifier": "C0006840",
    "name": "Candida infection",
    "entity_type": "phenotype"
  },
  "lor": 0.129
}
