# Using the Prediction Model

## Environment

In [1]:
import getpass
import json
import os
import sys
import time

import pandas as pd

from se_kge.constants import (
    DEFAULT_EMBEDDINGS_PATH, DEFAULT_GRAPH_PATH,
    DEFAULT_MAPPING_PATH, DEFAULT_MODEL_PATH,
)

In [2]:
print(sys.version)

3.7.3 (default, Apr 24 2019, 15:29:51) [MSC v.1915 64 bit (AMD64)]


In [3]:
print(time.asctime())

Thu Aug 15 12:03:23 2019


In [4]:
print(getpass.getuser())

RanaAldisi


# Loading the Data

In [5]:
from se_kge.default_predictor import predictor

In [6]:
print(f"""Loaded default predictor using paths:

embeddings: {DEFAULT_EMBEDDINGS_PATH}
graph:      {DEFAULT_GRAPH_PATH}
model:      {DEFAULT_MODEL_PATH}
mapping:    {DEFAULT_MAPPING_PATH}
""")

Loaded default predictor using paths:

embeddings: c:\users\rana aldisi\documents\github\se_kge\resources\predictive_model\070819_node2vec_embeddings_complete01.embeddings
graph:      c:\users\rana aldisi\documents\github\se_kge\resources\chemsim_50_graphs\fullgraph_with_chemsim_50.edgelist
model:      c:\users\rana aldisi\documents\github\se_kge\resources\predictive_model\070819_node2vec_model_complete01.pkl
mapping:    c:\users\rana aldisi\documents\github\se_kge\resources\mapping\fullgraph_nodes_mapping.tsv



# Examples of different kinds of predictions with literature evidence

## side effect - target association

In [14]:
r = predictor.find_new_relation(
    source_name='EGFR_HUMAN',
    target_name='Papulopustular rash',
)
print(json.dumps(r, indent=2))
#PMID: 18165622

{
  "source": {
    "node_id": "9635",
    "namespace": "uniprot",
    "identifier": "P00533",
    "name": "EGFR_HUMAN"
  },
  "target": {
    "node_id": "6791",
    "namespace": "umls",
    "identifier": "C2609319",
    "name": "Papulopustular rash"
  },
  "p": 0.981,
  "mlp": 0.008
}


In [15]:
r = predictor.find_new_relation(
    source_id='9438', # Histamine receptor H1
    target_id='331', # Drowsiness
)
print(json.dumps(r, indent=2))
#PMID: 26626077

{
  "source": {
    "node_id": "9438",
    "namespace": "uniprot",
    "identifier": "P51160",
    "name": "PDE6C_HUMAN"
  },
  "target": {
    "node_id": "331",
    "namespace": "umls",
    "identifier": "C0013144",
    "name": "Drowsiness"
  },
  "p": 0.977,
  "mlp": 0.01
}


## drug- side effect association

In [16]:
r = predictor.find_new_relation(
    source_id='3534',  # diazepam
    target_id='670',  # Libido decreased
)
print(json.dumps(r, indent=2))
#PMID: 29888057

{
  "source": {
    "node_id": "3534",
    "namespace": "pubchem.compound",
    "identifier": "3016",
    "name": "Diazepam"
  },
  "target": {
    "node_id": "670",
    "namespace": "umls",
    "identifier": "C0011124",
    "name": "Libido decreased"
  },
  "p": 0.003,
  "mlp": 2.523
}


In [17]:
r = predictor.find_new_relation(
    source_id='10270',  # Cytarabine 
    target_id='1149',  # Anaemia megaloblastic
)
print(json.dumps(r, indent=2))
# PMID: 23157436

{
  "source": {
    "node_id": "10270",
    "namespace": "uniprot",
    "identifier": "Q9NY56",
    "name": "OBP2A_HUMAN"
  },
  "target": {
    "node_id": "1149",
    "namespace": "umls",
    "identifier": "C0002888",
    "name": "Anaemia megaloblastic"
  },
  "p": 0.999,
  "mlp": 0.0
}


## drug-target association

In [18]:
r = predictor.find_new_relation(
    source_id='12765',  # Sertindole 
    target_id='9372',   # CHRM1 receptor
)
print(json.dumps(r, indent=2))
# PMID: 29942259 

{
  "source": {
    "node_id": "12765",
    "namespace": "pubchem.compound",
    "identifier": "131704218",
    "name": "2-Amino-3-Oxo-4-Sulfo-Butyric Acid"
  },
  "target": {
    "node_id": "9372",
    "namespace": "uniprot",
    "identifier": "P48169",
    "name": "GBRA4_HUMAN"
  },
  "p": 0.985,
  "mlp": 0.006
}


# Example of predicting relations using node2vec model and embeddings

In [9]:
def get_phenotype_df(curie):
    results = predictor.find_new_relations(
        node_curie=curie,
        results_type='phenotype',
        k=10,
    )
    results_df = pd.DataFrame(results['predictions'])
    results_df = results_df[['node_id', 'namespace', 'identifier', 'name', 'p', 'mlp']]
    return results['query'], results_df

In [11]:
query, df = get_phenotype_df('pubchem.compound:145742')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "9294",
    "namespace": "pubchem.compound",
    "identifier": "145742",
    "name": "Proline"
  },
  "k": 10,
  "type": "chemical"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,3042,pubchem.compound,2771,Citalopram,0.01,1.986
1,3272,pubchem.compound,2801,Clomipramine,0.017,1.759
2,5346,pubchem.compound,5095,Ropinirole,0.031,1.503
3,5191,pubchem.compound,4917,Prochlorperazine,0.031,1.509
4,4712,pubchem.compound,4205,Mirtazapine,0.038,1.42
5,4915,pubchem.compound,4585,Olanzapine,0.041,1.391
6,4514,pubchem.compound,4034,Meclizine,0.042,1.377
7,7235,pubchem.compound,9966051,VORTIOXETINE,0.047,1.327
8,3359,pubchem.compound,2818,Clozapine,0.048,1.315
9,4137,pubchem.compound,3559,Haloperidol,0.049,1.312


In [12]:
query, df = get_phenotype_df('pubchem.compound:1983')
print(json.dumps(query, indent=2))
df

{
  "entity": {
    "node_id": "1887",
    "namespace": "pubchem.compound",
    "identifier": "1983",
    "name": "Acetaminophen"
  },
  "k": 10,
  "type": "chemical"
}


Unnamed: 0,node_id,namespace,identifier,name,p,mlp
0,12602,pubchem.compound,447701,"N-Alpha-Acetyl-3,5-Diiodotyrosylglycine",0.003,2.535
1,11824,pubchem.compound,448006,"2-(1,1'-Biphenyl-4-Yl)Propanoic Acid",0.003,2.508
2,9863,pubchem.compound,636405,Cefpiramide,0.004,2.382
3,13015,pubchem.compound,9543429,Ndelta-(N'-Sulphodiaminophosphinyl)-L-Ornithine,0.004,2.406
4,13013,pubchem.compound,5287989,"(5-Chloropyrazolo[1,5-a]Pyrimidin-7-Yl)-(4-Met...",0.006,2.201
5,12118,pubchem.compound,3371,Flufenamic Acid,0.007,2.175
6,13277,pubchem.compound,448108,U-Pi-a-Pi,0.007,2.151
7,12091,pubchem.compound,8076,2-Ethoxyethanol,0.009,2.04
8,11854,pubchem.compound,4290,N-(2-Flouro-Benzyl)-4-Sulfamoyl-Benzamide,0.009,2.028
9,12880,pubchem.compound,65533,alpha-D-glucose-1-phosphate,0.013,1.879
