### Outlier Drug Pathway Lookup
Loads the results of an outlier analysis directly from MedBook's Mongo databse into a python pandas data frame and then looks up interactions in DGIdb and pathways in Reactome, KEGG and Wiki Pathways. 

To run replace the outlier analysis id (mGRmfK9rdZgpW6K53) with the id in the browser address bar when viewing the results of the outlier analysis in MedBook.

In [None]:
# import a variety of packages
import requests
import yaml
from IPython.core.display import display, HTML
import numpy as np
import pandas as pd
!pip2 install --quiet pypathway # not instandard jupyter container so add manually
from pypathway import *
import pymongo

In [14]:
# Connect to the database
db = pymongo.MongoClient("mongo").MedBook

In [15]:
# Find all jobs from a given sample name
jobs = db.jobs.find({"args.sample_label": "ckcc/TH03_0118_S01_RNASeq"})
for job in jobs:
    print job["_id"],":", job["args"]["sample_label"],"against", job["args"]["sample_group_name"]

GsaN5gNNrusKL8DtX : ckcc/TH03_0118_S01_RNASeq against --[TC2] acute myeloid leukemia
KFN3WuD6vRdun3ih3 : ckcc/TH03_0118_S01_RNASeq against --[TC2] AML+ALAL+ALL
hzTfpy7kXzftFMJxe : ckcc/TH03_0118_S01_RNASeq against Compendium 2 final samples 2016/10/24
zmkf3FzT9ajMPeWLC : ckcc/TH03_0118_S01_RNASeq against AML


In [21]:
# Lookup the outlier job information and extract up, down and top 5% expressed into dataframes
job = db.jobs.find_one({"_id": "hzTfpy7kXzftFMJxe"})
up = pd.DataFrame(job["output"]["up_genes"])
down = pd.DataFrame(job["output"]["down_genes"])
top5percent = pd.DataFrame(job["output"]["top5percent_genes"])
print "Outlier analysis for", job["args"]["sample_label"],"against", job["args"]["sample_group_name"]

Outlier analysis for ckcc/TH03_0118_S01_RNASeq against Compendium 2 final samples 2016/10/24


In [22]:
print "Up regulated genes (5)"
up.head()

Up regulated genes (5)


Unnamed: 0,background_median,gene_label,sample_value
0,8.09185,ACAP1,13.259877
1,8.376,ADA,13.776777
2,4.80645,ADGRE1,13.109611
3,6.3069,ADGRG5,12.579483
4,2.4183,AE000661.37,11.97697


In [8]:
# Search for drug interactions - see http://dgidb.genome.wustl.edu/api
endpoint = "http://dgidb.genome.wustl.edu/api/v1/interactions.json"
query = "?genes={}".format(",".join(up["gene_label"].values))  # just inhibitors
matches = requests.get(endpoint + query).json()
druggable_genes = {m["geneName"]: list(set([i["drugName"] 
                                            for i in m["interactions"]])) for m in matches["matchedTerms"]}
print "Up regulated genes with drug targets:"
print yaml.safe_dump(druggable_genes, default_flow_style=False)

Up regulated genes with drug targets:
ADA:
- PENTOSTATIN
- 1-((1R,2S)-1-{2-[2-(4-CHLOROPHENYL)-1,3-BENZOXAZOL-7-YL]ETHYL}-2-HYDROXYPROPYL)-1H-IMIDAZOLE-4-CARBOXAMIDE
- 1-((1R)-1-(HYDROXYMETHYL)-3-{6-[(3-PHENYLPROPANOYL)AMINO]-1H-INDOL-1-YL}PROPYL)-1H-IMIDAZOLE-4-CARBOXAMIDE
- 1-{(1R,2S)-2-HYDROXY-1-[2-(2-NAPHTHYLOXY)ETHYL]PROPYL}-1H-IMIDAZONE-4-CARBOXAMIDE
- FLUDARABINE
- CLADRIBINE
- EHNA
- DIPYRIDAMOLE
- 6-HYDROXY-7,8-DIHYDRO PURINE NUCLEOSIDE
- 6-HYDROXY-1,6-DIHYDRO PURINE NUCLEOSIDE
APAF1:
- ADENOSINE TRIPHOSPHATE
BTK:
- AVL-292
- INOSITOL 1,3,4,5-TETRAKISPHOSPHATE
- IBRUTINIB
CFD:
- 3,4-DICHLOROISOCOUMARIN
- ISATOIC ANHYDRIDE
CSF3R:
- RUXOLITINIB
CTSG:
- BIS-NAPTHYL BETA-KETOPHOSPHONIC ACID
- DERMOLASTIN
- 2-[3-({METHYL[1-(2-NAPHTHOYL)PIPERIDIN-4-YL]AMINO}CARBONYL)-2-NAPHTHYL]-1-(1-NAPHTHYL)-2-OXOETHYLPHOSPHONIC
  ACID
DNTT:
- CORDYCEPIN
FLT3:
- LESTAURTINIB
- QUIZARTINIB
- GTP-14564
- SORAFENIB
- FLT-3 INHIBITOR II
- CRENOLANIB
- 4SC-202
- RGB-286638
- CEDIRANIB
- CLOFARABINE
- S

In [9]:
reactome = []
for gene in druggable_genes.keys():
    reactome.extend(PublicDatabase.search_reactome(gene))
print "Found", len(reactome),"pathways in reactome,"
for p in reactome:
    display(HTML("{} <a target='_blank' href='http://www.reactome.org/content/detail/{}'>{}</a>".format(
                p.description, p.id, p.id)))
    if p.SBGN:
        display(HTML("Has SBGN Diagram"))

Found 14 pathways in reactome,


In [11]:
kegg = []
for gene in druggable_genes.keys():
    kegg.extend(PublicDatabase.search_kegg(gene, organism="hsa"))
print "Found", len(kegg),"pathways in kegg,"
for p in kegg:
    display(HTML("{} <a target='_blank' href='http://www.reactome.org/content/detail/{}'>{}</a>".format(
                p.description, p.id, p.id)))

Found 8 pathways in kegg,


In [12]:
wiki_pathways = []
for gene in druggable_genes.keys():
    wiki_pathways.extend(PublicDatabase.search_wp(gene, species="Homo_sapiens"))
print "Found", len(wiki_pathways),"pathways in wiki pathways,"
for p in wiki_pathways:
    display(HTML("{} <a target='_blank' href='http://wikipathways.org/index.php/Pathway:{}'>{}</a>".format(
                p.description, p.id, p.id)))

Found 106 pathways in wiki pathways,
