In [63]:
from rnaseq_lib.web import openfda_get_drugs_by_query
from rnaseq_lib.web import openfda_drug_label
from rnaseq_lib.web import _rget
from rnaseq_lib.utils.expando import Expando

import pandas as pd
import os
from collections import defaultdict

from progressbar import ProgressBar

# Table

1. Get list of drugs and targets from Cancerrxgene
2. Check for FDA listing
    1. Collect indications_of_usage, mechanism_of_action, openfda.brand_name, openfda.generic_name
3. Derive tissue from information

In [2]:
df = pd.read_excel('Cancerrxgene/screened-compounds.xlsx')
print df.shape; df.head()

(265, 5)


Unnamed: 0,Drug ID,Drug Name,Synonyms,Target,Target Pathway
0,1,Erlotinib,"Tarceva, RG-1415, CP-358774, OSI-774, Ro-50823...",EGFR,EGFR signaling
1,3,Rapamycin,"AY-22989, Sirolimus, WY-090217, Torisel, Rapamune",MTORC1,PI3K/MTOR signaling
2,5,Sunitinib,"Sutent, Sunitinib Malate, SU-11248","PDGFR, KIT, VEGFR, FLT3, RET, CSF1R",RTK signaling
3,6,PHA-665752,"PHA665752, PHA 665752",MET,RTK signaling
4,9,MG-132,"LLL cpd, MG 132, MG132","Proteasome, CAPN1",Protein stability and degradation


Query drug name in OpenFDA for additional information

In [106]:
info = defaultdict(list)

bar = ProgressBar()
for i in bar(xrange(len(df))):
    row = df.iloc[i]
    
    # Collect all aliases for Drug
    aliases = row.Synonyms.split(', ') if type(row.Synonyms) == list else [str(row.Synonyms)]
    aliases = None if 'nan' in aliases[0].lower() else aliases
    drug = [str(row['Drug Name']).split()[0]]
    aliases =  drug + aliases if aliases else drug
    
    # Check if any aliases return OpenFDA results
    r = None
    for name in aliases:
        if not '-' in name:
            r = openfda_drug_label(str(name))
            if r: break
    
    # If no results
    features  = ['usage', 'mech_action', 'brand_name', 'generic_name']
    if not r:
        #print 'No query for: {}'.format(row['Drug Name'])
        for f in features:
            info[f].append(None)
    
    else:
        # Pull out result
        e = r.json(object_hook=Expando)['results'][0]
        
        for f, a in zip(features, ['indications_and_usage', 'mechanism_of_action', 
                         'openfda.brand_name', 'openfda.generic_name']):    
            try:
                attribute = e[a] if not a.startswith('openfda') else e['openfda'][a.split('.')[1]]
                info[f].append('. '.join(attribute))
            except KeyError:
                info[f].append(None)

100% |########################################################################|


Add values to dataframe

In [107]:
df['usage'] = info['usage']
df['mech_action'] = info['mech_action']
df['brand_name'] = info['brand_name']
df['generic_name'] = info['generic_name']

In [108]:
df.head()

Unnamed: 0,Drug ID,Drug Name,Synonyms,Target,Target Pathway,usage,mech_action,brand_name,generic_name
0,1,Erlotinib,"Tarceva, RG-1415, CP-358774, OSI-774, Ro-50823...",EGFR,EGFR signaling,1. INDICATIONS AND USAGE Enter section text he...,12.1 Mechanism of Action The mechanism of clin...,Tarceva,ERLOTINIB HYDROCHLORIDE
1,3,Rapamycin,"AY-22989, Sirolimus, WY-090217, Torisel, Rapamune",MTORC1,PI3K/MTOR signaling,,,,
2,5,Sunitinib,"Sutent, Sunitinib Malate, SU-11248","PDGFR, KIT, VEGFR, FLT3, RET, CSF1R",RTK signaling,1 INDICATIONS AND USAGE SUTENT is a kinase inh...,12.1 Mechanism of Action Sunitinib is a small ...,SUTENT,SUNITINIB MALATE
3,6,PHA-665752,"PHA665752, PHA 665752",MET,RTK signaling,,,,
4,9,MG-132,"LLL cpd, MG 132, MG132","Proteasome, CAPN1",Protein stability and degradation,,,,


Drop rows whose generic or brand name does not match the drug name or synonyms

In [109]:
rows_to_drop = []
for i in xrange(len(df)):
    row = df.iloc[i]
    aliases = row.Synonyms.split(', ') if type(row.Synonyms) == list else [str(row.Synonyms)]
    drug = [str(row['Drug Name']).split()[0]]
    aliases =  drug + aliases if aliases else drug
    
    # If missing brand and generic name, drop
    if not row.generic_name and not row.brand_name:
        rows_to_drop.append(i)
        continue
    
    # If drug name or synonyms don't match, drop
    if not any([x for x in aliases if x.lower() in row.generic_name.lower()]):
        if not any([x for x in aliases if x.lower() in row.brand_name.lower()]):
            rows_to_drop.append(i)

In [110]:
df.drop(rows_to_drop)

Unnamed: 0,Drug ID,Drug Name,Synonyms,Target,Target Pathway,usage,mech_action,brand_name,generic_name
0,1,Erlotinib,"Tarceva, RG-1415, CP-358774, OSI-774, Ro-50823...",EGFR,EGFR signaling,1. INDICATIONS AND USAGE Enter section text he...,12.1 Mechanism of Action The mechanism of clin...,Tarceva,ERLOTINIB HYDROCHLORIDE
2,5,Sunitinib,"Sutent, Sunitinib Malate, SU-11248","PDGFR, KIT, VEGFR, FLT3, RET, CSF1R",RTK signaling,1 INDICATIONS AND USAGE SUTENT is a kinase inh...,12.1 Mechanism of Action Sunitinib is a small ...,SUTENT,SUNITINIB MALATE
5,11,Paclitaxel,"BMS-181339-01, Taxol, Onxol, Paxene, Praxel, A...",Microtubule stabiliser,Mitosis,INDICATIONS AND USAGE Paclitaxel Injection is ...,,paclitaxel,PACLITAXEL
8,30,Sorafenib,"Nexavar, 284461-73-0, BAY 43-9006","PDGFR, KIT, VEGFR, RAF",RTK signaling,1 INDICATIONS AND USAGE NEXAVAR is a kinase in...,12.1 Mechanism of Action Sorafenib is a kinase...,Nexavar,SORAFENIB
10,34,Imatinib,"Gleevec, STI-571","ABL, KIT, PDGFR",RTK signaling,1 INDICATIONS AND USAGE Imatinib mesylate is a...,12.1 Mechanism of Action Imatinib mesylate is ...,imatinib mesylate,IMATINIB MESYLATE
12,37,Crizotinib,"Xalkori, PF2341066, PF-2341066, PF 2341066","MET, ALK, ROS1",RTK signaling,1 INDICATIONS AND USAGE XALKORI is indicated f...,12.1 Mechanism of Action Crizotinib is an inhi...,Xalkori,CRIZOTINIB
16,51,Dasatinib,"BMS-354825-03, BMS-354825, Sprycel","ABL, SRC, Ephrins, PDGFR, KIT",Other,1 INDICATIONS AND USAGE SPRYCEL® (dasatinib) i...,"12.1 Mechanism of Action Dasatinib, at nanomol...",SPRYCEL,DASATINIB
26,64,CMK,KIN001-128,RSK2,ERK MAPK signaling,Indications and usage Indications and usage: 1...,,THE YEON CMK PORE BLEMISH BC 02 NATURAL BEIGE,"TITANIUM DIOXIDE, OCTINOXATE"
27,71,Pyrimethamine,"Daraprim, Chloridine",Dihydrofolate reductase (DHFR),Other,,,Pyrimethamine Leucovorin,PYRIMETHAMINE LEUCOVORIN
35,104,Bortezomib,"PS-341, LDP-341, Velcade",Proteasome,Protein stability and degradation,1 INDICATIONS AND USAGE VELCADE is a proteasom...,12.1 Mechanism of Action Bortezomib is a rever...,VELCADE,BORTEZOMIB
