In [16]:
# Imports
import os
import numpy as np
import pandas as pd
import sqlalchemy as sa
import matplotlib.pyplot as plt
import pickle as pickle
from sklearn import metrics

import predictor

from IPython.display import display, HTML

In [17]:
# Styling
def print2(a, b, *args, x=60):
    template = '{:%d}{}' % x
    formatted_template = template.format(a, b)
    for arg in args:
        formatted_template += ' ' + str(arg)
    print(formatted_template)
    


### Random stuff...

In [18]:
# Load data
pdx = pd.read_excel('kaist/PDX_DrugList_20150729.xlsx', 1)
pdx = pdx.rename(columns={
        'Drug': 'drug',
        'CID': 'cid'        
    })

print('PDX_DrugList_20150729.xlsx')
display(HTML("<h4>pdx</h4>"))
display(pdx.head(3))
print2("Number of rows:", pdx.shape[0])

PDX_DrugList_20150729.xlsx


Unnamed: 0,drug,target,cid
1,Linifacnib (ABT-869),ATP-competitive VEGFR/PDGFR inhibitor,11485656
2,Afatinib (BIBW 2992),EGFR/HER2 inhibitor,10184653
3,BMS-536924,ATP-competitive IGF-1R/IR inhibitor,11353973


Number of rows:                                             105


In [19]:
# Find protein targets for query CIDs using STITCH
engine = sa.create_engine('postgres://postgres:postgres@192.168.6.19:5432/kimlab')
sql_query = """
select *
from stitch.protein_chemical_links_human_nostereo_hc
where cid in ({})
""".format(", ".join(str(cid) for cid in set(pdx.cid)))
cid2enst = pd.read_sql_query(sql_query, engine)

display(HTML("<h4>cid2enst</h4>"))
display(cid2enst.head(3))
print2("Number of CID -> ENST mappings:", cid2enst.shape[0])
print2("Number of unique CIDs mapped to proteins:", len(set(cid2enst['cid'])))

Unnamed: 0,cid,ensp
0,2244,354612
1,2244,356438
2,2346,241337


Number of CID -> ENST mappings:                             156
Number of unique CIDs mapped to proteins:                   75


In [20]:
# Add `enst` ids to `pdx` data
pdx_wenst = pdx.merge(cid2enst, on='cid')

display(HTML("<h4>pdx_wenst</h4>"))
display(pdx_wenst.head(3))
print2("Number of rows:", pdx_wenst.shape[0])
print2("Number of unmapped CIDs:", len(set(pdx.cid) - set(pdx_wenst.cid)))
print2("Number of unique CIDs:", len(set(pdx_wenst['cid'])))
print2("Number of unique ENSTs:", len(set(pdx_wenst['ensp'].dropna())))

Unnamed: 0,drug,target,cid,ensp
0,Linifacnib (ABT-869),ATP-competitive VEGFR/PDGFR inhibitor,11485656,241453
1,Linifacnib (ABT-869),ATP-competitive VEGFR/PDGFR inhibitor,11485656,286301
2,Afatinib (BIBW 2992),EGFR/HER2 inhibitor,10184653,269571


Number of rows:                                             156
Number of unmapped CIDs:                                    30
Number of unique CIDs:                                      75
Number of unique ENSTs:                                     109


In [27]:
cid2enst_cids = set(cid2enst['cid'])
pdx_noenst = pdx[~pdx['cid'].isin(cid2enst_cids)]
pdx_noenst.to_clipboard()

In [32]:
print(pdx_noenst.to_csv(index=False))

drug,target,cid
BMS-536924, ATP-competitive IGF-1R/IR inhibitor,11353973
MK-2206 2HCl,highly selective inhibitor of Akt1/2/3,46930998
PD0332991 HCl, highly selective inhibitor of CDK4/6,11431660
Cisplatin,inhibit DNA synthesis,441203
MK-2866, selective androgen receptor modulator (SARM),11326715
AZD6482 (KIN-193),PI3Kβ inhibitor,44137675
AZD7762, inhibitor of Chk1,11152667
Cytarabine,antimetabolic agent and DNA synthesisinhibitor,6253
Gimeracil,inhibitor ofdihydropyrimidine dehydrogenase,54679224
"LDE225 (NVP-LDE225, Erismodeglb)","Smoothened(Smo) antagonist, inhibiting Hedgehog (Hh) signaling",24775005
"CAL-101 (GS-1101, Idelalisib)", p110δ inhibitor ,11625818
Trametinib (GSK1120212),MEK1/2 inhibitor,11707110
Tivantinib (ARQ197),first non-ATP-competitive c-Metinhibitor,11494412
Dovitinib Dilactic acid (TKI258 Dilactic acid),"multitargeted RTK inhibitorFLT3/c-Kit, FGFR1/3, VEGFR1-4",66553150
GW441756, inhibitor of TrkA,16219400
Icotinib (BPI-2009H),EGFR inhibitor,22024915
Salubrinal, i

In [21]:
# Create a dataframe containing `borrelidin` and `halofuginone`
borrelidin_ensts = [265112, 502553, 455217, 506040, 514259, 626210, 627006]
halofuginone_ents = [324331, 274680]

borrelidin_df = pd.DataFrame(
    [('borrelidin', x) for x in borrelidin_ensts], 
    columns=['partner_drug', 'partner_ensp']
)
halofuginone_df = pd.DataFrame(
    [('halofuginone', x) for x in halofuginone_ents], 
    columns=['partner_drug', 'partner_ensp']
)

partner_df = pd.concat([borrelidin_df, halofuginone_df], ignore_index=True)

display(HTML("<h4>partner_df</h4>"))
display(partner_df)
#print2("Number of rows:", partner_df.shape[0])

Unnamed: 0,partner_drug,partner_ensp
0,borrelidin,265112
1,borrelidin,502553
2,borrelidin,455217
3,borrelidin,506040
4,borrelidin,514259
5,borrelidin,626210
6,borrelidin,627006
7,halofuginone,324331
8,halofuginone,274680


In [22]:
# Join with partner enst
pdx_wenst_1 = pdx_wenst.copy()
pdx_wenst_1['partner_drug'] = 'borrelidin'

pdx_wenst_2 = pdx_wenst.copy()
pdx_wenst_2['partner_drug'] = 'halofuginone'

pdx_wenst_3 = pd.concat([pdx_wenst_1, pdx_wenst_2], ignore_index=True)

pdx_wenst_wpartner = pdx_wenst_3.merge(partner_df, on=['partner_drug'])
pdx_wenst_wpartner['ensp_1'], pdx_wenst_wpartner['ensp_2'] = \
    zip(*pdx_wenst_wpartner[['ensp', 'partner_ensp']].apply(
        lambda x: sorted(x), axis=1)
pdx_wenst_wpartner['ensp_pair'] = pdx_wenst_wpartner[['ensp', 'partner_ensp']].apply(
    lambda x: "({})".format(", ".join([str(enst) for enst in sorted(x)])), axis=1)

display(HTML("<h4>pdx_wenst_wpartner</h4>"))
display(pdx_wenst_wpartner.head(3))
print2("Number of rows:", pdx_wenst_wpartner.shape[0])
print2("Number of unique CIDs:", len(set(pdx_wenst_wpartner['cid'])))
print2("Number of unique ENSTs:", len(set(pdx_wenst_wpartner['ensp'])))

Unnamed: 0,drug,target,cid,ensp,partner_drug,partner_ensp,ensp_pair
0,Linifacnib (ABT-869),ATP-competitive VEGFR/PDGFR inhibitor,11485656,241453,borrelidin,265112,"(241453, 265112)"
1,Linifacnib (ABT-869),ATP-competitive VEGFR/PDGFR inhibitor,11485656,241453,borrelidin,502553,"(241453, 502553)"
2,Linifacnib (ABT-869),ATP-competitive VEGFR/PDGFR inhibitor,11485656,241453,borrelidin,455217,"(241453, 455217)"


Number of rows:                                             1404
Number of unique CIDs:                                      75
Number of unique ENSTs:                                     109


In [23]:
# Get scores from the database
sql_query = """
select *
from chemical_interactions_v2.predictor_2_all_unused_pairs_scored
where (ensp_1, ensp_2) in ({})
""".format(", ".join(pdx_wenst_wpartner['ensp_pair']))
result = pd.read_sql_query(sql_query, engine)

KeyboardInterrupt: 

In [24]:
result

NameError: name 'result' is not defined

In [None]:
sql_query = """

"""

In [25]:
ls kaist


PDX_DrugList_20150729.xlsx


In [None]:
sql_query = """

"""

In [None]:


borrelidin_df.head()

In [None]:
borrelidin_ensts

In [None]:
df2.shape

In [None]:
query_cids = set(pdx['CID'])

In [None]:
sql_query = """
select *
from chemical_interactions_v2.all_tested_drugs
where pubchem_cid_sub in ({})
""".format(", ".join(str(cid) for cid in query_cids))

engine = sa.create_engine('postgres://postgres:postgres@192.168.6.19:5432/kimlab')
db_cids = pd.read_sql_query(sql_query, engine)

In [None]:
display(db_cids.head(1))
print(db_cids.dtypes)
print(db_cids.shape)

In [None]:
sql_query = """
select *
from chemical_interactions_v2.predictor_1
where cid_1 in ({0})
or cid_2 in ({0})
""".format(", ".join(str(cid) for cid in query_cids))

engine = sa.create_engine('postgres://postgres:postgres@192.168.6.19:5432/kimlab')
db_cids = pd.read_sql_query(sql_query, engine)

In [None]:
display(db_cids.head(1))
print(db_cids.dtypes)
print(db_cids.shape)
db_cids_set = set(db_cids['cid_1']) | set(db_cids['cid_2'])
print(len(db_cids_set))

In [None]:
sql_query = """
select *
from chemical_interactions_v2.predictor_2_all_unused_pairs_scored
where ensp_1 in (    324331, 274680)
or ensp_2 in (265112, 502553, 455217, 506040, 514259, 626210, 627006,    );
"""
engine = sa.create_engine('postgres://postgres:postgres@192.168.6.19:5432/kimlab')
result_df = pd.read_sql_query(sql_query, engine)

In [None]:
result_df.head()

In [None]:
result_df.hist('score_predictor_2')

In [None]:
result_df.shape

In [None]:
result_df

In [None]:
df = pd.read_sql_query("select * from chemical_interactions_v2.all_tested_drugs limit 100", engine)

In [None]:
df.head()