# Intro

In [2]:
import os
import os.path as op

import numpy as np
import scipy as sp
import scipy.stats
import pandas as pd
import sqlalchemy as sa

from IPython.display import display, HTML
import matplotlib.pyplot as plt

import qgrid
qgrid.nbinstall(overwrite=True)
qgrid.set_defaults(remote_js=True, precision=4)

from common import dat



In [3]:
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [4]:
import seaborn as sns
sns.set_context('notebook', font_scale=2)



In [5]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Load data

In [24]:
# Load drug info data

# az_dream
engine = sa.create_engine('mysql://strokach:@192.168.6.19:3306/az_dream')
drug_info_release_3 = pd.read_sql_table('drug_info_release_3', engine)
drug_to_cid = pd.read_sql_table('drug_to_cid', engine)
drug_to_target = pd.read_sql_table('drug_to_target', engine)

# az_dream_data
engine = sa.create_engine('mysql://strokach:@192.168.6.19:3306/az_dream_data')
training_gby_drug = pd.read_sql_table('training_gby_drug', engine)
training_gby_drugpair = pd.read_sql_table('training_gby_drugpair', engine)
training_gby_cell = pd.read_sql_table('training_gby_cell', engine)

# training data (can also be validation data)
training = pd.read_sql_table('training', engine)

In [84]:
main_df = training.copy()

In [27]:
main_df.head()

Unnamed: 0,CELL_LINE,COMPOUND_A,COMPOUND_B,MAX_CONC_A,MAX_CONC_B,IC50_A,H_A,Einf_A,IC50_B,H_B,Einf_B,SYNERGY_SCORE,QA,COMBINATION_ID
0,BT-20,ADAM17,AKT,1,75,1.0,0.809002,59.122436,9.639714,0.757977,91.593425,29.54039,1,ADAM17.AKT
1,CAL-120,ADAM17,AKT,1,75,0.183214,2.503678,60.411999,1.0,0.0,100.0,4.40141,-1,ADAM17.AKT
2,CAL-51,ADAM17,AKT,1,75,1.0,0.726984,11.150843,75.0,0.375043,76.656479,0.315422,1,ADAM17.AKT
3,DU-4475,ADAM17,AKT,1,75,0.321533,10.0,58.599487,75.0,1.995866,6.171007,-41.73409,-1,ADAM17.AKT
4,HCC1143,ADAM17,AKT,1,75,0.398673,10.0,89.098894,1.0,0.0,100.0,35.53277,-1,ADAM17.AKT


## Drug pairs

In [73]:
engine = sa.create_engine('mysql://strokach:@192.168.6.19:3306/az_dream')
sql_query = """
select count(distinct unique_id)
from all_drug_pairs a
join challenge_name_to_cid d1 on (a.COMPOUND_A = d1.`ChallengeName`)
join challenge_name_to_cid d2 on (a.COMPOUND_B = d2.`ChallengeName`)
join chemical_interactions_v2.drug_atc_similarity f on (f.cid_1 = d1.cid and f.cid_2 = d2.cid);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct unique_id)
0                         70


In [74]:
engine = sa.create_engine('mysql://strokach:@192.168.6.19:3306/az_dream')
sql_query = """
select count(distinct unique_id)
from all_drug_pairs a
join challenge_name_to_cid d1 on (a.COMPOUND_A = d1.`ChallengeName`)
join challenge_name_to_cid d2 on (a.COMPOUND_B = d2.`ChallengeName`)
join chemical_interactions_v2.drug_chemical_similarity f on (f.cid_1 = d1.cid and f.cid_2 = d2.cid);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct unique_id)
0                          7


In [75]:
engine = sa.create_engine('mysql://strokach:@192.168.6.19:3306/az_dream')
sql_query = """
select count(distinct unique_id)
from all_drug_pairs a
join challenge_name_to_cid d1 on (a.COMPOUND_A = d1.`ChallengeName`)
join challenge_name_to_cid d2 on (a.COMPOUND_B = d2.`ChallengeName`)
join chemical_interactions_v2.drug_side_effect_similarity f on (f.cid_1 = d1.cid and f.cid_2 = d2.cid);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct unique_id)
0                         22


## Target pairs

In [77]:
engine = sa.create_engine('mysql://strokach:@192.168.6.19:3306/az_dream')

In [78]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.biogrid_topo f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5522


In [79]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.biogrid_topo_eb f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5443


In [80]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.biogrid_topo_nsp f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5522


In [81]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.gene_coexpression f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5534


In [83]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.gene_essentiality f on (f.ensp = t1.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       6910


In [84]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.getint_topo f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5112


In [85]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.getint_topo_eb f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5063


In [86]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.getint_topo_nsp f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5112


In [87]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.go_all f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5417


In [88]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.go_bp f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5416


In [89]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.go_cc f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5336


In [90]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.go_mf f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5417


In [91]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.go_slim f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5417


In [92]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.phylo f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       4715


In [93]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.string_topo f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5534


In [94]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.string_topo_eb f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5534


In [95]:
sql_query = """
select count(distinct UNIQUE_ID)
from all_drug_pairs a
join drug_to_target t1 on (a.COMPOUND_A = t1.drug)
join drug_to_target t2 on (a.COMPOUND_B = t2.drug)
join chemical_interactions_v2.string_topo_nsp f on (f.ensp_1 = t1.ensp_idx and f.ensp_2 = t2.ensp_idx);
"""
df = pd.read_sql_query(sql_query, engine)
print(df)

   count(distinct UNIQUE_ID)
0                       5534
