# Load packages

In [1]:
import os
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [2]:
current = os.getcwd()
os.chdir('../modelEval')

In [3]:
from scripts.evalModel import evalFullModel

In [4]:
os.chdir(current)

# Load training cdr

In [5]:
trainRNA = pd.read_csv('../../data/processed/RNA_test_cancergenes.csv', index_col=0)
trainCellLines = list(trainRNA.index)

trainCDR = pd.read_csv('../../data/processed/drugCellLinePairsData.csv')
trainCDR = trainCDR[trainCDR.DepMap_ID.isin(trainCellLines)]

# Define 

In [31]:
# datasets to examine
datasets = ['test', 'newcancer']

# baseline path and best model
dscPath = '/fs/scratch/PCON0041/PatrickLawrence/cancer-drug-response/DeepDSC/customEffScore/'
dscBest = 'DeepDSC_Encoder_Hidden_64_32_AFlkyu_LR1e-05_DR99_DS10_GC1_DNN_Hidden_128_16_DO3_AFrelu_LR001_DR96_DS5'

# out best path and model
fsPath = '/fs/scratch/PCON0041/PatrickLawrence/cancer-drug-response/fewShot/cdr/embedDrug_embedRNA_concat/'
fsBest = 'Unfused-FewShotCDREmbedDrugEmbedCell_NL64_DO0-3_AFsigmoid_LR0-01_DR0-99_DS50'

# DeepDSC Model

In [7]:
dscEval = evalFullModel(dscPath, modelType='DeepDSC')

In [8]:
dscPreds, _, dscCounts = dscEval.iterateModels(datasets[0], modelName=dscBest)

Average Cell Line precision @ k
Model: DeepDSC_Encoder_Hidden_64_32_AFlkyu_LR1e-05_DR99_DS10_GC1_DNN_Hidden_128_16_DO3_AFrelu_LR001_DR96_DS5
	Precision@1: 0.902
	Precision@2: 0.7843
	Precision@3: 0.6863
	Precision@4: 0.6029
	Precision@5: 0.5412
	Precision@10: 0.5051

Top ranked drug for each cell line:
No true effective drugs identified in top 3 for ACH-000563 (top drug: alvespimycin)

# cell lines without highly effective drug among top-3 predictions: 1

# of unique drugs among top-3 predictions: 13


# FS-CDR Model

In [32]:
fsEval = evalFullModel(fsPath, modelType='fsCDR')

In [33]:
fsPreds, _, fsCounts = fsEval.iterateModels(datasets[0], modelName=fsBest)

Average Cell Line precision @ k
Model: Unfused-FewShotCDREmbedDrugEmbedCell_NL64_DO0-3_AFsigmoid_LR0-01_DR0-99_DS50
	Precision@1: 0.9804
	Precision@2: 0.9314
	Precision@3: 0.9085
	Precision@4: 0.8578
	Precision@5: 0.851
	Precision@10: 0.8256

Top ranked drug for each cell line:

# cell lines without highly effective drug among top-3 predictions: 0

# of unique drugs among top-3 predictions: 15


# Explore training pairs

In [18]:
trainCDR.head()

Unnamed: 0,DepMap_ID,cancer_type,name,moa,target,indication,phase,r2,ic50,auc,lower_limit,effectiveCont,effective
0,ACH-000320,Pancreatic Cancer,floxuridine,DNA synthesis inhibitor,TYMS,colorectal cancer,Launched,0.972544,0.007879,0.336463,0.10514,6.692422,0
9,ACH-000846,Head and Neck Cancer,floxuridine,DNA synthesis inhibitor,TYMS,colorectal cancer,Launched,0.883318,0.052711,0.492464,0.101684,4.808385,0
16,ACH-000305,Esophageal Cancer,floxuridine,DNA synthesis inhibitor,TYMS,colorectal cancer,Launched,0.786261,0.190516,0.693634,0.33356,2.625589,0
26,ACH-000747,Lung Cancer,floxuridine,DNA synthesis inhibitor,TYMS,colorectal cancer,Launched,0.90381,0.370394,0.676512,0.104899,3.086931,0
35,ACH-000368,Brain Cancer,floxuridine,DNA synthesis inhibitor,TYMS,colorectal cancer,Launched,0.76943,0.058917,0.486676,0.04548,5.423301,0


In [19]:
totalEff = pd.DataFrame(index=trainCDR.name.unique(), columns=['total'])
for drug, subdf in trainCDR.groupby(by='name'):
    totalEff.loc[drug, 'total'] =  subdf.effective.sum()
    
totalEff = totalEff.sort_values(by='total', ascending=False).reset_index().rename(columns={'index':'drug'})
totalEff.head(10)

Unnamed: 0,drug,total
0,YM-155,35
1,epothilone-b,34
2,dolastatin-10,34
3,echinomycin,33
4,10-hydroxycamptothecin,33
5,genz-644282,32
6,romidepsin,31
7,alvespimycin,29
8,nemorubicin,25
9,cabazitaxel,20


In [20]:
print(len(totalEff))

893


## DSC overlap

In [21]:
totalEff[totalEff.drug.isin(list(dscRecs))]

Unnamed: 0,drug,total
0,YM-155,35
2,dolastatin-10,34
5,genz-644282,32
6,romidepsin,31
7,alvespimycin,29
8,nemorubicin,25
10,BGT226,19
18,epothilone-d,12
21,peruvoside,11
51,tanespimycin,4


In [22]:
trainCDR[trainCDR.name == 'dronedarone']

Unnamed: 0,DepMap_ID,cancer_type,name,moa,target,indication,phase,r2,ic50,auc,lower_limit,effectiveCont,effective
26738,ACH-000846,Head and Neck Cancer,dronedarone,adrenergic receptor antagonist,"ADRA1A, ADRA1B, ADRA1D, ADRA2A, ADRA2B, ADRA2C...",atrial fibrillation (AF),Launched,0.78583,1.933891,0.832369,0.012248,4.25513,0
26755,ACH-000896,Bladder Cancer,dronedarone,adrenergic receptor antagonist,"ADRA1A, ADRA1B, ADRA1D, ADRA2A, ADRA2B, ADRA2C...",atrial fibrillation (AF),Launched,0.774087,1.879819,0.832005,0.040924,3.068232,0
26759,ACH-000510,Lung Cancer,dronedarone,adrenergic receptor antagonist,"ADRA1A, ADRA1B, ADRA1D, ADRA2A, ADRA2B, ADRA2C...",atrial fibrillation (AF),Launched,0.746311,2.242921,0.84985,0.039495,3.035108,0
26760,ACH-000164,Pancreatic Cancer,dronedarone,adrenergic receptor antagonist,"ADRA1A, ADRA1B, ADRA1D, ADRA2A, ADRA2B, ADRA2C...",atrial fibrillation (AF),Launched,0.731916,1.617709,0.812753,0.00482,5.258224,0
26761,ACH-000650,Skin Cancer,dronedarone,adrenergic receptor antagonist,"ADRA1A, ADRA1B, ADRA1D, ADRA2A, ADRA2B, ADRA2C...",atrial fibrillation (AF),Launched,0.733432,2.286427,0.848956,0.010305,4.364812,0
26763,ACH-000788,Skin Cancer,dronedarone,adrenergic receptor antagonist,"ADRA1A, ADRA1B, ADRA1D, ADRA2A, ADRA2B, ADRA2C...",atrial fibrillation (AF),Launched,0.869245,2.371103,0.859649,0.074868,2.382377,0
26775,ACH-000776,Brain Cancer,dronedarone,adrenergic receptor antagonist,"ADRA1A, ADRA1B, ADRA1D, ADRA2A, ADRA2B, ADRA2C...",atrial fibrillation (AF),Launched,0.774155,2.402427,0.86685,0.123133,1.889295,0
26777,ACH-000486,Bladder Cancer,dronedarone,adrenergic receptor antagonist,"ADRA1A, ADRA1B, ADRA1D, ADRA2A, ADRA2B, ADRA2C...",atrial fibrillation (AF),Launched,0.736756,2.028855,0.836425,0.006949,4.802217,0
26782,ACH-000280,Ovarian Cancer,dronedarone,adrenergic receptor antagonist,"ADRA1A, ADRA1B, ADRA1D, ADRA2A, ADRA2B, ADRA2C...",atrial fibrillation (AF),Launched,0.7279,1.720118,0.81866,0.004434,5.316335,0
26787,ACH-000573,Breast Cancer,dronedarone,adrenergic receptor antagonist,"ADRA1A, ADRA1B, ADRA1D, ADRA2A, ADRA2B, ADRA2C...",atrial fibrillation (AF),Launched,0.713348,3.322323,0.886485,0.002084,5.837862,0


In [23]:
dscPreds[dscPreds.drug == 'dronedarone']

Unnamed: 0,cell_line,cancer_type,drug,true,pred
3270,ACH-000510,Lung Cancer,dronedarone,0,0.179848
3274,ACH-000776,Brain Cancer,dronedarone,0,0.170917
3268,ACH-000846,Head and Neck Cancer,dronedarone,0,0.17048
3275,ACH-000486,Bladder Cancer,dronedarone,0,0.169782
3272,ACH-000650,Skin Cancer,dronedarone,0,0.159348
3269,ACH-000896,Bladder Cancer,dronedarone,0,0.156198
3276,ACH-000280,Ovarian Cancer,dronedarone,0,0.152792
3278,ACH-000665,Lung Cancer,dronedarone,0,0.150507
3277,ACH-000573,Breast Cancer,dronedarone,0,0.146644
3271,ACH-000164,Pancreatic Cancer,dronedarone,0,0.146087


In [41]:
for cellLine, subdf in dscPreds.groupby(by='cell_line'):
    top3 = subdf.reset_index(drop=True).iloc[:3, :]
    if 'dronedarone' in list(top3.drug):
        print(cellLine)
        break
    
subdf.head(15)

ACH-000164


Unnamed: 0,cell_line,cancer_type,drug,true,pred
2087,ACH-000164,Pancreatic Cancer,ouabain,0,0.147241
791,ACH-000164,Pancreatic Cancer,genz-644282,1,0.146865
3271,ACH-000164,Pancreatic Cancer,dronedarone,0,0.146087
746,ACH-000164,Pancreatic Cancer,paclitaxel,0,0.145062
607,ACH-000164,Pancreatic Cancer,docetaxel,0,0.14298
5482,ACH-000164,Pancreatic Cancer,10-hydroxycamptothecin,0,0.142493
6241,ACH-000164,Pancreatic Cancer,oprozomib,1,0.142077
399,ACH-000164,Pancreatic Cancer,lanatoside-c,0,0.140697
3244,ACH-000164,Pancreatic Cancer,digoxin,1,0.139669
2396,ACH-000164,Pancreatic Cancer,NVP-BEZ235,0,0.139129


In [25]:
subdf.true.sum()

5

In [26]:
subdf.reset_index(drop=True, inplace=True)
subdf[subdf.true == 1]

Unnamed: 0,cell_line,cancer_type,drug,true,pred
1,ACH-000164,Pancreatic Cancer,genz-644282,1,0.146865
6,ACH-000164,Pancreatic Cancer,oprozomib,1,0.142077
8,ACH-000164,Pancreatic Cancer,digoxin,1,0.139669
23,ACH-000164,Pancreatic Cancer,doxorubicin,1,0.133552
35,ACH-000164,Pancreatic Cancer,anguidine,1,0.129751


## fsCDR overlap

In [27]:
totalEff[totalEff.drug.isin(list(fsRecs))]

Unnamed: 0,drug,total
0,YM-155,35
1,epothilone-b,34
2,dolastatin-10,34
3,echinomycin,33
4,10-hydroxycamptothecin,33
5,genz-644282,32
6,romidepsin,31
7,alvespimycin,29
8,nemorubicin,25
10,BGT226,19


# Examine drug recs by model

## DeepDSC

In [9]:
dscCounts.sort_values(by='total', ascending=False, inplace=True)
dscCounts

Unnamed: 0,1,2,3,total
alvespimycin,12,26,0,38
dolastatin-10,35,0,0,35
BGT226,4,13,17,34
romidepsin,0,7,13,20
tanespimycin,0,2,13,15
genz-644282,0,1,2,3
cephalomannine,0,0,3,3
ouabain,1,0,1,2
peruvoside,0,2,0,2
nemorubicin,0,1,0,1


In [10]:
dscRecs = set(dscCounts.index)

## fsCDR

In [34]:
fsCounts.sort_values(by='total', ascending=False, inplace=True)
fsCounts

Unnamed: 0,1,2,3,total
dolastatin-10,23,11,1,35
YM-155,3,13,17,33
romidepsin,6,17,8,31
echinomycin,0,4,13,17
triptolide,11,2,0,13
10-hydroxycamptothecin,2,1,4,7
maytansinol-isobutyrate,6,0,0,6
genz-644282,0,2,2,4
alvespimycin,0,1,2,3
epothilone-b,0,0,2,2


In [35]:
fsRecs = set(fsCounts.index)

# Model drug rec overlap

In [117]:
recOverlap = fsRecs.intersection(dscRecs)
# Sort by order of training rank
recOverlap = [d for d in totalEff.drug if d in recOverlap]
recOverlap

['YM-155',
 'dolastatin-10',
 'genz-644282',
 'romidepsin',
 'alvespimycin',
 'nemorubicin',
 'epothilone-d']

In [120]:
fsCounts.loc[recOverlap, :]

Unnamed: 0,1,2,3,total
YM-155,3,13,17,33
dolastatin-10,23,11,1,35
genz-644282,0,2,2,4
romidepsin,6,17,8,31
alvespimycin,0,1,2,3
nemorubicin,0,1,0,1
epothilone-d,0,0,1,1


In [121]:
dscCounts.loc[recOverlap, :]

Unnamed: 0,1,2,3,total
YM-155,0,0,1,1
dolastatin-10,35,0,0,35
genz-644282,0,1,2,3
romidepsin,0,7,13,20
alvespimycin,12,26,0,38
nemorubicin,0,1,0,1
epothilone-d,0,0,1,1


# Examine range of preds by drug

## DeepDSC

In [125]:
dscPredRange = {'drug': [], 'predCount': [], 'predRange': []}

for drug, subdf in dscPreds.groupby(by='drug'):
    dscPredRange['drug'].append(drug)
    dscPredRange['predCount'].append(len(subdf))
    minPred = subdf.pred.min()
    maxPred = subdf.pred.max()
    dscPredRange['predRange'].append(maxPred-minPred)
    
dscPredRange = pd.DataFrame(dscPredRange).sort_values(by='predRange', ascending=False)
dscPredRange.reset_index(drop=True, inplace=True)
dscPredRange.head(10)

Unnamed: 0,drug,predCount,predRange
0,dolastatin-10,35,0.086776
1,BGT226,34,0.082672
2,cephalomannine,21,0.082106
3,paclitaxel,40,0.080732
4,YM-155,41,0.080058
5,docetaxel,37,0.079843
6,vinflunine,33,0.078515
7,litronesib,32,0.078102
8,digoxin,24,0.077961
9,cabazitaxel,32,0.077668


In [127]:
dscPredRange.sort_values(by='predCount', ascending=False).head(10)

Unnamed: 0,drug,predCount,predRange
21,alvocidib,46,0.073501
26,bortezomib,44,0.072207
31,selinexor,43,0.071065
237,FK-866,42,0.045808
35,napabucasin,42,0.070659
43,genz-644282,41,0.066778
12,epothilone-b,41,0.076978
46,echinomycin,41,0.066098
32,ganetespib,41,0.071032
60,LY3023414,41,0.06292


## fsCDR

In [124]:
fsPredRange = {'drug': [], 'predCount': [], 'predRange': []}

for drug, subdf in fsPreds.groupby(by='drug'):
    fsPredRange['drug'].append(drug)
    fsPredRange['predCount'].append(len(subdf))
    minPred = subdf.pred.min()
    maxPred = subdf.pred.max()
    fsPredRange['predRange'].append(maxPred-minPred)
    
fsPredRange = pd.DataFrame(fsPredRange).sort_values(by='predRange', ascending=False)
fsPredRange.reset_index(drop=True, inplace=True)
fsPredRange.head(10)

Unnamed: 0,drug,predCount,predRange
0,GZD824,8,0.005369
1,panobinostat,39,0.002608
2,LY2874455,15,0.002165
3,cabazitaxel,32,0.002025
4,epothilone-b,41,0.001987
5,mitoxantrone,34,0.001822
6,givinostat,19,0.00178
7,brilliant-green,25,0.001778
8,WP1130,26,0.00171
9,danusertib,30,0.00162


In [129]:
fsPredRange.sort_values(by='predCount', ascending=False).head(10)

Unnamed: 0,drug,predCount,predRange
176,alvocidib,46,0.000221
23,bortezomib,44,0.001051
190,selinexor,43,0.000202
62,FK-866,42,0.000632
569,napabucasin,42,5e-06
415,ganetespib,41,3.8e-05
96,echinomycin,41,0.000414
156,genz-644282,41,0.00027
218,LY3023414,41,0.000165
4,epothilone-b,41,0.001987


# Examine similarity of recommended drugs

## import packages

In [46]:
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.keras.models import load_model

## Setup experiment

### load drug fingerprints

In [30]:
fps = pd.read_csv('../../data/processed/drug_fingerprints.csv', index_col=0)
fps.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,255
cytarabine,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0
epinastine,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
floxuridine,0,0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
valrubicin,1,0,0,0,0,1,0,0,1,0,...,0,1,0,1,1,1,0,1,0,0
adapalene,1,1,1,0,0,1,0,0,0,0,...,0,0,1,1,1,1,0,0,0,0


### get average sim of drug fps

In [82]:
fpsSim = cosine_similarity(fps)
fpsSims = fpsSim[np.triu_indices(len(fpsSim),1)]
print(f"Average similarity for drug fingerprints: {str(np.mean(fpsSims))[:7]}")

Average similarity for drug fingerprints: 0.30363


### load drug encoder

In [48]:
drugEncoderPath = '../../models/drugEncoders/DrugFewShot_Layers1_Hidden64_DO0-1_AFrelu_LR0-001_DR0-99_DS1000'
drugEncoder = load_model(drugEncoderPath).get_layer('model')
drugEncoder._name = 'drugEncoder'
drugEncoder.summary()

Model: "drugEncoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256)]             0         
                                                                 
 dense (Dense)               (None, 64)                16448     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 64)                4160      
                                                                 
Total params: 20,608
Trainable params: 20,608
Non-trainable params: 0
_________________________________________________________________


### Encoder drug fingerprints

In [50]:
encodedDrugs = pd.DataFrame(drugEncoder(fps.to_numpy()), index=fps.index)
encodedDrugs.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
cytarabine,0.0,0.0,4.14897,1.674288,0.0,0.0,5.396765,0.0,2.078526,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.178461,0.0,2.545449
epinastine,0.0,0.0,0.0,0.695803,0.0,2.58031,1.251452,0.086429,0.0,0.0,...,0.0,0.0,0.0,0.150529,0.0,0.0,0.0,0.0,0.0,0.0
floxuridine,0.0,0.0,4.986944,2.379904,1.527822,0.0,3.699862,0.0,2.491894,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.457049,0.0,1.083706
valrubicin,0.0,2.237368,0.0,0.0,0.0,0.0,0.0,1.321786,1.31388,0.0,...,0.0,0.0,3.946135,3.768987,0.0,0.0,2.814171,0.697466,0.0,1.543817
adapalene,0.0,0.0,0.0,0.0,0.0,0.0,2.499742,0.600349,0.0,0.0,...,0.0,1.016979,4.459757,0.0,3.530415,0.0,0.0,4.399436,0.0,0.0


### Get average similarity

In [81]:
embedSim = cosine_similarity(encodedDrugs)
allSims = embedSim[np.triu_indices(len(embedSim),1)]
print(f"Average similarity for drug embedings: {str(np.mean(allSims))[:7]}")

Average similarity for drug embedings: 0.35779


### Get MOA DF

In [105]:
moa = pd.read_csv('../../data/processed/drugCellLinePairsData.csv')
moa = moa.loc[:, ['name', 'moa']].drop_duplicates(keep='first').set_index('name')
getMOA = lambda x: moa.loc[x, 'moa']

### Make blank dataframe

In [98]:
cols = ['drug1', 'drug2', 'drug3', 'moa1_2', 'moa1_3', 'moa2_3', 'sim1_2', 'sim1_3', 'sim2_3']
blankRecSim = pd.DataFrame(columns=cols, index=fsPreds.cell_line.unique())
blankRecSim.head()

Unnamed: 0,drug1,drug2,drug3,moa1_2,moa1_3,moa2_3,sim1_2,sim1_3,sim2_3
ACH-000823,,,,,,,,,
ACH-000788,,,,,,,,,
ACH-000222,,,,,,,,,
ACH-000961,,,,,,,,,
ACH-000467,,,,,,,,,


## fsCDR

In [110]:
fsRecSim = blankRecSim.copy()
for cellLine, subdf in fsPreds.groupby(by='cell_line'):
    subdf.sort_values(by='pred', ascending=False, inplace=True)
    subdf.reset_index(inplace=True, drop=True)
    topDrugs = list(subdf.drug)[:3]
    moas = [getMOA(topDrugs[0]), getMOA(topDrugs[1]), getMOA(topDrugs[2])]
    moaSame = [int(moas[0]==moas[1]), int(moas[0]==moas[2]), int(moas[1]==moas[2])]
    d1 = encodedDrugs.loc[topDrugs[0], :]
    d2 = encodedDrugs.loc[topDrugs[1], :]
    d3 = encodedDrugs.loc[topDrugs[2], :]
    drugs = np.array([d1,d2,d3])
    sims = cosine_similarity(drugs)
    sims = [sims[0][1], sims[0][2], sims[1][2]]
    row = topDrugs + moaSame + sims
    fsRecSim.loc[cellLine, :] = row
    if cellLine == 'ACH-000834':
        print(moas)

fsRecSim

['HDAC inhibitor', 'topoisomerase inhibitor', 'topoisomerase inhibitor']


Unnamed: 0,drug1,drug2,drug3,moa1_2,moa1_3,moa2_3,sim1_2,sim1_3,sim2_3
ACH-000823,maytansinol-isobutyrate,dolastatin-10,romidepsin,0,0,0,0.391024,0.302599,0.438205
ACH-000788,maytansinol-isobutyrate,dolastatin-10,romidepsin,0,0,0,0.391024,0.302599,0.438205
ACH-000222,maytansinol-isobutyrate,triptolide,dolastatin-10,0,0,0,0.668389,0.391024,0.311363
ACH-000961,maytansinol-isobutyrate,triptolide,romidepsin,0,0,0,0.668389,0.302599,0.602038
ACH-000467,maytansinol-isobutyrate,dolastatin-10,YM-155,0,0,0,0.391024,0.461327,0.236087
ACH-000721,maytansinol-isobutyrate,dolastatin-10,YM-155,0,0,0,0.391024,0.461327,0.236087
ACH-000665,triptolide,dolastatin-10,romidepsin,0,0,0,0.311363,0.602038,0.438205
ACH-000715,triptolide,dolastatin-10,romidepsin,0,0,0,0.311363,0.602038,0.438205
ACH-000860,triptolide,romidepsin,YM-155,0,0,0,0.602038,0.546889,0.521528
ACH-000421,triptolide,dolastatin-10,10-hydroxycamptothecin,0,0,0,0.311363,0.344171,0.569532


In [108]:
moas

['RNA polymerase inhibitor', 'HDAC inhibitor', 'survivin inhibitor']

### Compare to average

In [90]:
fsAvgSim = fsRecSim.iloc[:, -3:].mean().mean()
pcntChngFS = (fsAvgSim- np.mean(fpsSims)) / np.mean(fpsSims)
print(pcntChngFS)

0.4702559104035975


### Check to see if changes due to different predictions or drugs not being tested

In [93]:
fsPreds[fsPreds.cell_line == 'ACH-000823']

Unnamed: 0,cell_line,cancer_type,drug,true,pred
4321,ACH-000823,Esophageal Cancer,maytansinol-isobutyrate,1,0.971395
4835,ACH-000823,Esophageal Cancer,dolastatin-10,0,0.950393
57,ACH-000823,Esophageal Cancer,romidepsin,1,0.901624
3063,ACH-000823,Esophageal Cancer,YM-155,1,0.890601
6312,ACH-000823,Esophageal Cancer,echinomycin,0,0.867241
...,...,...,...,...,...
1899,ACH-000823,Esophageal Cancer,mycophenolic-acid,0,0.000745
2775,ACH-000823,Esophageal Cancer,pevonedistat,0,0.000605
7173,ACH-000823,Esophageal Cancer,disulfiram,0,0.000538
7361,ACH-000823,Esophageal Cancer,napabucasin,0,0.000376


In [95]:
fsPreds[fsPreds.cell_line == 'ACH-000665']

Unnamed: 0,cell_line,cancer_type,drug,true,pred
825,ACH-000665,Lung Cancer,triptolide,1,0.969353
4862,ACH-000665,Lung Cancer,dolastatin-10,1,0.950108
78,ACH-000665,Lung Cancer,romidepsin,1,0.901772
3093,ACH-000665,Lung Cancer,YM-155,1,0.890497
6342,ACH-000665,Lung Cancer,echinomycin,1,0.867382
...,...,...,...,...,...
2783,ACH-000665,Lung Cancer,pevonedistat,0,0.000604
7198,ACH-000665,Lung Cancer,disulfiram,0,0.000542
2148,ACH-000665,Lung Cancer,AVN-944,0,0.000450
7370,ACH-000665,Lung Cancer,napabucasin,0,0.000378


In [96]:
# check to see if triptolide in 823
'triptolide' in list(fsPreds[fsPreds.cell_line == 'ACH-000823'].drug)

False

## DeepDSC

In [111]:
dscRecSim = blankRecSim.copy()
for cellLine, subdf in dscPreds.groupby(by='cell_line'):
    subdf.sort_values(by='pred', ascending=False, inplace=True)
    subdf.reset_index(inplace=True, drop=True)
    topDrugs = list(subdf.drug)[:3]
    moas = [getMOA(topDrugs[0]), getMOA(topDrugs[1]), getMOA(topDrugs[2])]
    moaSame = [int(moas[0]==moas[1]), int(moas[0]==moas[2]), int(moas[1]==moas[2])]
    d1 = fps.loc[topDrugs[0], :]
    d2 = fps.loc[topDrugs[1], :]
    d3 = fps.loc[topDrugs[2], :]
    drugs = np.array([d1,d2,d3])
    sims = cosine_similarity(drugs)
    sims = [sims[0][1], sims[0][2], sims[1][2]]
    row = topDrugs + moaSame + sims
    dscRecSim.loc[cellLine, :] = row

dscRecSim

Unnamed: 0,drug1,drug2,drug3,moa1_2,moa1_3,moa2_3,sim1_2,sim1_3,sim2_3
ACH-000823,dolastatin-10,BGT226,romidepsin,0,0,0,0.397142,0.396412,0.39358
ACH-000788,dolastatin-10,BGT226,romidepsin,0,0,0,0.397142,0.396412,0.39358
ACH-000222,dolastatin-10,alvespimycin,BGT226,0,0,0,0.344804,0.397142,0.435415
ACH-000961,alvespimycin,BGT226,romidepsin,0,0,0,0.435415,0.384426,0.39358
ACH-000467,dolastatin-10,alvespimycin,BGT226,0,0,0,0.344804,0.397142,0.435415
ACH-000721,dolastatin-10,alvespimycin,tanespimycin,0,0,1,0.344804,0.328415,0.926687
ACH-000665,dolastatin-10,alvespimycin,BGT226,0,0,0,0.344804,0.397142,0.435415
ACH-000715,dolastatin-10,alvespimycin,romidepsin,0,0,0,0.344804,0.396412,0.384426
ACH-000860,BGT226,romidepsin,tanespimycin,0,0,0,0.39358,0.404956,0.403162
ACH-000421,dolastatin-10,BGT226,genz-644282,0,0,0,0.397142,0.408248,0.432353


### Compare to average

In [91]:
dscAvgSim = dscRecSim.iloc[:, -3:].mean().mean()
pcntChngDSC = (dscAvgSim- np.mean(fpsSims)) / np.mean(fpsSims)
print(pcntChngDSC)

0.40571561183223337


In [97]:
print(pcntChngFS)

0.4702559104035975


Drugs recommended by our model are more similar to each other than DSC

## Compare overlap MOA

In [112]:
dscRecSim.loc[:, ['moa1_2', 'moa1_3', 'moa2_3']].sum().sum()

11

In [113]:
fsRecSim.loc[:, ['moa1_2', 'moa1_3', 'moa2_3']].sum().sum()

3

DeepDSC has more instances where recs have overlapping MOA, but on further examination, only single pair: 
alvespimycin and tanespimycin

fsCDR also has single pair: 10-hydroxycamptothecin and genz-644282

In [130]:
getMOA('TAS-103')

'topoisomerase inhibitor'

In [132]:
getMOA('topotecan')

'topoisomerase inhibitor'

In [133]:
getMOA('nemorubicin')

'topoisomerase inhibitor'

In [134]:
getMOA('YM-155')

'survivin inhibitor'

In [135]:
getMOA('genz-644282')

'topoisomerase inhibitor'

In [141]:
getMOA('emetine')

'protein synthesis inhibitor'

In [140]:
trainCDR[trainCDR.cancer_type == 'Brain Cancer'].sort_values(by='effectiveCont', ascending=False).head(25)

Unnamed: 0,DepMap_ID,cancer_type,name,moa,target,indication,phase,r2,ic50,auc,lower_limit,effectiveCont,effective
39227,ACH-000776,Brain Cancer,OTS167,maternal embryonic leucine zipper kinase inhib...,MELK,,Phase 1,0.904347,0.011514,0.317396,1e-06,17.62058,1
1823,ACH-000776,Brain Cancer,doxorubicin,topoisomerase inhibitor,TOP2A,"acute lymphoblastic leukemia (ALL), acute myel...",Launched,0.86321,0.126905,0.55,1.6e-05,12.608156,1
1709,ACH-000368,Brain Cancer,doxorubicin,topoisomerase inhibitor,TOP2A,"acute lymphoblastic leukemia (ALL), acute myel...",Launched,0.755299,0.104669,0.530173,7e-05,11.313405,1
603,ACH-000776,Brain Cancer,romidepsin,HDAC inhibitor,"HDAC1, HDAC2, HDAC3, HDAC4, HDAC5, HDAC6, HDAC...",cutaneous T-cell lymphoma (CTCL),Launched,0.956009,0.003249,0.17426,0.002442,11.083333,1
439,ACH-000368,Brain Cancer,romidepsin,HDAC inhibitor,"HDAC1, HDAC2, HDAC3, HDAC4, HDAC5, HDAC6, HDAC...",cutaneous T-cell lymphoma (CTCL),Launched,0.860328,0.003634,0.186467,0.003386,10.649303,1
51344,ACH-000570,Brain Cancer,ixazomib-citrate,proteasome inhibitor,,multiple myeloma,Launched,0.763558,0.200292,0.597009,8.2e-05,10.616456,1
23266,ACH-000570,Brain Cancer,epothilone-b,"microtubule stabilizing agent, tubulin polymer...","TUBA1A, TUBA1B, TUBA1C, TUBA3C, TUBA4A, TUBA8,...",,Phase 3,0.951694,0.000734,0.134542,0.033888,10.137268,1
29321,ACH-000776,Brain Cancer,alvespimycin,HSP inhibitor,HSP90AA1,,Phase 2,0.712988,0.004811,0.27473,0.004402,10.102319,1
29244,ACH-000570,Brain Cancer,alvespimycin,HSP inhibitor,HSP90AA1,,Phase 2,0.944493,0.000959,0.120133,0.02737,10.066925,1
37737,ACH-000368,Brain Cancer,dolastatin-10,tubulin polymerization inhibitor,TUBB,,Phase 2,0.818389,0.001068,0.098231,0.042026,9.6821,1
