# Load packages

In [2]:
import os
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [1]:
from scripts.evalModel import evalLogisticModels, getPredDist

# Define

# Data

## Load cell lines

In [4]:
trainRNA = pd.read_csv('../../data/processed/RNA_train_cancergenes.csv', index_col=0)
trainCellLines = list(trainRNA.index)

testRNA = pd.read_csv('../../data/processed/RNA_test_cancergenes.csv', index_col=0)
testCellLines = list(testRNA.index)

newRNA = pd.read_csv('../../data/processed/RNA_newcancer_cancergenes.csv', index_col=0)
newCellLines = list(newRNA.index)

## CDR

In [5]:
cdr = pd.read_csv('../../data/processed/drugCellLinePairsData.csv', index_col='DepMap_ID')
trainCDR = cdr.loc[trainCellLines, :].reset_index()
testCDR = cdr.loc[testCellLines, :].reset_index()
newCDR = cdr.loc[newCellLines, :].reset_index()

In [6]:
testTemp = testCDR.loc[:, ['DepMap_ID', 'cancer_type', 'name', 'effective']].rename(columns={'DepMap_ID':'cell_line',
                                                                                             'name': 'drug',
                                                                                             'effective': 'true'})

newTemp = newCDR.loc[:, ['DepMap_ID', 'cancer_type', 'name', 'effective']].rename(columns={'DepMap_ID':'cell_line',
                                                                                          'name': 'drug',
                                                                                          'effective': 'true'})

# Load drugs

In [7]:
drugs = pd.read_csv('../../data/processed/drug_fingerprints.csv', index_col=0)

drugs.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
cytarabine,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,...,0,0,1,0,0,0,1,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0
epinastine,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1
floxuridine,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,...,0,1,0,0,0,1,1,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,1,1,1,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0
valrubicin,1,0,0,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,1,1,0,0,1,1,0,1,0,1,1,...,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,1,0,0,0,1,1,1,0,0,0,1,0,0,1,0,1,1,1,0,1,0,0
adapalene,1,1,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,1,...,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0


In [8]:
trainDrugs = drugs.loc[list(trainCDR.name.values), :].to_numpy()
testDrugs = drugs.loc[list(testCDR.name.values), :].to_numpy()
newDrugs = drugs.loc[list(newCDR.name.values), :].to_numpy()

trainRNA = trainRNA.loc[list(trainCDR.DepMap_ID.values), :].to_numpy()
testRNA = testRNA.loc[list(testCDR.DepMap_ID.values), :].to_numpy()
newRNA = newRNA.loc[list(newCDR.DepMap_ID.values), :].to_numpy()

In [9]:
trainData = [trainDrugs, trainRNA]
trainEff = trainCDR.effective.to_numpy()
testData = [testDrugs, testRNA]
newData = [newDrugs, newRNA]

In [10]:
del cdr, drugs, trainDrugs, testDrugs, newDrugs, trainRNA, testRNA, newRNA

# Fusion performance

In [23]:
drugPath = '/fs/scratch/PCON0041/PatrickLawrence/cancer-drug-response/fewShot/drugs/siameseV1/models/'
drugModel = 'DrugFewShot_Layers2_Hidden16_DO0-1_AFrelu_LR0-01_DR0-99_DS1000'
drugPath = os.path.join(drugPath, drugModel)

In [30]:
fusionPath = '/fs/scratch/PCON0041/PatrickLawrence/cancer-drug-response/fewShot/fusion/embedDrug_rawRNA/'
modelPath = os.path.join(fusionPath, 'models')
models = os.listdir(modelPath)

In [27]:
thresh1 = 0.2
bestModel1 = ''
for m in models:
    currentFusion = os.path.join(modelPath, m)
    evalLog = evalLogisticModels(trainData.copy(), trainEff, testData.copy(), newData.copy(),
                                fusionPath=currentFusion, drugPath=drugPath, rnaPath=None)
    
    currentThresh = evalLog.evaluate(testDF=testTemp.copy(), newDF=newTemp.copy(), 
                                     modelName=m, at=1, thresh=thresh1, returnThresh=True)
    
    if (currentThresh > thresh1):
        thresh1 = currentThresh
        bestModel1 = m

print(f"Best model in terms of precision@1:\n{bestModel1} ({round(thresh1, 4)})")

Model: FusionFewShotEmbedDrugRawCell_NL128_64_32_DO0-1_AFrelu_LR0-001_DR0-99_DS1024_BYrna
	Precision@1: 0.2549
	Precision@2: 0.2745
	Precision@3: 0.3007
	Precision@4: 0.3039
	Precision@5: 0.2784
	Precision@10: 0.3051

Model: FusionFewShotEmbedDrugRawCell_NL32_32_32_DO0-1_AFrelu_LR0-001_DR0-99_DS1024_BYrna
	Precision@1: 0.3137
	Precision@2: 0.3039
	Precision@3: 0.3399
	Precision@4: 0.348
	Precision@5: 0.3333
	Precision@10: 0.3359

Model: FusionFewShotEmbedDrugRawCell_NL64_64_64_DO0-1_AFrelu_LR0-001_DR0-99_DS1024_BYrna
	Precision@1: 0.4706
	Precision@2: 0.3627
	Precision@3: 0.366
	Precision@4: 0.3382
	Precision@5: 0.3176
	Precision@10: 0.3231

Best model in terms of precision@1:
FusionFewShotEmbedDrugRawCell_NL64_64_64_DO0-1_AFrelu_LR0-001_DR0-99_DS1024_BYrna (0.4706)


In [28]:
thresh5 = 0.3
bestModel5 = ''
for m in models:
    currentFusion = os.path.join(modelPath, m)
    evalLog = evalLogisticModels(trainData.copy(), trainEff, testData.copy(), newData.copy(),
                                fusionPath=currentFusion, drugPath=drugPath, rnaPath=rnaPath)
    
    currentThresh = evalLog.evaluate(testDF=testTemp.copy(), newDF=newTemp.copy(), 
                                     modelName=m, at=5, thresh=thresh5, returnThresh=True)
    
    if (currentThresh > thresh5):
        thresh5 = currentThresh
        bestModel5 = m
        
print(f"Best model in terms of precision@5:\n{bestModel5} ({round(thresh5, 4)})")

Model: FusionFewShotEmbedDrugRawCell_NL32_32_32_DO0-1_AFrelu_LR0-001_DR0-99_DS1024_BYrna
	Precision@1: 0.3137
	Precision@2: 0.3039
	Precision@3: 0.3399
	Precision@4: 0.348
	Precision@5: 0.3333
	Precision@10: 0.3359



KeyboardInterrupt: 

In [29]:
print(f"Best model same @ k=1 and k=5: {bestModel1==bestModel5}")

Best model same @ k=1 and k=5: False


In [31]:
bestModel = 'FusionFewShotEmbedDrugRawCell_NL64_64_64_DO0-1_AFrelu_LR0-001_DR0-99_DS1024_BYrna'
fusionPath = os.path.join(modelPath, bestModel)

In [32]:
evalLog = evalLogisticModels(trainData.copy(), trainEff, testData.copy(), newData.copy(),
                                fusionPath=fusionPath, drugPath=drugPath, rnaPath=rnaPath)

In [33]:
testDF, newDF, testWrong, newWrong, testCounts, newCounts = evalLog.evaluate(testDF=testTemp.copy(), 
                                                                            newDF=newTemp.copy(),
                                                                            thresh=0.1)


Average Cell Line precision @ k on test set
	Precision@1: 0.4706
	Precision@2: 0.3627
	Precision@3: 0.366
	Precision@4: 0.3382
	Precision@5: 0.3176
	Precision@10: 0.3231

Average Cell Line precision @ k on newcancer set
	Precision@1: 0.2615
	Precision@2: 0.2923
	Precision@3: 0.2974
	Precision@4: 0.3038
	Precision@5: 0.3108
	Precision@10: 0.3314


Test set:
No true effective drugs identified in top 3 for ACH-000316 (top drug: fenbendazole)
No true effective drugs identified in top 3 for ACH-000329 (top drug: paclitaxel)
No true effective drugs identified in top 3 for ACH-000347 (top drug: SNX-2112)
No true effective drugs identified in top 3 for ACH-000510 (top drug: YM-155)
No true effective drugs identified in top 3 for ACH-000563 (top drug: fenbendazole)
No true effective drugs identified in top 3 for ACH-000651 (top drug: cephalomannine)
No true effective drugs identified in top 3 for ACH-000768 (top drug: dichloroacetate)
No true effective drugs identified in top 3 for ACH-000776 (

In [36]:
testDist = getPredDist(testDF)

Avg varaince of predictions for each drug: 0.0009


In [38]:
newDist = getPredDist(newDF)

Avg varaince of predictions for each drug: 0.0012


In [39]:
# poor cell line
testDF[testDF.cell_line == 'ACH-000161'].head(10)

Unnamed: 0,cell_line,cancer_type,drug,true,pred
6274,ACH-000161,Lung Cancer,YM-155,1,0.348495
6305,ACH-000161,Lung Cancer,TAS-103,0,0.348276
6296,ACH-000161,Lung Cancer,10-hydroxycamptothecin,1,0.347305
6307,ACH-000161,Lung Cancer,beta-lapachone,0,0.333575
6237,ACH-000161,Lung Cancer,genz-644282,0,0.272117
6310,ACH-000161,Lung Cancer,rubitecan,0,0.249295
6229,ACH-000161,Lung Cancer,doxorubicin,0,0.247823
6301,ACH-000161,Lung Cancer,echinomycin,0,0.242263
6262,ACH-000161,Lung Cancer,JNJ-26481585,0,0.2374
6270,ACH-000161,Lung Cancer,entinostat,0,0.237


In [22]:
# poor cell line
testDF[testDF.cell_line == 'ACH-000527'].head(10)

Unnamed: 0,cell_line,cancer_type,drug,true,pred
774,ACH-000527,Ovarian Cancer,JNJ-26481585,1,0.614604
793,ACH-000527,Ovarian Cancer,echinomycin,1,0.614604
783,ACH-000527,Ovarian Cancer,YM-155,1,0.59782
773,ACH-000527,Ovarian Cancer,epothilone-d,0,0.569773
781,ACH-000527,Ovarian Cancer,epothilone-b,0,0.560731
771,ACH-000527,Ovarian Cancer,sangivamycin,0,0.53547
785,ACH-000527,Ovarian Cancer,alvespimycin,1,0.534145
801,ACH-000527,Ovarian Cancer,verubulin,0,0.525908
802,ACH-000527,Ovarian Cancer,panobinostat,0,0.52477
768,ACH-000527,Ovarian Cancer,romidepsin,1,0.471187


## Cancer precision

In [40]:
cancerTest, cancerNew = evalLog.getCancerPerformance(testDF.copy(), newDF.copy())

In [41]:
print(round(cancerTest.mean(), 4))
cancerTest

p1    0.4496
p2    0.3415
p3    0.3442
p4    0.3218
p5    0.3106
dtype: float64


Unnamed: 0,p1,p2,p3,p4,p5
Ovarian Cancer,1.0,0.5,0.416667,0.375,0.4
Endometrial/Uterine Cancer,0.666667,0.5,0.555556,0.5,0.533333
Bladder Cancer,0.666667,0.5,0.444444,0.333333,0.333333
Esophageal Cancer,0.666667,0.5,0.333333,0.333333,0.333333
Skin Cancer,0.6,0.3,0.333333,0.25,0.24
Colon/Colorectal Cancer,0.5,0.5,0.583333,0.5625,0.45
Brain Cancer,0.5,0.25,0.166667,0.1875,0.2
Lung Cancer,0.461538,0.423077,0.435897,0.403846,0.353846
Breast Cancer,0.333333,0.333333,0.222222,0.25,0.2
Head and Neck Cancer,0.0,0.166667,0.222222,0.166667,0.133333


In [42]:
print(round(cancerNew.mean(), 4))
cancerNew

p1    0.2161
p2    0.2691
p3    0.2795
p4    0.2847
p5    0.2963
dtype: float64


Unnamed: 0,p1,p2,p3,p4,p5
Rhabdoid,0.5,0.5,0.5,0.5,0.5
Bile Duct Cancer,0.4,0.3,0.266667,0.3,0.24
Gastric Cancer,0.357143,0.285714,0.285714,0.303571,0.3
Bone Cancer,0.333333,0.333333,0.333333,0.388889,0.377778
Thyroid Cancer,0.25,0.375,0.375,0.3125,0.35
Sarcoma,0.166667,0.25,0.222222,0.25,0.266667
Kidney Cancer,0.153846,0.230769,0.25641,0.25,0.261538
Prostate Cancer,0.0,0.25,0.333333,0.375,0.4
Neuroblastoma,0.0,0.166667,0.222222,0.166667,0.266667
Gallbladder Cancer,0.0,0.0,0.0,0.0,0.0
