# Load packages

In [1]:
import os
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [2]:
from scripts.evalModel import evalLogisticModels, getPredDist

# Data

## Load cell lines

In [3]:
trainRNA = pd.read_csv('../../data/processed/RNA_train_cancergenes.csv', index_col=0)
trainCellLines = list(trainRNA.index)

testRNA = pd.read_csv('../../data/processed/RNA_test_cancergenes.csv', index_col=0)
testCellLines = list(testRNA.index)

newRNA = pd.read_csv('../../data/processed/RNA_newcancer_cancergenes.csv', index_col=0)
newCellLines = list(newRNA.index)

## CDR

In [4]:
cdr = pd.read_csv('../../data/processed/drugCellLinePairsData.csv', index_col='DepMap_ID')
trainCDR = cdr.loc[trainCellLines, :].reset_index()
testCDR = cdr.loc[testCellLines, :].reset_index()
newCDR = cdr.loc[newCellLines, :].reset_index()

In [5]:
testTemp = testCDR.loc[:, ['DepMap_ID', 'cancer_type', 'name', 'effective']].rename(columns={'DepMap_ID':'cell_line',
                                                                                             'name': 'drug',
                                                                                             'effective': 'true'})

newTemp = newCDR.loc[:, ['DepMap_ID', 'cancer_type', 'name', 'effective']].rename(columns={'DepMap_ID':'cell_line',
                                                                                          'name': 'drug',
                                                                                          'effective': 'true'})

# Load drugs

In [6]:
drugs = pd.read_csv('../../data/processed/drug_fingerprints.csv', index_col=0)

drugs.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,255
cytarabine,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0
epinastine,0,0,0,1,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
floxuridine,0,0,0,0,1,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,0
valrubicin,1,0,0,0,0,1,0,0,1,0,...,0,1,0,1,1,1,0,1,0,0
adapalene,1,1,1,0,0,1,0,0,0,0,...,0,0,1,1,1,1,0,0,0,0


In [7]:
trainDrugs = drugs.loc[list(trainCDR.name.values), :].to_numpy()
testDrugs = drugs.loc[list(testCDR.name.values), :].to_numpy()
newDrugs = drugs.loc[list(newCDR.name.values), :].to_numpy()

trainRNA = trainRNA.loc[list(trainCDR.DepMap_ID.values), :].to_numpy()
testRNA = testRNA.loc[list(testCDR.DepMap_ID.values), :].to_numpy()
newRNA = newRNA.loc[list(newCDR.DepMap_ID.values), :].to_numpy()

In [8]:
trainData = [trainDrugs, trainRNA]
trainEff = trainCDR.effective.to_numpy()
testData = [testDrugs, testRNA]
newData = [newDrugs, newRNA]

In [9]:
del cdr, drugs, trainDrugs, testDrugs, newDrugs, trainRNA, testRNA, newRNA

# Fusion performance

In [13]:
drugPath = '/fs/scratch/PCON0041/PatrickLawrence/cancer-drug-response/fewShot/drugs/siameseV1/models/'
drugModel = 'DrugFewShot_Layers2_Hidden16_DO0-1_AFrelu_LR0-01_DR0-99_DS1000'
drugPath = os.path.join(drugPath, drugModel)

In [18]:
fusionPath = '/fs/scratch/PCON0041/PatrickLawrence/cancer-drug-response/fewShot/fusion/embedDrug_rawRNA/models'
bestModel = 'FusionFewShotEmbedDrugRawCell_NL64_64_64_DO0-1_AFrelu_LR0-001_DR0-99_DS1024_BYrna'
fusionPath = os.path.join(fusionPath, bestModel)

In [19]:
evalLog = evalLogisticModels(trainData.copy(), trainEff, testData.copy(), newData.copy(), alt='xgb',
                            fusionPath=fusionPath, drugPath=drugPath, rnaPath=None)

In [20]:
testDF, newDF, testWrong, newWrong, testCounts, newCounts = evalLog.evaluate(testDF=testTemp.copy(), 
                                                                            newDF=newTemp.copy(),
                                                                            thresh=0.1)


Average Cell Line precision @ k on test set
	Precision@1: 0.4118
	Precision@2: 0.3725
	Precision@3: 0.3399
	Precision@4: 0.3235
	Precision@5: 0.3137
	Precision@10: 0.3385

Average Cell Line precision @ k on newcancer set
	Precision@1: 0.3692
	Precision@2: 0.3308
	Precision@3: 0.3282
	Precision@4: 0.3308
	Precision@5: 0.3169
	Precision@10: 0.3314


Test set:
No true effective drugs identified in top 3 for ACH-000012 (top drug: TAS-103)
No true effective drugs identified in top 3 for ACH-000329 (top drug: ixazomib)
No true effective drugs identified in top 3 for ACH-000347 (top drug: PFI-1)
No true effective drugs identified in top 3 for ACH-000368 (top drug: TAS-103)
No true effective drugs identified in top 3 for ACH-000486 (top drug: SNX-2112)
No true effective drugs identified in top 3 for ACH-000510 (top drug: beta-lapachone)
No true effective drugs identified in top 3 for ACH-000650 (top drug: TAS-103)
No true effective drugs identified in top 3 for ACH-000663 (top drug: ispinesib)

In [21]:
testDist = getPredDist(testDF)

Avg varaince of predictions for each drug: 0.00039999998989515007


In [22]:
newDist = getPredDist(newDF)

Avg varaince of predictions for each drug: 0.0005000000237487257


In [23]:
testCounts.head()

Unnamed: 0,1,2,3,total
YM-155,9,6,3,18
beta-lapachone,5,4,1,10
SNX-2112,4,0,1,5
10-hydroxycamptothecin,3,4,6,13
CUDC-907,3,4,3,10


In [24]:
# poor cell line
testDF[testDF.cell_line == 'ACH-000161'].head(10)

Unnamed: 0,cell_line,cancer_type,drug,true,pred
6307,ACH-000161,Lung Cancer,beta-lapachone,0,0.432734
6274,ACH-000161,Lung Cancer,YM-155,1,0.399203
6305,ACH-000161,Lung Cancer,TAS-103,0,0.399203
6296,ACH-000161,Lung Cancer,10-hydroxycamptothecin,1,0.399203
6237,ACH-000161,Lung Cancer,genz-644282,0,0.362997
6288,ACH-000161,Lung Cancer,dolastatin-10,1,0.337896
6281,ACH-000161,Lung Cancer,panobinostat,0,0.337896
6270,ACH-000161,Lung Cancer,entinostat,0,0.337896
6301,ACH-000161,Lung Cancer,echinomycin,0,0.337896
6248,ACH-000161,Lung Cancer,mitoxantrone,0,0.337896


In [25]:
# poor cell line
testDF[testDF.cell_line == 'ACH-000899'].head(10)

Unnamed: 0,cell_line,cancer_type,drug,true,pred
4526,ACH-000899,Skin Cancer,torasemide,0,0.440176
4545,ACH-000899,Skin Cancer,10-hydroxycamptothecin,1,0.432734
4496,ACH-000899,Skin Cancer,genz-644282,0,0.399203
4495,ACH-000899,Skin Cancer,vinorelbine,0,0.387348
4531,ACH-000899,Skin Cancer,CUDC-907,0,0.387348
4521,ACH-000899,Skin Cancer,epothilone-b,0,0.366272
4511,ACH-000899,Skin Cancer,mebendazole,0,0.362997
4557,ACH-000899,Skin Cancer,LY2606368,0,0.362997
4560,ACH-000899,Skin Cancer,verubulin,0,0.362997
4558,ACH-000899,Skin Cancer,vindesine,0,0.362997


## Cancer precision

In [26]:
cancerTest, cancerNew = evalLog.getCancerPerformance(testDF.copy(), newDF.copy())

In [27]:
print(round(cancerTest.mean(), 4))
cancerTest

p1    0.4024
p2    0.3979
p3    0.3530
p4    0.3264
p5    0.3126
dtype: float64


Unnamed: 0,p1,p2,p3,p4,p5
Colon/Colorectal Cancer,0.75,0.875,0.666667,0.5625,0.5
Endometrial/Uterine Cancer,0.666667,0.666667,0.444444,0.416667,0.4
Bladder Cancer,0.666667,0.5,0.333333,0.25,0.333333
Liver Cancer,0.5,0.75,0.666667,0.625,0.5
Ovarian Cancer,0.5,0.375,0.333333,0.375,0.45
Pancreatic Cancer,0.5,0.375,0.333333,0.25,0.25
Lung Cancer,0.461538,0.307692,0.307692,0.346154,0.338462
Esophageal Cancer,0.333333,0.166667,0.333333,0.333333,0.266667
Brain Cancer,0.25,0.125,0.083333,0.125,0.1
Skin Cancer,0.2,0.3,0.4,0.3,0.28


In [28]:
print(round(cancerNew.mean(), 4))
cancerNew

p1    0.3980
p2    0.3406
p3    0.3148
p4    0.3189
p5    0.2978
dtype: float64


Unnamed: 0,p1,p2,p3,p4,p5
Prostate Cancer,1.0,0.75,0.5,0.5,0.4
Thyroid Cancer,0.625,0.4375,0.458333,0.40625,0.375
Bile Duct Cancer,0.6,0.3,0.266667,0.2,0.2
Rhabdoid,0.5,0.5,0.5,0.4375,0.45
Gastric Cancer,0.357143,0.392857,0.333333,0.339286,0.328571
Bone Cancer,0.333333,0.333333,0.333333,0.388889,0.377778
Neuroblastoma,0.333333,0.333333,0.222222,0.333333,0.266667
Kidney Cancer,0.230769,0.192308,0.25641,0.25,0.246154
Sarcoma,0.0,0.166667,0.277778,0.333333,0.333333
Gallbladder Cancer,0.0,0.0,0.0,0.0,0.0
