# Load packages

In [22]:
import os
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [23]:
from scripts.evalModel import evalLogisticModels, getPredDist

# Define

# Data

## Load cell lines

In [25]:
trainRNA = pd.read_csv('../../data/processed/RNA_train_cancergenes.csv', index_col=0)
trainCellLines = list(trainRNA.index)

testRNA = pd.read_csv('../../data/processed/RNA_test_cancergenes.csv', index_col=0)
testCellLines = list(testRNA.index)

newRNA = pd.read_csv('../../data/processed/RNA_newcancer_cancergenes.csv', index_col=0)
newCellLines = list(newRNA.index)

## $\mathrm{SiamCDR_{LR}}$

In [26]:
cdr = pd.read_csv('../../data/processed/drugCellLinePairsData.csv', index_col='DepMap_ID')
trainCDR = cdr.loc[trainCellLines, :].reset_index()
testCDR = cdr.loc[testCellLines, :].reset_index()
newCDR = cdr.loc[newCellLines, :].reset_index()

In [27]:
testTemp = testCDR.loc[:, ['DepMap_ID', 'cancer_type', 'name', 'effective']].rename(columns={'DepMap_ID':'cell_line',
                                                                                             'name': 'drug',
                                                                                             'effective': 'true'})

newTemp = newCDR.loc[:, ['DepMap_ID', 'cancer_type', 'name', 'effective']].rename(columns={'DepMap_ID':'cell_line',
                                                                                          'name': 'drug',
                                                                                          'effective': 'true'})

# Load drugs

In [28]:
drugs = pd.read_csv('../../data/processed/drug_fingerprints.csv', index_col=0)

In [29]:
trainDrugs = drugs.loc[list(trainCDR.name.values), :].to_numpy()
testDrugs = drugs.loc[list(testCDR.name.values), :].to_numpy()
newDrugs = drugs.loc[list(newCDR.name.values), :].to_numpy()

trainRNA = trainRNA.loc[list(trainCDR.DepMap_ID.values), :].to_numpy()
testRNA = testRNA.loc[list(testCDR.DepMap_ID.values), :].to_numpy()
newRNA = newRNA.loc[list(newCDR.DepMap_ID.values), :].to_numpy()

In [30]:
trainData = [trainDrugs, trainRNA]
trainEff = trainCDR.effective.to_numpy()
testData = [testDrugs, testRNA]
newData = [newDrugs, newRNA]

In [31]:
del cdr, drugs, trainDrugs, testDrugs, newDrugs, trainRNA, testRNA, newRNA

# Model performance

In [34]:
rnaPath = '../../models/cellEncoders/CellLineFewShot_Layers2_Hidden64_DO0-1_AFsigmoid_LR0-001_DR0-99_DS1000'
evalLog = evalLogisticModels(trainData.copy(), trainEff, testData.copy(), newData.copy(),
                                fusionPath=None, drugPath=None, rnaPath=rnaPath)

In [35]:
testDF, newDF, testWrong, newWrong, testCounts, newCounts = evalLog.evaluate(testDF=testTemp.copy(), 
                                                                             newDF=newTemp.copy())

Average Cell Line precision @ k on test set
	Precision@1: 0.9412
	Precision@2: 0.9118
	Precision@3: 0.8693
	Precision@4: 0.8529
	Precision@5: 0.8275
	Precision@10: 0.8077

Average Cell Line precision @ k on newcancer set
	Precision@1: 0.9538
	Precision@2: 0.9077
	Precision@3: 0.9077
	Precision@4: 0.8731
	Precision@5: 0.8585
	Precision@10: 0.802


Test set:

	# of cell lines without effective drug among top-3 recs: 0
	# of unique drugs among top-3 predictions: 16

New cancer set
No true effective drugs identified in top 3 for ACH-000268 (top drug: sangivamycin)

	# of cell lines without effective drug among top-3 recs: 1
	# of unique drugs among top-3 predictions: 14


In [36]:
testDist = getPredDist(testDF)

Avg STD of predictions for each drug: 0.0162


In [37]:
newDist = getPredDist(newDF)

Avg STD of predictions for each drug: 0.027


In [15]:
testCounts

Unnamed: 0,1,2,3,total
dolastatin-10,35,0,0,35
romidepsin,10,21,1,32
echinomycin,4,14,19,37
10-hydroxycamptothecin,2,5,12,19
nemorubicin,1,0,0,1
YM-155,0,4,5,9
maytansinol-isobutyrate,0,3,3,6
genz-644282,0,3,2,5
sangivamycin,0,1,0,1
UK-383367,0,1,0,1


## Cancer precision

In [18]:
cancerTest, cancerNew = evalLog.getCancerPerformance(testDF.copy(), newDF.copy())

In [19]:
cancerTest

Unnamed: 0,p1,p2,p3,p4,p5
Bladder Cancer,1.0,1.0,1.0,1.0,1.0
Liver Cancer,1.0,1.0,1.0,1.0,0.9
Endometrial/Uterine Cancer,1.0,1.0,1.0,0.916667,0.866667
Head and Neck Cancer,1.0,1.0,1.0,0.916667,0.8
Colon/Colorectal Cancer,1.0,1.0,0.916667,0.9375,0.9
Breast Cancer,1.0,1.0,0.777778,0.833333,0.8
Skin Cancer,1.0,0.9,0.8,0.8,0.84
Ovarian Cancer,1.0,0.875,0.833333,0.8125,0.85
Brain Cancer,1.0,0.75,0.666667,0.6875,0.7
Lung Cancer,0.923077,0.884615,0.871795,0.884615,0.846154


In [20]:
round(cancerTest.mean(), 4)

p1    0.9450
p2    0.9265
p3    0.8824
p4    0.8574
p5    0.8280
dtype: float64

In [21]:
cancerNew

Unnamed: 0,p1,p2,p3,p4,p5
Rhabdoid,1.0,1.0,1.0,1.0,0.95
Sarcoma,1.0,1.0,1.0,1.0,0.933333
Prostate Cancer,1.0,1.0,1.0,1.0,0.9
Gastric Cancer,1.0,1.0,1.0,0.946429,0.942857
Gallbladder Cancer,1.0,1.0,1.0,0.75,0.6
Bone Cancer,1.0,0.944444,0.888889,0.861111,0.844444
Thyroid Cancer,1.0,0.875,0.875,0.875,0.9
Neuroblastoma,1.0,0.833333,0.888889,0.916667,0.866667
Kidney Cancer,0.846154,0.769231,0.769231,0.711538,0.707692
Bile Duct Cancer,0.8,0.8,0.866667,0.8,0.84


In [22]:
round(cancerNew.mean(), 4)

p1    0.9646
p2    0.9222
p3    0.9289
p4    0.8861
p5    0.8485
dtype: float64