# Import

In [1]:
import os
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [None]:
from scripts.evalModel import evalLogisticModels, getPredDist

# Load data

## Cell lines

In [3]:
trainRNA = pd.read_csv('../../data/processed/RNA_train_cancergenes.csv', index_col=0)
trainCellLines = list(trainRNA.index)

testRNA = pd.read_csv('../../data/processed/RNA_test_cancergenes.csv', index_col=0)
testCellLines = list(testRNA.index)

newRNA = pd.read_csv('../../data/processed/RNA_newcancer_cancergenes.csv', index_col=0)
newCellLines = list(newRNA.index)

## CDR

In [4]:
cdr = pd.read_csv('../../data/processed/drugCellLinePairsData.csv', index_col='DepMap_ID')
trainCDR = cdr.loc[trainCellLines, :].reset_index()
testCDR = cdr.loc[testCellLines, :].reset_index()
newCDR = cdr.loc[newCellLines, :].reset_index()

In [5]:
testTemp = testCDR.loc[:, ['DepMap_ID', 'cancer_type', 'name', 'effective']].rename(columns={'DepMap_ID':'cell_line',
                                                                                             'name': 'drug',
                                                                                             'effective': 'true'})

newTemp = newCDR.loc[:, ['DepMap_ID', 'cancer_type', 'name', 'effective']].rename(columns={'DepMap_ID':'cell_line',
                                                                                          'name': 'drug',
                                                                                          'effective': 'true'})

## Drugs

In [6]:
drugs = pd.read_csv('../../data/processed/drug_fingerprints.csv', index_col=0)

In [7]:
trainDrugs = drugs.loc[list(trainCDR.name.values), :].to_numpy()
testDrugs = drugs.loc[list(testCDR.name.values), :].to_numpy()
newDrugs = drugs.loc[list(newCDR.name.values), :].to_numpy()

trainRNA = trainRNA.loc[list(trainCDR.DepMap_ID.values), :].to_numpy()
testRNA = testRNA.loc[list(testCDR.DepMap_ID.values), :].to_numpy()
newRNA = newRNA.loc[list(newCDR.DepMap_ID.values), :].to_numpy()

## Format Pairs

In [8]:
trainData = [trainDrugs, trainRNA]
trainEff = trainCDR.effective.to_numpy()
testData = [testDrugs, testRNA]
newData = [newDrugs, newRNA]

In [9]:
del cdr, drugs, trainDrugs, testDrugs, newDrugs, trainRNA, testRNA, newRNA

# Model

In [10]:
drugPath = '/fs/scratch/PCON0041/PatrickLawrence/cancer-drug-response/fewShot/drugs/siameseV1/models/'
drugModel = 'DrugFewShot_Layers1_Hidden64_DO0-1_AFrelu_LR0-001_DR0-99_DS1000/'
drugPath = os.path.join(drugPath, drugModel)

In [11]:
rnaPath = '/fs/scratch/PCON0041/PatrickLawrence/cancer-drug-response/fewShot/cellLines/siameseV1/models/'
rnaModel = 'CellLineFewShot_Layers2_Hidden64_DO0-1_AFrelu_LR0-001_DR0-99_DS1000/'
rnaPath = os.path.join(rnaPath, rnaModel)

In [12]:
evalLog = evalLogisticModels(trainData.copy(), trainEff, testData.copy(), newData.copy(), alt='rf',
                            fusionPath=None, drugPath=drugPath, rnaPath=rnaPath)

## Cell line performance

In [13]:
testDF, newDF, testWrong, newWrong, testCounts, newCounts = evalLog.evaluate(testDF=testTemp.copy(), 
                                                                             newDF=newTemp.copy())

Average Cell Line precision @ k on test set
	Precision@1: 0.9608
	Precision@2: 0.9412
	Precision@3: 0.9085
	Precision@4: 0.8775
	Precision@5: 0.8549
	Precision@10: 0.8026

Average Cell Line precision @ k on newcancer set
	Precision@1: 0.9538
	Precision@2: 0.9538
	Precision@3: 0.8923
	Precision@4: 0.85
	Precision@5: 0.8277
	Precision@10: 0.7941


Test set:

	# of cell lines without effective drug among top-3 recs: 0
	# of unique drugs among top-3 predictions: 19

New cancer set
No true effective drugs identified in top 3 for ACH-000250 (top drug: dolastatin-10)
No true effective drugs identified in top 3 for ACH-000268 (top drug: epothilone-d)

	# of cell lines without effective drug among top-3 recs: 2
	# of unique drugs among top-3 predictions: 23


## Pred distribution

In [14]:
testDist = getPredDist(testDF)
testDist

Avg varaince of predictions for each drug: 0.0018


Unnamed: 0,drug,predCount,predRange,avg,variance
0,alvocidib,46,0.000833,0.000018,1.509662e-08
1,bortezomib,44,0.504806,0.214958,1.384082e-02
2,selinexor,43,0.082605,0.008783,2.288177e-04
3,napabucasin,42,0.002000,0.000127,2.219641e-07
4,FK-866,42,0.354943,0.090333,6.563520e-03
...,...,...,...,...,...
888,isoflupredone-acetate,1,0.000000,0.017576,
889,VU0361737,1,0.000000,0.010667,
890,rabeprazole,1,0.000000,0.077916,
891,raclopride,1,0.000000,0.000714,


In [15]:
newDist = getPredDist(newDF)
newDist

Avg varaince of predictions for each drug: 0.0018


Unnamed: 0,drug,predCount,predRange,avg,variance
0,ganetespib,60,0.086947,0.004024,0.000189
1,echinomycin,58,0.490563,0.842952,0.019307
2,genz-644282,57,0.471479,0.726997,0.012857
3,bortezomib,57,0.513834,0.203139,0.014413
4,verubulin,52,0.542650,0.389681,0.018493
...,...,...,...,...,...
905,dioscin,1,0.000000,0.002222,
906,dihydromyricetin,1,0.000000,0.000000,
907,dihydroartemisinin,1,0.000000,0.044118,
908,dichloroacetate,1,0.000000,0.002500,


## Cancer performance

In [16]:
testCancer, newCancer = evalLog.getCancerPerformance(testDF, newDF)

In [17]:
print(round(testCancer.mean(), 4))
testCancer

p1    0.9728
p2    0.9591
p3    0.9178
p4    0.8850
p5    0.8571
dtype: float64


Unnamed: 0,p1,p2,p3,p4,p5
Liver Cancer,1.0,1.0,1.0,1.0,1.0
Colon/Colorectal Cancer,1.0,1.0,1.0,1.0,0.95
Bladder Cancer,1.0,1.0,1.0,1.0,0.866667
Head and Neck Cancer,1.0,1.0,1.0,1.0,0.866667
Endometrial/Uterine Cancer,1.0,1.0,0.888889,0.916667,0.866667
Breast Cancer,1.0,1.0,0.888889,0.833333,0.8
Esophageal Cancer,1.0,1.0,0.888889,0.666667,0.733333
Skin Cancer,1.0,1.0,0.866667,0.9,0.84
Ovarian Cancer,1.0,0.875,0.916667,0.875,0.9
Brain Cancer,1.0,0.875,0.833333,0.75,0.8


In [18]:
print(round(newCancer.mean(), 4))
newCancer

p1    0.9769
p2    0.9641
p3    0.8820
p4    0.8556
p5    0.8187
dtype: float64


Unnamed: 0,p1,p2,p3,p4,p5
Prostate Cancer,1.0,1.0,1.0,1.0,1.0
Rhabdoid,1.0,1.0,1.0,0.9375,0.85
Gastric Cancer,1.0,1.0,0.952381,0.928571,0.914286
Sarcoma,1.0,1.0,0.944444,0.875,0.833333
Bone Cancer,1.0,1.0,0.925926,0.916667,0.888889
Thyroid Cancer,1.0,1.0,0.916667,0.875,0.85
Bile Duct Cancer,1.0,1.0,0.866667,0.85,0.84
Gallbladder Cancer,1.0,1.0,0.666667,0.75,0.6
Neuroblastoma,1.0,0.833333,0.777778,0.75,0.733333
Kidney Cancer,0.769231,0.807692,0.769231,0.673077,0.676923
