# Import

In [1]:
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Input
from tensorflow.keras.models import load_model, Model

In [2]:
current = os.getcwd()
os.chdir('../modelEval')

In [3]:
from scripts.evalModel import precision, getPredDist, countDrugsK

In [4]:
os.chdir('../../src/models/')

In [5]:
from fsCDR import fsCDR

In [6]:
os.chdir(current)

# Load Data 

## RNA

In [7]:
trainRNA = pd.read_csv('../../data/processed/RNA_train_cancergenes.csv', index_col=0)
valRNA = pd.read_csv('../../data/processed/RNA_val_cancergenes.csv', index_col=0)
testRNA = pd.read_csv('../../data/processed/RNA_test_cancergenes.csv', index_col=0)
newRNA = pd.read_csv('../../data/processed/RNA_newcancer_cancergenes.csv', index_col=0)

## Drugs

In [8]:
drugs = pd.read_csv('../../data/processed/drug_fingerprints.csv', index_col=0)

## CDR

In [9]:
cdr = pd.read_csv('../../data/processed/drugCellLinePairsData.csv')
cdrFilt = cdr.loc[:, ['DepMap_ID', 'cancer_type', 'name', 'ic50', 'auc', 'lower_limit', 'effectiveCont', 'effective']]
cdrFilt.rename(columns={'DepMap_ID': 'cell_line', 'name': 'drug', 'effective': 'true'}, inplace=True)
cdrFilt.head()

Unnamed: 0,cell_line,cancer_type,drug,ic50,auc,lower_limit,effectiveCont,true
0,ACH-000320,Pancreatic Cancer,floxuridine,0.007879,0.336463,0.10514,6.692422,0
1,ACH-001145,Ovarian Cancer,floxuridine,0.041552,0.485618,0.099077,5.053845,0
2,ACH-000873,Esophageal Cancer,floxuridine,0.097745,0.548483,0.060055,4.697597,0
3,ACH-000855,Esophageal Cancer,floxuridine,0.037349,0.631609,0.390028,4.052632,0
4,ACH-000488,Esophageal Cancer,floxuridine,0.071628,0.598061,0.236216,3.801377,0


# Process data

## Combine RNA

In [10]:
allRNA = pd.concat([trainRNA, valRNA])
allRNA.shape

(300, 463)

# Make result dfs

In [11]:
rows = ['fold0', 'fold1', 'fold2', 'fold3', 'fold4']
cols = ['p1', 'p2', 'p3', 'p4', 'p5', 'p10', 'nWrong', 'nUnique', 'variance']

In [12]:
clTest = pd.DataFrame(index=rows, columns=cols)
clNew = pd.DataFrame(index=rows, columns=cols)

ctTest = pd.DataFrame(index=rows, columns=cols[:5])
ctNew = pd.DataFrame(index=rows, columns=cols[:5])

# Define

## paths

In [None]:
drugPath = None

In [14]:
rnaPath = '../../models/cellEncoders/'
rnaModel = 'CellLineFewShot_Layers2_Hidden64_DO0-1_AFrelu_LR0-001_DR0-99_DS1000'
rnaPath = os.path.join(rnaPath, rnaModel)

## Func to init and train model

In [15]:
basePath = '../../models/SiamCDR/DNN'

In [16]:
def getDNN(trainCDR, valCDR, fold):
    # define model path
    modelName = f'RawDrug-EmbedCell-DNN_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_CV{fold}'
    modelPath = os.path.join(basePath, modelName)
    
    # init DNN
    np.random.seed(5)
    dnn = fsCDR(cellLineModelPath=rnaPath, 
                drugModelPath='None', 
                fusionModelPath='None',
                nodeList=[64, 64], 
                activation='relu', 
                dropout=0.1)
    
    trainData = (tf.convert_to_tensor(drugs.loc[list(trainCDR.drug), :], dtype=tf.float32), 
                 tf.convert_to_tensor(allRNA.loc[list(trainCDR.cell_line), :], dtype=tf.float32),
                 tf.convert_to_tensor(trainCDR.true, dtype=tf.float32))
    
    valData = (tf.convert_to_tensor(drugs.loc[list(valCDR.drug), :], dtype=tf.float32), 
               tf.convert_to_tensor(allRNA.loc[list(valCDR.cell_line), :], dtype=tf.float32),
               tf.convert_to_tensor(valCDR.true, dtype=tf.float32))
    
    # fit DNN
    history = dnn.fit(train=trainData, 
                      val=valData, 
                      learningRate=0.01, 
                      decayRate=0.99, 
                      decaySteps=500,
                      earlyStopping=True, 
                      patience=10,
                      minDelta=0.001,
                      batchSize=256, 
                      epochs=250, 
                      saveModel=True, 
                      modelPath=modelPath)
    
    return dnn

# 5-fold CV

## encoder newcancer data

In [17]:
testCDR = cdrFilt[cdrFilt.cell_line.isin(testRNA.index)]

testData = [tf.convert_to_tensor(drugs.loc[list(testCDR.drug), :], dtype=tf.float32), 
            tf.convert_to_tensor(testRNA.loc[list(testCDR.cell_line), :], dtype=tf.float32)]

newCDR = cdrFilt[cdrFilt.cell_line.isin(newRNA.index)]

newData = [tf.convert_to_tensor(drugs.loc[list(newCDR.drug), :], dtype=tf.float32), 
            tf.convert_to_tensor(newRNA.loc[list(newCDR.cell_line), :], dtype=tf.float32)]

## Load folds

In [18]:
foldDF = pd.read_csv('../../data/processed/cellLinesForCV.csv')
foldDF.head()

Unnamed: 0,fold0,fold1,fold2,fold3,fold4
0,ACH-000741,ACH-000026,ACH-000720,ACH-000566,ACH-000890
1,ACH-000396,ACH-000753,ACH-000018,ACH-000011,ACH-000845
2,ACH-000839,ACH-000973,ACH-000905,ACH-000142,ACH-000875
3,ACH-000273,ACH-000231,ACH-000558,ACH-000756,ACH-000869
4,ACH-000323,ACH-000137,ACH-000469,ACH-000232,ACH-000572


## run

In [27]:
models = {}
np.random.seed(1738)
for f in foldDF.columns:
    print(f)
    # get cell lines to use in test fold
    testCellLines = list(foldDF[f])
    trainCellLines = list(foldDF.drop(f, axis=1).to_numpy().reshape(-1))
    
    # split data into test and train sets
    trainCDR = cdrFilt[cdrFilt.cell_line.isin(trainCellLines)].copy()
    valCDR = cdrFilt[cdrFilt.cell_line.isin(testCellLines)].copy()
    
    # get trained model
    dnn = getDNN(trainCDR, valCDR, f)
    
    # Test results (cell line)
    testPreds = testCDR.copy()
    testPreds['pred'] = dnn.predict(testData)
    testPreds.sort_values(by='pred', ascending=False, inplace=True)
    testCount, testWrong = countDrugsK(testPreds)
    clTest.loc[f, :] = precision(testPreds, thresh=0.1, at=1, by='cellLine', getResults=True) +\
                        [len(testWrong), len(testCount), round(testPreds.groupby('drug').pred.var().mean(),4)]
    
    # Test results (cancer)
    ctTest.loc[f, :] = list(precision(testPreds.copy(), thresh=0.1, by='cancer').mean())
    
    # New cancer results
    newPreds = newCDR.copy()
    newPreds['pred'] = dnn.predict(newData)
    newPreds.sort_values(by='pred', ascending=False, inplace=True)
    newCount, newWrong = countDrugsK(newPreds)
    newPrecision = precision(newPreds, thresh=0.1, at=1, by='cellLine', getResults=True)
    clNew.loc[f, :] =  newPrecision +\
                        [len(newWrong), len(newCount), round(newPreds.groupby('drug').pred.var().mean(),4)]
    
    # New cancer results (cancer)
    ctNew.loc[f, :] = list(precision(newPreds.copy(), thresh=0.1, by='cancer').mean())
    
    # Keep model
    models[f] = dnn


# cell line test
clTest.loc['mean', :] = clTest.iloc[:5,:].mean()
clTest.loc['std', :] = clTest.iloc[:5,:].std()
clTest.iloc[:, :6] = clTest.iloc[:, :6].astype(float).round(4)

# cancer test
ctTest.loc['mean', :] = ctTest.iloc[:5,:].mean()
ctTest.loc['std', :] = ctTest.iloc[:5,:].std()
ctTest = ctTest.astype(float).round(4)

# cell line new
clNew.loc['mean', :] = clNew.iloc[:5,:].mean()
clNew.loc['std', :] = clNew.iloc[:5,:].std()
clNew.iloc[:, :6] = clNew.iloc[:, :6].astype(float).round(4)

# cancer new
ctNew.loc['mean', :] = ctNew.iloc[:5,:].mean()
ctNew.loc['std', :] = ctNew.iloc[:5,:].std()
ctNew = ctNew.astype(float).round(4)

fold0
[INFO] loading cell line feature extractor...


  function = cls._parse_function_from_config(config, custom_objects,


[INFO] drug feature extractor not loaded. Using raw features...
[INFO] fusion extractor not loaded. Concatenating drug and cell line features...
[INFO] building CDR model...
[INFO] compiling model...
[INFO] training model...
Epoch 1/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold0/assets
142/142 - 3s - loss: 0.2503 - val_loss: 0.2054 - 3s/epoch - 25ms/step
Epoch 2/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold0/assets
142/142 - 3s - loss: 0.2047 - val_loss: 0.1856 - 3s/epoch - 23ms/step
Epoch 3/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold0/assets
142/142 - 3s - loss: 0.1980 - val_loss: 0.1821 - 3s/epoch - 23ms/step
Epoch 4/250
INFO:tensorflow:Assets writt

  function = cls._parse_function_from_config(config, custom_objects,


[INFO] drug feature extractor not loaded. Using raw features...
[INFO] fusion extractor not loaded. Concatenating drug and cell line features...
[INFO] building CDR model...
[INFO] compiling model...
[INFO] training model...
Epoch 1/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold1/assets
146/146 - 4s - loss: 0.2372 - val_loss: 0.1960 - 4s/epoch - 27ms/step
Epoch 2/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold1/assets
146/146 - 3s - loss: 0.1993 - val_loss: 0.1879 - 3s/epoch - 23ms/step
Epoch 3/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold1/assets
146/146 - 3s - loss: 0.1930 - val_loss: 0.1821 - 3s/epoch - 23ms/step
Epoch 4/250
146/146 - 2s - loss: 0.1884 

  function = cls._parse_function_from_config(config, custom_objects,


[INFO] drug feature extractor not loaded. Using raw features...
[INFO] fusion extractor not loaded. Concatenating drug and cell line features...
[INFO] building CDR model...
[INFO] compiling model...
[INFO] training model...
Epoch 1/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold2/assets
149/149 - 4s - loss: 0.2413 - val_loss: 0.2002 - 4s/epoch - 24ms/step
Epoch 2/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold2/assets
149/149 - 4s - loss: 0.1997 - val_loss: 0.1921 - 4s/epoch - 25ms/step
Epoch 3/250
149/149 - 3s - loss: 0.1958 - val_loss: 0.2059 - 3s/epoch - 17ms/step
Epoch 4/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold2/assets
149/149 - 4s - loss: 0.1973 

  function = cls._parse_function_from_config(config, custom_objects,


[INFO] drug feature extractor not loaded. Using raw features...
[INFO] fusion extractor not loaded. Concatenating drug and cell line features...
[INFO] building CDR model...
[INFO] compiling model...
[INFO] training model...
Epoch 1/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold3/assets
148/148 - 4s - loss: 0.2428 - val_loss: 0.2015 - 4s/epoch - 27ms/step
Epoch 2/250
148/148 - 3s - loss: 0.1980 - val_loss: 0.2017 - 3s/epoch - 18ms/step
Epoch 3/250
148/148 - 3s - loss: 0.1925 - val_loss: 0.2044 - 3s/epoch - 18ms/step
Epoch 4/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold3/assets
148/148 - 4s - loss: 0.1876 - val_loss: 0.1952 - 4s/epoch - 26ms/step
Epoch 5/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_

  function = cls._parse_function_from_config(config, custom_objects,


[INFO] drug feature extractor not loaded. Using raw features...
[INFO] fusion extractor not loaded. Concatenating drug and cell line features...
[INFO] building CDR model...
[INFO] compiling model...
[INFO] training model...
Epoch 1/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold4/assets
145/145 - 4s - loss: 0.2429 - val_loss: 0.2066 - 4s/epoch - 25ms/step
Epoch 2/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold4/assets
145/145 - 3s - loss: 0.1968 - val_loss: 0.2008 - 3s/epoch - 24ms/step
Epoch 3/250
INFO:tensorflow:Assets written to: ../../models/fsCDR/DNN/crossValidation/Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold4/assets
145/145 - 4s - loss: 0.1917 - val_loss: 0.1981 - 4s/epoch - 26ms/step
Epoch 4/250
INFO:tensorflow:Assets writt

# Results

## Test set 

In [28]:
print("Trained-on cancer test set: cell line-wise precision@k")
clTest

Unnamed: 0,p1,p2,p3,p4,p5,p10,nWrong,nUnique,variance
fold0,0.9412,0.9118,0.8954,0.8529,0.8353,0.8359,0.0,15.0,0.0001
fold1,0.9412,0.9118,0.8954,0.8775,0.8431,0.8282,0.0,14.0,0.0
fold2,0.9412,0.902,0.8758,0.848,0.8314,0.8462,0.0,14.0,0.0
fold3,0.9608,0.9216,0.8824,0.8431,0.8314,0.8487,1.0,16.0,0.0
fold4,0.9608,0.9216,0.9216,0.8775,0.8353,0.8436,0.0,15.0,0.0
mean,0.949,0.9137,0.8941,0.8598,0.8353,0.8405,0.2,14.8,2e-05
std,0.0107,0.0082,0.0175,0.0165,0.0048,0.0084,0.447214,0.83666,4.5e-05


In [30]:
print("Trained-on cancer test set: cancer-wise precision@k")
ctTest

Unnamed: 0,p1,p2,p3,p4,p5
fold0,0.9306,0.9158,0.9051,0.8618,0.8391
fold1,0.945,0.9158,0.9051,0.8834,0.8541
fold2,0.9306,0.9126,0.8752,0.8542,0.8395
fold3,0.9728,0.9369,0.8877,0.8507,0.8387
fold4,0.9728,0.9314,0.9327,0.8869,0.8463
mean,0.9503,0.9225,0.9012,0.8674,0.8436
std,0.0213,0.0109,0.0216,0.0167,0.0067


## New cancer set 

In [33]:
print("Novel cancer test set: cell line-wise precision@k")
clNew

Unnamed: 0,p1,p2,p3,p4,p5,p10,nWrong,nUnique,variance
fold0,0.9692,0.9308,0.9026,0.8808,0.8585,0.8275,1.0,16.0,0.0001
fold1,0.9846,0.9385,0.9026,0.8731,0.8646,0.8235,1.0,15.0,0.0
fold2,0.9692,0.9385,0.9026,0.8731,0.8462,0.8196,1.0,14.0,0.0001
fold3,0.9692,0.9308,0.9077,0.8769,0.8646,0.8294,2.0,16.0,0.0001
fold4,0.9692,0.9385,0.9026,0.8923,0.8615,0.8196,1.0,14.0,0.0001
mean,0.9723,0.9354,0.9036,0.8792,0.8591,0.8239,1.2,15.0,8e-05
std,0.0069,0.0042,0.0023,0.008,0.0077,0.0045,0.447214,1.0,4.5e-05


In [32]:
print("Novel cancer test set: cancer-wise precision@k")
ctNew

Unnamed: 0,p1,p2,p3,p4,p5
fold0,0.9723,0.9497,0.9236,0.8801,0.8583
fold1,0.9923,0.9597,0.9236,0.869,0.8628
fold2,0.9756,0.9545,0.9238,0.8886,0.8428
fold3,0.9846,0.9507,0.9233,0.8791,0.8733
fold4,0.9723,0.959,0.9241,0.8918,0.8581
mean,0.9794,0.9547,0.9237,0.8817,0.8591
std,0.0088,0.0046,0.0003,0.009,0.011


# Save CV results

In [34]:
clTest.to_csv('../../data/results/trainedOnCellLine_RawDrug-EmbedCell-DNN.csv', index=False)
ctTest.to_csv('../../data/results/trainedOnCancer_RawDrug-EmbedCell-DNN.csv', index=False)
clNew.to_csv('../../data/results/novelCellLine_RawDrug-EmbedCell-DNN.csv', index=False)
ctNew.to_csv('../../data/results/novelCancer_RawDrug-EmbedCell-DNN.csv', index=False)

# Examine preds

# Load model

In [19]:
modelName = f'Unfused-FewShotCDRRawDrugEmbedCell_NL64_64_DO0-1_AFrelu_LR0-01_DR0-99_DS500_fold1'
modelPath = os.path.join(basePath, modelName)
dnn = load_model(modelPath)

In [20]:
dnn.summary()

Model: "FS-CDR"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_9 (InputLayer)           [(None, 256)]        0           []                               
                                                                                                  
 input_10 (InputLayer)          [(None, 463)]        0           []                               
                                                                                                  
 pairEncoder (Functional)       (None, 320)          38016       ['input_9[0][0]',                
                                                                  'input_10[0][0]']               
                                                                                                  
 dense_12 (Dense)               (None, 64)           20544       ['pairEncoder[0][0]']       

## Split into encoder and DNN to enable calc of gradient wrt embedding layer

### Encoder

In [39]:
encoder = dnn.get_layer('pairEncoder')

### DNN

In [40]:
subDNN = Sequential()
subDNN.add(Input(shape=(encoder.outputs[0].shape[1])))
for layer in dnn.layers[3:]:
    subDNN.add(layer)

In [41]:
subDNN.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 64)                20544     
                                                                 
 dropout_8 (Dropout)         (None, 64)                0         
                                                                 
 dense_13 (Dense)            (None, 64)                4160      
                                                                 
 dropout_9 (Dropout)         (None, 64)                0         
                                                                 
 dense_14 (Dense)            (None, 1)                 65        
                                                                 
Total params: 24,769
Trainable params: 24,769
Non-trainable params: 0
_________________________________________________________________


# Get test and training data for best fold

In [42]:
f = 'fold1'
testCellLines = list(foldDF[f])
trainCellLines = list(foldDF.drop(f, axis=1).to_numpy().reshape(-1))   

# split data into test and train sets
testCDR = cdrFilt[cdrFilt.cell_line.isin(testCellLines)].copy()
trainCDR = cdrFilt[cdrFilt.cell_line.isin(trainCellLines)].copy()

# get raw RNA + Drug data
testData = [tf.convert_to_tensor(drugs.loc[list(testCDR.drug), :], dtype=tf.float32), 
            tf.convert_to_tensor(allRNA.loc[list(testCDR.cell_line), :], dtype=tf.float32)]

trainData = [tf.convert_to_tensor(drugs.loc[list(trainCDR.drug), :], dtype=tf.float32), 
            tf.convert_to_tensor(allRNA.loc[list(trainCDR.cell_line), :], dtype=tf.float32)]

# Get preds on trained-on cancer test set

In [21]:
testCDR['pred'] = dnn(testData)
testCDR.sort_values(by='pred', ascending=False, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testCDR['pred'] = dnn(testData)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  testCDR.sort_values(by='pred', ascending=False, inplace=True)


In [24]:
testCancerResults = precision(testCDR, thresh=0.1, by='cancer')
testCancerResults.loc['Overall', :] = testCancerResults.mean(axis=0)
round(testCancerResults, 4)

Unnamed: 0,p1,p2,p3,p4,p5
Bladder Cancer,1.0,1.0,1.0,1.0,0.9333
Brain Cancer,1.0,0.875,0.8333,0.8125,0.75
Breast Cancer,1.0,0.8333,0.8889,0.8333,0.8667
Colon/Colorectal Cancer,1.0,1.0,0.9167,0.9375,0.85
Endometrial/Uterine Cancer,1.0,1.0,0.8889,0.9167,0.8667
Esophageal Cancer,1.0,0.8333,0.8889,0.8333,0.7333
Head and Neck Cancer,1.0,1.0,1.0,0.9167,0.9333
Liver Cancer,1.0,1.0,1.0,1.0,1.0
Lung Cancer,0.9231,0.8462,0.8974,0.8654,0.8769
Ovarian Cancer,1.0,1.0,0.9167,0.9375,0.8


In [44]:
testCount, testWrong = countDrugsK(testCDR, getPcnt=True)
print(testCount.pcntCorrect.mean())
testCount.sort_values(by='total', ascending=False)

0.4607295975873051


Unnamed: 0,1,2,3,total,pcntCorrect
dolastatin-10,35,5,0,78,0.487179
romidepsin,5,22,8,69,0.492754
10-hydroxycamptothecin,6,6,24,66,0.454545
echinomycin,1,14,12,52,0.480769
triptolide,5,6,1,24,0.5
alvespimycin,0,2,6,16,0.5
maytansinol-isobutyrate,6,0,0,12,0.5
YM-155,0,0,5,9,0.444444
exatecan-mesylate,1,4,0,8,0.375
epothilone-b,0,0,2,3,0.333333


# Get preds on novel cancer set

In [22]:
newCDR['pred'] = dnn(newData)
newCDR.sort_values(by='pred', ascending=False, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  newCDR['pred'] = dnn(newData)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  newCDR.sort_values(by='pred', ascending=False, inplace=True)


In [23]:
newCancerResults = precision(newCDR, thresh=0.1, by='cancer')
newCancerResults.loc['Overall', :] = newCancerResults.mean(axis=0)
round(newCancerResults, 4)

Unnamed: 0,p1,p2,p3,p4,p5
Bile Duct Cancer,1.0,0.9,0.8667,0.85,0.88
Bone Cancer,1.0,0.9444,0.9259,0.8889,0.9111
Gallbladder Cancer,1.0,1.0,1.0,0.75,0.8
Gastric Cancer,1.0,1.0,0.9524,0.9464,0.9143
Kidney Cancer,0.9231,0.8077,0.7949,0.75,0.7231
Neuroblastoma,1.0,1.0,1.0,0.8333,0.8
Prostate Cancer,1.0,1.0,1.0,1.0,0.9
Rhabdoid,1.0,1.0,0.9167,0.9375,0.9
Sarcoma,0.8333,0.9167,0.9444,0.9167,0.8667
Thyroid Cancer,1.0,1.0,0.9583,0.9375,0.85


In [None]:
newCount, newWrong = countDrugsK(newCDR, getPcnt=True)
print(newCount.pcntCorrect.mean())
newCount.sort_values(by='total', ascending=False)