In [1]:
import sksurv
import numpy as np

from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.linear_model import CoxnetSurvivalAnalysis
from lifelines.utils import concordance_index

from utils import DataLoader
import seaborn as sns

%matplotlib inline
from matplotlib import pyplot as plt

Using TensorFlow backend.


In [3]:
## move to utils

def get_structured_array(data_bool, data_value):
    all_bools = data_bool.cpu().detach().numpy()
    all_values = data_value.cpu().detach().numpy()

    new_list = []
    for idx in range(len(all_bools)):
        new_list.append(tuple((all_bools[idx], all_values[idx])))
    return np.array(new_list, dtype='bool, i8')

In [6]:
n_alphas = 100
l1_ratio = 0.3
num_genes = 500

all_indices = []

for fold_num in range(5):

    curr_indices = []

    data = DataLoader(fold=fold_num, num_genes=num_genes)
    y_train = get_structured_array(data.y_train_bool, data.y_train_value)
    print('Training size', y_train.shape)
    y_valid = get_structured_array(data.y_valid_bool, data.y_valid_value)
    print('Validation set', y_valid.shape)
    y_test = get_structured_array(data.y_test_bool, data.y_test_value)
    print('Test set', y_test.shape)

    ##### Genomics

    coxnet = CoxnetSurvivalAnalysis(l1_ratio=l1_ratio, n_alphas=n_alphas)
    coxnet.fit(data.gen_train.cpu().detach().numpy(), 
               y_train)
    gen_outputs = coxnet.predict(data.gen_test.cpu().detach().numpy())

    curr_indices.append(coxnet.score(data.gen_test.cpu().detach().numpy(), y_test))

    ##### Pyradiomics

    coxnet = CoxnetSurvivalAnalysis(l1_ratio=l1_ratio, n_alphas=n_alphas)
    coxnet.fit(data.pyrad_train.cpu().detach().numpy(), 
               y_train)

    pyrad_outputs = coxnet.predict(data.pyrad_test.cpu().detach().numpy())
    curr_indices.append(coxnet.score(data.pyrad_test.cpu().detach().numpy(), y_test))

    ##### Densenet

    coxnet = CoxnetSurvivalAnalysis(l1_ratio=l1_ratio, n_alphas=n_alphas)
    coxnet.fit(data.dense_train.cpu().detach().numpy(), 
               y_train)
    dense_outputs = coxnet.predict(data.dense_test.cpu().detach().numpy())
    curr_indices.append(coxnet.score(data.dense_test.cpu().detach().numpy(), y_test))

    ##### Genomics-PyRadiomics

    coxnet = CoxnetSurvivalAnalysis(l1_ratio=l1_ratio, n_alphas=n_alphas)
    feat_train = np.concatenate((data.gen_train.cpu().detach().numpy(), 
                                 data.pyrad_train.cpu().detach().numpy()), axis=1)
    coxnet.fit(feat_train, y_train)
    feat_valid = np.concatenate((data.gen_valid.cpu().detach().numpy(), 
                                 data.pyrad_valid.cpu().detach().numpy()), axis=1)
    feat_test = np.concatenate((data.gen_test.cpu().detach().numpy(), 
                                 data.pyrad_test.cpu().detach().numpy()), axis=1)
    feat1_outputs = coxnet.predict(feat_test)
    curr_indices.append(coxnet.score(feat_test, y_test))
    curr_indices.append(concordance_index(data.y_test_value.cpu().detach().numpy(),
                                                -gen_outputs-pyrad_outputs,
                                               data.y_test_bool.cpu().detach().numpy()))
    ##### Genomics-DenseNet

    coxnet = CoxnetSurvivalAnalysis(l1_ratio=l1_ratio, n_alphas=n_alphas)
    feat_train = np.concatenate((data.gen_train.cpu().detach().numpy(), 
                                 data.dense_train.cpu().detach().numpy()), axis=1)
    coxnet.fit(feat_train, y_train)
    feat_valid = np.concatenate((data.gen_valid.cpu().detach().numpy(), 
                                 data.dense_valid.cpu().detach().numpy()), axis=1)
    feat_test = np.concatenate((data.gen_test.cpu().detach().numpy(), 
                                 data.dense_test.cpu().detach().numpy()), axis=1)
    feat2_outputs = coxnet.predict(feat_test)
    curr_indices.append(coxnet.score(feat_test, y_test))
    curr_indices.append(concordance_index(data.y_test_value.cpu().detach().numpy(),
                                                -gen_outputs-dense_outputs,
                                               data.y_test_bool.cpu().detach().numpy()))

    ##### Genomics-PyRadiomics-DenseNet

    coxnet = CoxnetSurvivalAnalysis(l1_ratio=l1_ratio, n_alphas=n_alphas)
    feat_train = np.concatenate((data.gen_train.cpu().detach().numpy(), 
                                 data.pyrad_train.cpu().detach().numpy(),
                                 data.dense_train.cpu().detach().numpy()), axis=1)
    coxnet.fit(feat_train, y_train)
    feat_valid = np.concatenate((data.gen_valid.cpu().detach().numpy(), 
                                 data.pyrad_valid.cpu().detach().numpy(),
                                 data.dense_valid.cpu().detach().numpy()), axis=1)
    feat_test = np.concatenate((data.gen_test.cpu().detach().numpy(), 
                                data.pyrad_test.cpu().detach().numpy(),
                                data.dense_test.cpu().detach().numpy()), axis=1)
    feat3_outputs = coxnet.predict(feat_test)
    curr_indices.append(coxnet.score(feat_test, y_test))
    curr_indices.append(concordance_index(data.y_test_value.cpu().detach().numpy(),
                                                -gen_outputs-pyrad_outputs-dense_outputs,
                                               data.y_test_bool.cpu().detach().numpy()))

    all_indices.append(curr_indices)

mode valid
location data/stanford/labels/recurrence/
mode test
location data/stanford/labels/recurrence/
Training size (74,)
Validation set (11,)
Test set (22,)
mode valid
location data/stanford/labels/recurrence/
mode test
location data/stanford/labels/recurrence/
Training size (74,)
Validation set (11,)
Test set (22,)
mode valid
location data/stanford/labels/recurrence/
mode test
location data/stanford/labels/recurrence/
Training size (75,)
Validation set (11,)
Test set (21,)
mode valid
location data/stanford/labels/recurrence/
mode test
location data/stanford/labels/recurrence/
Training size (75,)
Validation set (11,)
Test set (21,)
mode valid
location data/stanford/labels/recurrence/
mode test
location data/stanford/labels/recurrence/
Training size (75,)
Validation set (11,)
Test set (21,)


In [27]:
methods = ['genomics \t ', 'pyradiomics \t', 'densenet \t',
           'gen-pyrad (ef) \t', 'gen-pyrad (lf) \t', 'gen-dense (ef) \t', 'gen-dense (lf) \t',
           'gen-pyrad-dense (ef)', 'gen-pyrad-dense (lf)',]

In [33]:
print('method \t\t \t|fd 1|fd 2|fd 3|fd 4|fd 5')
print('-------------------------------------------------')
for y in range(len(all_indices[0])):
    print(methods[y]  + '\t|' + '|'.join(["{0:.2f}".format(round(x[y], 2)) for x in all_indices]))

method 		 	|fd 1|fd 2|fd 3|fd 4|fd 5
-------------------------------------------------
genomics 	 	|0.52|0.45|0.60|0.72|0.75
pyradiomics 		|0.46|0.39|0.30|0.37|0.80
densenet 		|0.53|0.49|0.44|0.65|0.52
gen-pyrad (ef) 		|0.41|0.36|0.46|0.57|0.81
gen-pyrad (lf) 		|0.41|0.42|0.49|0.60|0.81
gen-dense (ef) 		|0.53|0.46|0.60|0.80|0.77
gen-dense (lf) 		|0.49|0.50|0.55|0.82|0.59
gen-pyrad-dense (ef)	|0.43|0.37|0.47|0.61|0.82
gen-pyrad-dense (lf)	|0.46|0.46|0.51|0.81|0.60
