In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import MinMaxScaler
import torch
from sklearn.metrics import accuracy_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import math
from sklearn.model_selection import train_test_split,cross_val_score,KFold,StratifiedKFold
import torchtuples as tt

In [2]:
from sksurv.linear_model import CoxPHSurvivalAnalysis, CoxnetSurvivalAnalysis
from sksurv.preprocessing import OneHotEncoder, encode_categorical

from sklearn import set_config
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [5]:
datas = pd.read_csv('rats.csv')
datas

Unnamed: 0,litter,rx,time,status,sex
0,1,1,101,0,f
1,1,0,49,1,f
2,1,0,104,0,f
3,2,1,91,0,m
4,2,0,104,0,m
...,...,...,...,...,...
295,99,0,104,0,f
296,99,0,79,1,f
297,100,1,92,0,m
298,100,0,104,0,m


In [6]:
data_y = datas.loc[:, ['time', 'status']].copy()
data_y = data_y.rename(columns={'status': 'cens'})
data_x = datas.drop(['time', 'status'], axis=1)

data_x['sex'] = data_x['sex'].astype('category')
data_x = OneHotEncoder().fit_transform(data_x)
X_train, X_test, y_train, y_test = train_test_split(data_x, data_y, test_size=0.3, random_state=1)
list_of_tuples = list(y_train.itertuples(index=False, name=None))
swapped_list = [(t[1], t[0]) for t in list_of_tuples]
y_train = np.array(swapped_list, dtype=[('cens', bool), ('time', float)])
list_of_tuples = list(y_test.itertuples(index=False, name=None))
swapped_list = [(t[1], t[0]) for t in list_of_tuples]
y_test = np.array(swapped_list, dtype=[('cens', bool), ('time', float)])

In [7]:
for df in [X_train, X_test]:
    numeric_columns = df.select_dtypes(include=['float64', 'int64']).columns
    df[numeric_columns] = (df[numeric_columns] - df[numeric_columns].min()) / (df[numeric_columns].max() - df[numeric_columns].min())

In [9]:
y_train

array([(False, 104.), (False,  97.), (False, 104.), (False,  72.),
       ( True,  79.), ( True,  86.), (False, 104.), (False,  89.),
       (False, 104.), (False, 104.), (False,  98.), (False,  99.),
       (False,  69.), (False,  79.), (False,  87.), ( True,  73.),
       (False, 104.), (False,  92.), (False,  95.), (False, 104.),
       (False, 102.), (False,  74.), (False, 104.), ( True,  96.),
       (False,  91.), (False, 104.), (False, 104.), (False,  76.),
       ( True,  55.), (False,  82.), (False,  91.), (False, 104.),
       (False, 104.), (False,  96.), (False, 104.), (False, 104.),
       (False,  91.), ( True,  45.), (False, 104.), ( True, 101.),
       (False, 104.), (False,  91.), (False, 104.), ( True,  84.),
       (False, 104.), (False, 104.), (False,  85.), (False, 104.),
       (False, 104.), (False, 104.), (False,  91.), (False,  95.),
       (False, 104.), (False, 102.), (False,  78.), (False,  89.),
       (False, 103.), ( True,  40.), (False,  83.), ( True,  6

# MODELS

In [10]:
cox_lasso = CoxnetSurvivalAnalysis(l1_ratio=1, alpha_min_ratio=0.01)
cox_lasso.fit(X_train, y_train)
cox_lasso.score(X_test, y_test)

0.7519181585677749

In [11]:
cox_ridge = CoxnetSurvivalAnalysis(l1_ratio=0.00001, alpha_min_ratio=0.01)
cox_ridge.fit(X_train, y_train)
cox_ridge.score(X_test, y_test)

0.731457800511509

In [12]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import seaborn as sns

sns.set_style("white")

import torch # For building the networks
import torchtuples as tt # Some useful functions
from torch import nn
import torch.nn.functional as F

from pycox.datasets import support
from pycox.models import DeepHitSingle
from pycox.models import CoxPH
from pycox.evaluation import EvalSurv
from pycox.preprocessing.feature_transforms import OrderedCategoricalLong
from pycox.models.loss import rank_loss_deephit_single

In [13]:
data_y_transform_train = pd.DataFrame.from_records(y_train, columns=['event', 'time'])
data_y_transform_test = pd.DataFrame.from_records(y_test, columns=['event', 'time'])
data_y_transform_train['cens'] = data_y_transform_train['cens'].replace({True: 1, False: 0})
data_y_transform_test['cens'] = data_y_transform_test['cens'].replace({True: 1, False: 0})
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train_values= X_train.values
X_test_values= X_test.values

In [14]:
num_durations = 10

labtrans = DeepHitSingle.label_transform(num_durations)
get_target = lambda df: (df['time'].values, df['cens'].values)
y_train_deephit = labtrans.fit_transform(*get_target(data_y_transform_train))
durations_test, events_test = get_target(data_y_transform_test)

In [15]:
in_features = X_train.shape[1]
out_features = labtrans.out_features
num_nodes = [32]
batch_norm = True
dropout = 0.2

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout)
optimizer = tt.optim.AdamWR(decoupled_weight_decay=0.01, cycle_eta_multiplier=0.8,
                            cycle_multiplier=2)
model_deep_hit = DeepHitSingle(net, optimizer, alpha = 0.2, sigma = 0.1, duration_index=labtrans.cuts)
epochs = 30
batch_size=8
model_deep_hit.fit(X_train_values, y_train_deephit, batch_size, epochs)
surv = model_deep_hit.predict_surv_df(X_test_values)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
ev.concordance_td('antolini')

	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:1025.)
  p.data = p.data.add(-weight_decay * eta, p.data)


0:	[0s / 0s],		train_loss: 0.5115
1:	[0s / 0s],		train_loss: 0.5065
2:	[0s / 0s],		train_loss: 0.4840
3:	[0s / 0s],		train_loss: 0.4533
4:	[0s / 0s],		train_loss: 0.4573
5:	[0s / 1s],		train_loss: 0.4328
6:	[0s / 1s],		train_loss: 0.4453
7:	[0s / 1s],		train_loss: 0.4381
8:	[0s / 1s],		train_loss: 0.4174
9:	[0s / 1s],		train_loss: 0.4189
10:	[0s / 1s],		train_loss: 0.4057
11:	[0s / 1s],		train_loss: 0.4162
12:	[0s / 1s],		train_loss: 0.4242
13:	[0s / 1s],		train_loss: 0.4115
14:	[0s / 1s],		train_loss: 0.4116
15:	[0s / 1s],		train_loss: 0.4154
16:	[0s / 2s],		train_loss: 0.4016
17:	[0s / 2s],		train_loss: 0.4122
18:	[0s / 2s],		train_loss: 0.4092
19:	[0s / 2s],		train_loss: 0.3880
20:	[0s / 2s],		train_loss: 0.4154
21:	[0s / 2s],		train_loss: 0.3799
22:	[0s / 2s],		train_loss: 0.3963
23:	[0s / 2s],		train_loss: 0.3760
24:	[0s / 2s],		train_loss: 0.3961
25:	[0s / 2s],		train_loss: 0.3870
26:	[0s / 2s],		train_loss: 0.3741
27:	[0s / 3s],		train_loss: 0.3754
28:	[0s / 3s],		train_loss: 0.

0.717391304347826

In [16]:
from sksurv.ensemble import RandomSurvivalForest
rsf = RandomSurvivalForest(n_estimators=1000,
                           min_samples_split=10,
                           min_samples_leaf=15,
                           n_jobs=-1,
                           random_state=42)
rsf.fit(X_train,y_train)
rsf.score(X_test, y_test)

0.7621483375959079

In [17]:
from sksurv.linear_model import CoxPHSurvivalAnalysis
estimator = CoxPHSurvivalAnalysis().fit(X_train, y_train)
estimator.score(X_test,y_test)

0.7519181585677749

In [18]:
from sksurv.linear_model import CoxnetSurvivalAnalysis
estimator = CoxnetSurvivalAnalysis(l1_ratio=0.99, fit_baseline_model=True)
estimator.fit(X_train, y_train)
estimator.score(X_test, y_test)

0.7519181585677749

In [None]:
from sksurv.ensemble import GradientBoostingSurvivalAnalysis
est_cph_tree = GradientBoostingSurvivalAnalysis(
    n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0
)
est_cph_tree.fit(X_train, y_train)
cindex = est_cph_tree.score(X_test, y_test)
cindex

0.7477876106194691

In [19]:
import torch
import torchtuples as tt

from pycox.datasets import metabric
from pycox.models import CoxPH
from pycox.evaluation import EvalSurv

In [20]:
get_target = lambda df: (df['time'].values, df['cens'].values)
y_train_deepsurv = get_target(data_y_transform_train)
durations_test, events_test = get_target(data_y_transform_test)

In [21]:
in_features = X_train.shape[1]
num_nodes = [64, 64]
out_features = 1
batch_norm = True
dropout = 0.2
output_bias = False

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm,
                              dropout, output_bias=output_bias)

In [22]:
optimizer = tt.optim.AdamWR(decoupled_weight_decay=0.01, cycle_eta_multiplier=0.8,
                            cycle_multiplier=2)
model_deepsurv = CoxPH(net, optimizer)
batch_size= 4
epochs= 100
verbose=1
callbacks = [tt.callbacks.EarlyStopping()]
log = model_deepsurv.fit(X_train_values, y_train_deepsurv, batch_size, epochs, callbacks, verbose)
model_deepsurv.compute_baseline_hazards()
surv = model_deepsurv.predict_surv_df(X_test_values)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
ev.concordance_td()

0:	[0s / 0s],	
1:	[0s / 0s],	
2:	[0s / 0s],	
3:	[0s / 0s],	
4:	[0s / 1s],	
5:	[0s / 1s],	
6:	[0s / 1s],	
7:	[0s / 1s],	
8:	[0s / 1s],	
9:	[0s / 1s],	


0.6820480404551201

In [23]:
from pycox.models import LogisticHazard
# from pycox.models import PMF
# from pycox.models import DeepHitSingle
from pycox.evaluation import EvalSurv

In [24]:
num_durations = 10

labtrans = LogisticHazard.label_transform(num_durations)
# labtrans = PMF.label_transform(num_durations)
# labtrans = DeepHitSingle.label_transform(num_durations)

get_target = lambda df: (df['time'].values, df['cens'].values)
y_train_loghazard = labtrans.fit_transform(*get_target(data_y_transform_train))
y_test_loghazard = labtrans.transform(*get_target(data_y_transform_test))


# # We don't need to transform the test labels
durations_test, events_test = get_target(data_y_transform_test)

In [25]:
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train_values= X_train.values
X_test_values= X_test.values

In [26]:
in_features = X_train.shape[1]
num_nodes = [32, 32]
out_features = labtrans.out_features
batch_norm = True
dropout = 0.1
net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout)
model = LogisticHazard(net, tt.optim.Adam(0.01), duration_index=labtrans.cuts)
batch_size = 32
epochs = 20
log= model.fit(X_train_values, y_train_loghazard, batch_size, epochs)
surv = model.predict_surv_df(X_test_values)
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
ev.concordance_td('antolini')

0:	[0s / 0s],		train_loss: 5.7757
1:	[0s / 0s],		train_loss: 5.0907
2:	[0s / 0s],		train_loss: 4.0231
3:	[0s / 0s],		train_loss: 2.6277
4:	[0s / 0s],		train_loss: 1.5295
5:	[0s / 0s],		train_loss: 0.8301
6:	[0s / 0s],		train_loss: 0.6154
7:	[0s / 0s],		train_loss: 0.5652
8:	[0s / 0s],		train_loss: 0.5368
9:	[0s / 0s],		train_loss: 0.4975
10:	[0s / 0s],		train_loss: 0.5137
11:	[0s / 0s],		train_loss: 0.4775
12:	[0s / 0s],		train_loss: 0.4842
13:	[0s / 0s],		train_loss: 0.5114
14:	[0s / 0s],		train_loss: 0.5214
15:	[0s / 0s],		train_loss: 0.5024
16:	[0s / 0s],		train_loss: 0.4960
17:	[0s / 0s],		train_loss: 0.4970
18:	[0s / 0s],		train_loss: 0.4446
19:	[0s / 0s],		train_loss: 0.4918


0.7583120204603581

In [27]:
cox= CoxnetSurvivalAnalysis()
rsf = RandomSurvivalForest(n_estimators=1000,
                           min_samples_split=10,
                           min_samples_leaf=15,
                           random_state=42)
cox.fit(X_train_values, y_train)
rsf.fit(X_train_values, y_train)
cox_pred = cox.predict(X_test_values)
rsf_pred = rsf.predict(X_test_values)
ensemble_pred= (cox_pred+rsf_pred)/2
from sksurv.metrics import concordance_index_censored
cindex = concordance_index_censored(y_test["cens"], y_test["time"], ensemble_pred)
print("Concordance Index:", cindex[0])

Concordance Index: 0.7468030690537084


In [28]:
cox= CoxPHSurvivalAnalysis()
rsf = RandomSurvivalForest(n_estimators=1000,
                           min_samples_split=10,
                           min_samples_leaf=15,
                           random_state=42)
cox_lasso = CoxnetSurvivalAnalysis(l1_ratio=1, alpha_min_ratio=0.01)
cox_lasso.fit(X_train, y_train)
cox.fit(X_train_values, y_train)
rsf.fit(X_train_values, y_train)
lasso_predict = cox_lasso.predict(X_test_values)
cox_pred = cox.predict(X_test_values)
rsf_pred = rsf.predict(X_test_values)
ensemble_pred = np.maximum(cox_pred, rsf_pred, lasso_predict)
from sksurv.metrics import concordance_index_censored
cindex = concordance_index_censored(y_test["cens"], y_test["time"], ensemble_pred)
print("Concordance Index:", cindex[0])

Concordance Index: 0.7621483375959079


