In [87]:
# Import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import re
import pickle

import os
path_dir = os.path.dirname(os.getcwd())

import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"

import torch # For building the networks 
import torchtuples as tt # Some useful functions

from pycox.datasets import metabric
from pycox.models import LogisticHazard, PMF, DeepHitSingle, CoxPH
from pycox.evaluation import EvalSurv

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [88]:
cd ../src/

/Users/linafaik/Documents/survival_analysis/src


In [89]:
from train import *
from train_survival_ml import *
from train_survival_deep import *

In [90]:
# We also set some seeds to make this reproducable.
# Note that on gpu, there is still some randomness.
np.random.seed(1234)
_ = torch.manual_seed(123)

In [91]:
# Parameters

scaler_name = "StandardScaler" #MinMaxScaler
random_state = 123
test_size = 0.3

In [92]:
#df = pd.read_csv(os.path.join(path_dir, "outputs/hdhi_clean.csv"))

In [93]:
#from sksurv import datasets
#df, y = datasets.load_flchain()

In [94]:
from pycox.datasets import support
support.read_df()

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,duration,event
0,82.709961,1.0,2.0,1.0,0.0,0.0,0.0,160.0,55.0,16.0,38.195309,142.0,19.000000,1.099854,30.0,1
1,79.660950,1.0,0.0,1.0,0.0,0.0,1.0,54.0,67.0,16.0,38.000000,142.0,10.000000,0.899902,1527.0,0
2,23.399990,1.0,2.0,3.0,0.0,0.0,1.0,87.0,144.0,45.0,37.296879,130.0,5.199219,1.199951,96.0,1
3,53.075989,1.0,4.0,3.0,0.0,0.0,0.0,55.0,100.0,18.0,36.000000,135.0,8.699219,0.799927,892.0,0
4,71.794983,0.0,1.0,1.0,0.0,0.0,0.0,65.0,135.0,40.0,38.593750,146.0,0.099991,0.399963,7.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8868,81.064941,0.0,4.0,1.0,0.0,0.0,1.0,111.0,110.0,34.0,39.593750,135.0,13.000000,1.500000,36.0,1
8869,72.560966,0.0,2.0,1.0,0.0,0.0,1.0,53.0,74.0,28.0,34.695309,139.0,7.899414,1.899902,49.0,1
8870,63.228001,0.0,1.0,1.0,0.0,0.0,2.0,95.0,110.0,22.0,38.695309,132.0,7.799805,1.500000,6.0,1
8871,75.405937,0.0,2.0,1.0,1.0,0.0,2.0,109.0,110.0,30.0,36.195309,140.0,15.398438,0.899902,10.0,1


In [95]:
from pycox.datasets import flchain
df = flchain.read_df()
df

Unnamed: 0,age,sex,sample.yr,kappa,lambda,flc.grp,creatinine,mgus,futime,death
0,97.0,0.0,1997,5.700,4.860,10,1.7,0.0,85.0,1.0
1,92.0,0.0,2000,0.870,0.683,1,0.9,0.0,1281.0,1.0
2,94.0,0.0,1997,4.360,3.850,10,1.4,0.0,69.0,1.0
3,92.0,0.0,1996,2.420,2.220,9,1.0,0.0,115.0,1.0
4,93.0,0.0,1996,1.320,1.690,6,1.1,0.0,1039.0,1.0
...,...,...,...,...,...,...,...,...,...,...
6519,53.0,0.0,1997,0.705,1.250,2,0.8,0.0,4547.0,0.0
6520,52.0,0.0,1996,0.786,1.030,2,0.7,0.0,4788.0,0.0
6521,52.0,0.0,1995,1.210,1.610,6,1.0,0.0,4997.0,0.0
6522,52.0,0.0,1999,0.858,0.581,1,0.8,0.0,3652.0,0.0


In [96]:
df['censored'] = (df.death==0).astype(int)

In [97]:
#from sksurv.preprocessing import OneHotEncoder

In [98]:
#df = OneHotEncoder().fit_transform(df)

In [99]:
#df.fillna(0, inplace=True)

In [100]:
#with open(os.path.join(path_dir, "outputs/cox_ph.pkl"), "rb") as f:
    #estimator_cox = pickle.load(f)

In [101]:
df = pd.get_dummies(df, drop_first=True, columns=['flc.grp'])

# 1. Train / test split

In [102]:
# covariate columns (used when possible)

cols_x = [
    'age', 'gender', 'rural',
    'duration_of_stay', 'duration_of_intensive_unit_stay', 
    'smoking_','alcohol', 'dm', 'htn', 'cad', 'prior_cmp', 'ckd', 'hb', 'tlc',
    'platelets', 'glucose', 'urea', 'creatinine', 'raised_cardiac_enzymes',
    'severe_anaemia', 'anaemia', 'stable_angina', 'acs', 'stemi',
    'atypical_chest_pain', 'heart_failure', 'hfref', 'hfnef', 'valvular',
    'chb', 'sss', 'aki', 'cva_infract', 'cva_bleed', 'af', 'vt', 'psvt',
    'congenital', 'uti', 'neuro_cardiogenic_syncope', 'orthostatic',
    'infective_endocarditis', 'dvt', 'cardiogenic_shock', 'shock',
    'pulmonary_embolism', 'chest_infection',
    'type_adm'
]

col_target = "time_before_readm"

In [103]:
df.columns

Index(['age', 'sex', 'sample.yr', 'kappa', 'lambda', 'creatinine', 'mgus',
       'futime', 'death', 'censored', 'flc.grp_2', 'flc.grp_3', 'flc.grp_4',
       'flc.grp_5', 'flc.grp_6', 'flc.grp_7', 'flc.grp_8', 'flc.grp_9',
       'flc.grp_10'],
      dtype='object')

In [104]:
cols_x = ['age', 'sex', 'kappa', 'lambda', 'creatinine','mgus', 
          #'flc.grp', 
          'flc.grp_2', 'flc.grp_3', 'flc.grp_4',
          'flc.grp_5', 'flc.grp_6', 'flc.grp_7', 'flc.grp_8', 'flc.grp_9',
          'flc.grp_10']

col_target = "futime"

In [105]:
Xy_train, Xy_test, y_train, y_test = split_train_test(df, cols_x, col_target, 
                                                      test_size=0.2, random_state=random_state)

Xy_train, Xy_val, y_train, y_val = split_train_test(Xy_train, cols_x, col_target, 
                                                      test_size=0.3, random_state=random_state)

In [106]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# rescale
scaler = eval(scaler_name)()

Xy_train[cols_x] = scaler.fit_transform(Xy_train[cols_x])
Xy_test[cols_x] = scaler.transform(Xy_test[cols_x])

In [107]:
from sksurv.linear_model import CoxPHSurvivalAnalysis

# train an estimator
estimator = CoxPHSurvivalAnalysis(alpha=0.5)
estimator = estimator.fit(Xy_train[cols_x], y_train)

In [108]:
estimator.score(Xy_test[cols_x], y_test)

0.5902042347685535

In [109]:
feat_importance, fig = plot_feat_imp(cols_x, estimator.coef_)
fig

# 2. DeepSurv

Source: https://nbviewer.org/github/havakv/pycox/blob/master/examples/cox-ph.ipynb

In [110]:
get_target = lambda df: (df[col_target].values, df['censored'].values)

y_train = get_target(Xy_train)
y_val = get_target(Xy_val)
y_test = get_target(Xy_test)

train = (np.array(Xy_train[cols_x]).astype(np.float32), y_train)
val = (np.array(Xy_val[cols_x]).astype(np.float32), y_val)
test = (np.array(Xy_test[cols_x]).astype(np.float32), y_test)

In [111]:
params = {
    'n_nodes': 32,
    'n_layers': 4,
    'dropout': 0.4, 
    'output_bias': False, 
    'epochs': 512, 
    'batch_size': 12
}

logs_df, model, score = train_deep_surv(
    train, val, test, CoxPH, out_features = 1, tolerance=10, 
    print_lr=True, print_logs=True, verbose = True,
    **params
)

print('score', score)

concordance: 0.5125192042373361
0:	[0s / 0s],		train_loss: 1.4696,	val_loss: 1.4399
1:	[0s / 1s],		train_loss: 1.4147,	val_loss: 1.4158
2:	[0s / 2s],		train_loss: 1.3968,	val_loss: 1.4160
3:	[0s / 2s],		train_loss: 1.3898,	val_loss: 1.4171
4:	[0s / 3s],		train_loss: 1.3881,	val_loss: 1.4201
concordance: 0.5455243790448585
5:	[0s / 4s],		train_loss: 1.3905,	val_loss: 1.4131
6:	[0s / 4s],		train_loss: 1.3809,	val_loss: 1.4114
7:	[0s / 5s],		train_loss: 1.3845,	val_loss: 1.4177
8:	[0s / 6s],		train_loss: 1.3884,	val_loss: 1.4111
9:	[0s / 7s],		train_loss: 1.3885,	val_loss: 1.4065
concordance: 0.570963193147841
10:	[0s / 7s],		train_loss: 1.3879,	val_loss: 1.4223
11:	[0s / 8s],		train_loss: 1.3809,	val_loss: 1.4104
12:	[0s / 9s],		train_loss: 1.3753,	val_loss: 1.4210
13:	[0s / 9s],		train_loss: 1.3837,	val_loss: 1.4134
14:	[0s / 10s],		train_loss: 1.3849,	val_loss: 1.4216
Epoch 00016: reducing learning rate of group 0 to 8.0000e-04.
concordance: 0.5795218634529763
15:	[0s / 11s],		train_lo

score 0.5931204219927143


In [112]:
grid_params = {
    "n_nodes" :[16, 32, 64],
    "n_layers" :[4],
    "dropout" :[0.4],
    "output_bias" :[False],
    "epochs" :[512],
    "batch_size" :[16]
}

best_model, table = grid_search_deep(train, val, test, 1, grid_params, CoxPH)

3 total scenario to run
1/3: params: {'n_nodes': 16, 'n_layers': 4, 'dropout': 0.4, 'output_bias': False, 'epochs': 512, 'batch_size': 16}
concordance: 0.5311532987765867
0:	[0s / 0s],		train_loss: 1.8319,	val_loss: 1.7896
1:	[0s / 1s],		train_loss: 1.6666,	val_loss: 1.6999
2:	[0s / 1s],		train_loss: 1.6429,	val_loss: 1.6760
3:	[0s / 2s],		train_loss: 1.6391,	val_loss: 1.6655
4:	[0s / 2s],		train_loss: 1.6250,	val_loss: 1.6657
concordance: 0.5302415055251625
5:	[0s / 3s],		train_loss: 1.6260,	val_loss: 1.6656
6:	[0s / 3s],		train_loss: 1.6302,	val_loss: 1.6616
7:	[0s / 4s],		train_loss: 1.6268,	val_loss: 1.6642
8:	[0s / 4s],		train_loss: 1.6281,	val_loss: 1.6639
9:	[0s / 5s],		train_loss: 1.6279,	val_loss: 1.6632
concordance: 0.5049539315915689
10:	[0s / 5s],		train_loss: 1.6248,	val_loss: 1.6614
11:	[0s / 6s],		train_loss: 1.6258,	val_loss: 1.6613
12:	[0s / 6s],		train_loss: 1.6235,	val_loss: 1.6611
13:	[0s / 7s],		train_loss: 1.6276,	val_loss: 1.6536
14:	[0s / 7s],		train_loss: 1.623

In [27]:
table

Unnamed: 0,n_nodes,n_layers,dropout,output_bias,epochs,batch_size,lr,score
6,32,2,0.4,True,512,16,0.001,0.600487
16,64,4,0.4,True,512,16,0.001,0.598106
5,16,4,0.4,False,512,16,0.001,0.598034
1,16,2,0.4,False,512,16,0.001,0.597936
15,64,3,0.4,False,512,16,0.001,0.596874
10,32,4,0.4,True,512,16,0.001,0.596413
17,64,4,0.4,False,512,16,0.001,0.595542
9,32,3,0.4,False,512,16,0.001,0.594644
3,16,3,0.4,False,512,16,0.001,0.594574
8,32,3,0.4,True,512,16,0.001,0.593632


In [29]:
table.sort_values(by="score", ascending=False, inplace=True)
best_params = table.drop('score', axis=1).iloc[0].to_dict()
best_score = table.score.iloc[0]

print('Best score: ', best_score)
print('Best params: ')
print(best_params)

Best score:  0.600486579883099
Best params: 
{'n_nodes': 32, 'n_layers': 2, 'dropout': 0.4, 'output_bias': True, 'epochs': 512, 'batch_size': 16, 'lr': 0.001}


In [31]:
best_params = {k:v for k,v in best_params.items() if k!='lr'}

In [33]:
best_params

{'n_nodes': 32,
 'n_layers': 2,
 'dropout': 0.4,
 'output_bias': True,
 'epochs': 512,
 'batch_size': 16}

In [35]:
params = {
    'n_nodes': 32,
    'n_layers': 2,
    'dropout': 0.4, 
    'output_bias': False, 
    'epochs': 512, 
    'batch_size': 16
}

logs_df, model, score = train_deep_surv(
    train, val, test, CoxPH, out_features = 1, tolerance=10, 
    print_lr=True, print_logs=True, verbose = True,
    **params
)

print('score', score)

0:	[0s / 0s],		train_loss: 1.7313,	val_loss: 1.6788
1:	[0s / 0s],		train_loss: 1.6503,	val_loss: 1.6891
2:	[0s / 0s],		train_loss: 1.6329,	val_loss: 1.6473
3:	[0s / 1s],		train_loss: 1.6202,	val_loss: 1.6489
4:	[0s / 1s],		train_loss: 1.6130,	val_loss: 1.7390
5:	[0s / 2s],		train_loss: 1.6199,	val_loss: 1.6872
6:	[0s / 2s],		train_loss: 1.6152,	val_loss: 1.6797
7:	[0s / 2s],		train_loss: 1.6138,	val_loss: 1.6823
Epoch 00009: reducing learning rate of group 0 to 2.0000e-04.
8:	[0s / 2s],		train_loss: 1.6162,	val_loss: 1.6942
9:	[0s / 3s],		train_loss: 1.6088,	val_loss: 1.6521
10:	[0s / 3s],		train_loss: 1.6084,	val_loss: 1.6501
11:	[0s / 3s],		train_loss: 1.6104,	val_loss: 1.6508
12:	[0s / 4s],		train_loss: 1.6110,	val_loss: 1.6583
13:	[0s / 4s],		train_loss: 1.5990,	val_loss: 1.6517
Epoch 00015: reducing learning rate of group 0 to 4.0000e-05.
14:	[0s / 4s],		train_loss: 1.6041,	val_loss: 1.6651
15:	[0s / 5s],		train_loss: 1.6120,	val_loss: 1.6529
16:	[0s / 5s],		train_loss: 1.6066,	va

score 0.5913055399600465


# 3. DeepHit

In [145]:
from pycox.models import DeepHitSingle

In [146]:
num_durations = int(df[col_target].max())
labtrans = DeepHitSingle.label_transform(num_durations)

In [147]:
get_target = lambda df: (df[col_target].values, df['censored'].values)

y_train = labtrans.fit_transform(*get_target(Xy_train))
y_val = labtrans.transform(*get_target(Xy_val))

train = (np.array(Xy_train[cols_x]).astype(np.float32), y_train)
val = (np.array(Xy_val[cols_x]).astype(np.float32), y_val)
test = (np.array(Xy_test[cols_x]).astype(np.float32), y_test)

In [152]:
params = {
    'n_nodes': 32,
    'out_features': num_durations,
    'dropout': 0.1,
    'model_params': {'alpha': 0.2, 'sigma': 0.1, 'duration_index': labtrans.cuts},
    'epochs': 512,
    'batch_size': 16,
    'discrete': True,
    'output_bias': True,
}

In [153]:
logs_df, model, score = train_deep_surv(
    train, val, test, DeepHitSingle, tolerance=10, 
    print_lr=True, print_logs=True, verbose = True,
    **params
)

Best learning rate:  0.001484968262254472
0:	[1s / 1s],		train_loss: 1.4185,	val_loss: 17.8841
1:	[1s / 2s],		train_loss: 1.3381,	val_loss: 21.0084
2:	[1s / 3s],		train_loss: 1.2589,	val_loss: 33.6124
3:	[1s / 5s],		train_loss: 1.1873,	val_loss: 30.1431
4:	[1s / 6s],		train_loss: 1.1383,	val_loss: 29.6380
5:	[1s / 7s],		train_loss: 1.1061,	val_loss: 29.9999
6:	[1s / 8s],		train_loss: 1.0855,	val_loss: 22.6818
7:	[1s / 10s],		train_loss: 1.0713,	val_loss: 20.6712
8:	[1s / 11s],		train_loss: 1.0595,	val_loss: 16.4748
9:	[1s / 12s],		train_loss: 1.0508,	val_loss: 18.2265
10:	[1s / 13s],		train_loss: 1.0403,	val_loss: 16.7687
11:	[1s / 14s],		train_loss: 1.0338,	val_loss: 18.2929
12:	[1s / 16s],		train_loss: 1.0253,	val_loss: 15.7813
13:	[1s / 17s],		train_loss: 1.0151,	val_loss: 14.7449
14:	[1s / 18s],		train_loss: 1.0125,	val_loss: 17.5678
15:	[1s / 19s],		train_loss: 1.0051,	val_loss: 14.7083
16:	[1s / 21s],		train_loss: 1.0007,	val_loss: 15.4418
17:	[1s / 22s],		train_loss: 0.9945,	val

In [154]:
score

0.5673976245500877

In [43]:
grid_params = {
    'n_nodes': [32, 64],
    'dropout': [0.1],
    'model_params': [{'alpha': 0.2, 'sigma': 0.1, 'duration_index': labtrans.cuts}],
    'epochs': [512],
    'batch_size': [16],
    'discrete': [True],
    'output_bias': [True],
}

best_model, table = grid_search_deep(train, val, test, labtrans.out_features, grid_params, DeepHitSingle)

2 total scenario to run
1/2: params: {'n_nodes': 32, 'dropout': 0.1, 'model_params': {'alpha': 0.2, 'sigma': 0.1}, 'epochs': 512, 'batch_size': 16, 'discrete': True, 'output_bias': True}
Best learning rate:  0.0001
0:	[1s / 1s],		train_loss: 1.4401,	val_loss: 309.6760
1:	[1s / 2s],		train_loss: 1.4326,	val_loss: 356.1917
2:	[1s / 3s],		train_loss: 1.4295,	val_loss: 376.0471
3:	[1s / 4s],		train_loss: 1.4240,	val_loss: 303.4081
4:	[1s / 5s],		train_loss: 1.4209,	val_loss: 344.3560
5:	[1s / 6s],		train_loss: 1.4164,	val_loss: 299.7307
6:	[1s / 7s],		train_loss: 1.4124,	val_loss: 325.4799
7:	[1s / 8s],		train_loss: 1.4074,	val_loss: 344.2605
8:	[1s / 9s],		train_loss: 1.4049,	val_loss: 287.5052
9:	[1s / 11s],		train_loss: 1.3992,	val_loss: 340.7303
10:	[1s / 12s],		train_loss: 1.3950,	val_loss: 407.2778
11:	[1s / 13s],		train_loss: 1.3929,	val_loss: 345.2958
12:	[1s / 14s],		train_loss: 1.3916,	val_loss: 303.9655
13:	[1s / 15s],		train_loss: 1.3905,	val_loss: 366.0233
14:	[1s / 17s],		tra

27:	[1s / 38s],		train_loss: 1.0368,	val_loss: 64.0916
28:	[1s / 40s],		train_loss: 1.0373,	val_loss: 78.5223
29:	[1s / 41s],		train_loss: 1.0180,	val_loss: 84.9396
30:	[1s / 42s],		train_loss: 1.0266,	val_loss: 67.3732
31:	[1s / 43s],		train_loss: 1.0235,	val_loss: 64.5239
32:	[1s / 45s],		train_loss: 1.0357,	val_loss: 70.4244
33:	[1s / 46s],		train_loss: 1.0306,	val_loss: 63.7743
34:	[1s / 47s],		train_loss: 1.0040,	val_loss: 68.0986
35:	[1s / 48s],		train_loss: 1.0148,	val_loss: 100.2245
36:	[1s / 49s],		train_loss: 1.0087,	val_loss: 76.6970
Current score: 0.6439156444003863 vs. best score: 0.6372563236307355


In [269]:
table

Unnamed: 0,n_nodes,dropout,model_params,epochs,batch_size,discrete,output_bias,lr,score
0,32,0.1,"{'alpha': 0.2, 'sigma': 0.1, 'duration_index':...",512,16,True,True,0.007925,0.630384
1,64,0.1,"{'alpha': 0.2, 'sigma': 0.1, 'duration_index':...",512,16,True,True,0.0001,0.577057


# Draft

In [206]:
# finding the best learning rate from this model
lrfinder = model.lr_finder(train[0], train[1], batch_size, tolerance=10)
lr = lrfinder.get_best_lr()
model.optimizer.set_lr(lr)

lrfinder_df = lrfinder.to_pandas()
fig = px.line(x=lrfinder_df.index, y=lrfinder_df.train_loss, 
              log_x=True, width=700, height=400)

fig.update_layout(dict(xaxis={'title':'lr'}, yaxis={'title':'batch_loss'}))
fig.show()

print("Best learning rate: ", lr)

Best learning rate:  0.0001


In [208]:
callbacks = [tt.callbacks.EarlyStopping(patience=15)]

log = model.fit(train[0], train[1], batch_size, 100, callbacks, verbose,
            val_data=val, val_batch_size=batch_size)

0:	[0s / 0s],		train_loss: 1.4597,	val_loss: 271.5473
1:	[0s / 0s],		train_loss: 1.4582,	val_loss: 280.5531
2:	[0s / 0s],		train_loss: 1.4579,	val_loss: 271.2865
3:	[0s / 1s],		train_loss: 1.4580,	val_loss: 300.3719
4:	[0s / 1s],		train_loss: 1.4543,	val_loss: 305.8623
5:	[0s / 1s],		train_loss: 1.4541,	val_loss: 296.1074
6:	[0s / 2s],		train_loss: 1.4494,	val_loss: 305.4858
7:	[0s / 2s],		train_loss: 1.4490,	val_loss: 322.0967
8:	[0s / 2s],		train_loss: 1.4457,	val_loss: 292.0412
9:	[0s / 2s],		train_loss: 1.4483,	val_loss: 282.3502
10:	[0s / 3s],		train_loss: 1.4455,	val_loss: 316.9798
11:	[0s / 3s],		train_loss: 1.4452,	val_loss: 304.8284
12:	[0s / 3s],		train_loss: 1.4434,	val_loss: 319.9052
13:	[0s / 3s],		train_loss: 1.4415,	val_loss: 336.0021
14:	[0s / 4s],		train_loss: 1.4418,	val_loss: 331.8771
15:	[0s / 4s],		train_loss: 1.4382,	val_loss: 320.0784
16:	[0s / 4s],		train_loss: 1.4340,	val_loss: 358.6517
17:	[0s / 5s],		train_loss: 1.4378,	val_loss: 335.6406


In [212]:
logs_df = log.to_pandas().reset_index().melt(
    id_vars="index", value_name="loss", var_name="dataset").reset_index()

fig = px.line(logs_df, y="loss", x="index", color="dataset", width=800, height = 400)
fig.show()

# scoring the model
surv = model.interpolate(10).predict_surv_df(test[0])
ev = EvalSurv(surv, test[1][0], test[1][1], censor_surv='km')
score = ev.concordance_td()
score

0.5252427912727381

In [143]:

best_score

0.6584983169424773

In [84]:
# finding the best learning rate from this model
lrfinder = model.lr_finder(train[0], y_train, params['batch_size'], tolerance=10, lr_min=0.001, lr_max=1.0)

lrfinder_df = lrfinder.to_pandas()
fig = px.line(x=lrfinder_df.index, y=lrfinder_df.train_loss, 
              log_x=True, width=700, height=400)
fig.update_layout(dict(xaxis={'title':'lr'}, yaxis={'title':'batch_loss'}))


print("Best LR: ", lrfinder.get_best_lr())
fig

Best LR:  0.0001


In [83]:
help(model.lr_finder)

Help on method lr_finder in module torchtuples.base:

lr_finder(input, target, batch_size=256, lr_min=0.0001, lr_max=1.0, lr_range=(1e-07, 10.0), n_steps=100, tolerance=inf, callbacks=None, verbose=False, num_workers=0, shuffle=True, **kwargs) method of pycox.models.cox.CoxPH instance



In [85]:
# setting the new number
model.optimizer.set_lr(lrfinder.get_best_lr())
model.optimizer.param_groups[0]['lr']

0.0001

In [88]:
callbacks = [tt.callbacks.EarlyStopping()]
verbose = True

log = model.fit(train[0], train[1], params['batch_size'], 2, callbacks, verbose,
                val_data=val, val_batch_size=params['batch_size'])

0:	[0s / 0s],		train_loss: 2.5088,	val_loss: 11.4820
1:	[0s / 0s],		train_loss: 2.4612,	val_loss: 12.4909


In [89]:
logs_df = log.to_pandas().reset_index().melt(
    id_vars="index", value_name="loss", var_name="dataset").reset_index()

px.line(logs_df, y="loss", x="index", color="dataset", width=800, height = 400)

In [91]:
model.partial_log_likelihood(*val).mean()

-25.007416

In [92]:
_ = model.compute_baseline_hazards()
surv = model.predict_surv_df(test[0])

In [93]:
N = 3
surv_df = surv[np.random.choice(surv.columns, N)]\
    .reset_index().melt(id_vars="duration", var_name="patient_id", value_name="S")
px.line(surv_df, x="duration", y="S", color="patient_id", width=800, height = 400)

In [95]:
ev = EvalSurv(surv, test[1][0], test[1][1], censor_surv='km')

In [97]:
test[1][0].shape[0]

2800

In [100]:
test[0][0].shape

(48,)

In [98]:
surv.shape[1]

4000

In [96]:
ev.concordance_td()

AssertionError: 

In [157]:
time_grid = np.linspace(durations_test.min(), durations_test.max(), 100)
brier_scores = ev.brier_score(time_grid)

In [158]:
brier_scores_df = pd.DataFrame(brier_scores).reset_index().rename(columns={"index":"duration"})
px.line(brier_scores_df, x="duration", y="brier_score", width=800, height = 400)

In [159]:
time_grid = np.arange(1, 91)

In [160]:
ev.integrated_brier_score(time_grid)

0.06015295918635839

In [161]:
ev.integrated_nbll(time_grid)

0.22598241274956873

In [162]:
help(ev.integrated_nbll)

Help on method integrated_nbll in module pycox.evaluation.eval_surv:

integrated_nbll(time_grid, max_weight=inf) method of pycox.evaluation.eval_surv.EvalSurv instance
    Integrated negative binomial log-likelihood weighted by the inverse censoring distribution.
    Essentially an integral over values obtained from `nbll(time_grid, max_weight)`.
    
    Arguments:
        time_grid {np.array} -- Durations where the brier score should be calculated.
    
    Keyword Arguments:
        max_weight {float} -- Max weight value (max number of individuals an individual
            can represent (default {np.inf}).

