In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

data = pd.read_csv("/kaggle/input/breast-test/res_data1.csv")
train, test = train_test_split(data, test_size=0.2)


train["flag"] = 1
test["flag"] = 0


data = pd.concat([train, test])


features_cat = ["hualiao","xiluoda","fangliao","neifenmi","baxiang","fufa","yuanchuzhuanyi","zuzhixue_leixing",
                "T_fenqi","fenhua_xuhao","linbajie_zhuangtai","N_fenqi","TNM_fenqi","012/3","HR","HER-2",
                "Ki67","CK56","EGFR","fenxing_xuhao","shoushu_leibie","OP4"]
features_con = ["mm","LN","CCI_score","age_score","age-CCI_score"]

df_dummy = pd.get_dummies(data[features_cat])
data = pd.concat([data, df_dummy], axis = 1)

train = data[data["flag"] == 1]
test = data[data["flag"] == 0]


features = df_dummy.columns.to_list() + features_con
train_sel = train[["OS_month", "siwang"] + features]
test_sel = test[["OS_month", "siwang"] + features]
train_sel.to_csv("/kaggle/working/data_train.csv", index = False)
test_sel.to_csv("/kaggle/working/data_test.csv", index = False)

In [2]:
pip install pysurvival

Collecting pysurvival
  Downloading pysurvival-0.1.2.tar.gz (4.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting progressbar
  Downloading progressbar-2.5.tar.gz (10 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: pysurvival, progressbar
  Building wheel for pysurvival (setup.py) ... [?25ldone
[?25h  Created wheel for pysurvival: filename=pysurvival-0.1.2-cp37-cp37m-linux_x86_64.whl size=5331553 sha256=5e4bb44a37a46f03eed95f7f234da40d233b6fde5d3288ef1d558820a0b99995
  Stored in directory: /root/.cache/pip/wheels/1a/63/e2/32273d765a4e2f4ccac69c8adf97425ca80bab5d0c8447f120
  Building wheel for progressbar (setup.py) ... [?25ldone
[?25h  Created wheel for progressbar: filename=progressbar-2.5-py3-none-any.whl size=12082 sha256=d8ee1cf280c5fb64acc6c5d3dc63a2022b3e92056608c5e33a860d82f971c6

In [6]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from pysurvival.models.simulations import SimulationModel
from pysurvival.models.semi_parametric import NonLinearCoxPHModel
from pysurvival.utils.metrics import concordance_index
from pysurvival.utils.metrics import brier_score
# from pysurvival.utils.display import integrated_brier_score
from pysurvival.utils.display import display_loss_values



train = pd.read_csv("/kaggle/working/data_train.csv")
test = pd.read_csv("/kaggle/working/data_test.csv")
train["flag"] = 1
test["flag"] = 0


data = pd.concat([train, test])
data["OS_month"] = (data["OS_month"] == 4).astype(int)


features_cat = ["hualiao", "xiluoda", "fangliao", "zuzhixue_leixing", "TNM_fenqi", "baxiang", "CCI_score", "OP4", "neifenmi"]
features_con = ["age_score", "linbajie_zhuangtai"]


df_dummy = pd.get_dummies(data[features_cat])
data = pd.concat([data, df_dummy], axis = 1)



train = data[data["flag"] == 1]
test = data[data["flag"] == 0]


features = df_dummy.columns.to_list() + features_con

# Creating the X, T and E input
X_train, X_test = train[features].values, test[features].values
T_train, T_test = train['siwang'].values, test['siwang'].values
E_train, E_test = train['OS_month'].values, test['OS_month'].values


list_structure = [[{'activation': 'Sigmoid', 'num_units': 24}, 
                   {'activation': 'Sigmoid', 'num_units': 16}, 
                   {'activation': 'Sigmoid', 'num_units': 8},],
                 [{'activation': 'Sigmoid', 'num_units': 10}, 
                   {'activation': 'Sigmoid', 'num_units': 8}, 
                   {'activation': 'Sigmoid', 'num_units': 6},],
                 [{'activation': 'Sigmoid', 'num_units': 8}, 
                   {'activation': 'Sigmoid', 'num_units': 4}, 
                   {'activation': 'Sigmoid', 'num_units': 2},],
                 [{'activation': 'Sigmoid', 'num_units': 36}, 
                   {'activation': 'Sigmoid', 'num_units': 6},],
                 [{'activation': 'Sigmoid', 'num_units': 12}, 
                   {'activation': 'Sigmoid', 'num_units': 6},],
                 [{'activation': 'Sigmoid', 'num_units': 8}, 
                   {'activation': 'Sigmoid', 'num_units': 4},],
                 [{'activation': 'Sigmoid', 'num_units': 12}, 
                   {'activation': 'Sigmoid', 'num_units': 4},],]


#### 4 - Creating an instance of the NonLinear CoxPH model and fitting the data.
list_lr = [0.1, 0.01, 0.001, 0.0001]
list_num_epochs = [500, 1000, 1500]
list_optimizer = ["adadelta", "adagrad", "adam", "adamax", "rmsprop", "sgd"]



parameters = []
for structure in list_structure:
    for lr in list_lr:
        for num_epochs in list_num_epochs:
            for optimizer in list_optimizer:
                parameters.append([structure, lr, num_epochs, optimizer])


deepsurv_cv_results = pd.DataFrame(parameters)
list_cindex = []
kf = KFold(n_splits = 5)



for parameter in parameters:
    structure = parameter[0]
    lr = parameter[1]
    num_epochs = parameter[2]
    optimizer = parameter[3]
    
    cindexes = []
    for train_index, test_index in kf.split(train):
        X_tr, X_val = X_train[train_index], X_train[test_index]
        T_tr, T_val = T_train[train_index], T_train[test_index]
        E_tr, E_val = E_train[train_index], E_train[test_index]
        
        # Building the model
        nonlinear_coxph = NonLinearCoxPHModel(structure = structure)
        nonlinear_coxph.fit(X_tr, T_tr, E_tr, l2_reg = 0, batch_normalization = False,
                            verbose = True, 
                            lr = lr, num_epochs = num_epochs, optimizer = optimizer,
                            dropout = 0.)
        
        #### 5 - Cross Validation / Model Performances
        c_index = concordance_index(nonlinear_coxph, X_val, T_val, E_val)

        cindexes.append(c_index)
    list_cindex.append(np.mean(cindexes))
    print(parameter, np.mean(cindexes))


deepsurv_cv_results["cindex"] = list_cindex
deepsurv_cv_results.to_csv("/kaggle/working/deepsurv_cv_results.csv", index = False)



def integrated_brier_score(model, X, T, E, t_max=None, use_mean_point=True):
    """ The Integrated Brier Score (IBS) provides an overall calculation of 
        the model performance at all available times.
    """

    # Computing the brier scores
    times, brier_scores = brier_score(model, X, T, E, t_max, use_mean_point)

    # Getting the proper value of t_max
    if t_max is None:
        t_max = max(times)
    else:
        t_max = min(t_max, max(times))

    # Computing the IBS
    ibs_value = np.trapz(brier_scores, times)/t_max 

    return ibs_value


deepsurv_cv_results = pd.read_csv("/kaggle/working/deepsurv_cv_results.csv")
print(deepsurv_cv_results["cindex"].values.max())
ind_best = deepsurv_cv_results["cindex"].values.argmax()
structure = deepsurv_cv_results.iloc[ind_best, 0]
lr = deepsurv_cv_results.iloc[ind_best, 1]
num_epochs = deepsurv_cv_results.iloc[ind_best, 2]
optimizer = deepsurv_cv_results.iloc[ind_best, 3]


# Building the model
nonlinear_coxph = NonLinearCoxPHModel(structure = eval(structure))
nonlinear_coxph.fit(X_train, T_train, E_train, l2_reg = 0, batch_normalization = False,
                    verbose = True, 
                    lr = lr, num_epochs = num_epochs, optimizer = optimizer,
                    dropout = 0.)

#### 5 - Cross Validation / Model Performances
c_index = concordance_index(nonlinear_coxph, X_test, T_test, E_test)
print('C-index: {:.4f}'.format(c_index))

ibs = integrated_brier_score(nonlinear_coxph, X_test, T_test, E_test)
print('IBS: {:.4f}'.format(ibs))


def bootstrap_replicate_1d(data):
    bs_sample = np.random.choice(data,len(data))
    return bs_sample


bootstrap_R = 100
c_indexes = []
ibss = []


for i in range(bootstrap_R):
    print(i)
    train_bs_idx = bootstrap_replicate_1d(np.array(range(train.shape[0])))
    train_bs = train.iloc[train_bs_idx, ]
    # Creating the X, T and E input
    X_train = train_bs[features].values
    T_train = train_bs['siwang'].values
    E_train = train_bs['OS_month'].values
    
    # Building the model
    nonlinear_coxph = NonLinearCoxPHModel(structure = eval(structure))
    nonlinear_coxph.fit(X_train, T_train, E_train, l2_reg = 0, batch_normalization = False,
                        verbose = True, 
                        lr = lr, num_epochs = num_epochs, optimizer = optimizer,
                        dropout = 0.)

    #### 5 - Cross Validation / Model Performances
    c_index = concordance_index(nonlinear_coxph, X_test, T_test, E_test)
    c_indexes.append(np.round(c_index, 4))

    ibs = integrated_brier_score(nonlinear_coxph, X_test, T_test, E_test)
    ibss.append(np.round(ibs, 4))


pd.DataFrame(data = {"cindex": c_indexes, "ibs": ibss}).to_csv("/kaggle/working/results.ci.deepsurv.csv", index=False)

# Compute the 95% confidence interval: conf_int
mean_cindex = np.mean(c_indexes)
mean_ibs = np.mean(ibss)

# Print the mean
print('mean cindex =', mean_cindex)
print('mean ibs =', mean_ibs)


ci_cindex = np.percentile(c_indexes, [2.5, 97.5])
ci_ibs = np.percentile(ibss, [2.5, 97.5])
 
# Print the confidence interval
print('confidence interval =', ci_cindex)
print('confidence interval =', ci_ibs)

AttributeError: The time axis needs to be created before using the method get_time_buckets.