In [1]:
import numpy as np
import pandas as pd
import os
import pickle
import matplotlib.pyplot as plt
# import seaborn as sns
from scipy import stats
from tqdm import tqdm
import datetime
from datetime import timedelta

from scipy.integrate import trapezoid

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [3]:
from sksurv.metrics import cumulative_dynamic_auc

## real

In [4]:
final = pd.read_csv('./final_real_paper.csv')

In [5]:
final.reset_index(drop = True, inplace = True)

from sklearn.model_selection import train_test_split
data_train, data_test = train_test_split(final, test_size = 0.3, random_state = 42, stratify = final['DEAD'])# 0.3, 1000

In [6]:
X_train, X_test = data_train.drop(columns = ['DIFF','DEAD']), data_test.drop(columns = ['DIFF','DEAD'])

In [7]:
Y_train = []
y = np.array(data_train[['DEAD','DIFF']]).tolist()
for yy in y:
    Y_train.append(tuple(yy))

Y_train = np.array(Y_train, dtype=[('DEAD', bool), ('DIFF', int)])


Y_test = []
y = np.array(data_test[['DEAD','DIFF']]).tolist()
for yy in y:
    Y_test.append(tuple(yy))
Y_test = np.array(Y_test, dtype=[('DEAD', bool), ('DIFF', int)])

X_test_baseline, Y_test_baseline = X_test.copy(), Y_test.copy()

In [8]:
from sksurv.ensemble import RandomSurvivalForest
rsf = RandomSurvivalForest(random_state=42, n_estimators=120, max_depth=8, min_samples_leaf=4, max_features=3)
rsf.fit(X_train, Y_train)

#c-index
rsf.score(X_test_baseline, Y_test_baseline)

0.7765405472448269

In [9]:
np.round(rsf.score(X_test_baseline, Y_test_baseline),4)

0.7765

### Brier score

In [10]:
from sksurv.metrics import brier_score, integrated_brier_score
# mask created to enable for calculating Brier score
mask = (Y_test_baseline["DIFF"] < Y_test_baseline[Y_test_baseline["DEAD"]==1]["DIFF"].max()) & (Y_test_baseline["DIFF"] > Y_test_baseline[Y_test_baseline["DEAD"]==1]["DIFF"].min())
times = np.unique(np.percentile(Y_test_baseline[mask]["DIFF"], np.linspace(1, 99, 100))) #.1, 99.9, 101

survs_rsf = rsf.predict_survival_function(X_test_baseline[mask])
# survs_cph = cph.predict_survival_function(X[mask])
preds_rsf = [fn(times) for fn in survs_rsf]

brier_rsf = brier_score(Y_train, Y_test_baseline[mask], preds_rsf, times)

In [11]:
brier_real = pd.DataFrame({"time": times, "brier_score":  brier_rsf[1], "label": "RSF"})

In [12]:
# Brier score
np.round(brier_real['brier_score'].mean(),4)

0.0975

In [13]:
# IBS
integrated_brier_score(Y_train, Y_test_baseline[mask], preds_rsf, times)

0.13767781677067814

In [14]:
rsf_risk_scores = rsf.predict(X_test_baseline[mask])
rsf_auc_real, rsf_mean_auc = cumulative_dynamic_auc(Y_train, Y_test_baseline[mask], rsf_risk_scores, times)

## syn

In [16]:
final = pd.read_csv('./final_syn_paper_55000.csv')

In [17]:
X_train = final.drop(columns = ['DIFF','DEAD'])

In [18]:
Y_train = []
y = np.array(final[['DEAD','DIFF']]).tolist()
for yy in y:
    Y_train.append(tuple(yy))

Y_train = np.array(Y_train, dtype=[('DEAD', bool), ('DIFF', int)])




In [19]:
from sksurv.ensemble import RandomSurvivalForest
rsf = RandomSurvivalForest(random_state=42, n_estimators=120, max_depth=8, min_samples_leaf=4, max_features=3)
rsf.fit(X_train, Y_train)

#c-index
rsf.score(X_test_baseline, Y_test_baseline)

0.7415704865393759

In [20]:
np.round(rsf.score(X_test_baseline, Y_test_baseline),4)

0.7416

### Brier Score

In [21]:
from sksurv.metrics import brier_score, integrated_brier_score
# mask created to enable for calculating Brier score
mask = (Y_test_baseline["DIFF"] < Y_test_baseline[Y_test_baseline["DEAD"]==1]["DIFF"].max()) & (Y_test_baseline["DIFF"] > Y_test_baseline[Y_test_baseline["DEAD"]==1]["DIFF"].min())
times = np.unique(np.percentile(Y_test_baseline[mask]["DIFF"], np.linspace(1, 99, 100)))

survs_rsf = rsf.predict_survival_function(X_test_baseline[mask])
# survs_cph = cph.predict_survival_function(X[mask])
preds_rsf = [fn(times) for fn in survs_rsf]

brier_rsf = brier_score(Y_train, Y_test_baseline[mask], preds_rsf, times)

In [22]:
brier_syn = pd.DataFrame({"time": times, "brier_score":  brier_rsf[1], "label": "RSF"})

In [23]:
# Brier score
np.round(brier_syn['brier_score'].mean(),4)

0.0749

In [24]:
# IBS
integrated_brier_score(Y_train, Y_test_baseline[mask], preds_rsf, times)

0.10654755985451095