In [48]:
import numpy as np
import tensorflow as tf
import random
import pandas as pd

from sklearn.model_selection import train_test_split
from sksurv.metrics import concordance_index_censored, concordance_index_ipcw
from sksurv.metrics import integrated_brier_score
from utility.survival import convert_to_structured
from utility.training import get_data_loader, scale_data, make_time_event_split
from tools.model_builder import make_cox_model, make_coxnet_model, make_rsf_model
from utility.risk import _make_riskset
from utility.loss import CoxPHLoss
from pathlib import Path
import paths as pt
import joblib
import os
from time import time
from utility.config import load_config
from tools.preprocessor import Preprocessor

# "SUPPORT" "SEER" "GBSG2" "WHAS500" "FLCHAIN" "METABRIC"

In [49]:
dl = get_data_loader("FLCHAIN").load_data()
X, y = dl.get_data()
num_features, cat_features = dl.get_features()

In [50]:
obs_arr = np.array(y['event'])
n_censored = obs_arr.shape[0] - obs_arr.sum()
print(f"Number of samples: {len(y)}")
print(f"Number of uncensored/censored: {obs_arr.shape[0]-n_censored}/{n_censored}")
pct_censored = round(n_censored / obs_arr.shape[0], 2)
print(f"{pct_censored* 100} % of records are censored")
print(f"{(1-pct_censored)* 100} % of records are uncensored\n")

Number of samples: 7871
Number of uncensored/censored: 2166/5705
72.0 % of records are censored
28.000000000000004 % of records are uncensored



In [51]:
print(len(num_features))
print(len(cat_features))

4
5


In [52]:
print(y[(y['event'] == True)]['time'].min())
print(y[(y['event'] == True)]['time'].max())
print(y[(y['event'] == True)]['time'].mean())

1.0
4998.0
2177.5480147737767


In [53]:
print(y[(y['event'] == False)]['time'].min())
print(y[(y['event'] == False)]['time'].max())
print(y[(y['event'] == False)]['time'].mean())

1.0
5215.0
4226.201226993865
