In [7]:
import numpy as np
import config as cfg
from data_loader import get_data_loader
from utility.config import load_config
import random
import torch

np.random.seed(0)
torch.manual_seed(0)
random.seed(0)

# Load single event synthetic
dataset_name = "synthetic_se"
data_config = load_config(cfg.DGP_CONFIGS_DIR, f"synthetic.yaml")
dl = get_data_loader(dataset_name).load_data(data_config, k_tau=0.25)
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

Value: 0, Count: 5747, Percentage: 57.47%
Value: 1, Count: 4253, Percentage: 42.53%


In [8]:
# Load competing risks synthetic
dataset_name = "synthetic_cr"
data_config = load_config(cfg.DGP_CONFIGS_DIR, f"synthetic.yaml")
dl = get_data_loader(dataset_name).load_data(data_config, k_tau=0.25)
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

Value: 0, Count: 1700, Percentage: 17.00%
Value: 1, Count: 2791, Percentage: 27.91%
Value: 2, Count: 5509, Percentage: 55.09%


In [13]:
# Load multi event synthetic
dataset_name = "synthetic_me"
data_config = load_config(cfg.DGP_CONFIGS_DIR, f"synthetic.yaml")
dl = get_data_loader(dataset_name).load_data(data_config, k_taus=[0.25, 0.25, 0.25])
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()

for i in range(3):
    unique_values, counts = np.unique(y_e[:,i], return_counts=True)
    total_count = len(y_e[:,i])
    for value, count in zip(unique_values, counts):
        percentage = (count / total_count)*100
        print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")
    print()

Value: 0, Count: 4166, Percentage: 41.66%
Value: 1, Count: 5834, Percentage: 58.34%

Value: 0, Count: 2758, Percentage: 27.58%
Value: 1, Count: 7242, Percentage: 72.42%

Value: 0, Count: 1311, Percentage: 13.11%
Value: 1, Count: 8689, Percentage: 86.89%



In [15]:
# Load PRO-ACT dataset
dataset_name = "als"
dl = get_data_loader(dataset_name).load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()

for i in range(4):
    unique_values, counts = np.unique(y_e[:,i], return_counts=True)
    total_count = len(y_e[:,i])
    for value, count in zip(unique_values, counts):
        percentage = (count / total_count)*100
        print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")
    print()

Value: False, Count: 462, Percentage: 47.09%
Value: True, Count: 519, Percentage: 52.91%

Value: False, Count: 527, Percentage: 53.72%
Value: True, Count: 454, Percentage: 46.28%

Value: False, Count: 384, Percentage: 39.14%
Value: True, Count: 597, Percentage: 60.86%

Value: False, Count: 225, Percentage: 22.94%
Value: True, Count: 756, Percentage: 77.06%



In [4]:
obs_arr = np.array(y['event'])
n_censored = obs_arr.shape[0] - obs_arr.sum()
print(f"Number of samples: {len(y)}")
print(f"Number of uncensored/censored: {obs_arr.shape[0]-n_censored}/{n_censored}")
pct_censored = round(n_censored / obs_arr.shape[0], 2)
print(f"{pct_censored* 100} % of records are censored")
print(f"{(1-pct_censored)* 100} % of records are uncensored\n")

NameError: name 'y' is not defined

In [None]:
print(len(num_features))
print(len(cat_features))

14
0


In [None]:
print(y[(y['event'] == True)]['time'].min())
print(y[(y['event'] == True)]['time'].max())
print(y[(y['event'] == True)]['time'].mean())

3
1944
205.4536116633532


In [None]:
print(y[(y['event'] == False)]['time'].min())
print(y[(y['event'] == False)]['time'].max())
print(y[(y['event'] == False)]['time'].mean())

344
2029
1059.856891082129


In [None]:
df

Unnamed: 0,SOFA,abs_lymphocytes_max,abs_lymphocytes_min,age,aniongap_max,aniongap_min,aniongap_range,bicarbonate_max,bicarbonate_min,bicarbonate_range,...,temperature_max,temperature_mean,temperature_min,temperature_range,wbc_max,wbc_min,wbc_range,weight,time,event
0,0,1.2800,1.1570,55,16.0,15.0,1.0,23.0,22.0,1.0,...,38.22,37.255000,36.72,1.50,19.0,15.7,3.3,71.20,1,False
1,1,0.7310,0.7310,46,16.0,14.0,2.0,24.0,24.0,0.0,...,36.83,36.556000,36.39,0.44,20.1,17.0,3.1,72.35,1,False
2,8,0.5500,0.5500,77,11.0,11.0,0.0,33.0,33.0,0.0,...,37.20,36.573684,35.10,2.10,18.4,12.0,6.4,65.00,9,True
3,4,1.2800,1.1570,57,12.0,12.0,0.0,23.0,23.0,0.0,...,37.70,37.242353,36.22,1.48,20.2,17.9,2.3,100.05,1,False
4,5,0.3580,0.3580,82,19.0,13.0,6.0,23.0,22.0,1.0,...,37.33,36.452857,36.06,1.27,17.9,9.5,8.4,48.00,167,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38515,1,1.6362,1.6362,77,20.0,20.0,0.0,22.0,22.0,0.0,...,38.28,36.935000,36.33,1.95,20.2,20.2,0.0,93.60,38,True
38516,9,0.7566,0.7566,42,12.0,10.0,2.0,25.0,24.0,1.0,...,36.50,35.985714,35.56,0.94,8.8,5.5,3.3,68.80,7,True
38517,5,1.2800,1.1570,43,15.0,14.0,1.0,24.0,23.0,1.0,...,37.61,37.266000,36.83,0.78,12.1,9.5,2.6,107.50,7,False
38518,1,1.2800,1.1570,48,14.0,11.0,3.0,17.0,14.0,3.0,...,37.11,36.848571,36.39,0.72,13.1,13.1,0.0,67.90,2,False
