In [2]:
import numpy as np
import config as cfg
from data_loader import get_data_loader
from utility.config import load_config
import random
import torch

# Setup precision
dtype = torch.float64
torch.set_default_dtype(dtype)

np.random.seed(0)
torch.manual_seed(0)
random.seed(0)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Load single event synthetic
dataset_name = "synthetic_se"
data_config = load_config(cfg.DGP_CONFIGS_DIR, f"synthetic_se.yaml")
dl = get_data_loader(dataset_name)
dl = dl.load_data(data_config, k_tau=0.25, linear=False)
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

Value: 0, Count: 6671, Percentage: 33.36%
Value: 1, Count: 13329, Percentage: 66.64%


In [3]:
# Load competing risks synthetic
dataset_name = "synthetic_cr"
data_config = load_config(cfg.DGP_CONFIGS_DIR, f"synthetic_cr.yaml")
dl = get_data_loader(dataset_name).load_data(data_config, k_tau=0.25, linear=True)
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

KeyError: 'bl_e1'

In [2]:
# Load multi event synthetic
dataset_name = "synthetic_me"
data_config = load_config(cfg.DGP_CONFIGS_DIR, f"synthetic_me.yaml")
dl = get_data_loader(dataset_name).load_data(data_config, k_taus=[0.25, 0.25, 0.25])
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()

for i in range(3):
    unique_values, counts = np.unique(y_e[:,i], return_counts=True)
    total_count = len(y_e[:,i])
    for value, count in zip(unique_values, counts):
        percentage = (count / total_count)*100
        print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")
    print()

Value: 0, Count: 2291, Percentage: 11.46%
Value: 1, Count: 17709, Percentage: 88.55%

Value: 0, Count: 2838, Percentage: 14.19%
Value: 1, Count: 17162, Percentage: 85.81%

Value: 0, Count: 1707, Percentage: 8.54%
Value: 1, Count: 18293, Percentage: 91.47%



In [7]:
# Load PRO-ACT dataset
dataset_name = "als_me"
dl = get_data_loader(dataset_name).load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()

for i in range(4):
    unique_values, counts = np.unique(y_e[:,i], return_counts=True)
    total_count = len(y_e[:,i])
    for value, count in zip(unique_values, counts):
        percentage = (count / total_count)*100
        print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")
    print()

Value: False, Count: 462, Percentage: 47.09%
Value: True, Count: 519, Percentage: 52.91%

Value: False, Count: 527, Percentage: 53.72%
Value: True, Count: 454, Percentage: 46.28%

Value: False, Count: 384, Percentage: 39.14%
Value: True, Count: 597, Percentage: 60.86%

Value: False, Count: 225, Percentage: 22.94%
Value: True, Count: 756, Percentage: 77.06%



In [2]:
# Load SEER (SE) dataset
dataset_name = "seer_se"
dl = get_data_loader(dataset_name)
dl = dl.load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

Value: 0, Count: 10492, Percentage: 54.52%
Value: 1, Count: 8754, Percentage: 45.48%


In [3]:
# Load SEER (CR) dataset
dataset_name = "seer_cr"
dl = get_data_loader(dataset_name)
dl = dl.load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

Value: 0, Count: 8103, Percentage: 42.10%
Value: 1, Count: 2389, Percentage: 12.41%
Value: 2, Count: 8754, Percentage: 45.48%


In [4]:
# Load Rotterdam (CR) dataset
dataset_name = "rotterdam_cr"
dl = get_data_loader(dataset_name)
dl = dl.load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

Value: 0, Count: 1269, Percentage: 42.56%
Value: 1, Count: 195, Percentage: 6.54%
Value: 2, Count: 1518, Percentage: 50.91%


In [2]:
# Load MIMIC-IV (SE) dataset
dataset_name = "mimic_se"
dl = get_data_loader(dataset_name)
dl = dl.load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

Value: False, Count: 15647, Percentage: 59.64%
Value: True, Count: 10589, Percentage: 40.36%


In [3]:
# Load MIMIC-IV (ME) dataset
dataset_name = "mimic_me"
dl = get_data_loader(dataset_name).load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()

for i in range(3):
    unique_values, counts = np.unique(y_e[:,i], return_counts=True)
    total_count = len(y_e[:,i])
    for value, count in zip(unique_values, counts):
        percentage = (count / total_count)*100
        print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")
    print()

Value: 0.0, Count: 19718, Percentage: 75.16%
Value: 1.0, Count: 6518, Percentage: 24.84%

Value: 0.0, Count: 20091, Percentage: 76.58%
Value: 1.0, Count: 6145, Percentage: 23.42%

Value: 0.0, Count: 15647, Percentage: 59.64%
Value: 1.0, Count: 10589, Percentage: 40.36%

