In [1]:
import numpy as np
import config as cfg
from data_loader import get_data_loader
from utility.config import load_config
import random
import torch

# Setup precision
dtype = torch.float64
torch.set_default_dtype(dtype)

np.random.seed(0)
torch.manual_seed(0)
random.seed(0)

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Load single event synthetic
dataset_name = "synthetic_se"
data_config = load_config(cfg.DGP_CONFIGS_DIR, f"synthetic_se.yaml")
dl = get_data_loader(dataset_name)
dl = dl.load_data(data_config, k_tau=0.25, linear=False)
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

In [None]:
# Load competing risks synthetic
dataset_name = "synthetic_cr"
data_config = load_config(cfg.DGP_CONFIGS_DIR, f"synthetic_cr.yaml")
dl = get_data_loader(dataset_name).load_data(data_config, k_tau=0.25, linear=False)
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

In [None]:
# Load multi event synthetic
dataset_name = "synthetic_me"
data_config = load_config(cfg.DGP_CONFIGS_DIR, f"synthetic_me.yaml")
dl = get_data_loader(dataset_name).load_data(data_config, k_taus=[0.25, 0.25, 0.25], linear=False)
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()

for i in range(3):
    unique_values, counts = np.unique(y_e[:,i], return_counts=True)
    total_count = len(y_e[:,i])
    for value, count in zip(unique_values, counts):
        percentage = (count / total_count)*100
        print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")
    print()

In [2]:
# Load PRO-ACT dataset
dataset_name = "als_me"
dl = get_data_loader(dataset_name).load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()

for i in range(4):
    unique_values, counts = np.unique(y_e[:,i], return_counts=True)
    total_count = len(y_e[:,i])
    for value, count in zip(unique_values, counts):
        percentage = (count / total_count)*100
        print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")
    print()

Value: False, Count: 1754, Percentage: 45.38%
Value: True, Count: 2111, Percentage: 54.62%

Value: False, Count: 1963, Percentage: 50.79%
Value: True, Count: 1902, Percentage: 49.21%

Value: False, Count: 1535, Percentage: 39.72%
Value: True, Count: 2330, Percentage: 60.28%

Value: False, Count: 965, Percentage: 24.97%
Value: True, Count: 2900, Percentage: 75.03%



In [3]:
X

Unnamed: 0,SOO,Diagnosis_Delta,Subject_used_Riluzole,El_escorial,FVC_Min,FVC_Max,FVC_Mean,Handgrip_Strength,ANKLE_Strength,ELBOW_Strength,FIRST_DORSAL_INTEROSSEOUS_OF_THE_HAND_Strength,HIP_FLEXOR_Strength,KNEE_Strength,SHOULDER_Strength,WRIST_Strength
3,Limb,507.0,No,Probable Laboratory Supported,0.84,1.10,0.950000,,,,,,,,
6,Other,49.0,Yes,Probable Laboratory Supported,2.36,2.54,2.446667,,,,,,,,
8,Limb,,Yes,,4.00,4.00,4.000000,,,,,,,,
11,Limb,511.0,,Definite,2.17,3.62,3.110000,20.0,10.725000,19.3625,0.300000,21.750,25.875,5.75,7.500
13,Limb,633.0,No,,0.22,0.89,0.630000,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7678,Limb,67.0,Yes,Definite,,,,48.5,7.460000,40.1750,6.950000,37.800,66.040,,31.060
7679,Limb,125.0,,Probable,,,,23.6,53.866667,31.6200,2.866667,40.450,59.575,,27.725
7680,Limb,351.0,,Probable,,,,8.9,0.000000,3.5000,0.000000,0.000,2.750,,4.375
7681,Limb,35.0,,Definite,,,,117.9,20.525000,34.7000,6.425000,42.775,46.725,,34.280


In [3]:
# Load SEER (SE) dataset
dataset_name = "seer_se"
dl = get_data_loader(dataset_name)
dl = dl.load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

Value: 0, Count: 10492, Percentage: 54.52%
Value: 1, Count: 8754, Percentage: 45.48%


In [4]:
# Load SEER (CR) dataset
dataset_name = "seer_cr"
dl = get_data_loader(dataset_name)
dl = dl.load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

Value: 0, Count: 8103, Percentage: 42.10%
Value: 1, Count: 2389, Percentage: 12.41%
Value: 2, Count: 8754, Percentage: 45.48%


In [5]:
# Load Rotterdam (CR) dataset
dataset_name = "rotterdam_cr"
dl = get_data_loader(dataset_name)
dl = dl.load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

Value: 0, Count: 1269, Percentage: 42.56%
Value: 1, Count: 195, Percentage: 6.54%
Value: 2, Count: 1518, Percentage: 50.91%


In [2]:
# Load MIMIC-IV (SE) dataset
dataset_name = "mimic_se"
dl = get_data_loader(dataset_name)
dl = dl.load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

Value: False, Count: 15390, Percentage: 62.78%
Value: True, Count: 9126, Percentage: 37.22%


In [None]:
# Load MIMIC-IV (ME) dataset
dataset_name = "mimic_me"
dl = get_data_loader(dataset_name).load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()

for i in range(3):
    unique_values, counts = np.unique(y_e[:,i], return_counts=True)
    total_count = len(y_e[:,i])
    for value, count in zip(unique_values, counts):
        percentage = (count / total_count)*100
        print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")
    print()

Value: 0.0, Count: 19718, Percentage: 75.16%
Value: 1.0, Count: 6518, Percentage: 24.84%

Value: 0.0, Count: 20091, Percentage: 76.58%
Value: 1.0, Count: 6145, Percentage: 23.42%

Value: 0.0, Count: 15647, Percentage: 59.64%
Value: 1.0, Count: 10589, Percentage: 40.36%



In [3]:
# Load MIMIC (CR) dataset
dataset_name = "mimic_cr"
dl = get_data_loader(dataset_name)
dl = dl.load_data()
num_features, cat_features = dl.get_features()
X, y_t, y_e = dl.get_data()
unique_values, counts = np.unique(y_e, return_counts=True)
total_count = len(y_e)
for value, count in zip(unique_values, counts):
    percentage = (count / total_count)*100
    print(f"Value: {value}, Count: {count}, Percentage: {percentage:.2f}%")

Value: 0, Count: 22076, Percentage: 90.05%
Value: 1, Count: 924, Percentage: 3.77%
Value: 2, Count: 1516, Percentage: 6.18%
