### 1. Import libraries

In [186]:
import gc
import json
import os
import math
import multiprocessing
import numpy as np
import pandas as pd
import torch
import importlib
import logging
from pathlib import Path
from sklearn.model_selection import GroupKFold, GroupShuffleSplit

# Pycox and PyTorch tuples for survival analysis
import torchtuples as tt
import pycox
from pycox.preprocessing.label_transforms import LabTransDiscreteTime
from pycox.models import CoxPH, DeepHit
from pycox.evaluation import EvalSurv

# Ray for hyperparameter tuning and distributed processing
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.search.bayesopt import BayesOptSearch
from ray.tune.search.optuna import OptunaSearch
from ray.tune.search import ConcurrencyLimiter
from ray.tune.schedulers import ASHAScheduler, PopulationBasedTraining
from ray.air import session
import ray.cloudpickle as pickle

# Custom modules for data handling, balancing, training, evaluation, and model architectures
import dataloader2
import databalancer2
import datatrainer2
import modeleval
import netweaver2

# Reload custom modules to ensure latest changes are available
importlib.reload(dataloader2)
importlib.reload(databalancer2)
importlib.reload(datatrainer2)
importlib.reload(modeleval)
importlib.reload(netweaver2)

# Import specific functions from custom modules to keep code clean and readable
from netweaver2 import (
    lstm_net_init, DHANNWrapper, LSTMWrapper, generalized_ann_net_init
)
from dataloader2 import (
    load_and_transform_data, preprocess_data #stack_sequences, dh_dataset_loader
)
from databalancer2 import (
    define_medoid_general, df_event_focus, rebalance_data, underbalance_data_general, medoid_cluster, 
    dh_rebalance_data
)
from datatrainer2 import (
    recursive_clustering, prepare_training_data, 
    prepare_validation_data, lstm_training
)
from modeleval import (
    dh_test_model, nam_dagostino_chi2, get_baseline_hazard_at_timepoints, combined_test_model
)

import psutil
torch.cuda.empty_cache()
gc.collect()

80

### 2. Define constants, load datasets

In [187]:
# Define Constants and Load Datasets
RANDOM_SEED = 12345
N_SPLIT = 2
FEATURE_COLS = ['gender', 'dm', 'ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
DURATION_COL = 'date_from_sub_60'
EVENT_COL = 'endpoint'
CLUSTER_COL = 'key'
TIME_GRID = np.array([i * 365 for i in range(6)])

# Define Feature Groups
CAT_FEATURES = ['gender', 'dm', 'ht', 'sprint']
LOG_FEATURES = ['a1c', 'po4', 'UACR_mg_g', 'Cr']
STANDARD_FEATURES = ['age', 'alb', 'ca', 'hb', 'hco3']
PASSTHROUGH_FEATURES = ['key', 'date_from_sub_60', 'endpoint']

# Load and Transform Data
BASE_FILENAME = '/mnt/d/pydatascience/g3_regress/data/X/X_20240628'
X_train_transformed, X_test_transformed = load_and_transform_data(
    BASE_FILENAME, CAT_FEATURES, LOG_FEATURES, STANDARD_FEATURES, PASSTHROUGH_FEATURES
)

2024-11-21 07:14:59,126 - INFO - Transforming training data...
2024-11-21 07:15:11,857 - INFO - Transforming test data...


### 3. Train and save models
- model naming: {deepsurv/deephit}\_{nn}\_{resample method}_{outcome}
- for deepsurv model, only the result in time_grid will be retrieved so the result of deepsurv and deephit models are compatible

In [188]:
def create_neural_network(config, num_risk = len(X_train_transformed[EVENT_COL].unique()) - 1, num_time_bins=len(TIME_GRID)):
    """
    Function to create a neural network based on the given configuration.

    Args:
        config (dict): Configuration dictionary containing model type, network type, and hyperparameters.

    Returns:
        torch.nn.Module: Created neural network model.
    """
    gc.collect()
    torch.cuda.empty_cache()
    if config['model'] == 'deepsurv':
        num_risk = None
        num_time_bins=None
    elif config['model'] == 'deephit':
        num_risk = num_risk
        num_time_bins = num_time_bins
    # Create the Neural Network
    if config['net'] == 'ann':
        net = generalized_ann_net_init(
            input_size=len(config['features']),
            num_nodes=config["num_nodes"],
            batch_norm=config["batch_norm"],
            dropout=config["dropout"],
            output_size=1, # Default output size for DeepSurv
            num_risks = num_risk,
            num_time_bins = num_time_bins
        )
    elif config['net'] == 'lstm':
        net = lstm_net_init(
            input_size=len(config['features']),
            num_nodes=config["num_nodes"],
            batch_norm=config["batch_norm"],
            dropout=config["dropout"],
            num_risks = num_risk,
            num_time_bins = num_time_bins
        )
    else:
        raise ValueError("Unknown network type: {}".format(config['net']))

    optimizer = tt.optim.AdamWR(decoupled_weight_decay=1e-6, cycle_eta_multiplier=0.8)
    if config['model'] == 'deepsurv':
        model = CoxPH(net, optimizer)
    elif config['model'] == 'deephit':
        model = DeepHit(net, optimizer)
    model.optimizer.set_lr(config["lr"])
    
    return model

def train_neural_network(model, config, X_train, X_val, duration_col, event_col, cluster_col, callbacks, time_grid=None):
    """
    Function to train a given neural network using the provided datasets.

    Args:
        net (torch.nn.Module): Neural network to be trained.
        config (dict): Configuration dictionary containing model hyperparameters.
        X_train (pd.DataFrame): Training dataset with features.
        X_val (pd.DataFrame): Validation dataset with features.
        duration_col (str): Column representing event durations.
        event_col (str): Column representing event occurrences.
        cluster_col (str): Column for grouping during cross-validation.
        callbacks (list): List of callbacks for training.
        time_grid (np.array, optional): Time grid for evaluation if required. Defaults to None.

    Returns:
        model: Trained PyCox model.
        logs: Training logs.
    """
    gc.collect()
    torch.cuda.empty_cache()
    # Train the model
    if config['model'] == 'deepsurv':
        print('Initiate training of deepsurv neural network')
        X_val = df_event_focus(X_val, event_col, config['endpoint'])
        X_val_processed, y_val = preprocess_data(X_val, config['features'], duration_col, event_col)
        val_data = (X_val_processed, y_val)
        if config['net'] == 'ann':
            print('model structure: ANN')
            if config['balance_method'] == 'clustering':
                print('data balancing method: clustering')
                model, logs = recursive_clustering(model, X_train, duration_col, event_col, config, val_data, callbacks, max_repeats=30)
            elif config['balance_method'] == 'enn':
                print('data balancing method: smoteenn')
                X_train = rebalance_data(X_train, event_col, config['endpoint'], CAT_FEATURES, config, RANDOM_SEED, method='ENN')
                X_train, y_train = preprocess_data(X_train, config['features'], duration_col, event_col)
                logs = model.fit(X_train, y_train, config['batch_size'], int(config['max_epochs']), callbacks, verbose=True, val_data=val_data, num_workers=10)
            elif config['balance_method'] == 'tomek':
                print('data balancing method: smotetomek')
                X_train = rebalance_data(X_train, event_col, config['endpoint'], CAT_FEATURES, config, RANDOM_SEED, method='Tomek')
                X_train, y_train = preprocess_data(X_train, config['features'], duration_col, event_col)
                logs = model.fit(X_train, y_train, config['batch_size'], int(config['max_epochs']), callbacks, verbose=True, val_data=val_data, num_workers=10)
        elif config['net'] == 'lstm':
            print('model structure: LSTM')
            if config['balance_method'] == 'clustering':
                print('data balancing method: clustering')
                model, logs = lstm_training(model, X_train, X_val, duration_col, event_col, cluster_col, config, callbacks, time_grid)
            elif config['balance_method'] == 'NearMiss':
                print('data balancing method: NearMiss')
                model, logs = lstm_training(model, X_train, X_val, duration_col, event_col, cluster_col, config, callbacks, time_grid)
    elif config['model'] == 'deephit':
        print('Initiate training of deephit neural network')
        X_val_processed, y_val = preprocess_data(X_val, config['features'], duration_col, event_col, TIME_GRID, discretize=True)
        val_data = (X_val_processed, y_val)
        if config['net'] == 'ann':
            print('model structure: ANN')
            if config['balance_method'] == 'clustering':
                print('data balancing method: clustering')
                model, logs = recursive_clustering(model, X_train, duration_col, event_col, config, val_data, callbacks, max_repeats=30, time_grid=TIME_GRID)
            elif config['balance_method'] == 'NearMiss':
                print('data balancing method: NearMiss')
                X_train = underbalance_data_general(X_train, EVENT_COL, CLUSTER_COL, config, version=config['version'])
                X_train, y_train = preprocess_data(X_train, config['features'], duration_col, event_col, TIME_GRID, discretize=True)
                logs = model.fit(X_train, y_train, config['batch_size'], int(config['max_epochs']), callbacks, verbose=True, val_data=val_data)
        elif config['net'] == 'lstm':
            print('model structure: LSTM')
            if config['balance_method'] == 'clustering':
                print('data balancing method: clustering')
                model, logs = lstm_training(model, X_train, X_val, duration_col, event_col, cluster_col, config, callbacks, time_grid)
            elif config['balance_method'] == 'NearMiss':
                print('data balancing method: NearMiss')
                model, logs = lstm_training(model, X_train, X_val, duration_col, event_col, cluster_col, config, callbacks, time_grid)        

    # Free memory after training
    gc.collect()
    torch.cuda.empty_cache()

    return model, logs

def save_model(params, model, model_path, baseline_hazard_path):
    """
    Save model weights and baseline hazard data.

    Parameters:
    - model: The trained model to save.
    - model_path: Path to save the model weights (.pt file).
    - baseline_hazard_path: Path to save the baseline hazards (.pkl file).
    """
    # Compute baseline hazards and save
    if params['model'] == 'deepsurv':
        baseline_hazard = model.compute_baseline_hazards()
        baseline_hazard.to_pickle(baseline_hazard_path)
    
    # Save model weights
    model.save_model_weights(model_path)
    print(f"Model and baseline hazards saved to {model_path} and {baseline_hazard_path}.")

def training_wrapper(df, config, spliter, model_path, hazard_path, feature_col=FEATURE_COLS, duration_col=DURATION_COL, event_col=EVENT_COL, cluster_col=CLUSTER_COL, time_grid=TIME_GRID):
    """
    Train and save a survival analysis model with grouped cross-validation splits.

    This function performs training on grouped cross-validation splits of the input DataFrame and saves each trained model
    along with its baseline hazards. Memory management is handled to ensure efficient GPU usage.

    Parameters:
    - df (pd.DataFrame): DataFrame containing training data.
    - config (dict): Configuration dictionary for initializing the neural network.
    - spliter (object): Splitter object (e.g., GroupShuffleSplit or StratifiedKFold) used for creating train-validation splits.
    - model_path (str): File path to save the trained model weights (.pt file).
    - hazard_path (str): File path to save the baseline hazards (.pkl file).
    - feature_col (list): List of feature column names in `df` used for model training.
    - duration_col (str): Name of the column representing duration/time-to-event.
    - event_col (str): Name of the column representing the event indicator (0 = censored, 1 = event).
    - cluster_col (str): Name of the column used for grouping (clusters for cross-validation).
    - time_grid (list): List or array defining the time grid for training.

    Returns:
    - None: Saves the model weights and baseline hazard data for each cross-validation split.
    """
    for train_idx, val_idx in spliter.split(X=df[feature_col], y=df[event_col], groups=df[cluster_col]):
        # Clear GPU memory for each split
        gc.collect()
        torch.cuda.empty_cache()
        
        # Define early stopping callback
        callbacks = [tt.cb.EarlyStopping()]
        
        # Create training and validation sets
        train_df = df.iloc[train_idx]
        val_df = df.iloc[val_idx]
        
        # Initialize and train the model
        model = create_neural_network(config)
        model, logs = train_neural_network(
            model, config,
            X_train=train_df, X_val=val_df,
            duration_col=duration_col, event_col=event_col,
            cluster_col=cluster_col, callbacks=callbacks, time_grid=time_grid
        )
        
        # Save the trained model and its baseline hazards
        save_model(config, model, model_path, hazard_path)
        
        # Free memory for the next iteration
        del model, logs
        gc.collect()
        torch.cuda.empty_cache()

    print("Training and saving completed for all cross-validation splits.")

    print("All models have been trained and saved successfully.")

#### 3.1 deepsurv_ann_clustering_1
- features: ['gender', 'dm', 'ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling strategy: 0.05
- 2 hidden layers with 8 and 4 nodes
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.1144793446270997
- learning rate: 0.1
- max epochs: 9
- batch size: 512

In [189]:
gc.collect()
torch.cuda.empty_cache()

deepsurv_ann_clustering_1_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'clustering',
    'features': ['gender', 'dm', 'ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 1,
    'num_nodes': [8, 4],
    'batch_norm': False,
    'dropout': 0.1144793446270997,
    'lr': 0.1,
    'max_epochs': 9,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.2 deepsurv_ann_smoteenn_1
- features: ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3']
- sampling strategy: 0.3
- 4 hidden layers with 64, 32, 16 and 8 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.09555033386059111
- learning rate: 0.1
- max epochs: 16
- batch size: 512

In [190]:
deepsurv_ann_smoteenn_1_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'enn',
    'features': ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3'],
    'endpoint': 1,
    'num_nodes': [64, 32, 16, 8],
    'batch_norm': True,
    'dropout': 0.09555033386059111,
    'lr': 0.1,
    'max_epochs': 16,
    'batch_size': 512,
    'sampling_strategy': 0.3,
    'seq_length': 1,
}

#### 3.3 deepsurv_ann_smotetomek_1
- features:  ['gender', 'dm', 'ht', 'sprint', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3']
- sampling strategy: 0.2
- 3 hidden layers with 32, 16 and 8 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.23872991564684112
- learning rate: 0.1
- max epochs: 14
- batch size: 512

In [191]:
deepsurv_ann_smotetomek_1_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'tomek',
    'features': ['gender', 'dm', 'ht', 'sprint', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3'],
    'endpoint': 1,
    'num_nodes': [32, 16, 8],
    'batch_norm': True,
    'dropout': 0.23872991564684112,
    'lr': 0.1,
    'max_epochs': 14,
    'batch_size': 512,
    'sampling_strategy': 0.2,
    'seq_length': 1,
}

#### 3.4 deepsurv_ann_clustering_2
- features: ["gender", "a1c", "po4", "UACR_mg_g", "Cr"]
- sampling_strategy: 0.05
- 3 hidden layers with 32, 16, 8 nodes
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.3058921011568742
- learning rate: 0.1
- max epochs: 14
- batch size: 512

In [192]:
deepsurv_ann_clustering_2_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'clustering',
    'features': ["gender", "a1c", "po4", "UACR_mg_g", "Cr"],
    'endpoint': 2,
    'num_nodes': [32, 16, 8],
    'batch_norm': False,
    'dropout': 0.3058921011568742,
    'lr': 0.1,
    'max_epochs': 14,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.5 deepsurv_ann_smoteenn_2
- features: ["gender", "dm", "ht", "sprint", "a1c", "po4", "UACR_mg_g", "Cr", "age", "alb", "ca", "hb", "hco3"]
- sampling_strategy: 0.1, 
- 2 hidden layers with 8, 4 nodes
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.38878203553667456
- learning rate: 0.01
- max epochs: 10
- batch size: 512

In [193]:
deepsurv_ann_smoteenn_2_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'enn',
    'features': ["gender", "dm", "ht", "sprint", "a1c", "po4", "UACR_mg_g", "Cr", "age", "alb", "ca", "hb", "hco3"],
    'endpoint': 2,
    'num_nodes': [8, 4],
    'batch_norm': False,
    'dropout': 0.38878203553667456,
    'lr': 0.01,
    'max_epochs': 10,
    'batch_size': 512,
    'sampling_strategy': 0.1,
    'seq_length': 1,
}

#### 3.6 deepsurv_ann_smotetomek_2
- features: ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling_strategy: 0.05
- 2 hidden layers with 64, 32 nodes
- batch normalization in each hidden layer 
- dropout ratio in each layer: 0.3162398297390827
- learning rate: 0.1
- max epochs: 11
- batch size: 512

In [194]:
deepsurv_ann_smotetomek_2_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'tomek',
    'features': ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 2,
    'num_nodes': [64, 32],
    'batch_norm': True,
    'dropout': 0.3162398297390827,
    'lr': 0.1,
    'max_epochs': 11,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.7 deepsurv_lstm_clustering_1
- features: ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr']
- sampling_strategy: 0.05
- 3 hidden layers with 8, 4, 2 nodes
- sequence length 7
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.2772567071863989
- learning rate: 0.1
- max epochs: 13
- batch size: 512

In [195]:
deepsurv_lstm_clustering_1_config = {
    'model': 'deepsurv',
    'net': 'lstm',
    'balance_method': 'clustering',
    'features': ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr'],
    'endpoint': 1,
    'num_nodes': [8, 4, 2],
    'batch_norm': False,
    'dropout': 0.2772567071863989,
    'lr': 0.1,
    'max_epochs': 13,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 7,
}

#### 3.8 deepsurv_lstm_nearmiss_1
- features: ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr']
- sampling_strategy: 0.05
- 3 hidden layers with 8, 4, 2 nodes
- seq_length': 8
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.3397308077824205
- learning rate: 0.001
- max epochs: 9
- batch size: 512

In [196]:
deepsurv_lstm_nearmiss_1_config = {
    'model': 'deepsurv',
    'net': 'lstm',
    'balance_method': 'NearMiss',
    'features': ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr'],
    'endpoint': 1,
    'num_nodes': [8, 4, 2],
    'batch_norm': False,
    'dropout': 0.3397308077824205,
    'lr': 0.001,
    'max_epochs': 9,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 8,
}

#### 3.9 deepsurv_lstm_clustering_2
- features: ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr']
- sampling_strategy: 0.05
- 3 hidden layers with 8, 4, 2 nodes
- seq_length': 8
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.3397308077824205
- learning rate: 0.001
- max epochs: 9
- batch size: 512

In [197]:
deepsurv_lstm_clustering_2_config = {
    'model': 'deepsurv',
    'net': 'lstm',
    'balance_method': 'clustering',
    'features': ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr'],
    'endpoint': 2,
    'num_nodes': [8, 4, 2],
    'batch_norm': False,
    'dropout': 0.3397308077824205,
    'lr': 0.001,
    'max_epochs': 9,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 8,
}

#### 3.10 deepsurv_lstm_nearmiss_2
- features: ['sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling_strategy: 0.05
- 2 hidden layers with 32, 16 nodes
- seq_length': 2
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.35763396978044143
- learning rate: 0.1
- max epochs: 10
- batch size: 512

In [198]:
deepsurv_lstm_nearmiss_2_config = {
    'model': 'deepsurv',
    'net': 'lstm',
    'balance_method': 'NearMiss',
    'features': ['sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 2,
    'num_nodes': [32, 16],
    'batch_norm': False,
    'dropout': 0.35763396978044143,
    'lr': 0.1,
    'max_epochs': 10,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 2,
}

#### 3.11 deephit_ann_clustering_all
- features: ['gender', 'dm', 'ht', 'sprint', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3']
- sampling strategy: 0.05
- 2 hidden layers with 64 and 32 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.26400151710698067
- learning rate: 0.1
- max epochs: 8
- batch size: 512

In [199]:
deephit_ann_clustering_all_config = {
    'model': 'deephit',
    'net': 'ann',
    'balance_method': 'clustering',
    'features': ['gender', 'dm', 'ht', 'sprint', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3'],
    'endpoint': 'all',
    'num_nodes': [64, 32],
    'batch_norm': True,
    'dropout': 0.26400151710698067,
    'lr': 0.1,
    'max_epochs': 8,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.12 deephit_ann_nearmiss2_all
- features: ['sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling strategy: 0.05
- 2 hidden layers with 8, 4 and 2 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.7346754269827496
- learning rate: 0.01
- max epochs: 7
- batch size: 512

In [200]:
deephit_ann_nearmiss2_all_config = {
    'model': 'deephit',
    'net': 'ann',
    'balance_method': 'NearMiss',
    'version': 2,
    'features': ['sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 'all',
    'num_nodes': [8, 4, 2],
    'batch_norm': True,
    'dropout': 0.7346754269827496,
    'lr': 0.01,
    'max_epochs': 7,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.13 deephit_lstm_clustering_all
- features: ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling strategy: 0.05
- seq_length: 6
- 3 hidden layers with 64, 32 and 16 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.46132889488306583
- learning rate: 0.1
- max epochs: 5
- batch size: 512

In [201]:
deephit_lstm_clustering_all_config = {
    'model': 'deephit',
    'net': 'lstm',
    'balance_method': 'clustering',
    'version': 2,
    'features': ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 'all',
    'num_nodes': [64, 32, 16],
    'batch_norm': True,
    'dropout': 0.46132889488306583,
    'lr': 0.1,
    'max_epochs': 5,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 6,
}

#### 3.14 deephit_lstm_nearmiss1_all
- features: ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr']
- sampling strategy: 0.05
- seq_length: 9
- 3 hidden layers with 32, 16 and 8 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.18001924589390816
- learning rate: 0.1
- max epochs: 9
- batch size: 512

In [202]:
deephit_lstm_nearmiss1_all_config = {
    'model': 'deephit',
    'net': 'lstm',
    'balance_method': 'NearMiss',
    'version': 1,
    'features': ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr'],
    'endpoint': 'all',
    'num_nodes': [32, 16, 8],
    'batch_norm': True,
    'dropout': 0.18001924589390816,
    'lr': 0.1,
    'max_epochs': 9,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 9,
}

In [222]:
model_ls = ['deepsurv_ann_clustering_1', 'deepsurv_ann_smoteenn_1', 'deepsurv_ann_smotetomek_1',
            'deepsurv_ann_clustering_2', 'deepsurv_ann_smoteenn_2', 'deepsurv_ann_smotetomek_2',
            'deepsurv_lstm_clustering_1', 'deepsurv_lstm_nearmiss_1', 'deepsurv_lstm_clustering_2', 'deepsurv_lstm_nearmiss_2',
            'deephit_ann_clustering_all', 'deephit_ann_nearmiss2_all', 'deephit_lstm_clustering_all', 'deephit_lstm_nearmiss1_all']
model_path = '/mnt/d/PYDataScience/g3_regress/code/models/'
# Retrieve configurations dynamically from globals
all_configs = {}
for model in model_ls:
    config_var_name = f"{model}_config"
    config = globals().get(config_var_name)
    if config:
        all_configs[model] = config
    else:
        print(f"Configuration for {config_var_name} not found.")

# Save all configurations to a JSON file
output_file = model_path + "all_model_configs.json"
with open(output_file, 'w') as json_file:
    json.dump(all_configs, json_file, indent=4)

print(f"All model configurations saved to {output_file}")

All model configurations saved to /mnt/d/PYDataScience/g3_regress/code/models/all_model_configs.json


In [204]:
gss1 = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=RANDOM_SEED)
gss2 = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=RANDOM_SEED)
for train_idx_1, fin_val_idx in gss1.split(X=X_train_transformed[FEATURE_COLS], y=X_train_transformed[EVENT_COL], groups=X_train_transformed[CLUSTER_COL]):
    X_train_transformed_2, X_fin_val = X_train_transformed.iloc[train_idx_1, :], X_train_transformed.iloc[fin_val_idx, :]
    gc.collect()
    torch.cuda.empty_cache()
    for model in model_ls:
        config_var_name = model + "_config"
        model_config = globals().get(config_var_name)
        if model_config is None:
            print(f"Configuration for {config_var_name} not found.")
            continue

        model_weights_path = f'{model_path}{model}.pt'
        model_hazard_path = f'{model_path}{model}_hazard.pkl'
        
        training_wrapper(X_train_transformed_2, model_config, gss2, model_weights_path, 
                        model_hazard_path, 
                        feature_col=FEATURE_COLS, duration_col=DURATION_COL, event_col=EVENT_COL, cluster_col=CLUSTER_COL, time_grid=TIME_GRID)
        gc.collect()
        torch.cuda.empty_cache()

2024-11-21 07:15:16,843 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:15:16,848 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:15:16,856 - INFO - Performing clustering iteration 1 / 20
2024-11-21 07:15:16,856 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:16,859 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: clustering


2024-11-21 07:15:17,464 - INFO - Defined medoid for deepsurv model with 1207 clusters.


0:	[0s / 0s],		train_loss: 5.1261,	val_loss: 7.8751
1:	[0s / 0s],		train_loss: 5.0471,	val_loss: 7.6602
2:	[0s / 0s],		train_loss: 4.9526,	val_loss: 7.5625
3:	[0s / 0s],		train_loss: 4.9109,	val_loss: 7.1527
4:	[0s / 0s],		train_loss: 4.8635,	val_loss: 6.9761
5:	[0s / 0s],		train_loss: 4.8398,	val_loss: 6.9163
6:	[0s / 0s],		train_loss: 4.8417,	val_loss: 6.9144
7:	[0s / 0s],		train_loss: 4.8399,	val_loss: 6.7745
8:	[0s / 0s],		train_loss: 4.8130,	val_loss: 6.8014


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:18,509 - INFO - Performing clustering iteration 2 / 20
2024-11-21 07:15:18,510 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:18,514 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:15:18,920 - INFO - Defined medoid for deepsurv model with 1207 clusters.


9:	[0s / 0s],		train_loss: 4.7669,	val_loss: 6.8175
10:	[0s / 0s],		train_loss: 4.7986,	val_loss: 6.8202
11:	[0s / 0s],		train_loss: 4.7645,	val_loss: 6.8363
12:	[0s / 0s],		train_loss: 4.7694,	val_loss: 6.7860
13:	[0s / 0s],		train_loss: 4.7765,	val_loss: 6.7771
14:	[0s / 0s],		train_loss: 4.7583,	val_loss: 6.8182
15:	[0s / 0s],		train_loss: 4.7811,	val_loss: 6.7846
16:	[0s / 0s],		train_loss: 4.7801,	val_loss: 6.7791
17:	[0s / 0s],		train_loss: 4.7613,	val_loss: 6.7746


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:19,687 - INFO - Performing clustering iteration 3 / 20
2024-11-21 07:15:19,688 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:19,693 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:15:20,118 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:20,409 - INFO - Performing clustering iteration 4 / 20
2024-11-21 07:15:20,410 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:20,412 - INFO - Event column 'endpoint' updated with focus on event value 1.


18:	[0s / 0s],		train_loss: 4.7769,	val_loss: 6.8447


2024-11-21 07:15:20,813 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:21,100 - INFO - Performing clustering iteration 5 / 20
2024-11-21 07:15:21,101 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:21,103 - INFO - Event column 'endpoint' updated with focus on event value 1.


19:	[0s / 0s],		train_loss: 4.7844,	val_loss: 6.8214


2024-11-21 07:15:21,499 - INFO - Defined medoid for deepsurv model with 1207 clusters.


20:	[0s / 0s],		train_loss: 4.7827,	val_loss: 6.7636
21:	[0s / 0s],		train_loss: 4.7608,	val_loss: 6.7695
22:	[0s / 0s],		train_loss: 4.7600,	val_loss: 6.7485
23:	[0s / 0s],		train_loss: 4.7720,	val_loss: 6.7816
24:	[0s / 0s],		train_loss: 4.7808,	val_loss: 6.7897
25:	[0s / 0s],		train_loss: 4.7605,	val_loss: 6.7722
26:	[0s / 0s],		train_loss: 4.7439,	val_loss: 6.7515
27:	[0s / 0s],		train_loss: 4.7430,	val_loss: 6.7445
28:	[0s / 0s],		train_loss: 4.7576,	val_loss: 6.7426


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:22,416 - INFO - Performing clustering iteration 6 / 20
2024-11-21 07:15:22,416 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:22,423 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:15:22,835 - INFO - Defined medoid for deepsurv model with 1207 clusters.


29:	[0s / 0s],		train_loss: 4.7665,	val_loss: 6.7421
30:	[0s / 0s],		train_loss: 4.7587,	val_loss: 6.6434
31:	[0s / 0s],		train_loss: 4.7528,	val_loss: 6.6015
32:	[0s / 0s],		train_loss: 4.7665,	val_loss: 6.7106
33:	[0s / 0s],		train_loss: 4.7547,	val_loss: 6.7139
34:	[0s / 0s],		train_loss: 4.7491,	val_loss: 6.6707
35:	[0s / 0s],		train_loss: 4.7560,	val_loss: 6.6521
36:	[0s / 0s],		train_loss: 4.7532,	val_loss: 6.6634
37:	[0s / 0s],		train_loss: 4.7816,	val_loss: 6.7348


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:23,652 - INFO - Performing clustering iteration 7 / 20
2024-11-21 07:15:23,652 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:23,656 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:15:24,053 - INFO - Defined medoid for deepsurv model with 1207 clusters.


38:	[0s / 0s],		train_loss: 4.7548,	val_loss: 6.7172
39:	[0s / 0s],		train_loss: 4.7767,	val_loss: 6.7546
40:	[0s / 0s],		train_loss: 4.7785,	val_loss: 6.7386
41:	[0s / 0s],		train_loss: 4.7612,	val_loss: 6.7324


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:24,502 - INFO - Performing clustering iteration 8 / 20
2024-11-21 07:15:24,502 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:24,506 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:15:24,894 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:25,162 - INFO - Performing clustering iteration 9 / 20
2024-11-21 07:15:25,162 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:25,165 - INFO - Event column 'endpoint' updated with focus on event value 1.


42:	[0s / 0s],		train_loss: 4.7530,	val_loss: 6.6424


2024-11-21 07:15:25,548 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))


43:	[0s / 0s],		train_loss: 4.7619,	val_loss: 6.6517


2024-11-21 07:15:25,852 - INFO - Performing clustering iteration 10 / 20
2024-11-21 07:15:25,853 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:25,856 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:15:26,255 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:26,541 - INFO - Performing clustering iteration 11 / 20
2024-11-21 07:15:26,542 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:26,545 - INFO - Event column 'endpoint' updated with focus on event value 1.


44:	[0s / 0s],		train_loss: 4.7489,	val_loss: 6.7018


2024-11-21 07:15:26,926 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:27,194 - INFO - Performing clustering iteration 12 / 20
2024-11-21 07:15:27,195 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:27,198 - INFO - Event column 'endpoint' updated with focus on event value 1.


45:	[0s / 0s],		train_loss: 4.7466,	val_loss: 6.6353


2024-11-21 07:15:27,593 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:27,920 - INFO - Performing clustering iteration 13 / 20
2024-11-21 07:15:27,921 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:27,924 - INFO - Event column 'endpoint' updated with focus on event value 1.


46:	[0s / 0s],		train_loss: 4.7516,	val_loss: 6.6632


2024-11-21 07:15:28,336 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:28,601 - INFO - Performing clustering iteration 14 / 20
2024-11-21 07:15:28,602 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:28,605 - INFO - Event column 'endpoint' updated with focus on event value 1.


47:	[0s / 0s],		train_loss: 4.7788,	val_loss: 6.6325


2024-11-21 07:15:28,968 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:29,290 - INFO - Performing clustering iteration 15 / 20
2024-11-21 07:15:29,290 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:29,293 - INFO - Event column 'endpoint' updated with focus on event value 1.


48:	[0s / 0s],		train_loss: 4.7669,	val_loss: 6.6485


2024-11-21 07:15:29,663 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:29,946 - INFO - Performing clustering iteration 16 / 20
2024-11-21 07:15:29,946 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:29,949 - INFO - Event column 'endpoint' updated with focus on event value 1.


49:	[0s / 0s],		train_loss: 4.7675,	val_loss: 6.6691


2024-11-21 07:15:30,322 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:30,607 - INFO - Performing clustering iteration 17 / 20
2024-11-21 07:15:30,608 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:30,610 - INFO - Event column 'endpoint' updated with focus on event value 1.


50:	[0s / 0s],		train_loss: 4.7774,	val_loss: 6.6255


2024-11-21 07:15:30,981 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:31,266 - INFO - Performing clustering iteration 18 / 20
2024-11-21 07:15:31,267 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:31,270 - INFO - Event column 'endpoint' updated with focus on event value 1.


51:	[0s / 0s],		train_loss: 4.7919,	val_loss: 6.6365


2024-11-21 07:15:31,625 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:31,896 - INFO - Performing clustering iteration 19 / 20
2024-11-21 07:15:31,897 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:31,901 - INFO - Event column 'endpoint' updated with focus on event value 1.


52:	[0s / 0s],		train_loss: 4.7648,	val_loss: 6.6145


2024-11-21 07:15:32,265 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:15:32,550 - INFO - Performing clustering iteration 20 / 20
2024-11-21 07:15:32,550 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:15:32,553 - INFO - Event column 'endpoint' updated with focus on event value 1.


53:	[0s / 0s],		train_loss: 4.7684,	val_loss: 6.6113


2024-11-21 07:15:32,897 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))


54:	[0s / 0s],		train_loss: 4.7946,	val_loss: 6.6120
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_1_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-21 07:15:34,308 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:15:34,312 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: smoteenn


2024-11-21 07:15:38,270 - INFO - Missing values imputed using IterativeImputer.
2024-11-21 07:15:38,274 - INFO - Dataframe rebalanced with SMOTE and ENN.


0:	[4s / 4s],		train_loss: 3.6937,	val_loss: 5.0549
1:	[4s / 8s],		train_loss: 3.6709,	val_loss: 5.0572
2:	[7s / 16s],		train_loss: 3.6393,	val_loss: 5.0221
3:	[4s / 20s],		train_loss: 3.6521,	val_loss: 5.0700
4:	[4s / 24s],		train_loss: 3.6409,	val_loss: 5.0327
5:	[4s / 28s],		train_loss: 3.6310,	val_loss: 5.0508
6:	[4s / 32s],		train_loss: 3.6230,	val_loss: 5.0177
7:	[4s / 37s],		train_loss: 3.6388,	val_loss: 5.1196
8:	[4s / 41s],		train_loss: 3.6336,	val_loss: 5.1525
9:	[4s / 45s],		train_loss: 3.6276,	val_loss: 5.0265
10:	[7s / 53s],		train_loss: 3.6211,	val_loss: 5.0507
11:	[4s / 57s],		train_loss: 3.6150,	val_loss: 5.0887
12:	[4s / 1m:1s],		train_loss: 3.6100,	val_loss: 5.0697
13:	[4s / 1m:5s],		train_loss: 3.6051,	val_loss: 5.0419
14:	[4s / 1m:9s],		train_loss: 3.6046,	val_loss: 5.0584
15:	[4s / 1m:13s],		train_loss: 3.6214,	val_loss: 5.0595


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_1_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-21 07:16:53,571 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:16:53,577 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: smotetomek


2024-11-21 07:16:57,472 - INFO - Missing values imputed using IterativeImputer.
2024-11-21 07:16:57,477 - INFO - Dataframe rebalanced with SMOTE and Tomek.


0:	[7s / 7s],	
1:	[4s / 12s],	
2:	[4s / 16s],	
3:	[4s / 21s],	
4:	[4s / 25s],	
5:	[4s / 29s],	
6:	[4s / 34s],	
7:	[4s / 38s],	
8:	[7s / 46s],	
9:	[4s / 50s],	


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_1_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-21 07:17:49,121 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 07:17:49,125 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 07:17:49,131 - INFO - Performing clustering iteration 1 / 20
2024-11-21 07:17:49,132 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:49,135 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: clustering


2024-11-21 07:17:49,725 - INFO - Defined medoid for deepsurv model with 3725 clusters.


0:	[0s / 0s],		train_loss: 4.8491,	val_loss: 7.7885
1:	[0s / 0s],		train_loss: 4.7782,	val_loss: 7.7687
2:	[0s / 0s],		train_loss: 4.7601,	val_loss: 7.7889
3:	[0s / 0s],		train_loss: 4.7745,	val_loss: 7.7888
4:	[0s / 0s],		train_loss: 4.7578,	val_loss: 7.7798
5:	[0s / 0s],		train_loss: 4.7615,	val_loss: 7.7837
6:	[0s / 0s],		train_loss: 4.7549,	val_loss: 7.7865
7:	[0s / 0s],		train_loss: 4.7594,	val_loss: 7.7859
8:	[0s / 0s],		train_loss: 4.7591,	val_loss: 7.7944
9:	[0s / 0s],		train_loss: 4.7502,	val_loss: 7.7828
10:	[0s / 0s],		train_loss: 4.7515,	val_loss: 7.7940


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:50,948 - INFO - Performing clustering iteration 2 / 20
2024-11-21 07:17:50,949 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:50,952 - INFO - Event column 'endpoint' updated with focus on event value 1.


11:	[0s / 1s],		train_loss: 4.7466,	val_loss: 7.7883


2024-11-21 07:17:51,328 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:51,623 - INFO - Performing clustering iteration 3 / 20
2024-11-21 07:17:51,623 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:51,626 - INFO - Event column 'endpoint' updated with focus on event value 1.


12:	[0s / 0s],		train_loss: 4.8200,	val_loss: 7.7737


2024-11-21 07:17:51,997 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:52,276 - INFO - Performing clustering iteration 4 / 20
2024-11-21 07:17:52,276 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:52,280 - INFO - Event column 'endpoint' updated with focus on event value 1.


13:	[0s / 0s],		train_loss: 4.8953,	val_loss: 7.7692


2024-11-21 07:17:52,635 - INFO - Defined medoid for deepsurv model with 3725 clusters.


14:	[0s / 0s],		train_loss: 5.0552,	val_loss: 7.7432
15:	[0s / 0s],		train_loss: 5.0348,	val_loss: 7.7373
16:	[0s / 0s],		train_loss: 5.0344,	val_loss: 7.7413
17:	[0s / 0s],		train_loss: 5.0290,	val_loss: 7.7437
18:	[0s / 0s],		train_loss: 5.0251,	val_loss: 7.7457
19:	[0s / 0s],		train_loss: 5.0157,	val_loss: 7.7448
20:	[0s / 0s],		train_loss: 5.0263,	val_loss: 7.7410
21:	[0s / 0s],		train_loss: 5.0199,	val_loss: 7.7420
22:	[0s / 0s],		train_loss: 5.0201,	val_loss: 7.7426
23:	[0s / 0s],		train_loss: 5.0199,	val_loss: 7.7430
24:	[0s / 0s],		train_loss: 5.0217,	val_loss: 7.7435
25:	[0s / 0s],		train_loss: 5.0140,	val_loss: 7.7435


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:53,790 - INFO - Performing clustering iteration 5 / 20
2024-11-21 07:17:53,791 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:53,795 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:17:54,156 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:54,423 - INFO - Performing clustering iteration 6 / 20
2024-11-21 07:17:54,424 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:54,427 - INFO - Event column 'endpoint' updated with focus on event value 1.


26:	[0s / 0s],		train_loss: 5.0522,	val_loss: 7.7378


2024-11-21 07:17:54,764 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:55,026 - INFO - Performing clustering iteration 7 / 20
2024-11-21 07:17:55,026 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:55,030 - INFO - Event column 'endpoint' updated with focus on event value 1.


27:	[0s / 0s],		train_loss: 5.0547,	val_loss: 7.7385


2024-11-21 07:17:55,376 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:55,640 - INFO - Performing clustering iteration 8 / 20
2024-11-21 07:17:55,641 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:55,644 - INFO - Event column 'endpoint' updated with focus on event value 1.


28:	[0s / 0s],		train_loss: 5.0684,	val_loss: 7.7389


2024-11-21 07:17:55,968 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:56,245 - INFO - Performing clustering iteration 9 / 20
2024-11-21 07:17:56,245 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:56,248 - INFO - Event column 'endpoint' updated with focus on event value 1.


29:	[0s / 0s],		train_loss: 5.0666,	val_loss: 7.7377


2024-11-21 07:17:56,568 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:56,847 - INFO - Performing clustering iteration 10 / 20
2024-11-21 07:17:56,847 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:56,851 - INFO - Event column 'endpoint' updated with focus on event value 1.


30:	[0s / 0s],		train_loss: 5.0814,	val_loss: 7.7513


2024-11-21 07:17:57,154 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:57,423 - INFO - Performing clustering iteration 11 / 20
2024-11-21 07:17:57,424 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:57,426 - INFO - Event column 'endpoint' updated with focus on event value 1.


31:	[0s / 0s],		train_loss: 5.0834,	val_loss: 7.7463


2024-11-21 07:17:57,732 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:58,039 - INFO - Performing clustering iteration 12 / 20
2024-11-21 07:17:58,040 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:58,042 - INFO - Event column 'endpoint' updated with focus on event value 1.


32:	[0s / 0s],		train_loss: 5.0866,	val_loss: 7.7461


2024-11-21 07:17:58,332 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:58,603 - INFO - Performing clustering iteration 13 / 20
2024-11-21 07:17:58,604 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:58,606 - INFO - Event column 'endpoint' updated with focus on event value 1.


33:	[0s / 0s],		train_loss: 5.0876,	val_loss: 7.7497


2024-11-21 07:17:58,900 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:59,162 - INFO - Performing clustering iteration 14 / 20
2024-11-21 07:17:59,163 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:59,166 - INFO - Event column 'endpoint' updated with focus on event value 1.


34:	[0s / 0s],		train_loss: 5.1140,	val_loss: 7.7488


2024-11-21 07:17:59,443 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:17:59,724 - INFO - Performing clustering iteration 15 / 20
2024-11-21 07:17:59,724 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:17:59,727 - INFO - Event column 'endpoint' updated with focus on event value 1.


35:	[0s / 0s],		train_loss: 5.1067,	val_loss: 7.7484


2024-11-21 07:18:00,001 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:18:00,266 - INFO - Performing clustering iteration 16 / 20
2024-11-21 07:18:00,267 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:18:00,271 - INFO - Event column 'endpoint' updated with focus on event value 1.


36:	[0s / 0s],		train_loss: 5.0861,	val_loss: 7.7503


2024-11-21 07:18:00,527 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:18:00,798 - INFO - Performing clustering iteration 17 / 20
2024-11-21 07:18:00,799 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:18:00,801 - INFO - Event column 'endpoint' updated with focus on event value 1.


37:	[0s / 0s],		train_loss: 5.1113,	val_loss: 7.7491


2024-11-21 07:18:01,069 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))


38:	[0s / 0s],		train_loss: 5.1017,	val_loss: 7.7498


2024-11-21 07:18:01,405 - INFO - Performing clustering iteration 18 / 20
2024-11-21 07:18:01,406 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:18:01,409 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:18:01,655 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:18:01,943 - INFO - Performing clustering iteration 19 / 20
2024-11-21 07:18:01,943 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:18:01,947 - INFO - Event column 'endpoint' updated with focus on event value 1.


39:	[0s / 0s],		train_loss: 5.1034,	val_loss: 7.7457


2024-11-21 07:18:02,189 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:18:02,455 - INFO - Performing clustering iteration 20 / 20
2024-11-21 07:18:02,455 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:18:02,458 - INFO - Event column 'endpoint' updated with focus on event value 1.


40:	[0s / 0s],		train_loss: 5.0856,	val_loss: 7.7435


2024-11-21 07:18:02,689 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))


41:	[0s / 0s],		train_loss: 5.1035,	val_loss: 7.7486
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-21 07:18:04,098 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 07:18:04,103 - INFO - Event column 'endpoint' updated with focus on event value 2.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: smoteenn


2024-11-21 07:18:07,721 - INFO - Missing values imputed using IterativeImputer.
2024-11-21 07:18:07,725 - INFO - Dataframe rebalanced with SMOTE and ENN.


0:	[3s / 3s],		train_loss: 4.8912,	val_loss: 7.6535
1:	[7s / 10s],		train_loss: 4.7604,	val_loss: 7.4641
2:	[3s / 14s],		train_loss: 4.6971,	val_loss: 7.4596
3:	[3s / 17s],		train_loss: 4.7232,	val_loss: 7.5025
4:	[3s / 21s],		train_loss: 4.6880,	val_loss: 7.4566
5:	[3s / 24s],		train_loss: 4.6840,	val_loss: 7.4608
6:	[3s / 28s],		train_loss: 4.6813,	val_loss: 7.4559
7:	[3s / 32s],		train_loss: 4.6968,	val_loss: 7.4582
8:	[3s / 35s],		train_loss: 4.6937,	val_loss: 7.4718
9:	[3s / 39s],		train_loss: 4.7053,	val_loss: 7.4401


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-21 07:18:48,384 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 07:18:48,390 - INFO - Event column 'endpoint' updated with focus on event value 2.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: smotetomek


2024-11-21 07:18:55,636 - INFO - Missing values imputed using IterativeImputer.
2024-11-21 07:18:55,643 - INFO - Dataframe rebalanced with SMOTE and Tomek.


0:	[3s / 3s],		train_loss: 4.6940,	val_loss: 7.4088
1:	[4s / 7s],		train_loss: 4.6857,	val_loss: 7.3693
2:	[4s / 11s],		train_loss: 4.6101,	val_loss: 7.4064
3:	[3s / 15s],		train_loss: 4.6571,	val_loss: 7.5830
4:	[4s / 19s],		train_loss: 4.6257,	val_loss: 7.4060
5:	[3s / 23s],		train_loss: 4.5966,	val_loss: 7.4291
6:	[3s / 27s],		train_loss: 4.5714,	val_loss: 7.4163
7:	[7s / 35s],		train_loss: 4.6199,	val_loss: 7.4361
8:	[3s / 39s],		train_loss: 4.6154,	val_loss: 7.4609
9:	[3s / 43s],		train_loss: 4.6048,	val_loss: 7.4003
10:	[3s / 46s],		train_loss: 4.5941,	val_loss: 7.4191


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-21 07:19:43,954 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:19:44,138 - INFO - Performing clustering iteration 1 / 20
2024-11-21 07:19:44,138 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:19:44,142 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: LSTM
data balancing method: clustering


2024-11-21 07:19:44,754 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-21 07:19:44,755 - INFO - Performing clustering iteration 2 / 20
2024-11-21 07:19:44,756 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:19:44,759 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:19:45,149 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-21 07:19:45,150 - INFO - Performing clustering iteration 3 / 20
2024-11-21 07:19:45,150 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:19:45,153 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:19:45,543 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-21 07:19:45,544 - INFO - Performing clustering iteration 4 / 20
2024-11-21 07:19:45,544 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:19:45,547 - INFO - Event column 'endpoint' updated with focus on event value 1

64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[5s / 5s],		train_loss: 4.9715,	val_loss: 7.8747
1:	[2s / 8s],		train_loss: 3.2833,	val_loss: 5.4501
2:	[2s / 10s],		train_loss: 2.6288,	val_loss: 5.5585
3:	[2s / 12s],		train_loss: 2.5349,	val_loss: 6.4079
4:	[2s / 15s],		train_loss: 2.4789,	val_loss: 5.6070
5:	[2s / 17s],		train_loss: 2.4512,	val_loss: 5.2319
6:	[2s / 19s],		train_loss: 2.3713,	val_loss: 5.2538
7:	[2s / 22s],		train_loss: 2.3792,	val_loss: 5.9345
8:	[2s / 24s],		train_loss: 2.2310,	val_loss: 5.4938
9:	[2s / 26s],		train_loss: 2.1501,	val_loss: 5.2844
10:	[2s / 29s],		train_loss: 2.1776,	val_loss: 4.9481
11:	[2s / 31s],		train_loss: 2.1259,	val_loss: 5.5123
12:	[2s / 33s],		train_loss: 2.0918,	val_loss: 5.1554


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_1_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-21 07:21:12,326 - INFO - Event column 'endpoint' updated with focus on event value 1.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['_original_index'] = df.index
2024-11-21 07:21:12,519 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: LSTM
data balancing method: NearMiss


2024-11-21 07:21:12,818 - INFO - Dataset for deepsurv model undersampled using method 'NearMiss' with sampling strategy 0.05.
2024-11-21 07:21:21,728 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:21:46,528 - INFO - Validation data retrieved


64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[2s / 2s],		train_loss: 5.1432,	val_loss: 7.9110
1:	[5s / 7s],		train_loss: 5.1453,	val_loss: 7.9107
2:	[2s / 9s],		train_loss: 5.1459,	val_loss: 7.9107
3:	[2s / 11s],		train_loss: 5.1488,	val_loss: 7.9104
4:	[2s / 14s],		train_loss: 5.1538,	val_loss: 7.9101
5:	[2s / 16s],		train_loss: 5.1572,	val_loss: 7.9099
6:	[2s / 18s],		train_loss: 5.1406,	val_loss: 7.9099
7:	[2s / 20s],		train_loss: 5.1441,	val_loss: 7.9094
8:	[2s / 22s],		train_loss: 5.1469,	val_loss: 7.9089


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_1_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-21 07:22:10,408 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 07:22:10,593 - INFO - Performing clustering iteration 1 / 20
2024-11-21 07:22:10,594 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:22:10,597 - INFO - Event column 'endpoint' updated with focus on event value 2.


Initiate training of deepsurv neural network
model structure: LSTM
data balancing method: clustering


2024-11-21 07:22:11,217 - INFO - Defined medoid for deepsurv model with 3725 clusters.
2024-11-21 07:22:11,218 - INFO - Performing clustering iteration 2 / 20
2024-11-21 07:22:11,219 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:22:11,221 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 07:22:11,611 - INFO - Defined medoid for deepsurv model with 3725 clusters.
2024-11-21 07:22:11,612 - INFO - Performing clustering iteration 3 / 20
2024-11-21 07:22:11,612 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:22:11,616 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 07:22:11,997 - INFO - Defined medoid for deepsurv model with 3725 clusters.
2024-11-21 07:22:11,998 - INFO - Performing clustering iteration 4 / 20
2024-11-21 07:22:11,999 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:22:12,001 - INFO - Event column 'endpoint' updated with focus on event value 2

64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[2s / 2s],		train_loss: 4.9768
1:	[2s / 5s],		train_loss: 4.9595
2:	[2s / 8s],		train_loss: 4.9157
3:	[2s / 11s],		train_loss: 4.8961
4:	[2s / 14s],		train_loss: 4.8531
5:	[2s / 17s],		train_loss: 4.8309
6:	[2s / 19s],		train_loss: 4.8291
7:	[2s / 22s],		train_loss: 4.8137
8:	[2s / 25s],		train_loss: 4.8082


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-21 07:24:10,206 - INFO - Event column 'endpoint' updated with focus on event value 2.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['_original_index'] = df.index
2024-11-21 07:24:10,387 - INFO - Event column 'endpoint' updated with focus on event value 2.


Initiate training of deepsurv neural network
model structure: LSTM
data balancing method: NearMiss


2024-11-21 07:24:10,695 - INFO - Dataset for deepsurv model undersampled using method 'NearMiss' with sampling strategy 0.05.
2024-11-21 07:24:39,470 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 07:25:13,616 - INFO - Validation data retrieved


64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[2s / 2s],		train_loss: 5.1199
1:	[2s / 4s],		train_loss: 5.0552
2:	[2s / 6s],		train_loss: 4.9886
3:	[2s / 8s],		train_loss: 4.9948
4:	[2s / 10s],		train_loss: 4.9788
5:	[2s / 12s],		train_loss: 4.9737
6:	[5s / 18s],		train_loss: 4.9611
7:	[2s / 20s],		train_loss: 4.9899
8:	[2s / 22s],		train_loss: 4.9590
9:	[2s / 24s],		train_loss: 4.9670


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-21 07:25:39,878 - INFO - Performing clustering iteration 1 / 20
2024-11-21 07:25:39,878 - INFO - CUDA environment set up and GPU memory cleared.


Initiate training of deephit neural network
model structure: ANN
data balancing method: clustering


2024-11-21 07:25:40,459 - INFO - Defined medoid for deephit model with 4932 clusters.


0:	[0s / 0s],		train_loss: 0.5257,	val_loss: 0.0665
1:	[0s / 1s],		train_loss: 0.3995,	val_loss: 0.0774
2:	[0s / 1s],		train_loss: 0.3632,	val_loss: 0.0787
3:	[0s / 2s],		train_loss: 0.3490,	val_loss: 0.0698
4:	[0s / 2s],		train_loss: 0.3351,	val_loss: 0.0703
5:	[0s / 3s],		train_loss: 0.3256,	val_loss: 0.0765
6:	[0s / 3s],		train_loss: 0.3259,	val_loss: 0.0746
7:	[0s / 4s],		train_loss: 0.3260,	val_loss: 0.0758


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:25:44,805 - INFO - Performing clustering iteration 2 / 20
2024-11-21 07:25:44,806 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:25:45,186 - INFO - Defined medoid for deephit model with 4932 clusters.


8:	[0s / 0s],		train_loss: 0.4304,	val_loss: 0.0664
9:	[0s / 1s],		train_loss: 0.3586,	val_loss: 0.0608
10:	[0s / 1s],		train_loss: 0.3419,	val_loss: 0.0611
11:	[0s / 2s],		train_loss: 0.3369,	val_loss: 0.0630
12:	[0s / 2s],		train_loss: 0.3297,	val_loss: 0.0631
13:	[0s / 3s],		train_loss: 0.3283,	val_loss: 0.0633
14:	[0s / 3s],		train_loss: 0.3274,	val_loss: 0.0638
15:	[0s / 4s],		train_loss: 0.3311,	val_loss: 0.0591


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:25:49,572 - INFO - Performing clustering iteration 3 / 20
2024-11-21 07:25:49,573 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:25:49,934 - INFO - Defined medoid for deephit model with 4932 clusters.


16:	[0s / 0s],		train_loss: 0.3331,	val_loss: 0.0598
17:	[0s / 1s],		train_loss: 0.3417,	val_loss: 0.0733
18:	[0s / 1s],		train_loss: 0.3351,	val_loss: 0.0607
19:	[0s / 1s],		train_loss: 0.3304,	val_loss: 0.0686
20:	[0s / 2s],		train_loss: 0.3260,	val_loss: 0.0614
21:	[0s / 3s],		train_loss: 0.3224,	val_loss: 0.0642
22:	[0s / 3s],		train_loss: 0.3242,	val_loss: 0.0670
23:	[0s / 4s],		train_loss: 0.3206,	val_loss: 0.0621


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:25:54,180 - INFO - Performing clustering iteration 4 / 20
2024-11-21 07:25:54,181 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:25:54,537 - INFO - Defined medoid for deephit model with 4932 clusters.


24:	[0s / 0s],		train_loss: 0.3304,	val_loss: 0.0596
25:	[0s / 1s],		train_loss: 0.3257,	val_loss: 0.0594


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:25:55,873 - INFO - Performing clustering iteration 5 / 20
2024-11-21 07:25:55,873 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:25:56,213 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:25:56,932 - INFO - Performing clustering iteration 6 / 20
2024-11-21 07:25:56,932 - INFO - CUDA environment set up and GPU memory cleared.


26:	[0s / 0s],		train_loss: 0.3325,	val_loss: 0.0612


2024-11-21 07:25:57,282 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:25:57,957 - INFO - Performing clustering iteration 7 / 20
2024-11-21 07:25:57,958 - INFO - CUDA environment set up and GPU memory cleared.


27:	[0s / 0s],		train_loss: 0.3367,	val_loss: 0.0619


2024-11-21 07:25:58,274 - INFO - Defined medoid for deephit model with 4932 clusters.


28:	[0s / 0s],		train_loss: 0.3379,	val_loss: 0.0616


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:25:59,010 - INFO - Performing clustering iteration 8 / 20
2024-11-21 07:25:59,010 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:25:59,334 - INFO - Defined medoid for deephit model with 4932 clusters.


29:	[0s / 0s],		train_loss: 0.3408,	val_loss: 0.0571
30:	[0s / 1s],		train_loss: 0.3427,	val_loss: 0.0501
31:	[0s / 1s],		train_loss: 0.3472,	val_loss: 0.0531
32:	[0s / 2s],		train_loss: 0.3371,	val_loss: 0.0699
33:	[0s / 2s],		train_loss: 0.3330,	val_loss: 0.0627
34:	[0s / 3s],		train_loss: 0.3278,	val_loss: 0.0610
35:	[0s / 3s],		train_loss: 0.3319,	val_loss: 0.0591
36:	[4s / 7s],		train_loss: 0.3301,	val_loss: 0.0587


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:26:07,429 - INFO - Performing clustering iteration 9 / 20
2024-11-21 07:26:07,429 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:26:07,724 - INFO - Defined medoid for deephit model with 4932 clusters.


37:	[0s / 0s],		train_loss: 0.3427,	val_loss: 0.0578
38:	[0s / 1s],		train_loss: 0.3330,	val_loss: 0.0611
39:	[0s / 1s],		train_loss: 0.3306,	val_loss: 0.0580


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:26:10,018 - INFO - Performing clustering iteration 10 / 20
2024-11-21 07:26:10,019 - INFO - CUDA environment set up and GPU memory cleared.


40:	[0s / 2s],		train_loss: 0.3309,	val_loss: 0.0573


2024-11-21 07:26:10,303 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:26:11,026 - INFO - Performing clustering iteration 11 / 20
2024-11-21 07:26:11,026 - INFO - CUDA environment set up and GPU memory cleared.


41:	[0s / 0s],		train_loss: 0.3477,	val_loss: 0.0574


2024-11-21 07:26:11,310 - INFO - Defined medoid for deephit model with 4932 clusters.


42:	[0s / 0s],		train_loss: 0.3408,	val_loss: 0.0624


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:26:12,096 - INFO - Performing clustering iteration 12 / 20
2024-11-21 07:26:12,097 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:26:12,367 - INFO - Defined medoid for deephit model with 4932 clusters.


43:	[0s / 0s],		train_loss: 0.3660,	val_loss: 0.0449
44:	[0s / 1s],		train_loss: 0.3468,	val_loss: 0.0526
45:	[0s / 1s],		train_loss: 0.3363,	val_loss: 0.0551
46:	[0s / 2s],		train_loss: 0.3315,	val_loss: 0.0580
47:	[0s / 2s],		train_loss: 0.3314,	val_loss: 0.0586
48:	[0s / 3s],		train_loss: 0.3290,	val_loss: 0.0574
49:	[0s / 3s],		train_loss: 0.3283,	val_loss: 0.0589


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:26:16,684 - INFO - Performing clustering iteration 13 / 20
2024-11-21 07:26:16,685 - INFO - CUDA environment set up and GPU memory cleared.


50:	[0s / 4s],		train_loss: 0.3300,	val_loss: 0.0565


2024-11-21 07:26:16,940 - INFO - Defined medoid for deephit model with 4932 clusters.


51:	[0s / 0s],		train_loss: 0.3492,	val_loss: 0.0600
52:	[0s / 1s],		train_loss: 0.3342,	val_loss: 0.0582


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:26:18,661 - INFO - Performing clustering iteration 14 / 20
2024-11-21 07:26:18,662 - INFO - CUDA environment set up and GPU memory cleared.


53:	[0s / 1s],		train_loss: 0.3333,	val_loss: 0.0585


2024-11-21 07:26:18,905 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:26:19,661 - INFO - Performing clustering iteration 15 / 20
2024-11-21 07:26:19,661 - INFO - CUDA environment set up and GPU memory cleared.


54:	[0s / 0s],		train_loss: 0.3488,	val_loss: 0.0585


2024-11-21 07:26:19,906 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:26:20,615 - INFO - Performing clustering iteration 16 / 20
2024-11-21 07:26:20,615 - INFO - CUDA environment set up and GPU memory cleared.


55:	[0s / 0s],		train_loss: 0.3503,	val_loss: 0.0572


2024-11-21 07:26:20,871 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:26:21,554 - INFO - Performing clustering iteration 17 / 20
2024-11-21 07:26:21,554 - INFO - CUDA environment set up and GPU memory cleared.


56:	[0s / 0s],		train_loss: 0.3534,	val_loss: 0.0564


2024-11-21 07:26:21,777 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:26:22,519 - INFO - Performing clustering iteration 18 / 20
2024-11-21 07:26:22,520 - INFO - CUDA environment set up and GPU memory cleared.


57:	[0s / 0s],		train_loss: 0.3523,	val_loss: 0.0542


2024-11-21 07:26:22,730 - INFO - Defined medoid for deephit model with 4932 clusters.


58:	[0s / 0s],		train_loss: 0.3554,	val_loss: 0.0513


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:26:23,497 - INFO - Performing clustering iteration 19 / 20
2024-11-21 07:26:23,497 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:26:23,702 - INFO - Defined medoid for deephit model with 4932 clusters.


59:	[0s / 0s],		train_loss: 0.3582,	val_loss: 0.0491


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-21 07:26:24,415 - INFO - Performing clustering iteration 20 / 20
2024-11-21 07:26:24,415 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:26:24,602 - INFO - Defined medoid for deephit model with 4932 clusters.


60:	[0s / 0s],		train_loss: 0.3605,	val_loss: 0.0462


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_clustering_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_clustering_all_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.
Initiate training of deephit neural network
model structure: ANN
data balancing method: NearMiss


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['_original_index'] = df.index
2024-11-21 07:26:38,082 - INFO - Dataset for deephit model undersampled using method 'NearMiss' with sampling strategy 0.05.


0:	[7s / 7s],		train_loss: 0.0978,	val_loss: 0.0567
1:	[3s / 10s],		train_loss: 0.0674,	val_loss: 0.0337
2:	[3s / 13s],		train_loss: 0.0593,	val_loss: 0.0316
3:	[2s / 16s],		train_loss: 0.0565,	val_loss: 0.0275
4:	[3s / 20s],		train_loss: 0.0550,	val_loss: 0.0272
5:	[2s / 22s],		train_loss: 0.0548,	val_loss: 0.0269
6:	[2s / 25s],		train_loss: 0.0547,	val_loss: 0.0269
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_nearmiss2_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_nearmiss2_all_hazard.pkl.


  self.net.load_state_dict(torch.load(path, **kwargs))


Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-21 07:27:04,975 - INFO - Performing clustering iteration 1 / 20
2024-11-21 07:27:04,975 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:27:04,978 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deephit neural network
model structure: LSTM
data balancing method: clustering


2024-11-21 07:27:05,391 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-21 07:27:05,392 - INFO - Performing clustering iteration 2 / 20
2024-11-21 07:27:05,393 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:27:05,396 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:27:05,816 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-21 07:27:05,818 - INFO - Performing clustering iteration 3 / 20
2024-11-21 07:27:05,819 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:27:05,826 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 07:27:06,248 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-21 07:27:06,250 - INFO - Performing clustering iteration 4 / 20
2024-11-21 07:27:06,250 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-21 07:27:06,254 - INFO - Event column 'endpoint' updated with focus on event value 1

64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[3s / 3s],		train_loss: 0.0585,	val_loss: 0.0412
1:	[2s / 6s],		train_loss: 0.0493,	val_loss: 0.0432
2:	[3s / 9s],		train_loss: 0.0494,	val_loss: 0.0417
3:	[2s / 12s],		train_loss: 0.0485,	val_loss: 0.0412
4:	[2s / 15s],		train_loss: 0.0493,	val_loss: 0.0420


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_clustering_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_clustering_all_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.
Initiate training of deephit neural network
model structure: LSTM
data balancing method: NearMiss


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['_original_index'] = df.index
2024-11-21 07:28:55,901 - INFO - Dataset for deephit model undersampled using method 'NearMiss' with sampling strategy 0.05.
2024-11-21 07:29:29,809 - INFO - Validation data retrieved


64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[3s / 3s],		train_loss: 0.4252,	val_loss: 0.0750
1:	[2s / 6s],		train_loss: 0.3917,	val_loss: 0.0722
2:	[2s / 9s],		train_loss: 0.3814,	val_loss: 0.0722
3:	[6s / 15s],		train_loss: 0.3792,	val_loss: 0.0669
4:	[2s / 18s],		train_loss: 0.3730,	val_loss: 0.0693
5:	[2s / 21s],		train_loss: 0.3558,	val_loss: 0.0645
6:	[2s / 24s],		train_loss: 0.3377,	val_loss: 0.0655
7:	[3s / 27s],		train_loss: 0.3722,	val_loss: 0.0827
8:	[2s / 30s],		train_loss: 0.3641,	val_loss: 0.0678


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_nearmiss1_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_nearmiss1_all_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


### 4. Load models and hazards

In [205]:
def load_model(model, model_config, model_path, baseline_hazard_path):
    """
    Load model weights and baseline hazard data.

    Parameters:
    - create_model_func: Function to create the model architecture (e.g., create_neural_network).
    - model_path: Path to load the model weights (.pt file).
    - baseline_hazard_path: Path to load the baseline hazards (.pkl file).

    Returns:
    - model: The loaded model with weights and baseline hazards.
    """
    
    # Load model weights
    model.load_model_weights(model_path)
    
    # Load baseline hazards and assign to model
    if model_config['model'] == 'deepsurv':
        baseline_hazard = pd.read_pickle(baseline_hazard_path)
        model.baseline_hazards_ = baseline_hazard
        model.baseline_cumulative_hazards_ = baseline_hazard.cumsum()
    
    print(f"Model and baseline hazards loaded from {model_path} and {baseline_hazard_path}.")
    return model

In [206]:
# Dictionary to store loaded models
loaded_models = {}

for model_name in model_ls:
    # Retrieve configuration by dynamically constructing the variable name
    config_var_name = model_name + "_config"
    model_config = globals().get(config_var_name)
    
    if model_config is None:
        print(f"Configuration for {config_var_name} not found.")
        continue

    model_weights_path = f'{model_path}{model_name}.pt'
    model_hazard_path = f'{model_path}{model_name}_hazard.pkl'
    
    # Define the model creation function as a lambda to pass the config
    create_model_func = lambda: create_neural_network(
        config=model_config,
        num_risk=len(X_train_transformed[EVENT_COL].unique()) - 1,
        num_time_bins=len(TIME_GRID)
    )
    model = create_model_func()
    
    # Load the model and store it in the dictionary
    loaded_models[model_name] = load_model(model, model_config, model_weights_path, model_hazard_path)
    print(f'Loaded model {model_name}')

Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_1_hazard.pkl.
Loaded model deepsurv_ann_clustering_1


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_1_hazard.pkl.
Loaded model deepsurv_ann_smoteenn_1
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_1_hazard.pkl.
Loaded model deepsurv_ann_smotetomek_1


  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_2_hazard.pkl.
Loaded model deepsurv_ann_clustering_2


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_2_hazard.pkl.
Loaded model deepsurv_ann_smoteenn_2


  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_2_hazard.pkl.
Loaded model deepsurv_ann_smotetomek_2
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_1_hazard.pkl.
Loaded model deepsurv_lstm_clustering_1


  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_1_hazard.pkl.
Loaded model deepsurv_lstm_nearmiss_1
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_2_hazard.pkl.
Loaded model deepsurv_lstm_clustering_2


  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_2_hazard.pkl.
Loaded model deepsurv_lstm_nearmiss_2
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_clustering_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_clustering_all_hazard.pkl.
Loaded model deephit_ann_clustering_all
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_nearmiss2_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_nearmiss2_all_hazard.pkl.
Loaded model deephit_ann_nearmiss2_all
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_clustering_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_clustering_all_hazard.pkl.
Loaded model deephit_lstm_clustering_all
Model and baseline hazards loaded from /mnt/d/

### 5. Prepare and make prediction on the training set, combine the prediction arrays for training of super learner

In [207]:
def predict_neural_network(model, config, X_test, duration_col, event_col, cluster_col, time_grid=None):
    """
    Function to train a given neural network using the provided datasets.

    Args:
        net (torch.nn.Module): Neural network to be trained.
        config (dict): Configuration dictionary containing model hyperparameters.
        X_train (pd.DataFrame): Training dataset with features.
        X_val (pd.DataFrame): Validation dataset with features.
        duration_col (str): Column representing event durations.
        event_col (str): Column representing event occurrences.
        cluster_col (str): Column for grouping during cross-validation.
        callbacks (list): List of callbacks for training.
        time_grid (np.array, optional): Time grid for evaluation if required. Defaults to None.

    Returns:
        model: Trained PyCox model.
        logs: Training logs.
    """
    gc.collect()
    torch.cuda.empty_cache()
    # Train the model
    if config['model'] == 'deepsurv':
        print('Initiate testing of deepsurv neural network')
        X_test = df_event_focus(X_test, event_col, config['endpoint'])
        if config['net'] == 'ann':
            print('model structure: ANN')
            X_test_processed, y_test = preprocess_data(X_test, config['features'], duration_col, event_col)
            surv = model.predict_surv_df(X_test_processed, batch_size=512)
        elif config['net'] == 'lstm':
            print('model structure: LSTM')
            X_test_processed, y_test = prepare_validation_data(X_test, config['features'], duration_col, event_col, config, cluster_col, config['model'], time_grid)
            X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32)
            y_test_tensor = (torch.tensor(y_test[0], dtype=torch.float32), torch.tensor(y_test[1], dtype=torch.float32))
            surv = model.predict_surv_df(X_test_tensor, batch_size=512)
    elif config['model'] == 'deephit':
        print('Initiate testing of deephit neural network')
        if config['net'] == 'ann':
            print('model structure: ANN')
            X_test_processed, y_test = preprocess_data(X_test, config['features'], duration_col, event_col, time_grid, discretize=True)
            surv = model.predict_cif(X_test_processed, batch_size=512)
            print('prediction complete, please note that prediction of deephit models are CIF.')
        elif config['net'] == 'lstm':
            print('model structure: LSTM')
            X_test_processed, y_test = prepare_validation_data(X_test, config['features'], duration_col, event_col, config, cluster_col, config['model'], time_grid)
            surv = model.predict_cif(X_test_processed, batch_size=512)
            print('prediction complete, please note that prediction of deephit models are CIF.')

    # Free memory after training
    gc.collect()
    torch.cuda.empty_cache()

    return surv, y_test

In [208]:

def align_to_time_grid(surv, time_grid):
    """
    Align the survival DataFrame to the closest indices of the time grid.

    Parameters:
        surv (pd.DataFrame): Survival probabilities DataFrame.
        time_grid (np.array): Array of target time points to align.

    Returns:
        aligned_surv (pd.DataFrame): Aligned survival probabilities.
    """
    # Convert the DataFrame's index to a NumPy array for fast computation
    surv_times = np.array(surv.index)
    
    # Find the closest time in the survival DataFrame for each time in the grid
    closest_indices = [np.argmin(np.abs(surv_times - t)) for t in time_grid]
    
    # Extract the rows corresponding to the closest times
    aligned_surv = surv.iloc[closest_indices].copy()
    
    # Reindex the DataFrame to match the time grid
    aligned_surv.index = range(len(time_grid))  # Standardize indices to 0, 1, 2, ...
    return aligned_surv

#### 5.1 Use lifelines and CoxPH Fitter to get the CIF of both outcomes as the 'ground truth' of the training dataset

In [209]:
from lifelines import CoxPHFitter
from lifelines import AalenJohansenFitter
from sklearn.linear_model import LinearRegression
from joblib import Parallel, delayed

# Convert all non-target events to 0 (censored)
X_train_transformed["event1"] = X_train_transformed[EVENT_COL].apply(lambda x: 1 if x == 1 else 0)
X_train_transformed["event2"] = X_train_transformed[EVENT_COL].apply(lambda x: 1 if x == 2 else 0)

class_counts = X_train_transformed[EVENT_COL].value_counts()
X_train_transformed['weights'] = X_train_transformed[EVENT_COL].map(lambda e: 1 / class_counts[e]).values

# Step 1: Fit a Cox model for each event type
cox_model_event_1 = CoxPHFitter()
cox_model_event_1.fit(X_train_transformed[FEATURE_COLS + [DURATION_COL, 'event1', CLUSTER_COL, 'weights']], duration_col=DURATION_COL, event_col="event1", cluster_col=CLUSTER_COL, weights_col="weights", robust=True)

cox_model_event_2 = CoxPHFitter()
cox_model_event_2.fit(X_train_transformed[FEATURE_COLS + [DURATION_COL, 'event2', CLUSTER_COL, 'weights']], duration_col=DURATION_COL, event_col="event2", cluster_col=CLUSTER_COL, weights_col="weights", robust=True)

# Step 2: Predict individual cumulative hazards for each event type
cumulative_hazard_event_1 = cox_model_event_1.predict_cumulative_hazard(X_train_transformed)
cumulative_hazard_event_2 = cox_model_event_2.predict_cumulative_hazard(X_train_transformed)

# Step 3: Compute overall survival for each individual
# Overall survival: S(t) = exp(- (H1(t) + H2(t)))
overall_survival = np.exp(-(cumulative_hazard_event_1 + cumulative_hazard_event_2))

# Step 4: Calculate CIF for each event type
# CIF_k(t) = ∫ h_k(u) * S(u) du (approximated as cumulative sum)
cif_event_1 = (cumulative_hazard_event_1 * overall_survival).cumsum(axis=0)
cif_event_2 = (cumulative_hazard_event_2 * overall_survival).cumsum(axis=0)

# Step 5: Format and display the CIF predictions
cif_event_1_normalized = cif_event_1.div(cif_event_1.iloc[-1].max(), axis=1)
cif_event_2_normalized = cif_event_2.div(cif_event_2.iloc[-1].max(), axis=1)

# Compute the CIF ground truth
cif_ground_truth = np.zeros((2,6,396424))
cif_ground_truth[0] = align_to_time_grid(cif_event_1_normalized, TIME_GRID).values
cif_ground_truth[1] = align_to_time_grid(cif_event_2_normalized, TIME_GRID).values




In [220]:
_, y_ground_truth = preprocess_data(X_train_transformed, FEATURE_COLS, DURATION_COL, EVENT_COL, TIME_GRID, discretize=True)
concordance_indices = {}
integrated_brier_scores = {}
neg_log_likelihoods = {}
brier_series = {}
for i in range(0, 2):
    event_interest = i + 1
    cif = pd.DataFrame(cif_ground_truth[i], [0, 1, 2, 3, 4, 5])
    ev = EvalSurv(1-cif, y_ground_truth[0], y_ground_truth[1] == event_interest, censor_surv='km')
    concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()
    brier_series[f"Event_{event_interest}"] = ev.brier_score(TIME_GRID)
    integrated_brier_scores[f"Event_{event_interest}"] = ev.integrated_brier_score(TIME_GRID)
    neg_log_likelihoods[f"Event_{event_interest}"] = ev.integrated_nbll(TIME_GRID)

    # # Nam and D'Agostino Chi2 statistic for calibration
    # for time in time_grid:
    #     chi2_stat, p_value, observed_events, expected_events, n, prob_df = nam_dagostino_chi2(
    #         df=df_test, 
    #         duration_col=duration_col, 
    #         event_col=event_col,
    #         surv=(1-cif), 
    #         time=time, 
    #         event_focus=event_interest
    #     )
    #     nam_dagostino_results.append({
    #         'Event': event_interest,
    #         'Year': round(time / 365),
    #         'Chi2_Stat': chi2_stat,
    #         'P_Value': p_value,
    #         'Observed_Events': observed_events.tolist(),
    #         'Expected_Events': expected_events.tolist(),
    #         'Sample_Size': n.tolist()
    #     })
display(concordance_indices)
display(brier_series)
display(integrated_brier_scores)
display(neg_log_likelihoods)


{'Event_1': 0.9818982092210652, 'Event_2': 0.7033707118427875}

{'Event_1': 0       0.000000
 365     0.038203
 730     0.038203
 1095    0.038203
 1460    0.038203
 1825    0.038203
 Name: brier_score, dtype: float64,
 'Event_2': 0       0.000000
 365     0.085103
 730     0.085103
 1095    0.085103
 1460    0.085103
 1825    0.085103
 Name: brier_score, dtype: float64}

{'Event_1': 0.03565589439044669, 'Event_2': 0.07942951892202992}

{'Event_1': 0.17732477425591783, 'Event_2': 0.27434970836708145}

#### 5.2 Get the prediction for each model

In [211]:
gc.collect()
torch.cuda.empty_cache()

# Initialize dictionary to store combined predictions and y_test
combined_predictions = []
combined_y_test = []
cif_ground_truth_test = []

# Get unique keys and split them into 10 groups
unique_keys = X_train_transformed_2['key'].unique()
np.random.shuffle(unique_keys)  # Shuffle keys to ensure randomness
key_folds = np.array_split(unique_keys, 10)

for fold_idx, test_keys in enumerate(key_folds):
    print(f"Processing fold {fold_idx + 1}...")
    # Split the dataset into train and test based on keys
    X_test_fold = X_train_transformed_2[X_train_transformed_2['key'].isin(test_keys)]
    
    # Get the indices of X_test_fold relative to the original dataset
    test_indices = X_test_fold.index.to_numpy()

    # Extract CIF ground truth for these indices
    fold_cif_ground_truth = cif_ground_truth[:, :, test_indices]

    # Stack this fold's CIF ground truth
    cif_ground_truth_test.append(fold_cif_ground_truth)
    
    # Store predictions for this fold
    fold_predictions = {}
    
    X_test, y_test = preprocess_data(X_test_fold, FEATURE_COLS, DURATION_COL, EVENT_COL, TIME_GRID, discretize=True)
    combined_y_test.append(y_test)
    
    for model_name in model_ls:
        # Retrieve configuration by dynamically constructing the variable name
        config_var_name = model_name + "_config"
        model_config = globals().get(config_var_name)
        if model_config is None:
            print(f"Configuration for {config_var_name} not found.")
            continue
        try:
            print(f"Initiating prediction for model: {model_name}")
            
            # Retrieve the loaded model
            model = loaded_models.get(model_name)
            if model is None:
                print(f"Model {model_name} is not loaded.")
                continue
            
            # Predict using the loaded model and configuration
            surv, _ = predict_neural_network(
                model=model,
                config=model_config,
                X_test=X_test_fold,
                duration_col=DURATION_COL,
                event_col=EVENT_COL,
                cluster_col=CLUSTER_COL,
                time_grid=TIME_GRID
            )
            
            # Align survival probabilities (if DeepSurv)
            if model_config['model'] == 'deepsurv':
                surv = align_to_time_grid(surv, TIME_GRID).values  # 2D array
                
                # Structure key dynamically
                key = f"deepsurv_{model_config['net']}_{model_config['balance_method']}"
                
                # Store predictions
                if key not in fold_predictions:
                    fold_predictions[key] = []
                fold_predictions[key].append(np.expand_dims(1 - surv, axis=0))
            
            # Handle DeepHit predictions
            elif model_config['model'] == 'deephit':
                surv = np.array(surv)  # Convert to numpy array
                
                # Structure key dynamically
                key = f"deephit_{model_config['net']}_{model_config['balance_method']}"
                
                # Store predictions
                if key not in fold_predictions:
                    fold_predictions[key] = []
                fold_predictions[key].append(surv)
            
            print(f"Prediction completed for {model_name} on fold {fold_idx + 1}.")
        
        except Exception as e:
            print(f"Error during prediction for {model_name} on fold {fold_idx + 1}: {e}")
            
    combined_predictions.append(fold_predictions)

meta_learner_X_train = {}

for fold_predictions in combined_predictions:
    for model_key, fold_data in fold_predictions.items():
        if model_key not in meta_learner_X_train:
            meta_learner_X_train[model_key] = []
        
        # Align DeepSurv predictions to match DeepHit format
        if "deepsurv" in model_key:
            # Stack competing outcomes and remove the redundant dimension
            meta_learner_X_train[model_key].extend(
                [np.squeeze(np.stack(fold_data, axis=0), axis=1)]  # Squeeze out extra axis
            )
        else:
            # Keep DeepHit predictions as-is
            meta_learner_X_train[model_key].extend(fold_data)

# Combine y_test
meta_learner_y_train = (
    np.concatenate([fold[0] for fold in combined_y_test]),  # Concatenate all first elements
    np.concatenate([fold[1] for fold in combined_y_test])   # Concatenate all second elements
)

# Combine CIF ground truth for all folds
cif_ground_truth_test_stacked = np.concatenate(cif_ground_truth_test, axis=2)

print("Final predictions and y_test combined.")

2024-11-21 09:05:26,695 - INFO - Event column 'endpoint' updated with focus on event value 1.


Processing fold 1...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:05:27,340 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 1.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:05:36,046 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 1.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:05:45,899 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 1.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:05:46,603 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 1.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:05:48,259 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 1.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:05:50,237 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:05:50,246 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 1.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:06:05,425 - INFO - Validation data retrieved
2024-11-21 09:06:06,029 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:06:06,030 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 1.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:06:16,036 - INFO - Validation data retrieved
2024-11-21 09:06:16,577 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:06:16,578 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 1.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:06:26,632 - INFO - Validation data retrieved
2024-11-21 09:06:27,219 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:06:27,220 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 1.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:06:42,499 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 1.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 1.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 1.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:06:57,027 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 1.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:07:07,519 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 1.
Processing fold 2...
Initiating prediction for model: deepsurv_ann_clustering_1


2024-11-21 09:07:08,068 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:07:08,789 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 2.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:07:20,564 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 2.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:07:29,765 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 2.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:07:30,508 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 2.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:07:32,160 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 2.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:07:34,253 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:07:34,254 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 2.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:07:45,664 - INFO - Validation data retrieved
2024-11-21 09:07:50,048 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:07:50,050 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 2.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:08:01,090 - INFO - Validation data retrieved
2024-11-21 09:08:01,729 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:08:01,730 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 2.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:08:12,580 - INFO - Validation data retrieved
2024-11-21 09:08:13,219 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:08:13,220 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 2.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:08:28,603 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 2.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 2.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 2.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:08:43,157 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 2.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:08:53,719 - INFO - Validation data retrieved
2024-11-21 09:08:54,183 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 2.
Processing fold 3...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:08:54,820 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 3.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:09:06,640 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 3.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:09:15,959 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 3.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:09:16,739 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 3.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:09:18,562 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 3.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:09:20,643 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:09:20,644 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 3.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:09:32,088 - INFO - Validation data retrieved
2024-11-21 09:09:32,854 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:09:32,856 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 3.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:09:47,366 - INFO - Validation data retrieved
2024-11-21 09:09:47,959 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:09:47,960 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 3.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:09:57,866 - INFO - Validation data retrieved
2024-11-21 09:09:58,446 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:09:58,447 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 3.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:10:10,285 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 3.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 3.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 3.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:10:28,324 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 3.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:10:39,024 - INFO - Validation data retrieved
2024-11-21 09:10:39,445 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 3.
Processing fold 4...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:10:40,096 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 4.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:10:52,095 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 4.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:11:02,224 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 4.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:11:02,939 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 4.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:11:04,667 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 4.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:11:06,768 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:11:06,770 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 4.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:11:18,523 - INFO - Validation data retrieved
2024-11-21 09:11:19,275 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:11:19,276 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 4.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:11:34,059 - INFO - Validation data retrieved
2024-11-21 09:11:34,779 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:11:34,780 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 4.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:11:45,457 - INFO - Validation data retrieved
2024-11-21 09:11:46,110 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:11:46,111 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 4.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:11:57,901 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 4.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 4.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 4.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:12:16,418 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 4.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:12:27,234 - INFO - Validation data retrieved
2024-11-21 09:12:27,669 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 4.
Processing fold 5...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:12:28,298 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 5.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:12:40,139 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 5.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:12:49,757 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 5.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:12:50,455 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 5.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:12:52,158 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 5.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:12:54,182 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:12:54,184 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 5.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:13:05,830 - INFO - Validation data retrieved
2024-11-21 09:13:06,530 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:13:06,532 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 5.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:13:21,261 - INFO - Validation data retrieved
2024-11-21 09:13:21,876 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:13:21,877 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 5.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:13:32,105 - INFO - Validation data retrieved
2024-11-21 09:13:32,682 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:13:32,683 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 5.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:13:44,466 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 5.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 5.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 5.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:14:02,439 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 5.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:14:12,930 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 5.
Processing fold 6...
Initiating prediction for model: deepsurv_ann_clustering_1


2024-11-21 09:14:13,409 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:14:14,186 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 6.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:14:22,865 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 6.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:14:36,033 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 6.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:14:36,734 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 6.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:14:39,020 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 6.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:14:41,103 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:14:41,104 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 6.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:14:52,784 - INFO - Validation data retrieved
2024-11-21 09:14:53,485 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:14:53,486 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 6.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:15:07,747 - INFO - Validation data retrieved
2024-11-21 09:15:08,501 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:15:08,502 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 6.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:15:18,909 - INFO - Validation data retrieved
2024-11-21 09:15:19,577 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:15:19,578 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 6.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:15:31,710 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 6.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 6.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 6.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:15:50,217 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 6.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:16:00,905 - INFO - Validation data retrieved
2024-11-21 09:16:01,366 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 6.
Processing fold 7...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:16:01,995 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 7.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:16:10,303 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 7.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:16:23,795 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 7.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:16:24,552 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 7.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:16:26,413 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 7.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:16:28,413 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:16:28,415 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 7.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:16:39,692 - INFO - Validation data retrieved
2024-11-21 09:16:40,411 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:16:40,412 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 7.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:16:54,783 - INFO - Validation data retrieved
2024-11-21 09:16:55,451 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:16:55,452 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 7.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:17:05,978 - INFO - Validation data retrieved
2024-11-21 09:17:06,556 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:17:06,558 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 7.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:17:18,167 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 7.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 7.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 7.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:17:36,328 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 7.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:17:46,976 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 7.
Processing fold 8...
Initiating prediction for model: deepsurv_ann_clustering_1


2024-11-21 09:17:47,439 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:17:48,087 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 8.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:17:56,188 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 8.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:18:08,162 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 8.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:18:09,032 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 8.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:18:10,901 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 8.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:18:13,052 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:18:13,053 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 8.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:18:23,902 - INFO - Validation data retrieved
2024-11-21 09:18:24,693 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:18:24,694 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 8.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:18:34,955 - INFO - Validation data retrieved
2024-11-21 09:18:35,557 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:18:35,558 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 8.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:18:48,992 - INFO - Validation data retrieved
2024-11-21 09:18:49,613 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:18:49,614 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 8.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:19:00,778 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 8.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 8.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 8.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:19:18,341 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 8.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:19:28,399 - INFO - Validation data retrieved
2024-11-21 09:19:28,838 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 8.
Processing fold 9...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:19:29,510 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 9.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:19:37,934 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 9.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:19:47,339 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 9.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:19:48,041 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 9.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:19:53,372 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 9.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:19:55,430 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:19:55,432 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 9.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:20:06,880 - INFO - Validation data retrieved
2024-11-21 09:20:07,541 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:20:07,542 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 9.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:20:17,821 - INFO - Validation data retrieved
2024-11-21 09:20:18,417 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:20:18,418 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 9.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:20:32,135 - INFO - Validation data retrieved
2024-11-21 09:20:32,728 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:20:32,729 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 9.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:20:44,244 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 9.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 9.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 9.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:20:58,856 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 9.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:21:13,145 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 9.
Processing fold 10...
Initiating prediction for model: deepsurv_ann_clustering_1


2024-11-21 09:21:13,692 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:21:14,321 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 10.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:21:22,331 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 10.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:21:30,950 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 10.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:21:31,637 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 10.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:21:33,311 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 10.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:21:35,470 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:21:35,473 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 10.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:21:49,736 - INFO - Validation data retrieved
2024-11-21 09:21:50,426 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:21:50,427 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 10.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:22:00,446 - INFO - Validation data retrieved
2024-11-21 09:22:01,128 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:22:01,129 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 10.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:22:11,119 - INFO - Validation data retrieved
2024-11-21 09:22:11,753 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:22:11,754 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 10.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:22:26,261 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 10.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 10.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 10.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:22:40,206 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 10.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:22:53,764 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 10.
Final predictions and y_test combined.


In [212]:
# Combine all 10 items for each model's predictions
final_meta_learner_X_train = {}

for key, predictions in meta_learner_X_train.items():
    # Concatenate the predictions along the last axis
    final_meta_learner_X_train[key] = np.concatenate(predictions, axis=2)  # Combine along the feature axis

# Validate the shapes
for key, combined_prediction in final_meta_learner_X_train.items():
    print(f"{key}: final shape {combined_prediction.shape}")


deepsurv_ann_clustering: final shape (2, 6, 316242)
deepsurv_ann_enn: final shape (2, 6, 316242)
deepsurv_ann_tomek: final shape (2, 6, 316242)
deepsurv_lstm_clustering: final shape (2, 6, 316242)
deepsurv_lstm_NearMiss: final shape (2, 6, 316242)
deephit_ann_clustering: final shape (2, 6, 316242)
deephit_ann_NearMiss: final shape (2, 6, 316242)
deephit_lstm_clustering: final shape (2, 6, 316242)
deephit_lstm_NearMiss: final shape (2, 6, 316242)


In [213]:
display(np.shape(cif_ground_truth_test_stacked))

# Use the duration and event data as targets
durations = meta_learner_y_train[0]  # Time to event or censoring
events = meta_learner_y_train[1]  # Event type (competing risks)

print(durations.shape)
print(events.shape)
print(np.unique(events, return_counts=True))

(2, 6, 316242)

(316242,)
(316242,)
(array([0, 1, 2]), array([310074,   1509,   4659]))


### 6. Ensemble methods

#### 6.1 Prepare each model's prediction on validation set


In [214]:
gc.collect()
torch.cuda.empty_cache()

# Initialize dictionary to store combined predictions and y_test
combined_predictions = []
combined_y_val = []
cif_ground_truth_val = []

# Get unique keys and split them into 10 groups
unique_keys = X_fin_val['key'].unique()
np.random.shuffle(unique_keys)  # Shuffle keys to ensure randomness
key_folds = np.array_split(unique_keys, 10)

for fold_idx, test_keys in enumerate(key_folds):
    print(f"Processing fold {fold_idx + 1}...")
    # Split the dataset into train and test based on keys
    X_val_fold = X_fin_val[X_fin_val['key'].isin(test_keys)]
    
    # Get the indices of X_test_fold relative to the original dataset
    test_indices = X_val_fold.index.to_numpy()

    # Extract CIF ground truth for these indices
    cif_ground_truth_val_fold = cif_ground_truth[:, :, test_indices]

    # Stack this fold's CIF ground truth
    cif_ground_truth_val.append(cif_ground_truth_val_fold)
    
    # Store predictions for this fold
    fold_predictions = {}
    
    X_val, y_val = preprocess_data(X_val_fold, FEATURE_COLS, DURATION_COL, EVENT_COL, TIME_GRID, discretize=True)
    combined_y_val.append(y_val)
    
    for model_name in model_ls:
        # Retrieve configuration by dynamically constructing the variable name
        config_var_name = model_name + "_config"
        model_config = globals().get(config_var_name)
        if model_config is None:
            print(f"Configuration for {config_var_name} not found.")
            continue
        try:
            print(f"Initiating prediction for model: {model_name}")
            
            # Retrieve the loaded model
            model = loaded_models.get(model_name)
            if model is None:
                print(f"Model {model_name} is not loaded.")
                continue
            
            # Predict using the loaded model and configuration
            surv, _ = predict_neural_network(
                model=model,
                config=model_config,
                X_test=X_val_fold,
                duration_col=DURATION_COL,
                event_col=EVENT_COL,
                cluster_col=CLUSTER_COL,
                time_grid=TIME_GRID
            )
            
            # Align survival probabilities (if DeepSurv)
            if model_config['model'] == 'deepsurv':
                surv = align_to_time_grid(surv, TIME_GRID).values  # 2D array
                
                # Structure key dynamically
                key = f"deepsurv_{model_config['net']}_{model_config['balance_method']}"
                
                # Store predictions
                if key not in fold_predictions:
                    fold_predictions[key] = []
                fold_predictions[key].append(np.expand_dims(1 - surv, axis=0))
            
            # Handle DeepHit predictions
            elif model_config['model'] == 'deephit':
                surv = np.array(surv)  # Convert to numpy array
                
                # Structure key dynamically
                key = f"deephit_{model_config['net']}_{model_config['balance_method']}"
                
                # Store predictions
                if key not in fold_predictions:
                    fold_predictions[key] = []
                fold_predictions[key].append(surv)
            
            print(f"Prediction completed for {model_name} on fold {fold_idx + 1}.")
        
        except Exception as e:
            print(f"Error during prediction for {model_name} on fold {fold_idx + 1}: {e}")
            
    combined_predictions.append(fold_predictions)

meta_learner_X_val = {}

for fold_predictions in combined_predictions:
    for model_key, fold_data in fold_predictions.items():
        if model_key not in meta_learner_X_val:
            meta_learner_X_val[model_key] = []
        
        # Align DeepSurv predictions to match DeepHit format
        if "deepsurv" in model_key:
            # Stack competing outcomes and remove the redundant dimension
            meta_learner_X_val[model_key].extend(
                [np.squeeze(np.stack(fold_data, axis=0), axis=1)]  # Squeeze out extra axis
            )
        else:
            # Keep DeepHit predictions as-is
            meta_learner_X_val[model_key].extend(fold_data)

# Combine y_test
meta_learner_y_val = (
    np.concatenate([fold[0] for fold in combined_y_val]),  # Concatenate all first elements
    np.concatenate([fold[1] for fold in combined_y_val])   # Concatenate all second elements
)

# Combine CIF ground truth for all folds
cif_ground_truth_val_stacked = np.concatenate(cif_ground_truth_val, axis=2)

print("Validation predictions and y_val combined.")

Processing fold 1...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network


2024-11-21 09:22:54,543 - INFO - Event column 'endpoint' updated with focus on event value 1.


model structure: ANN


2024-11-21 09:22:55,018 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 1.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:22:57,418 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 1.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:22:59,719 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 1.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:00,173 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 1.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:00,963 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 1.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:01,759 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:23:01,760 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 1.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:23:04,373 - INFO - Validation data retrieved
2024-11-21 09:23:04,863 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:23:04,864 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 1.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:23:07,447 - INFO - Validation data retrieved
2024-11-21 09:23:07,903 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:23:07,904 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 1.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:23:10,422 - INFO - Validation data retrieved
2024-11-21 09:23:10,877 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:23:10,878 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 1.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:23:13,453 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 1.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 1.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 1.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:23:17,512 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 1.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:23:20,229 - INFO - Validation data retrieved
2024-11-21 09:23:20,588 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 1.
Processing fold 2...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:21,036 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 2.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:23,395 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 2.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:29,255 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 2.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:29,695 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 2.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:30,358 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 2.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:31,117 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:23:31,118 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 2.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:23:33,721 - INFO - Validation data retrieved
2024-11-21 09:23:34,167 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:23:34,168 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 2.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:23:36,730 - INFO - Validation data retrieved
2024-11-21 09:23:37,216 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:23:37,217 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 2.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:23:39,737 - INFO - Validation data retrieved
2024-11-21 09:23:40,210 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:23:40,211 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 2.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:23:42,687 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 2.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 2.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 2.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:23:46,659 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 2.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:23:49,331 - INFO - Validation data retrieved
2024-11-21 09:23:49,711 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 2.
Processing fold 3...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:50,121 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 3.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:52,463 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 3.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:54,633 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 3.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:55,079 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 3.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:55,741 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 3.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:23:56,496 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:23:56,497 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 3.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:23:59,069 - INFO - Validation data retrieved
2024-11-21 09:23:59,555 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:23:59,556 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 3.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:24:05,592 - INFO - Validation data retrieved
2024-11-21 09:24:06,087 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:24:06,088 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 3.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:24:08,515 - INFO - Validation data retrieved
2024-11-21 09:24:09,000 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:24:09,001 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 3.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:24:11,581 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 3.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 3.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 3.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:24:15,625 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 3.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:24:18,276 - INFO - Validation data retrieved
2024-11-21 09:24:18,643 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 3.
Processing fold 4...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:24:19,054 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 4.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:24:21,303 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 4.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:24:23,414 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 4.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:24:23,852 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 4.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:24:24,563 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 4.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:24:25,347 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:24:25,348 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 4.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:24:27,684 - INFO - Validation data retrieved
2024-11-21 09:24:28,135 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:24:28,136 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 4.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:24:30,436 - INFO - Validation data retrieved
2024-11-21 09:24:30,897 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:24:30,898 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 4.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:24:33,198 - INFO - Validation data retrieved
2024-11-21 09:24:33,655 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:24:33,656 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 4.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:24:35,972 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 4.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 4.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 4.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:24:43,440 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 4.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:24:46,000 - INFO - Validation data retrieved
2024-11-21 09:24:46,387 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 4.
Processing fold 5...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:24:46,847 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 5.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:24:49,189 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 5.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:24:51,357 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 5.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:24:51,792 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 5.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:24:52,520 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 5.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:24:53,259 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:24:53,260 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 5.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:24:55,726 - INFO - Validation data retrieved
2024-11-21 09:24:56,178 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:24:56,178 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 5.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:24:58,615 - INFO - Validation data retrieved
2024-11-21 09:24:59,091 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:24:59,091 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 5.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:25:01,503 - INFO - Validation data retrieved
2024-11-21 09:25:01,962 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:25:01,963 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 5.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:25:04,331 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 5.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 5.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 5.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:25:08,286 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 5.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:25:11,006 - INFO - Validation data retrieved
2024-11-21 09:25:11,396 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 5.
Processing fold 6...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:25:11,834 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 6.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:25:17,702 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 6.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:25:20,051 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 6.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:25:20,499 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 6.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:25:21,192 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 6.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:25:21,934 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:25:21,935 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 6.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:25:24,364 - INFO - Validation data retrieved
2024-11-21 09:25:24,828 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:25:24,829 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 6.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:25:27,208 - INFO - Validation data retrieved
2024-11-21 09:25:27,683 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:25:27,684 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 6.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:25:30,047 - INFO - Validation data retrieved
2024-11-21 09:25:30,531 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:25:30,532 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 6.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:25:33,053 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 6.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 6.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 6.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:25:37,120 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 6.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:25:39,782 - INFO - Validation data retrieved
2024-11-21 09:25:40,200 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 6.
Processing fold 7...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:25:40,822 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 7.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:25:43,190 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 7.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:25:45,436 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 7.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:25:45,876 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 7.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:25:46,653 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 7.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:25:47,397 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:25:47,398 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 7.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:25:53,465 - INFO - Validation data retrieved
2024-11-21 09:25:53,947 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:25:53,948 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 7.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:25:56,353 - INFO - Validation data retrieved
2024-11-21 09:25:56,842 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:25:56,843 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 7.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:25:59,214 - INFO - Validation data retrieved
2024-11-21 09:25:59,727 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:25:59,728 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 7.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:26:02,138 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 7.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 7.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 7.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:26:06,004 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 7.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:26:08,592 - INFO - Validation data retrieved
2024-11-21 09:26:08,968 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 7.
Processing fold 8...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:26:09,380 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 8.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:26:11,754 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 8.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:26:14,459 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 8.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:26:14,960 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 8.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:26:15,652 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 8.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:26:16,481 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:26:16,482 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 8.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:26:18,861 - INFO - Validation data retrieved
2024-11-21 09:26:19,325 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:26:19,326 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 8.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:26:21,633 - INFO - Validation data retrieved
2024-11-21 09:26:22,072 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:26:22,073 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 8.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:26:24,395 - INFO - Validation data retrieved
2024-11-21 09:26:24,874 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:26:24,875 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 8.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:26:30,924 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 8.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 8.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 8.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:26:34,978 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 8.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:26:37,489 - INFO - Validation data retrieved
2024-11-21 09:26:37,853 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 8.
Processing fold 9...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:26:38,276 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 9.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:26:40,655 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 9.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:26:42,929 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 9.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:26:43,413 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 9.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:26:44,310 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 9.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:26:45,070 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:26:45,071 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 9.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:26:47,500 - INFO - Validation data retrieved
2024-11-21 09:26:47,972 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:26:47,973 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 9.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:26:50,440 - INFO - Validation data retrieved
2024-11-21 09:26:50,890 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:26:50,891 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 9.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:26:53,301 - INFO - Validation data retrieved
2024-11-21 09:26:53,791 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:26:53,792 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 9.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:26:56,364 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 9.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 9.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 9.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:27:00,374 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 9.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:27:06,676 - INFO - Validation data retrieved
2024-11-21 09:27:07,043 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 9.
Processing fold 10...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:07,469 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 10.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:09,689 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 10.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:11,784 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 10.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:12,231 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 10.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:12,923 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 10.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:13,759 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:27:13,760 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 10.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:27:16,105 - INFO - Validation data retrieved
2024-11-21 09:27:16,519 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:27:16,520 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 10.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:27:18,689 - INFO - Validation data retrieved
2024-11-21 09:27:19,073 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:27:19,074 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 10.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:27:21,222 - INFO - Validation data retrieved
2024-11-21 09:27:21,631 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:27:21,631 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 10.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:27:23,891 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 10.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 10.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 10.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:27:27,644 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 10.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:27:30,129 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 10.
Validation predictions and y_val combined.


In [215]:
# Combine all 10 items for each model's predictions
final_meta_learner_X_val = {}

for key, predictions in meta_learner_X_val.items():
    # Concatenate the predictions along the last axis
    final_meta_learner_X_val[key] = np.concatenate(predictions, axis=2)  # Combine along the feature axis

# Validate the shapes
for key, combined_prediction in final_meta_learner_X_val.items():
    print(f"{key}: final shape {combined_prediction.shape}")

deepsurv_ann_clustering: final shape (2, 6, 80182)
deepsurv_ann_enn: final shape (2, 6, 80182)
deepsurv_ann_tomek: final shape (2, 6, 80182)
deepsurv_lstm_clustering: final shape (2, 6, 80182)
deepsurv_lstm_NearMiss: final shape (2, 6, 80182)
deephit_ann_clustering: final shape (2, 6, 80182)
deephit_ann_NearMiss: final shape (2, 6, 80182)
deephit_lstm_clustering: final shape (2, 6, 80182)
deephit_lstm_NearMiss: final shape (2, 6, 80182)


In [216]:
# Use the duration and event data as targets
durations_val = meta_learner_y_val[0]  # Time to event or censoring
events_val = meta_learner_y_val[1]  # Event type (competing risks)

print(durations_val.shape)
print(events_val.shape)
print(np.unique(events_val, return_counts=True))

(80182,)
(80182,)
(array([0, 1, 2]), array([78593,   416,  1173]))


#### 6.2 Prepare each model's prediction on test set

In [217]:
gc.collect()
torch.cuda.empty_cache()

# Initialize dictionary to store combined predictions and y_test
combined_predictions = []
combined_y_fin_test = []
cif_ground_truth_fin_test = []

# Get unique keys and split them into 10 groups
unique_keys = X_test_transformed['key'].unique()
np.random.shuffle(unique_keys)  # Shuffle keys to ensure randomness
key_folds = np.array_split(unique_keys, 10)

for fold_idx, test_keys in enumerate(key_folds):
    print(f"Processing fold {fold_idx + 1}...")
    # Split the dataset into train and test based on keys
    X_test_fold = X_test_transformed[X_test_transformed['key'].isin(test_keys)]
    
    # Get the indices of X_test_fold relative to the original dataset
    test_indices = X_test_transformed.index.to_numpy()
    
    # Store predictions for this fold
    fold_predictions = {}
    
    X_fin_test, y_fin_test = preprocess_data(X_test_fold, FEATURE_COLS, DURATION_COL, EVENT_COL, TIME_GRID, discretize=True)
    combined_y_fin_test.append(y_fin_test)
    
    for model_name in model_ls:
        # Retrieve configuration by dynamically constructing the variable name
        config_var_name = model_name + "_config"
        model_config = globals().get(config_var_name)
        if model_config is None:
            print(f"Configuration for {config_var_name} not found.")
            continue
        try:
            print(f"Initiating prediction for model: {model_name}")
            
            # Retrieve the loaded model
            model = loaded_models.get(model_name)
            if model is None:
                print(f"Model {model_name} is not loaded.")
                continue
            
            # Predict using the loaded model and configuration
            surv, _ = predict_neural_network(
                model=model,
                config=model_config,
                X_test=X_test_fold,
                duration_col=DURATION_COL,
                event_col=EVENT_COL,
                cluster_col=CLUSTER_COL,
                time_grid=TIME_GRID
            )
            
            # Align survival probabilities (if DeepSurv)
            if model_config['model'] == 'deepsurv':
                surv = align_to_time_grid(surv, TIME_GRID).values  # 2D array
                
                # Structure key dynamically
                key = f"deepsurv_{model_config['net']}_{model_config['balance_method']}"
                
                # Store predictions
                if key not in fold_predictions:
                    fold_predictions[key] = []
                fold_predictions[key].append(np.expand_dims(1 - surv, axis=0))
            
            # Handle DeepHit predictions
            elif model_config['model'] == 'deephit':
                surv = np.array(surv)  # Convert to numpy array
                
                # Structure key dynamically
                key = f"deephit_{model_config['net']}_{model_config['balance_method']}"
                
                # Store predictions
                if key not in fold_predictions:
                    fold_predictions[key] = []
                fold_predictions[key].append(surv)
            
            print(f"Prediction completed for {model_name} on fold {fold_idx + 1}.")
        
        except Exception as e:
            print(f"Error during prediction for {model_name} on fold {fold_idx + 1}: {e}")
            
    combined_predictions.append(fold_predictions)

meta_learner_X_fin_test = {}

for fold_predictions in combined_predictions:
    for model_key, fold_data in fold_predictions.items():
        if model_key not in meta_learner_X_fin_test:
            meta_learner_X_fin_test[model_key] = []
        
        # Align DeepSurv predictions to match DeepHit format
        if "deepsurv" in model_key:
            # Stack competing outcomes and remove the redundant dimension
            meta_learner_X_fin_test[model_key].extend(
                [np.squeeze(np.stack(fold_data, axis=0), axis=1)]  # Squeeze out extra axis
            )
        else:
            # Keep DeepHit predictions as-is
            meta_learner_X_fin_test[model_key].extend(fold_data)

# Combine y_test
meta_learner_y_fin_test = (
    np.concatenate([fold[0] for fold in combined_y_fin_test]),  # Concatenate all first elements
    np.concatenate([fold[1] for fold in combined_y_fin_test])   # Concatenate all second elements
)

print("Final test predictions and y_fin_test combined.")

Processing fold 1...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network


2024-11-21 09:27:30,742 - INFO - Event column 'endpoint' updated with focus on event value 1.


model structure: ANN


2024-11-21 09:27:31,161 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 1.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:32,589 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 1.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:33,923 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 1.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:34,359 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 1.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:34,908 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 1.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:35,464 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:27:35,465 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 1.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:27:36,654 - INFO - Validation data retrieved
2024-11-21 09:27:37,083 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:27:37,084 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 1.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:27:41,945 - INFO - Validation data retrieved
2024-11-21 09:27:42,383 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:27:42,384 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 1.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:27:43,645 - INFO - Validation data retrieved
2024-11-21 09:27:44,077 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:27:44,078 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 1.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:27:45,387 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 1.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 1.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 1.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:27:48,010 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 1.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:27:49,488 - INFO - Validation data retrieved
2024-11-21 09:27:49,849 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 1.
Processing fold 2...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:50,250 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 2.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:51,673 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 2.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:53,056 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 2.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:53,468 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 2.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:53,989 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 2.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:27:54,561 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:27:54,562 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 2.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:27:55,796 - INFO - Validation data retrieved
2024-11-21 09:27:56,200 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:27:56,201 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 2.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:27:57,431 - INFO - Validation data retrieved
2024-11-21 09:27:57,829 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:27:57,830 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 2.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:27:59,066 - INFO - Validation data retrieved
2024-11-21 09:27:59,479 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:27:59,480 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 2.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:00,744 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 2.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 2.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 2.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:28:03,323 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 2.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:28:04,880 - INFO - Validation data retrieved
2024-11-21 09:28:05,261 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 2.
Processing fold 3...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:05,649 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 3.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:06,984 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 3.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:08,326 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 3.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:08,760 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 3.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:09,308 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 3.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:09,934 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:28:09,935 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 3.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:11,124 - INFO - Validation data retrieved
2024-11-21 09:28:11,573 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:28:11,574 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 3.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:12,767 - INFO - Validation data retrieved
2024-11-21 09:28:13,200 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:28:13,201 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 3.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:17,965 - INFO - Validation data retrieved
2024-11-21 09:28:18,357 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:28:18,357 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 3.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:19,501 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 3.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 3.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 3.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:28:21,936 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 3.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:28:23,412 - INFO - Validation data retrieved
2024-11-21 09:28:23,783 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 3.
Processing fold 4...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:24,162 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 4.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:25,476 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 4.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:26,702 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 4.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:27,151 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 4.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:27,680 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 4.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:28,239 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:28:28,240 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 4.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:29,319 - INFO - Validation data retrieved
2024-11-21 09:28:29,733 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:28:29,734 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 4.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:30,800 - INFO - Validation data retrieved
2024-11-21 09:28:31,220 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:28:31,220 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 4.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:32,271 - INFO - Validation data retrieved
2024-11-21 09:28:32,660 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:28:32,660 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 4.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:33,775 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 4.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 4.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 4.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:28:36,132 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 4.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:28:37,535 - INFO - Validation data retrieved
2024-11-21 09:28:37,909 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 4.
Processing fold 5...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:38,291 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 5.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:39,595 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 5.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:40,989 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 5.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:41,425 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 5.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:41,982 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 5.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:42,630 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:28:42,631 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 5.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:43,790 - INFO - Validation data retrieved
2024-11-21 09:28:44,280 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:28:44,281 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 5.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:45,399 - INFO - Validation data retrieved
2024-11-21 09:28:45,814 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:28:45,814 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 5.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:46,903 - INFO - Validation data retrieved
2024-11-21 09:28:47,296 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:28:47,297 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 5.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:28:48,419 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 5.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 5.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 5.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:28:54,418 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 5.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:28:55,835 - INFO - Validation data retrieved
2024-11-21 09:28:56,211 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 5.
Processing fold 6...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:56,599 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 6.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:57,982 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 6.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:59,312 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 6.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:28:59,742 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 6.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:00,303 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 6.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:00,905 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:29:00,906 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 6.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:02,114 - INFO - Validation data retrieved
2024-11-21 09:29:02,531 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:29:02,532 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 6.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:03,685 - INFO - Validation data retrieved
2024-11-21 09:29:04,090 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:29:04,091 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 6.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:05,274 - INFO - Validation data retrieved
2024-11-21 09:29:05,675 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:29:05,676 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 6.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:06,880 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 6.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 6.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 6.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:29:09,386 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 6.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:29:10,891 - INFO - Validation data retrieved
2024-11-21 09:29:11,249 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 6.
Processing fold 7...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:11,650 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 7.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:13,004 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 7.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:14,234 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 7.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:14,771 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 7.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:15,331 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 7.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:15,936 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:29:15,937 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 7.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:17,083 - INFO - Validation data retrieved
2024-11-21 09:29:17,501 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:29:17,502 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 7.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:18,589 - INFO - Validation data retrieved
2024-11-21 09:29:19,016 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:29:19,017 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 7.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:20,076 - INFO - Validation data retrieved
2024-11-21 09:29:20,453 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:29:20,454 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 7.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:21,569 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 7.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 7.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 7.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:29:23,947 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 7.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:29:25,382 - INFO - Validation data retrieved
2024-11-21 09:29:25,742 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 7.
Processing fold 8...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:29,762 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 8.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:31,119 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 8.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:32,404 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 8.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:32,859 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 8.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:33,390 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 8.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:33,947 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:29:33,948 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 8.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:35,096 - INFO - Validation data retrieved
2024-11-21 09:29:35,489 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:29:35,490 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 8.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:36,645 - INFO - Validation data retrieved
2024-11-21 09:29:37,060 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:29:37,060 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 8.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:38,197 - INFO - Validation data retrieved
2024-11-21 09:29:38,592 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:29:38,593 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 8.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:39,783 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 8.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 8.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 8.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:29:42,232 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 8.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:29:43,713 - INFO - Validation data retrieved
2024-11-21 09:29:44,062 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 8.
Processing fold 9...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:44,465 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 9.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:45,863 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 9.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:47,196 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 9.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:47,669 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 9.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:48,259 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 9.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:48,824 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:29:48,825 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 9.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:50,049 - INFO - Validation data retrieved
2024-11-21 09:29:50,449 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:29:50,450 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 9.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:51,614 - INFO - Validation data retrieved
2024-11-21 09:29:52,020 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:29:52,021 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 9.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:53,177 - INFO - Validation data retrieved
2024-11-21 09:29:53,589 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:29:53,589 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 9.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:29:54,796 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 9.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 9.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 9.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:29:57,309 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 9.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:29:58,809 - INFO - Validation data retrieved
2024-11-21 09:29:59,165 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 9.
Processing fold 10...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:29:59,574 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 10.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:30:00,844 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 10.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:30:02,031 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 10.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:30:06,087 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 10.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:30:06,605 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 10.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-21 09:30:07,162 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:30:07,163 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 10.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:30:08,188 - INFO - Validation data retrieved
2024-11-21 09:30:08,565 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-21 09:30:08,566 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 10.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:30:09,586 - INFO - Validation data retrieved
2024-11-21 09:30:09,977 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:30:09,978 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 10.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:30:10,995 - INFO - Validation data retrieved
2024-11-21 09:30:11,409 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-21 09:30:11,410 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 10.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-21 09:30:12,465 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 10.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 10.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 10.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:30:14,746 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 10.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-21 09:30:16,095 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 10.
Final test predictions and y_fin_test combined.


In [218]:
# Combine all 10 items for each model's predictions
final_meta_learner_X_fin_test = {}

for key, predictions in meta_learner_X_fin_test.items():
    # Concatenate the predictions along the last axis
    final_meta_learner_X_fin_test[key] = np.concatenate(predictions, axis=2)  # Combine along the feature axis

# Validate the shapes
for key, combined_prediction in final_meta_learner_X_fin_test.items():
    print(f"{key}: final shape {combined_prediction.shape}")
    
# Use the duration and event data as targets
durations_fin_test = meta_learner_y_fin_test[0]  # Time to event or censoring
events_fin_test = meta_learner_y_fin_test[1]  # Event type (competing risks)

print(durations_fin_test.shape)
print(events_fin_test.shape)
print(np.unique(events_fin_test, return_counts=True))

deepsurv_ann_clustering: final shape (2, 6, 40513)
deepsurv_ann_enn: final shape (2, 6, 40513)
deepsurv_ann_tomek: final shape (2, 6, 40513)
deepsurv_lstm_clustering: final shape (2, 6, 40513)
deepsurv_lstm_NearMiss: final shape (2, 6, 40513)
deephit_ann_clustering: final shape (2, 6, 40513)
deephit_ann_NearMiss: final shape (2, 6, 40513)
deephit_lstm_clustering: final shape (2, 6, 40513)
deephit_lstm_NearMiss: final shape (2, 6, 40513)
(40513,)
(40513,)
(array([0, 1, 2]), array([39793,   200,   520]))


#### 6.2 Averaging

In [219]:
# Predictions from each model
predictions = [final_meta_learner_X_fin_test[key] for key in final_meta_learner_X_fin_test.keys()]

# Aggregate by majority voting for each time point and event type
average_predictions = np.mean(predictions, axis=0)

print("Average Voting Predictions shape:", average_predictions.shape)


Average Voting Predictions shape: (2, 6, 40513)


In [None]:
concordance_indices = {}
integrated_brier_scores = {}
neg_log_likelihoods = {}
brier_series = {}
nam_dagostino_results = []  # Initialize as a list

for i in range(0, 2):
    event_interest = i + 1
    cif = pd.DataFrame(average_predictions[i], index=[0, 1, 2, 3, 4, 5])  # Use index correctly
    ev = EvalSurv(1 - cif, durations_fin_test, events_fin_test == event_interest, censor_surv='km')

    # Concordance index
    concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()

    # Brier score series
    brier_series[f"Event_{event_interest}"] = ev.brier_score(TIME_GRID)

    # Integrated Brier score
    integrated_brier_scores[f"Event_{event_interest}"] = ev.integrated_brier_score(TIME_GRID)

    # Negative log-likelihood
    neg_log_likelihoods[f"Event_{event_interest}"] = ev.integrated_nbll(TIME_GRID)

    # Nam and D'Agostino Chi2 statistic for calibration
    for time_idx, time in enumerate(TIME_GRID):
        # Filter data for current time point
        mask = durations_fin_test <= time
        durations_filtered = durations_fin_test[mask]
        events_filtered = events_fin_test[mask]
        
        if len(durations_filtered) == 0:
            print(f"Skipping time {time} for event {event_interest} due to empty data.")
            continue
        
        try:
            # Calculate Nam-D'Agostino Chi² statistic
            chi2_stat, p_value, observed_events, expected_events, n, prob_df = nam_dagostino_chi2(
                df=pd.DataFrame({"durations": durations_filtered, "events": events_filtered}),
                duration_col="durations",
                event_col="events",
                surv=(1 - cif),  # Survival function
                time=time_idx,
                event_focus=event_interest
            )

            # Append results
            nam_dagostino_results.append({
                'Event': event_interest,
                'Year': round(time / 365),
                'Chi2_Stat': chi2_stat,
                'P_Value': p_value,
                'Observed_Events': observed_events.tolist() if isinstance(observed_events, np.ndarray) else observed_events,
                'Expected_Events': expected_events.tolist() if isinstance(expected_events, np.ndarray) else expected_events,
                'Sample_Size': n if isinstance(n, int) else n.tolist()
            })
        except ValueError as e:
            print(f"Error calculating Nam-D'Agostino Chi² at time {time} for event {event_interest}: {e}")

# Display results
print("Concordance Indices:", concordance_indices)
print("Brier Score Series:", brier_series)
print("Integrated Brier Scores:", integrated_brier_scores)
print("Negative Log-Likelihoods:", neg_log_likelihoods)
print("Nam-D'Agostino Results:", nam_dagostino_results)



Error calculating Nam-D'Agostino Chi² at time 0 for event 1: Length of values (40513) does not match length of index (19588)
Error calculating Nam-D'Agostino Chi² at time 0 for event 2: Length of values (40513) does not match length of index (19588)


                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.


Concordance Indices: {'Event_1': 0.9861141445942802, 'Event_2': 0.8047894453585371}
Brier Score Series: {'Event_1': 0       3.783331e-07
365     4.689445e-02
730     4.689445e-02
1095    4.689445e-02
1460    4.689445e-02
1825    4.689445e-02
Name: brier_score, dtype: float64, 'Event_2': 0       6.123067e-07
365     2.107838e-01
730     2.107838e-01
1095    2.107838e-01
1460    2.107838e-01
1825    2.107838e-01
Name: brier_score, dtype: float64}
Integrated Brier Scores: {'Event_1': 0.04376818205145606, 'Event_2': 0.19673160328047123}
Negative Log-Likelihoods: {'Event_1': 0.22729299286338747, 'Event_2': 0.5733443844258161}
Nam-D'Agostino Results: [{'Event': 1, 'Year': 1, 'Chi2_Stat': 0.12214437522851679, 'P_Value': 0.9982093122377609, 'Observed_Events': quantile
0    0.000000
1    0.000000
2    0.000000
3    0.000000
4    0.022289
Name: observed_probs, dtype: float64, 'Expected_Events': quantile
0    0.016495
1    0.018913
2    0.023356
3    0.026458
4    0.074863
Name: predicted_probs, 

                To resolve ties, data is randomly jittered.


In [None]:
np.unique(durations_fin_test)

array([0, 1, 2, 3, 4, 5])

#### 6.3 Stacking

##### 6.3.1 XGBoost

In [None]:
def train_and_predict_with_xgboost(outcome_idx, time_idx):
    """
    Train an XGBoost model using CIF targets for a specific outcome and time point.
    """
    # Training data
    stacking_inputs_train = np.array([
        final_meta_learner_X_train[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_train.keys()
    ]).T  # Shape: (num_train_samples, num_models)
    
    target_train = cif_ground_truth_test_stacked[outcome_idx, time_idx, :]  # Correct indexing

    # Validation data
    stacking_inputs_val = np.array([
        final_meta_learner_X_val[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_val.keys()
    ]).T  # Shape: (num_val_samples, num_models)

    target_val = cif_ground_truth_val_stacked[outcome_idx, time_idx, :]  # Correct indexing

    # Test data
    stacking_inputs_test = np.array([
        final_meta_learner_X_fin_test[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_fin_test.keys()
    ]).T  # Shape: (num_test_samples, num_models)

    # Prepare DMatrix for XGBoost
    dtrain = xgb.DMatrix(stacking_inputs_train, label=target_train)
    dval = xgb.DMatrix(stacking_inputs_val, label=target_val)
    dtest = xgb.DMatrix(stacking_inputs_test)

    # Configure GPU training
    params = {'objective': 'reg:squarederror',
        'tree_method': 'gpu_hist',
        'max_depth': 3,
        'learning_rate': 0.014837295326564928,
        'subsample': 0.8168005866659258,
        'colsample_bytree': 0.8290129403377126,
        'lambda': 6.7539372305286465,
        'alpha': 0.5018353832953043}

    # Train the model with early stopping
    booster = xgb.train(
        params, dtrain, num_boost_round=500,
        evals=[(dtrain, "train"), (dval, "validation")],
        early_stopping_rounds=20,  # Stop if no improvement for 20 rounds
        verbose_eval=False
    )

    # Predict CIF for test patients
    cif_predictions_test = booster.predict(dtest)
    
    return outcome_idx, time_idx, cif_predictions_test

# Parallel processing for each outcome and time point
results = Parallel(n_jobs=-1)(
    delayed(train_and_predict_with_xgboost)(outcome_idx, time_idx)
    for outcome_idx in range(2)
    for time_idx in range(6)
)

# Assign results to the xgboost_predictions array
xgboost_predictions = np.zeros((2, 6, X_test_transformed.shape[0]))
for outcome_idx, time_idx, cif_predictions in results:
    xgboost_predictions[outcome_idx, time_idx] = cif_predictions

# Verify the shape of the stacked predictions
print("XGBoost Stacking Predictions Shape:", xgboost_predictions.shape)
# Expected output: (2, 6, 316242)



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_metho

XGBoost Stacking Predictions Shape: (2, 6, 40513)


In [160]:
concordance_indices = {}
integrated_brier_scores = {}
neg_log_likelihoods = {}
brier_series = {}
nam_dagostino_results = []  # Initialize as a list

for i in range(0, 2):
    event_interest = i + 1
    cif = pd.DataFrame(xgboost_predictions[i], index=[0, 1, 2, 3, 4, 5])  # Use index correctly
    ev = EvalSurv(1 - cif, durations_fin_test, events_fin_test == event_interest, censor_surv='km')

    # Concordance index
    concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()

    # Brier score series
    brier_series[f"Event_{event_interest}"] = ev.brier_score(TIME_GRID)

    # Integrated Brier score
    integrated_brier_scores[f"Event_{event_interest}"] = ev.integrated_brier_score(TIME_GRID)

    # Negative log-likelihood
    neg_log_likelihoods[f"Event_{event_interest}"] = ev.integrated_nbll(TIME_GRID)

    # Nam and D'Agostino Chi2 statistic for calibration
    for time_idx, time in enumerate(TIME_GRID):
        # Filter data for current time point
        mask = durations_fin_test <= time
        durations_filtered = durations_fin_test[mask]
        events_filtered = events_fin_test[mask]
        
        if len(durations_filtered) == 0:
            print(f"Skipping time {time} for event {event_interest} due to empty data.")
            continue
        
        try:
            # Calculate Nam-D'Agostino Chi² statistic
            chi2_stat, p_value, observed_events, expected_events, n, prob_df = nam_dagostino_chi2(
                df=pd.DataFrame({"durations": durations_filtered, "events": events_filtered}),
                duration_col="durations",
                event_col="events",
                surv=(1 - cif),  # Survival function
                time=time_idx,
                event_focus=event_interest
            )

            # Append results
            nam_dagostino_results.append({
                'Event': event_interest,
                'Year': round(time / 365),
                'Chi2_Stat': chi2_stat,
                'P_Value': p_value,
                'Observed_Events': observed_events.tolist() if isinstance(observed_events, np.ndarray) else observed_events,
                'Expected_Events': expected_events.tolist() if isinstance(expected_events, np.ndarray) else expected_events,
                'Sample_Size': n if isinstance(n, int) else n.tolist()
            })
        except ValueError as e:
            print(f"Error calculating Nam-D'Agostino Chi² at time {time} for event {event_interest}: {e}")

# Display results
print("Concordance Indices:", concordance_indices)
print("Brier Score Series:", brier_series)
print("Integrated Brier Scores:", integrated_brier_scores)
print("Negative Log-Likelihoods:", neg_log_likelihoods)
print("Nam-D'Agostino Results:", nam_dagostino_results)



Error calculating Nam-D'Agostino Chi² at time 0 for event 1: Length of values (40513) does not match length of index (19588)
Error calculating Nam-D'Agostino Chi² at time 0 for event 2: Length of values (40513) does not match length of index (19588)


                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.


Concordance Indices: {'Event_1': 0.9792338086166885, 'Event_2': 0.7275737517718853}
Brier Score Series: {'Event_1': 0       0.000000
365     0.034227
730     0.034227
1095    0.034227
1460    0.034227
1825    0.034227
Name: brier_score, dtype: float64, 'Event_2': 0       0.000000
365     0.127521
730     0.127521
1095    0.127521
1460    0.127521
1825    0.127521
Name: brier_score, dtype: float64}
Integrated Brier Scores: {'Event_1': 0.03194480400702308, 'Event_2': 0.11901920846077037}
Negative Log-Likelihoods: {'Event_1': 0.1744737147244062, 'Event_2': 0.37606509292105755}
Nam-D'Agostino Results: [{'Event': 1, 'Year': 1, 'Chi2_Stat': 0.02168567931458386, 'P_Value': 0.9999416396119769, 'Observed_Events': quantile
0    0.00000
1    0.00000
2    0.00000
3    0.00000
4    0.01914
Name: observed_probs, dtype: float64, 'Expected_Events': quantile
0    0.001888
1    0.002650
2    0.003851
3    0.007186
4    0.033434
Name: predicted_probs, dtype: float64, 'Sample_Size': [8104, 8101, 8103, 810

                To resolve ties, data is randomly jittered.
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [78]:
import numpy as np
import ray.train
import xgboost as xgb
import ray
from ray import tune
from sklearn.model_selection import train_test_split
from pycox.evaluation import EvalSurv
import pandas as pd

# Random seed for reproducibility
RANDOM_SEED = 42

def xgboost_training_wrapper(config, data):
    """
    A Ray Tune-compatible wrapper to train XGBoost models for all outcomes and time points,
    evaluate predictions using concordance index, and report mean concordance index.

    Args:
        config: Dictionary of hyperparameters provided by Ray Tune.
        data: Dictionary containing final_meta_learner_X_train, cif_ground_truth_test_stacked,
              durations, and events.
    """
    final_meta_learner_X_train = data["final_meta_learner_X_train"]
    cif_ground_truth_test_stacked = data["cif_ground_truth_test_stacked"]
    durations = data["durations"]
    events = data["events"]

    # Initialize predictions array (2 outcomes, 6 time points, 316242 patients)
    xgboost_predictions = np.zeros((2, 6, len(durations)))

    # Train models and make predictions for each outcome and time point
    for outcome_idx in range(2):
        for time_idx in range(6):
            # Prepare data
            stacking_inputs = np.array([
                final_meta_learner_X_train[key][outcome_idx, time_idx]
                for key in final_meta_learner_X_train.keys()
            ]).T  # Shape: (316242, 9)

            target = cif_ground_truth_test_stacked[outcome_idx, time_idx, :]  # Shape: (316242,)

            # Split data into training and validation sets
            X_train, X_val, y_train, y_val = train_test_split(
                stacking_inputs, target, test_size=0.2, random_state=RANDOM_SEED
            )

            # Prepare DMatrix for XGBoost
            dtrain = xgb.DMatrix(X_train, label=y_train)
            dval = xgb.DMatrix(X_val, label=y_val)

            # Train XGBoost model
            booster = xgb.train(
                config,
                dtrain,
                num_boost_round=500,
                evals=[(dtrain, "train"), (dval, "validation")],
                early_stopping_rounds=20,
                verbose_eval=False
            )

            # Predict CIF for all patients
            dtest = xgb.DMatrix(stacking_inputs)
            xgboost_predictions[outcome_idx, time_idx] = booster.predict(dtest)

    # Compute concordance index for each event
    concordance_indices = {}
    for i in range(2):
        event_interest = i + 1
        cif = pd.DataFrame(xgboost_predictions[i], index=[0, 1, 2, 3, 4, 5])
        ev = EvalSurv(1 - cif, durations, events == event_interest, censor_surv="km")
        concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()

    # Calculate mean concordance index
    mean_concordance_index = np.mean(list(concordance_indices.values()))

    # Report mean concordance index to Ray Tune
    ray.train.report({'c-index stat': mean_concordance_index})

# Define the search space
search_space = {
    "objective": "reg:squarederror",
    "tree_method": "gpu_hist",  # Use GPU acceleration
    "max_depth": tune.randint(3, 10),  # Maximum depth of trees
    "learning_rate": tune.loguniform(0.01, 0.3),  # Learning rate
    "subsample": tune.uniform(0.5, 1.0),  # Subsampling rate
    "colsample_bytree": tune.uniform(0.5, 1.0),  # Feature subsampling rate
    "lambda": tune.loguniform(1e-4, 10.0),  # L2 regularization term
    "alpha": tune.loguniform(1e-4, 10.0),  # L1 regularization term
}

# Prepare data
data = {
    "final_meta_learner_X_train": final_meta_learner_X_train,
    "cif_ground_truth_test_stacked": cif_ground_truth_test_stacked,
    "durations": durations,
    "events": events,
}

# Run Ray Tune
analysis = tune.run(
    tune.with_parameters(xgboost_training_wrapper, data=data),
    config=search_space,
    resources_per_trial={"cpu": 2, "gpu": 1},  # Adjust based on your resources
    num_samples=50,  # Number of hyperparameter configurations to try
    metric='c-index stat',  # Metric to optimize
    mode="max",  # Maximize the concordance index
    storage_path="/mnt/d/PYDataScience/g3_regress/data/results",  # Directory to store results
)

# Get the best configuration
best_config = analysis.get_best_config(metric="mean_concordance_index", mode="max")
print("Best hyperparameters:", best_config)


2024-11-17 18:31:06,984	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-11-17 20:15:33
Running for:,01:44:26.88
Memory:,100.8/117.9 GiB

Trial name,status,loc,alpha,colsample_bytree,lambda,learning_rate,max_depth,subsample,iter,total time (s),c-index stat
xgboost_training_wrapper_0df1a_00000,TERMINATED,192.168.236.234:1003253,0.0141936,0.872683,0.00338673,0.0666382,4,0.614585,1,114.835,0.85425
xgboost_training_wrapper_0df1a_00001,TERMINATED,192.168.236.234:1003687,0.0063679,0.965151,2.80002,0.0217801,8,0.629694,1,130.689,0.852337
xgboost_training_wrapper_0df1a_00002,TERMINATED,192.168.236.234:1004147,0.0244766,0.600954,0.00310441,0.228248,3,0.803935,1,112.6,0.853303
xgboost_training_wrapper_0df1a_00003,TERMINATED,192.168.236.234:1004568,1.89679,0.520151,7.01459,0.0431147,4,0.88264,1,114.363,0.856744
xgboost_training_wrapper_0df1a_00004,TERMINATED,192.168.236.234:1004988,0.0766019,0.689039,0.0100393,0.0121418,8,0.970722,1,132.607,0.855037
xgboost_training_wrapper_0df1a_00005,TERMINATED,192.168.236.234:1005454,0.000204935,0.638219,0.00213184,0.0719649,8,0.769812,1,126.329,0.848903
xgboost_training_wrapper_0df1a_00006,TERMINATED,192.168.236.234:1005910,0.00897389,0.700659,6.11243,0.0141268,5,0.515068,1,115.955,0.858138
xgboost_training_wrapper_0df1a_00007,TERMINATED,192.168.236.234:1006335,0.261425,0.997296,0.0508392,0.140841,8,0.684653,1,125.169,0.848343
xgboost_training_wrapper_0df1a_00008,TERMINATED,192.168.236.234:1006794,0.00456015,0.958849,0.000103928,0.0159644,5,0.863895,1,119.522,0.857573
xgboost_training_wrapper_0df1a_00009,TERMINATED,192.168.236.234:1007216,0.00350792,0.708339,0.00135998,0.0230098,3,0.719504,1,112.981,0.859644


Trial name,c-index stat
xgboost_training_wrapper_0df1a_00000,0.85425
xgboost_training_wrapper_0df1a_00001,0.852337
xgboost_training_wrapper_0df1a_00002,0.853303
xgboost_training_wrapper_0df1a_00003,0.856744
xgboost_training_wrapper_0df1a_00004,0.855037
xgboost_training_wrapper_0df1a_00005,0.848903
xgboost_training_wrapper_0df1a_00006,0.858138
xgboost_training_wrapper_0df1a_00007,0.848343
xgboost_training_wrapper_0df1a_00008,0.857573
xgboost_training_wrapper_0df1a_00009,0.859644


2024-11-17 18:33:07,651 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:35:20,666 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:37:15,012 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:39:11,679 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:41:26,716 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:43:35,691 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:45:37,053 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:47:44,061 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:49:45,263 - INFO - Summary name ray/tune/c-index s

Best hyperparameters: None


In [122]:
import os
import json

# Define the base directory containing the trial folders
base_dir = "/mnt/d/PYDataScience/g3_regress/data/results/xgboost_training_wrapper_2024-11-17_18-31-06"

# Initialize variables to store the max c-index and corresponding config
max_c_index = float("-inf")
best_config = None

# Iterate through all folders and parse the result.json files
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file == "result.json":
            file_path = os.path.join(root, file)
            with open(file_path, "r") as f:
                data = json.load(f)
                if "c-index stat" in data:
                    c_index = data["c-index stat"]
                    # Update max c-index and config if a new max is found
                    if c_index > max_c_index:
                        max_c_index = c_index
                        best_config = data.get("config", None)

# Display the results
max_c_index, best_config


(0.8614533246252081,
 {'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'max_depth': 3,
  'learning_rate': 0.014837295326564928,
  'subsample': 0.8168005866659258,
  'colsample_bytree': 0.8290129403377126,
  'lambda': 6.7539372305286465,
  'alpha': 0.5018353832953043})

##### 6.3.2 Linear Regression

In [142]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np
from joblib import Parallel, delayed

# Initialize the final stacking predictions array
lineregression_predictions = np.zeros((2, 6, X_test_transformed.shape[0]))  # (2 outcomes, 6 time points, 316242 patients)

def train_and_predict_with_linear_regression(outcome_idx, time_idx):
    """
    Train a Linear Regression model using CIF targets for a specific outcome and time point.
    """
    # Prepare stacking inputs and targets
    # Training data
    stacking_inputs_train = np.array([
        final_meta_learner_X_train[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_train.keys()
    ]).T  # Shape: (num_train_samples, num_models)
    
    target_train = cif_ground_truth_test_stacked[outcome_idx, time_idx, :]  # Correct indexing

    # Validation data
    stacking_inputs_val = np.array([
        final_meta_learner_X_val[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_val.keys()
    ]).T  # Shape: (num_val_samples, num_models)

    target_val = cif_ground_truth_val_stacked[outcome_idx, time_idx, :]  # Correct indexing

    # Test data
    stacking_inputs_test = np.array([
        final_meta_learner_X_fin_test[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_fin_test.keys()
    ]).T  # Shape: (num_test_samples, num_models)

    # Train the Linear Regression model
    model = LinearRegression()
    model.fit(stacking_inputs_train, target_train)

    # Predict CIF for all patients
    cif_predictions = model.predict(stacking_inputs_test)

    return outcome_idx, time_idx, cif_predictions

# Parallel processing for each outcome and time point
results = Parallel(n_jobs=-1)(
    delayed(train_and_predict_with_linear_regression)(outcome_idx, time_idx)
    for outcome_idx in range(2)
    for time_idx in range(6)
)

# Assign results to the lineregression_predictions array
for outcome_idx, time_idx, cif_predictions in results:
    lineregression_predictions[outcome_idx, time_idx] = cif_predictions

# Verify the shape of the stacked predictions
print("Linear Regression Stacking Predictions Shape:", lineregression_predictions.shape)

Linear Regression Stacking Predictions Shape: (2, 6, 40513)


In [161]:
concordance_indices = {}
integrated_brier_scores = {}
neg_log_likelihoods = {}
brier_series = {}
nam_dagostino_results = []  # Initialize as a list

for i in range(0, 2):
    event_interest = i + 1
    cif = pd.DataFrame(lineregression_predictions[i], index=[0, 1, 2, 3, 4, 5])  # Use index correctly
    ev = EvalSurv(1 - cif, durations_fin_test, events_fin_test == event_interest, censor_surv='km')

    # Concordance index
    concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()

    # Brier score series
    brier_series[f"Event_{event_interest}"] = ev.brier_score(TIME_GRID)

    # Integrated Brier score
    integrated_brier_scores[f"Event_{event_interest}"] = ev.integrated_brier_score(TIME_GRID)

    # Negative log-likelihood
    neg_log_likelihoods[f"Event_{event_interest}"] = ev.integrated_nbll(TIME_GRID)

    # Nam and D'Agostino Chi2 statistic for calibration
    for time_idx, time in enumerate(TIME_GRID):
        # Filter data for current time point
        mask = durations_fin_test <= time
        durations_filtered = durations_fin_test[mask]
        events_filtered = events_fin_test[mask]
        
        if len(durations_filtered) == 0:
            print(f"Skipping time {time} for event {event_interest} due to empty data.")
            continue
        
        try:
            # Calculate Nam-D'Agostino Chi² statistic
            chi2_stat, p_value, observed_events, expected_events, n, prob_df = nam_dagostino_chi2(
                df=pd.DataFrame({"durations": durations_filtered, "events": events_filtered}),
                duration_col="durations",
                event_col="events",
                surv=(1 - cif),  # Survival function
                time=time_idx,
                event_focus=event_interest
            )

            # Append results
            nam_dagostino_results.append({
                'Event': event_interest,
                'Year': round(time / 365),
                'Chi2_Stat': chi2_stat,
                'P_Value': p_value,
                'Observed_Events': observed_events.tolist() if isinstance(observed_events, np.ndarray) else observed_events,
                'Expected_Events': expected_events.tolist() if isinstance(expected_events, np.ndarray) else expected_events,
                'Sample_Size': n if isinstance(n, int) else n.tolist()
            })
        except ValueError as e:
            print(f"Error calculating Nam-D'Agostino Chi² at time {time} for event {event_interest}: {e}")

# Display results
print("Concordance Indices:", concordance_indices)
print("Brier Score Series:", brier_series)
print("Integrated Brier Scores:", integrated_brier_scores)
print("Negative Log-Likelihoods:", neg_log_likelihoods)
print("Nam-D'Agostino Results:", nam_dagostino_results)



Error calculating Nam-D'Agostino Chi² at time 0 for event 1: Length of values (40513) does not match length of index (19588)
Error calculating Nam-D'Agostino Chi² at time 0 for event 2: Length of values (40513) does not match length of index (19588)


                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.


Concordance Indices: {'Event_1': 0.979147976082038, 'Event_2': 0.7546631563758323}
Brier Score Series: {'Event_1': 0       0.00000
365     0.03744
730     0.03744
1095    0.03744
1460    0.03744
1825    0.03744
Name: brier_score, dtype: float64, 'Event_2': 0       0.000000
365     0.116634
730     0.116634
1095    0.116634
1460    0.116634
1825    0.116634
Name: brier_score, dtype: float64}
Integrated Brier Scores: {'Event_1': 0.034943942825916126, 'Event_2': 0.1088581410271849}
Negative Log-Likelihoods: {'Event_1': 0.18104063587078995, 'Event_2': 0.3927945233629699}
Nam-D'Agostino Results: [{'Event': 1, 'Year': 1, 'Chi2_Stat': 0.020654911205768265, 'P_Value': 0.9999470375752154, 'Observed_Events': quantile
0    0.000000
1    0.000000
2    0.000000
3    0.000000
4    0.019646
Name: observed_probs, dtype: float64, 'Expected_Events': quantile
0    0.001286
1    0.002432
2    0.003715
3    0.006436
4    0.035073
Name: predicted_probs, dtype: float64, 'Sample_Size': [8103, 8102, 8103, 8102

                To resolve ties, data is randomly jittered.


##### 6.3.3 RandomForest regression

In [125]:
from sklearn.ensemble import RandomForestRegressor
from joblib import Parallel, delayed
import numpy as np

def train_and_predict_with_rf(outcome_idx, time_idx):
    """
    Train a Random Forest model using CIF targets for a specific outcome and time point.
    Use pre-prepared validation and test predictions.
    """
    # Prepare training data
    stacking_inputs_train = np.array([
        final_meta_learner_X_train[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_train.keys()
    ]).T  # Shape: (num_train_samples, num_models)
    
    target_train = cif_ground_truth_test_stacked[outcome_idx, time_idx, :]  # Correct indexing

    # Validation data
    stacking_inputs_val = np.array([
        final_meta_learner_X_val[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_val.keys()
    ]).T  # Shape: (num_val_samples, num_models)

    target_val = cif_ground_truth_val_stacked[outcome_idx, time_idx, :]  # Correct indexing

    # Test data
    stacking_inputs_test = np.array([
        final_meta_learner_X_fin_test[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_fin_test.keys()
    ]).T  # Shape: (num_test_samples, num_models)

    # Train the Random Forest Regressor
    model = RandomForestRegressor(
        n_estimators=168,
        max_depth=3,
        min_samples_split=3,
        min_samples_leaf=4,
        random_state=RANDOM_SEED,
        max_features='sqrt',
        n_jobs=-1  # Use all available cores,
        
    )
    model.fit(stacking_inputs_train, target_train, )

    # Predict CIF for all patients in the test set
    cif_predictions = model.predict(stacking_inputs_test)

    return outcome_idx, time_idx, cif_predictions

# Parallel processing for each outcome and time point
rf_predictions = np.zeros((2, 6, X_test_transformed.shape[0]))  # Initialize prediction array
results = Parallel(n_jobs=-1)(
    delayed(train_and_predict_with_rf)(outcome_idx, time_idx)
    for outcome_idx in range(2)
    for time_idx in range(6)
)

# Assign results to the rf_predictions array
for outcome_idx, time_idx, cif_predictions in results:
    rf_predictions[outcome_idx, time_idx] = cif_predictions

# Verify the shape of the stacked predictions
print("Random Forest Stacking Predictions Shape:", rf_predictions.shape)
# Expected output: (2, 6, num_test_samples)


Random Forest Stacking Predictions Shape: (2, 6, 40513)


In [162]:
concordance_indices = {}
integrated_brier_scores = {}
neg_log_likelihoods = {}
brier_series = {}
nam_dagostino_results = []  # Initialize as a list

for i in range(0, 2):
    event_interest = i + 1
    cif = pd.DataFrame(rf_predictions[i], index=[0, 1, 2, 3, 4, 5])  # Use index correctly
    ev = EvalSurv(1 - cif, durations_fin_test, events_fin_test == event_interest, censor_surv='km')

    # Concordance index
    concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()

    # Brier score series
    brier_series[f"Event_{event_interest}"] = ev.brier_score(TIME_GRID)

    # Integrated Brier score
    integrated_brier_scores[f"Event_{event_interest}"] = ev.integrated_brier_score(TIME_GRID)

    # Negative log-likelihood
    neg_log_likelihoods[f"Event_{event_interest}"] = ev.integrated_nbll(TIME_GRID)

    # Nam and D'Agostino Chi2 statistic for calibration
    for time_idx, time in enumerate(TIME_GRID):
        # Filter data for current time point
        mask = durations_fin_test <= time
        durations_filtered = durations_fin_test[mask]
        events_filtered = events_fin_test[mask]
        
        if len(durations_filtered) == 0:
            print(f"Skipping time {time} for event {event_interest} due to empty data.")
            continue
        
        try:
            # Calculate Nam-D'Agostino Chi² statistic
            chi2_stat, p_value, observed_events, expected_events, n, prob_df = nam_dagostino_chi2(
                df=pd.DataFrame({"durations": durations_filtered, "events": events_filtered}),
                duration_col="durations",
                event_col="events",
                surv=(1 - cif),  # Survival function
                time=time_idx,
                event_focus=event_interest
            )

            # Append results
            nam_dagostino_results.append({
                'Event': event_interest,
                'Year': round(time / 365),
                'Chi2_Stat': chi2_stat,
                'P_Value': p_value,
                'Observed_Events': observed_events.tolist() if isinstance(observed_events, np.ndarray) else observed_events,
                'Expected_Events': expected_events.tolist() if isinstance(expected_events, np.ndarray) else expected_events,
                'Sample_Size': n if isinstance(n, int) else n.tolist()
            })
        except ValueError as e:
            print(f"Error calculating Nam-D'Agostino Chi² at time {time} for event {event_interest}: {e}")

# Display results
print("Concordance Indices:", concordance_indices)
print("Brier Score Series:", brier_series)
print("Integrated Brier Scores:", integrated_brier_scores)
print("Negative Log-Likelihoods:", neg_log_likelihoods)
print("Nam-D'Agostino Results:", nam_dagostino_results)



Error calculating Nam-D'Agostino Chi² at time 0 for event 1: Length of values (40513) does not match length of index (19588)
Error calculating Nam-D'Agostino Chi² at time 0 for event 2: Length of values (40513) does not match length of index (19588)


                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.


Concordance Indices: {'Event_1': 0.9796230921738424, 'Event_2': 0.7554581494270163}
Brier Score Series: {'Event_1': 0       0.000000
365     0.053315
730     0.053315
1095    0.053315
1460    0.053315
1825    0.053315
Name: brier_score, dtype: float64, 'Event_2': 0       0.000000
365     0.099329
730     0.099329
1095    0.099329
1460    0.099329
1825    0.099329
Name: brier_score, dtype: float64}
Integrated Brier Scores: {'Event_1': 0.0497605494173283, 'Event_2': 0.0927068468793533}
Negative Log-Likelihoods: {'Event_1': 0.23971583618508627, 'Event_2': 0.3280149880111438}
Nam-D'Agostino Results: [{'Event': 1, 'Year': 1, 'Chi2_Stat': 0.02078836491212447, 'P_Value': 0.9999463533563095, 'Observed_Events': quantile
0    0.000000
1    0.000000
2    0.000000
3    0.000000
4    0.020235
Name: observed_probs, dtype: float64, 'Expected_Events': quantile
0    0.003513
1    0.003532
2    0.003596
3    0.005635
4    0.032308
Name: predicted_probs, dtype: float64, 'Sample_Size': [12648, 3869, 7812,

                To resolve ties, data is randomly jittered.


In [118]:
def random_forest_training_wrapper(config, data):
    """
    A Ray Tune-compatible wrapper to train Random Forest models for all outcomes and time points,
    evaluate predictions using concordance index, and report mean concordance index.

    Args:
        config: Dictionary of hyperparameters provided by Ray Tune.
        data: Dictionary containing final_meta_learner_X_train, cif_ground_truth_test_stacked,
              durations, and events.
    """
    final_meta_learner_X_train = data["final_meta_learner_X_train"]
    final_meta_learner_X_val = data["final_meta_learner_X_val"]
    cif_ground_truth_test_stacked = data["cif_ground_truth_test_stacked"]
    cif_ground_truth_val_stacked = data["cif_ground_truth_val_stacked"]
    durations_val = data["durations_val"]
    events_val = data["events_val"]

    # Initialize predictions array (2 outcomes, 6 time points, number of patients)
    rf_predictions = np.zeros((2, 6, X_fin_val.shape[0]))

    # Train models and make predictions for each outcome and time point
    for outcome_idx in range(2):
        for time_idx in range(6):
            # Prepare training data
            stacking_inputs_train = np.array([
                final_meta_learner_X_train[key][outcome_idx, time_idx]
                for key in final_meta_learner_X_train.keys()
            ]).T  # Shape: (num_train_samples, num_models)
            
            target_train = cif_ground_truth_test_stacked[outcome_idx, time_idx, :]  # Correct indexing

            # Validation data
            stacking_inputs_val = np.array([
                final_meta_learner_X_val[key][outcome_idx, time_idx]
                for key in final_meta_learner_X_val.keys()
            ]).T  # Shape: (num_val_samples, num_models)

            target_val = cif_ground_truth_val_stacked[outcome_idx, time_idx, :]  # Correct indexing

            # Train Random Forest model
            model = RandomForestRegressor(
                n_estimators=config["n_estimators"],
                max_depth=config["max_depth"],
                min_samples_split=config["min_samples_split"],
                min_samples_leaf=config["min_samples_leaf"],
                max_features=config["max_features"],
                random_state=RANDOM_SEED,
                n_jobs=-1
            )
            model.fit(stacking_inputs_train , target_train)

            # Predict CIF for all patients
            rf_predictions[outcome_idx, time_idx] = model.predict(stacking_inputs_val)

    # Compute concordance index for each event
    concordance_indices = {}
    for i in range(2):
        event_interest = i + 1
        cif = pd.DataFrame(rf_predictions[i], index=[0, 1, 2, 3, 4, 5])
        ev = EvalSurv(1 - cif, durations_val, events_val == event_interest, censor_surv="km")
        concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()

    # Calculate mean concordance index
    mean_concordance_index = np.mean(list(concordance_indices.values()))

    # Report mean concordance index to Ray Tune
    ray.train.report({'c-index_stat': mean_concordance_index})

# Define the search space
search_space = {
    "n_estimators": tune.randint(50, 500),  # Number of trees in the forest
    "max_depth": tune.randint(3, 20),  # Maximum depth of the tree
    "min_samples_split": tune.randint(2, 10),  # Minimum number of samples required to split an internal node
    "min_samples_leaf": tune.randint(1, 10),  # Minimum number of samples required to be at a leaf node
    "max_features": tune.choice(["sqrt", "log2"])  # Number of features to consider when looking for the best split
}

# Prepare data
data = {
    "final_meta_learner_X_train": final_meta_learner_X_train,
    "final_meta_learner_X_val": final_meta_learner_X_val,
    "cif_ground_truth_test_stacked": cif_ground_truth_test_stacked,
    "cif_ground_truth_val_stacked": cif_ground_truth_val_stacked,
    "durations_val": durations_val,
    "events_val": events_val,
}

ray.shutdown()
ray.init()
# Run Ray Tune
analysis = tune.run(
    tune.with_parameters(random_forest_training_wrapper, data=data),
    config=search_space,
    resources_per_trial={"cpu": 20, "gpu": 0},  # Random Forest does not require GPU
    num_samples=50,  # Number of hyperparameter configurations to try
    metric="c-index_stat",  # Metric to optimize
    mode="max",  # Maximize the concordance index
    storage_path="/mnt/d/PYDataScience/g3_regress/data/results",  # Directory to store results
    verbose=2
)

# Get the best configuration
best_config = analysis.get_best_config(metric="c-index stat", mode="max")
print("Best hyperparameters:", best_config)

ray.shutdown()

2024-11-19 07:06:14,475	INFO worker.py:1807 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
2024-11-19 07:06:15,356	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-11-19 08:39:23
Running for:,01:33:07.76
Memory:,53.2/117.9 GiB

Trial name,status,loc,max_depth,max_features,min_samples_leaf,min_samples_split,n_estimators,iter,total time (s),c-index_stat
random_forest_training_wrapper_b6411_00000,TERMINATED,192.168.236.234:1387505,9,sqrt,5,8,164,1,73.9574,0.857171
random_forest_training_wrapper_b6411_00001,TERMINATED,192.168.236.234:1388561,7,log2,9,2,411,1,139.173,0.859153
random_forest_training_wrapper_b6411_00002,TERMINATED,192.168.236.234:1389744,3,sqrt,3,2,420,1,70.2625,0.861421
random_forest_training_wrapper_b6411_00003,TERMINATED,192.168.236.234:1390733,17,sqrt,3,5,201,1,139.298,0.849436
random_forest_training_wrapper_b6411_00004,TERMINATED,192.168.236.234:1391916,12,sqrt,6,3,368,1,190.577,0.853591
random_forest_training_wrapper_b6411_00005,TERMINATED,192.168.236.234:1393240,5,log2,7,5,97,1,28.2665,0.860848
random_forest_training_wrapper_b6411_00006,TERMINATED,192.168.236.234:1394131,9,log2,8,9,207,1,89.8782,0.857037
random_forest_training_wrapper_b6411_00007,TERMINATED,192.168.236.234:1395194,6,log2,3,9,168,1,57.0302,0.860378
random_forest_training_wrapper_b6411_00008,TERMINATED,192.168.236.234:1396143,3,sqrt,2,8,128,1,24.5496,0.861465
random_forest_training_wrapper_b6411_00009,TERMINATED,192.168.236.234:1397018,9,log2,6,6,429,1,179.317,0.857075




Trial name,c-index_stat
random_forest_training_wrapper_b6411_00000,0.857171
random_forest_training_wrapper_b6411_00001,0.859153
random_forest_training_wrapper_b6411_00002,0.861421
random_forest_training_wrapper_b6411_00003,0.849436
random_forest_training_wrapper_b6411_00004,0.853591
random_forest_training_wrapper_b6411_00005,0.860848
random_forest_training_wrapper_b6411_00006,0.857037
random_forest_training_wrapper_b6411_00007,0.860378
random_forest_training_wrapper_b6411_00008,0.861465
random_forest_training_wrapper_b6411_00009,0.857075


2024-11-19 08:39:23,246	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/mnt/d/PYDataScience/g3_regress/data/results/random_forest_training_wrapper_2024-11-19_07-06-15' in 1.2829s.
2024-11-19 08:39:23,252	INFO tune.py:1041 -- Total run time: 5587.90 seconds (5586.48 seconds for the tuning loop).


Best hyperparameters: None


In [124]:
import os
import json

# Define the base directory containing the trial folders
base_dir = "/mnt/d/PYDataScience/g3_regress/data/results/random_forest_training_wrapper_2024-11-19_07-06-15"

# Initialize variables to store the max c-index and corresponding config
max_c_index = float("-inf")
best_config = None

# Iterate through all folders and parse the result.json files
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file == "result.json":
            file_path = os.path.join(root, file)
            with open(file_path, "r") as f:
                data = json.load(f)
                if "c-index_stat" in data:
                    c_index = data["c-index_stat"]
                    # Update max c-index and config if a new max is found
                    if c_index > max_c_index:
                        max_c_index = c_index
                        best_config = data.get("config", None)

# Display the results
max_c_index, best_config


(0.8617197160068155,
 {'n_estimators': 168,
  'max_depth': 3,
  'min_samples_split': 3,
  'min_samples_leaf': 4,
  'max_features': 'sqrt'})