### 1. Import libraries

In [164]:
import gc
import json
import os
import math
import multiprocessing
import numpy as np
import pandas as pd
import torch
import importlib
import logging
from pathlib import Path
from sklearn.model_selection import GroupKFold, GroupShuffleSplit

# Pycox and PyTorch tuples for survival analysis
import torchtuples as tt
import pycox
from pycox.preprocessing.label_transforms import LabTransDiscreteTime
from pycox.models import CoxPH, DeepHit
from pycox.evaluation import EvalSurv

# Ray for hyperparameter tuning and distributed processing
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.search.bayesopt import BayesOptSearch
from ray.tune.search.optuna import OptunaSearch
from ray.tune.search import ConcurrencyLimiter
from ray.tune.schedulers import ASHAScheduler, PopulationBasedTraining
from ray.air import session
import ray.cloudpickle as pickle

# Custom modules for data handling, balancing, training, evaluation, and model architectures
import dataloader2
import databalancer2
import datatrainer2
import modeleval
import netweaver2

# Reload custom modules to ensure latest changes are available
importlib.reload(dataloader2)
importlib.reload(databalancer2)
importlib.reload(datatrainer2)
importlib.reload(modeleval)
importlib.reload(netweaver2)

# Import specific functions from custom modules to keep code clean and readable
from netweaver2 import (
    lstm_net_init, DHANNWrapper, LSTMWrapper, generalized_ann_net_init
)
from dataloader2 import (
    load_and_transform_data, preprocess_data #stack_sequences, dh_dataset_loader
)
from databalancer2 import (
    define_medoid_general, df_event_focus, rebalance_data, underbalance_data_general, medoid_cluster, 
    dh_rebalance_data
)
from datatrainer2 import (
    recursive_clustering, prepare_training_data, 
    prepare_validation_data, lstm_training
)
from modeleval import (
    dh_test_model, nam_dagostino_chi2, get_baseline_hazard_at_timepoints, combined_test_model
)

import psutil
torch.cuda.empty_cache()
gc.collect()

80

### 2. Define constants, load datasets

In [165]:
# Define Constants and Load Datasets
RANDOM_SEED = 12345
N_SPLIT = 2
FEATURE_COLS = ['gender', 'dm', 'ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
DURATION_COL = 'date_from_sub_60'
EVENT_COL = 'endpoint'
CLUSTER_COL = 'key'
TIME_GRID = np.array([i * 365 for i in range(6)])

# Define Feature Groups
CAT_FEATURES = ['gender', 'dm', 'ht', 'sprint']
LOG_FEATURES = ['a1c', 'po4', 'UACR_mg_g', 'Cr']
STANDARD_FEATURES = ['age', 'alb', 'ca', 'hb', 'hco3']
PASSTHROUGH_FEATURES = ['key', 'date_from_sub_60', 'endpoint']

# Load and Transform Data
BASE_FILENAME = '/mnt/d/pydatascience/g3_regress/data/X/X_20240628'
X_train_transformed, X_test_transformed = load_and_transform_data(
    BASE_FILENAME, CAT_FEATURES, LOG_FEATURES, STANDARD_FEATURES, PASSTHROUGH_FEATURES
)

2024-11-21 00:04:32,551 - INFO - Transforming training data...
2024-11-21 00:04:45,062 - INFO - Transforming test data...


### 3. Train and save models
- model naming: {deepsurv/deephit}\_{nn}\_{resample method}_{outcome}
- for deepsurv model, only the result in time_grid will be retrieved so the result of deepsurv and deephit models are compatible

In [6]:
def create_neural_network(config, num_risk = len(X_train_transformed[EVENT_COL].unique()) - 1, num_time_bins=len(TIME_GRID)):
    """
    Function to create a neural network based on the given configuration.

    Args:
        config (dict): Configuration dictionary containing model type, network type, and hyperparameters.

    Returns:
        torch.nn.Module: Created neural network model.
    """
    gc.collect()
    torch.cuda.empty_cache()
    if config['model'] == 'deepsurv':
        num_risk = None
        num_time_bins=None
    elif config['model'] == 'deephit':
        num_risk = num_risk
        num_time_bins = num_time_bins
    # Create the Neural Network
    if config['net'] == 'ann':
        net = generalized_ann_net_init(
            input_size=len(config['features']),
            num_nodes=config["num_nodes"],
            batch_norm=config["batch_norm"],
            dropout=config["dropout"],
            output_size=1, # Default output size for DeepSurv
            num_risks = num_risk,
            num_time_bins = num_time_bins
        )
    elif config['net'] == 'lstm':
        net = lstm_net_init(
            input_size=len(config['features']),
            num_nodes=config["num_nodes"],
            batch_norm=config["batch_norm"],
            dropout=config["dropout"],
            num_risks = num_risk,
            num_time_bins = num_time_bins
        )
    else:
        raise ValueError("Unknown network type: {}".format(config['net']))

    optimizer = tt.optim.AdamWR(decoupled_weight_decay=1e-6, cycle_eta_multiplier=0.8)
    if config['model'] == 'deepsurv':
        model = CoxPH(net, optimizer)
    elif config['model'] == 'deephit':
        model = DeepHit(net, optimizer)
    model.optimizer.set_lr(config["lr"])
    
    return model

def train_neural_network(model, config, X_train, X_val, duration_col, event_col, cluster_col, callbacks, time_grid=None):
    """
    Function to train a given neural network using the provided datasets.

    Args:
        net (torch.nn.Module): Neural network to be trained.
        config (dict): Configuration dictionary containing model hyperparameters.
        X_train (pd.DataFrame): Training dataset with features.
        X_val (pd.DataFrame): Validation dataset with features.
        duration_col (str): Column representing event durations.
        event_col (str): Column representing event occurrences.
        cluster_col (str): Column for grouping during cross-validation.
        callbacks (list): List of callbacks for training.
        time_grid (np.array, optional): Time grid for evaluation if required. Defaults to None.

    Returns:
        model: Trained PyCox model.
        logs: Training logs.
    """
    gc.collect()
    torch.cuda.empty_cache()
    # Train the model
    if config['model'] == 'deepsurv':
        print('Initiate training of deepsurv neural network')
        X_val = df_event_focus(X_val, event_col, config['endpoint'])
        X_val_processed, y_val = preprocess_data(X_val, config['features'], duration_col, event_col)
        val_data = (X_val_processed, y_val)
        if config['net'] == 'ann':
            print('model structure: ANN')
            if config['balance_method'] == 'clustering':
                print('data balancing method: clustering')
                model, logs = recursive_clustering(model, X_train, duration_col, event_col, config, val_data, callbacks, max_repeats=30)
            elif config['balance_method'] == 'enn':
                print('data balancing method: smoteenn')
                X_train = rebalance_data(X_train, event_col, config['endpoint'], CAT_FEATURES, config, RANDOM_SEED, method='ENN')
                X_train, y_train = preprocess_data(X_train, config['features'], duration_col, event_col)
                logs = model.fit(X_train, y_train, config['batch_size'], int(config['max_epochs']), callbacks, verbose=True, val_data=val_data, num_workers=10)
            elif config['balance_method'] == 'tomek':
                print('data balancing method: smotetomek')
                X_train = rebalance_data(X_train, event_col, config['endpoint'], CAT_FEATURES, config, RANDOM_SEED, method='Tomek')
                X_train, y_train = preprocess_data(X_train, config['features'], duration_col, event_col)
                logs = model.fit(X_train, y_train, config['batch_size'], int(config['max_epochs']), callbacks, verbose=True, val_data=val_data, num_workers=10)
        elif config['net'] == 'lstm':
            print('model structure: LSTM')
            if config['balance_method'] == 'clustering':
                print('data balancing method: clustering')
                model, logs = lstm_training(model, X_train, X_val, duration_col, event_col, cluster_col, config, callbacks, time_grid)
            elif config['balance_method'] == 'NearMiss':
                print('data balancing method: NearMiss')
                model, logs = lstm_training(model, X_train, X_val, duration_col, event_col, cluster_col, config, callbacks, time_grid)
    elif config['model'] == 'deephit':
        print('Initiate training of deephit neural network')
        X_val_processed, y_val = preprocess_data(X_val, config['features'], duration_col, event_col, TIME_GRID, discretize=True)
        val_data = (X_val_processed, y_val)
        if config['net'] == 'ann':
            print('model structure: ANN')
            if config['balance_method'] == 'clustering':
                print('data balancing method: clustering')
                model, logs = recursive_clustering(model, X_train, duration_col, event_col, config, val_data, callbacks, max_repeats=30, time_grid=TIME_GRID)
            elif config['balance_method'] == 'NearMiss':
                print('data balancing method: NearMiss')
                X_train = underbalance_data_general(X_train, EVENT_COL, CLUSTER_COL, config, version=config['version'])
                X_train, y_train = preprocess_data(X_train, config['features'], duration_col, event_col, TIME_GRID, discretize=True)
                logs = model.fit(X_train, y_train, config['batch_size'], int(config['max_epochs']), callbacks, verbose=True, val_data=val_data)
        elif config['net'] == 'lstm':
            print('model structure: LSTM')
            if config['balance_method'] == 'clustering':
                print('data balancing method: clustering')
                model, logs = lstm_training(model, X_train, X_val, duration_col, event_col, cluster_col, config, callbacks, time_grid)
            elif config['balance_method'] == 'NearMiss':
                print('data balancing method: NearMiss')
                model, logs = lstm_training(model, X_train, X_val, duration_col, event_col, cluster_col, config, callbacks, time_grid)        

    # Free memory after training
    gc.collect()
    torch.cuda.empty_cache()

    return model, logs

def save_model(params, model, model_path, baseline_hazard_path):
    """
    Save model weights and baseline hazard data.

    Parameters:
    - model: The trained model to save.
    - model_path: Path to save the model weights (.pt file).
    - baseline_hazard_path: Path to save the baseline hazards (.pkl file).
    """
    # Compute baseline hazards and save
    if params['model'] == 'deepsurv':
        baseline_hazard = model.compute_baseline_hazards()
        baseline_hazard.to_pickle(baseline_hazard_path)
    
    # Save model weights
    model.save_model_weights(model_path)
    print(f"Model and baseline hazards saved to {model_path} and {baseline_hazard_path}.")

def training_wrapper(df, config, spliter, model_path, hazard_path, feature_col=FEATURE_COLS, duration_col=DURATION_COL, event_col=EVENT_COL, cluster_col=CLUSTER_COL, time_grid=TIME_GRID):
    """
    Train and save a survival analysis model with grouped cross-validation splits.

    This function performs training on grouped cross-validation splits of the input DataFrame and saves each trained model
    along with its baseline hazards. Memory management is handled to ensure efficient GPU usage.

    Parameters:
    - df (pd.DataFrame): DataFrame containing training data.
    - config (dict): Configuration dictionary for initializing the neural network.
    - spliter (object): Splitter object (e.g., GroupShuffleSplit or StratifiedKFold) used for creating train-validation splits.
    - model_path (str): File path to save the trained model weights (.pt file).
    - hazard_path (str): File path to save the baseline hazards (.pkl file).
    - feature_col (list): List of feature column names in `df` used for model training.
    - duration_col (str): Name of the column representing duration/time-to-event.
    - event_col (str): Name of the column representing the event indicator (0 = censored, 1 = event).
    - cluster_col (str): Name of the column used for grouping (clusters for cross-validation).
    - time_grid (list): List or array defining the time grid for training.

    Returns:
    - None: Saves the model weights and baseline hazard data for each cross-validation split.
    """
    for train_idx, val_idx in spliter.split(X=df[feature_col], y=df[event_col], groups=df[cluster_col]):
        # Clear GPU memory for each split
        gc.collect()
        torch.cuda.empty_cache()
        
        # Define early stopping callback
        callbacks = [tt.cb.EarlyStopping()]
        
        # Create training and validation sets
        train_df = df.iloc[train_idx]
        val_df = df.iloc[val_idx]
        
        # Initialize and train the model
        model = create_neural_network(config)
        model, logs = train_neural_network(
            model, config,
            X_train=train_df, X_val=val_df,
            duration_col=duration_col, event_col=event_col,
            cluster_col=cluster_col, callbacks=callbacks, time_grid=time_grid
        )
        
        # Save the trained model and its baseline hazards
        save_model(config, model, model_path, hazard_path)
        
        # Free memory for the next iteration
        del model, logs
        gc.collect()
        torch.cuda.empty_cache()

    print("Training and saving completed for all cross-validation splits.")

    print("All models have been trained and saved successfully.")

#### 3.1 deepsurv_ann_clustering_1
- features: ['gender', 'dm', 'ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling strategy: 0.05
- 2 hidden layers with 8 and 4 nodes
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.1144793446270997
- learning rate: 0.1
- max epochs: 9
- batch size: 512

In [7]:
gc.collect()
torch.cuda.empty_cache()

deepsurv_ann_clustering_1_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'clustering',
    'features': ['gender', 'dm', 'ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 1,
    'num_nodes': [8, 4],
    'batch_norm': False,
    'dropout': 0.1144793446270997,
    'lr': 0.1,
    'max_epochs': 9,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.2 deepsurv_ann_smoteenn_1
- features: ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3']
- sampling strategy: 0.3
- 4 hidden layers with 64, 32, 16 and 8 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.09555033386059111
- learning rate: 0.1
- max epochs: 16
- batch size: 512

In [8]:
deepsurv_ann_smoteenn_1_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'enn',
    'features': ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3'],
    'endpoint': 1,
    'num_nodes': [64, 32, 16, 8],
    'batch_norm': True,
    'dropout': 0.09555033386059111,
    'lr': 0.1,
    'max_epochs': 16,
    'batch_size': 512,
    'sampling_strategy': 0.3,
    'seq_length': 1,
}

#### 3.3 deepsurv_ann_smotetomek_1
- features:  ['gender', 'dm', 'ht', 'sprint', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3']
- sampling strategy: 0.2
- 3 hidden layers with 32, 16 and 8 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.23872991564684112
- learning rate: 0.1
- max epochs: 14
- batch size: 512

In [9]:
deepsurv_ann_smotetomek_1_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'tomek',
    'features': ['gender', 'dm', 'ht', 'sprint', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3'],
    'endpoint': 1,
    'num_nodes': [32, 16, 8],
    'batch_norm': True,
    'dropout': 0.23872991564684112,
    'lr': 0.1,
    'max_epochs': 14,
    'batch_size': 512,
    'sampling_strategy': 0.2,
    'seq_length': 1,
}

#### 3.4 deepsurv_ann_clustering_2
- features: ["gender", "a1c", "po4", "UACR_mg_g", "Cr"]
- sampling_strategy: 0.05
- 3 hidden layers with 32, 16, 8 nodes
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.3058921011568742
- learning rate: 0.1
- max epochs: 14
- batch size: 512

In [10]:
deepsurv_ann_clustering_2_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'clustering',
    'features': ["gender", "a1c", "po4", "UACR_mg_g", "Cr"],
    'endpoint': 2,
    'num_nodes': [32, 16, 8],
    'batch_norm': False,
    'dropout': 0.3058921011568742,
    'lr': 0.1,
    'max_epochs': 14,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.5 deepsurv_ann_smoteenn_2
- features: ["gender", "dm", "ht", "sprint", "a1c", "po4", "UACR_mg_g", "Cr", "age", "alb", "ca", "hb", "hco3"]
- sampling_strategy: 0.1, 
- 2 hidden layers with 8, 4 nodes
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.38878203553667456
- learning rate: 0.01
- max epochs: 10
- batch size: 512

In [11]:
deepsurv_ann_smoteenn_2_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'enn',
    'features': ["gender", "dm", "ht", "sprint", "a1c", "po4", "UACR_mg_g", "Cr", "age", "alb", "ca", "hb", "hco3"],
    'endpoint': 2,
    'num_nodes': [8, 4],
    'batch_norm': False,
    'dropout': 0.38878203553667456,
    'lr': 0.01,
    'max_epochs': 10,
    'batch_size': 512,
    'sampling_strategy': 0.1,
    'seq_length': 1,
}

#### 3.6 deepsurv_ann_smotetomek_2
- features: ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling_strategy: 0.05
- 2 hidden layers with 64, 32 nodes
- batch normalization in each hidden layer 
- dropout ratio in each layer: 0.3162398297390827
- learning rate: 0.1
- max epochs: 11
- batch size: 512

In [12]:
deepsurv_ann_smotetomek_2_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'tomek',
    'features': ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 2,
    'num_nodes': [64, 32],
    'batch_norm': True,
    'dropout': 0.3162398297390827,
    'lr': 0.1,
    'max_epochs': 11,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.7 deepsurv_lstm_clustering_1
- features: ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr']
- sampling_strategy: 0.05
- 3 hidden layers with 8, 4, 2 nodes
- sequence length 7
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.2772567071863989
- learning rate: 0.1
- max epochs: 13
- batch size: 512

In [13]:
deepsurv_lstm_clustering_1_config = {
    'model': 'deepsurv',
    'net': 'lstm',
    'balance_method': 'clustering',
    'features': ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr'],
    'endpoint': 1,
    'num_nodes': [8, 4, 2],
    'batch_norm': False,
    'dropout': 0.2772567071863989,
    'lr': 0.1,
    'max_epochs': 13,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 7,
}

#### 3.8 deepsurv_lstm_nearmiss_1
- features: ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr']
- sampling_strategy: 0.05
- 3 hidden layers with 8, 4, 2 nodes
- seq_length': 8
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.3397308077824205
- learning rate: 0.001
- max epochs: 9
- batch size: 512

In [14]:
deepsurv_lstm_nearmiss_1_config = {
    'model': 'deepsurv',
    'net': 'lstm',
    'balance_method': 'NearMiss',
    'features': ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr'],
    'endpoint': 1,
    'num_nodes': [8, 4, 2],
    'batch_norm': False,
    'dropout': 0.3397308077824205,
    'lr': 0.001,
    'max_epochs': 9,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 8,
}

#### 3.9 deepsurv_lstm_clustering_2
- features: ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr']
- sampling_strategy: 0.05
- 3 hidden layers with 8, 4, 2 nodes
- seq_length': 8
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.3397308077824205
- learning rate: 0.001
- max epochs: 9
- batch size: 512

In [15]:
deepsurv_lstm_clustering_2_config = {
    'model': 'deepsurv',
    'net': 'lstm',
    'balance_method': 'clustering',
    'features': ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr'],
    'endpoint': 2,
    'num_nodes': [8, 4, 2],
    'batch_norm': False,
    'dropout': 0.3397308077824205,
    'lr': 0.001,
    'max_epochs': 9,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 8,
}

#### 3.10 deepsurv_lstm_nearmiss_2
- features: ['sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling_strategy: 0.05
- 2 hidden layers with 32, 16 nodes
- seq_length': 2
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.35763396978044143
- learning rate: 0.1
- max epochs: 10
- batch size: 512

In [16]:
deepsurv_lstm_nearmiss_2_config = {
    'model': 'deepsurv',
    'net': 'lstm',
    'balance_method': 'NearMiss',
    'features': ['sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 2,
    'num_nodes': [32, 16],
    'batch_norm': False,
    'dropout': 0.35763396978044143,
    'lr': 0.1,
    'max_epochs': 10,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 2,
}

#### 3.11 deephit_ann_clustering_all
- features: ['gender', 'dm', 'ht', 'sprint', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3']
- sampling strategy: 0.05
- 2 hidden layers with 64 and 32 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.26400151710698067
- learning rate: 0.1
- max epochs: 8
- batch size: 512

In [17]:
deephit_ann_clustering_all_config = {
    'model': 'deephit',
    'net': 'ann',
    'balance_method': 'clustering',
    'features': ['gender', 'dm', 'ht', 'sprint', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3'],
    'endpoint': 'all',
    'num_nodes': [64, 32],
    'batch_norm': True,
    'dropout': 0.26400151710698067,
    'lr': 0.1,
    'max_epochs': 8,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.12 deephit_ann_nearmiss2_all
- features: ['sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling strategy: 0.05
- 2 hidden layers with 8, 4 and 2 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.7346754269827496
- learning rate: 0.01
- max epochs: 7
- batch size: 512

In [18]:
deephit_ann_nearmiss2_all_config = {
    'model': 'deephit',
    'net': 'ann',
    'balance_method': 'NearMiss',
    'version': 2,
    'features': ['sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 'all',
    'num_nodes': [8, 4, 2],
    'batch_norm': True,
    'dropout': 0.7346754269827496,
    'lr': 0.01,
    'max_epochs': 7,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.13 deephit_lstm_clustering_all
- features: ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling strategy: 0.05
- seq_length: 6
- 3 hidden layers with 64, 32 and 16 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.46132889488306583
- learning rate: 0.1
- max epochs: 5
- batch size: 512

In [19]:
deephit_lstm_clustering_all_config = {
    'model': 'deephit',
    'net': 'lstm',
    'balance_method': 'clustering',
    'version': 2,
    'features': ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 'all',
    'num_nodes': [64, 32, 16],
    'batch_norm': True,
    'dropout': 0.46132889488306583,
    'lr': 0.1,
    'max_epochs': 5,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 6,
}

#### 3.14 deephit_lstm_nearmiss1_all
- features: ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr']
- sampling strategy: 0.05
- seq_length: 9
- 3 hidden layers with 32, 16 and 8 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.18001924589390816
- learning rate: 0.1
- max epochs: 9
- batch size: 512

In [20]:
deephit_lstm_nearmiss1_all_config = {
    'model': 'deephit',
    'net': 'lstm',
    'balance_method': 'NearMiss',
    'version': 1,
    'features': ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr'],
    'endpoint': 'all',
    'num_nodes': [32, 16, 8],
    'batch_norm': True,
    'dropout': 0.18001924589390816,
    'lr': 0.1,
    'max_epochs': 9,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 9,
}

In [21]:
model_ls = ['deepsurv_ann_clustering_1', 'deepsurv_ann_smoteenn_1', 'deepsurv_ann_smotetomek_1',
            'deepsurv_ann_clustering_2', 'deepsurv_ann_smoteenn_2', 'deepsurv_ann_smotetomek_2',
            'deepsurv_lstm_clustering_1', 'deepsurv_lstm_nearmiss_1', 'deepsurv_lstm_clustering_2', 'deepsurv_lstm_nearmiss_2',
            'deephit_ann_clustering_all', 'deephit_ann_nearmiss2_all', 'deephit_lstm_clustering_all', 'deephit_lstm_nearmiss1_all']
model_path = '/mnt/d/PYDataScience/g3_regress/code/models/'

In [22]:
gss1 = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=RANDOM_SEED)
gss2 = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=RANDOM_SEED)
for train_idx_1, fin_val_idx in gss1.split(X=X_train_transformed[FEATURE_COLS], y=X_train_transformed[EVENT_COL], groups=X_train_transformed[CLUSTER_COL]):
    X_train_transformed_2, X_fin_val = X_train_transformed.iloc[train_idx_1, :], X_train_transformed.iloc[fin_val_idx, :]
    gc.collect()
    torch.cuda.empty_cache()
    for model in model_ls:
        config_var_name = model + "_config"
        model_config = globals().get(config_var_name)
        if model_config is None:
            print(f"Configuration for {config_var_name} not found.")
            continue

        model_weights_path = f'{model_path}{model}.pt'
        model_hazard_path = f'{model_path}{model}_hazard.pkl'
        
        training_wrapper(X_train_transformed_2, model_config, gss2, model_weights_path, 
                        model_hazard_path, 
                        feature_col=FEATURE_COLS, duration_col=DURATION_COL, event_col=EVENT_COL, cluster_col=CLUSTER_COL, time_grid=TIME_GRID)
        gc.collect()
        torch.cuda.empty_cache()

2024-11-15 23:37:59,243 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:37:59,249 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:37:59,260 - INFO - Performing clustering iteration 1 / 20
2024-11-15 23:37:59,260 - INFO - init
2024-11-15 23:37:59,263 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:37:59,267 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: clustering


2024-11-15 23:37:59,934 - INFO - Defined medoid for deepsurv model with 1207 clusters.
	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha = 1) (Triggered internally at ../torch/csrc/utils/python_arg_parser.cpp:1581.)
  p.data = p.data.add(-weight_decay * eta, p.data)


0:	[0s / 0s],		train_loss: 5.1222,	val_loss: 7.8605
1:	[0s / 0s],		train_loss: 5.0338,	val_loss: 7.5699
2:	[0s / 0s],		train_loss: 4.9697,	val_loss: 7.4738
3:	[0s / 0s],		train_loss: 4.9293,	val_loss: 7.3146
4:	[0s / 0s],		train_loss: 4.8518,	val_loss: 6.9994
5:	[0s / 0s],		train_loss: 4.7889,	val_loss: 6.8219
6:	[0s / 0s],		train_loss: 4.8050,	val_loss: 6.8218
7:	[0s / 0s],		train_loss: 4.7501,	val_loss: 6.7547
8:	[0s / 0s],		train_loss: 4.7655,	val_loss: 6.7621


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:00,813 - INFO - Performing clustering iteration 2 / 20
2024-11-15 23:38:00,813 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:00,818 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:38:01,253 - INFO - Defined medoid for deepsurv model with 1207 clusters.


9:	[0s / 0s],		train_loss: 4.6969,	val_loss: 6.7727
10:	[0s / 0s],		train_loss: 4.6655,	val_loss: 6.6474
11:	[0s / 0s],		train_loss: 4.6524,	val_loss: 6.8231
12:	[0s / 0s],		train_loss: 4.6561,	val_loss: 6.7562
13:	[0s / 0s],		train_loss: 4.6518,	val_loss: 6.7023
14:	[0s / 0s],		train_loss: 4.6509,	val_loss: 6.5587
15:	[0s / 0s],		train_loss: 4.6507,	val_loss: 6.6676
16:	[0s / 0s],		train_loss: 4.6268,	val_loss: 6.6052
17:	[0s / 0s],		train_loss: 4.6377,	val_loss: 6.5342


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:01,790 - INFO - Performing clustering iteration 3 / 20
2024-11-15 23:38:01,791 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:01,794 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:38:02,231 - INFO - Defined medoid for deepsurv model with 1207 clusters.


18:	[0s / 0s],		train_loss: 4.6094,	val_loss: 6.6564
19:	[0s / 0s],		train_loss: 4.6289,	val_loss: 6.5130
20:	[0s / 0s],		train_loss: 4.5973,	val_loss: 6.5200
21:	[0s / 0s],		train_loss: 4.6182,	val_loss: 6.4772
22:	[0s / 0s],		train_loss: 4.6057,	val_loss: 6.4865
23:	[0s / 0s],		train_loss: 4.6040,	val_loss: 6.3840
24:	[0s / 0s],		train_loss: 4.5978,	val_loss: 6.3814
25:	[0s / 0s],		train_loss: 4.6132,	val_loss: 6.4298


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:02,824 - INFO - Performing clustering iteration 4 / 20
2024-11-15 23:38:02,824 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:02,827 - INFO - Event column 'endpoint' updated with focus on event value 1.


26:	[0s / 0s],		train_loss: 4.6217,	val_loss: 6.4693


2024-11-15 23:38:03,230 - INFO - Defined medoid for deepsurv model with 1207 clusters.


27:	[0s / 0s],		train_loss: 4.6167,	val_loss: 6.3916
28:	[0s / 0s],		train_loss: 4.6326,	val_loss: 6.3972
29:	[0s / 0s],		train_loss: 4.6202,	val_loss: 6.3943
30:	[0s / 0s],		train_loss: 4.6303,	val_loss: 6.3755
31:	[0s / 0s],		train_loss: 4.6168,	val_loss: 6.4505
32:	[0s / 0s],		train_loss: 4.6078,	val_loss: 6.4495
33:	[0s / 0s],		train_loss: 4.6297,	val_loss: 6.3474
34:	[0s / 0s],		train_loss: 4.6127,	val_loss: 6.3939
35:	[0s / 0s],		train_loss: 4.5996,	val_loss: 6.3698


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:03,761 - INFO - Performing clustering iteration 5 / 20
2024-11-15 23:38:03,762 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:03,764 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:38:04,170 - INFO - Defined medoid for deepsurv model with 1207 clusters.


36:	[0s / 0s],		train_loss: 4.6199,	val_loss: 6.3322
37:	[0s / 0s],		train_loss: 4.6204,	val_loss: 6.4536
38:	[0s / 0s],		train_loss: 4.5920,	val_loss: 6.3989
39:	[0s / 0s],		train_loss: 4.6021,	val_loss: 6.2783
40:	[0s / 0s],		train_loss: 4.5824,	val_loss: 6.3993
41:	[0s / 0s],		train_loss: 4.5764,	val_loss: 6.3986
42:	[0s / 0s],		train_loss: 4.5946,	val_loss: 6.3371
43:	[0s / 0s],		train_loss: 4.6034,	val_loss: 6.4016
44:	[0s / 0s],		train_loss: 4.6128,	val_loss: 6.4057


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:04,682 - INFO - Performing clustering iteration 6 / 20
2024-11-15 23:38:04,682 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:04,686 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:38:05,091 - INFO - Defined medoid for deepsurv model with 1207 clusters.


45:	[0s / 0s],		train_loss: 4.6003,	val_loss: 6.4377
46:	[0s / 0s],		train_loss: 4.5957,	val_loss: 6.4163
47:	[0s / 0s],		train_loss: 4.6103,	val_loss: 6.3416
48:	[0s / 0s],		train_loss: 4.6085,	val_loss: 6.3274
49:	[0s / 0s],		train_loss: 4.6087,	val_loss: 6.3596


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:05,402 - INFO - Performing clustering iteration 7 / 20
2024-11-15 23:38:05,403 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:05,407 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:38:05,792 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:05,943 - INFO - Performing clustering iteration 8 / 20
2024-11-15 23:38:05,943 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:05,947 - INFO - Event column 'endpoint' updated with focus on event value 1.


50:	[0s / 0s],		train_loss: 4.6048,	val_loss: 6.4077


2024-11-15 23:38:06,331 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:06,490 - INFO - Performing clustering iteration 9 / 20
2024-11-15 23:38:06,491 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:06,494 - INFO - Event column 'endpoint' updated with focus on event value 1.


51:	[0s / 0s],		train_loss: 4.6086,	val_loss: 6.4236


2024-11-15 23:38:06,871 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:07,018 - INFO - Performing clustering iteration 10 / 20
2024-11-15 23:38:07,019 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:07,021 - INFO - Event column 'endpoint' updated with focus on event value 1.


52:	[0s / 0s],		train_loss: 4.6170,	val_loss: 6.3584


2024-11-15 23:38:07,418 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:07,568 - INFO - Performing clustering iteration 11 / 20
2024-11-15 23:38:07,569 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:07,572 - INFO - Event column 'endpoint' updated with focus on event value 1.


53:	[0s / 0s],		train_loss: 4.5973,	val_loss: 6.3334


2024-11-15 23:38:07,959 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:08,123 - INFO - Performing clustering iteration 12 / 20
2024-11-15 23:38:08,124 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:08,127 - INFO - Event column 'endpoint' updated with focus on event value 1.


54:	[0s / 0s],		train_loss: 4.6139,	val_loss: 6.2936


2024-11-15 23:38:08,506 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:08,667 - INFO - Performing clustering iteration 13 / 20
2024-11-15 23:38:08,668 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:08,671 - INFO - Event column 'endpoint' updated with focus on event value 1.


55:	[0s / 0s],		train_loss: 4.6199,	val_loss: 6.3020


2024-11-15 23:38:09,041 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:09,190 - INFO - Performing clustering iteration 14 / 20
2024-11-15 23:38:09,190 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:09,193 - INFO - Event column 'endpoint' updated with focus on event value 1.


56:	[0s / 0s],		train_loss: 4.6315,	val_loss: 6.2808


2024-11-15 23:38:09,570 - INFO - Defined medoid for deepsurv model with 1207 clusters.


57:	[3s / 3s],		train_loss: 4.6205,	val_loss: 6.2778
58:	[0s / 3s],		train_loss: 4.6115,	val_loss: 6.2782
59:	[0s / 3s],		train_loss: 4.6213,	val_loss: 6.2788
60:	[0s / 3s],		train_loss: 4.6268,	val_loss: 6.5559
61:	[0s / 3s],		train_loss: 4.6183,	val_loss: 6.2105
62:	[0s / 3s],		train_loss: 4.6344,	val_loss: 6.2469
63:	[0s / 3s],		train_loss: 4.6127,	val_loss: 6.4725
64:	[0s / 3s],		train_loss: 4.6021,	val_loss: 6.2031
65:	[0s / 3s],		train_loss: 4.6055,	val_loss: 6.1676


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:13,678 - INFO - Performing clustering iteration 15 / 20
2024-11-15 23:38:13,679 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:13,683 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:38:14,087 - INFO - Defined medoid for deepsurv model with 1207 clusters.


66:	[0s / 0s],		train_loss: 4.6355,	val_loss: 6.2681
67:	[0s / 0s],		train_loss: 4.6207,	val_loss: 6.3029
68:	[0s / 0s],		train_loss: 4.5872,	val_loss: 6.1525
69:	[0s / 0s],		train_loss: 4.6243,	val_loss: 6.1809
70:	[0s / 0s],		train_loss: 4.6310,	val_loss: 6.1024


71:	[0s / 0s],		train_loss: 4.5923,	val_loss: 6.2492
72:	[0s / 0s],		train_loss: 4.5884,	val_loss: 6.1203
73:	[0s / 0s],		train_loss: 4.6063,	val_loss: 6.1447
74:	[0s / 0s],		train_loss: 4.6177,	val_loss: 6.2629


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:14,660 - INFO - Performing clustering iteration 16 / 20
2024-11-15 23:38:14,661 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:14,664 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:38:15,081 - INFO - Defined medoid for deepsurv model with 1207 clusters.


75:	[0s / 0s],		train_loss: 4.6029,	val_loss: 6.0245
76:	[0s / 0s],		train_loss: 4.5860,	val_loss: 6.1040
77:	[0s / 0s],		train_loss: 4.6219,	val_loss: 6.2977
78:	[0s / 0s],		train_loss: 4.6078,	val_loss: 6.1980
79:	[0s / 0s],		train_loss: 4.6001,	val_loss: 6.2019
80:	[0s / 0s],		train_loss: 4.6007,	val_loss: 6.1669
81:	[0s / 0s],		train_loss: 4.6201,	val_loss: 6.0048
82:	[0s / 0s],		train_loss: 4.6083,	val_loss: 6.0783


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:15,724 - INFO - Performing clustering iteration 17 / 20
2024-11-15 23:38:15,724 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:15,727 - INFO - Event column 'endpoint' updated with focus on event value 1.


83:	[0s / 0s],		train_loss: 4.6184,	val_loss: 6.3530


2024-11-15 23:38:16,143 - INFO - Defined medoid for deepsurv model with 1207 clusters.


84:	[0s / 0s],		train_loss: 4.6088,	val_loss: 6.3463
85:	[0s / 0s],		train_loss: 4.6013,	val_loss: 6.1670
86:	[0s / 0s],		train_loss: 4.6261,	val_loss: 6.0214
87:	[0s / 0s],		train_loss: 4.6163,	val_loss: 6.1141
88:	[0s / 0s],		train_loss: 4.6053,	val_loss: 6.2017
89:	[0s / 0s],		train_loss: 4.6292,	val_loss: 6.1950


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:16,542 - INFO - Performing clustering iteration 18 / 20
2024-11-15 23:38:16,542 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:16,545 - INFO - Event column 'endpoint' updated with focus on event value 1.


90:	[0s / 0s],		train_loss: 4.6066,	val_loss: 6.1575
91:	[0s / 0s],		train_loss: 4.6072,	val_loss: 6.1915


2024-11-15 23:38:16,912 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:17,067 - INFO - Performing clustering iteration 19 / 20
2024-11-15 23:38:17,068 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:17,070 - INFO - Event column 'endpoint' updated with focus on event value 1.


92:	[0s / 0s],		train_loss: 4.5918,	val_loss: 6.1420


2024-11-15 23:38:17,428 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:38:17,584 - INFO - Performing clustering iteration 20 / 20
2024-11-15 23:38:17,585 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:38:17,588 - INFO - Event column 'endpoint' updated with focus on event value 1.


93:	[0s / 0s],		train_loss: 4.6236,	val_loss: 6.1294


2024-11-15 23:38:17,958 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))


94:	[0s / 0s],		train_loss: 4.6207,	val_loss: 6.1573
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_1_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-15 23:38:18,782 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:38:18,788 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: smoteenn


2024-11-15 23:38:22,924 - INFO - Missing values imputed using IterativeImputer.
2024-11-15 23:38:22,934 - INFO - Dataframe rebalanced with SMOTE and ENN.


0:	[2s / 2s],		train_loss: 3.7049,	val_loss: 5.0127
1:	[2s / 5s],		train_loss: 3.6708,	val_loss: 5.0493
2:	[2s / 8s],		train_loss: 3.6362,	val_loss: 5.0243
3:	[2s / 10s],		train_loss: 3.6525,	val_loss: 5.2003
4:	[2s / 13s],		train_loss: 3.6420,	val_loss: 5.0970
5:	[2s / 16s],		train_loss: 3.6308,	val_loss: 5.0213
6:	[2s / 18s],		train_loss: 3.6192,	val_loss: 5.0222
7:	[2s / 21s],		train_loss: 3.6382,	val_loss: 5.0518
8:	[6s / 27s],		train_loss: 3.6333,	val_loss: 5.0301
9:	[2s / 29s],		train_loss: 3.6285,	val_loss: 5.0079
10:	[2s / 32s],		train_loss: 3.6230,	val_loss: 5.0445
11:	[2s / 34s],		train_loss: 3.6155,	val_loss: 5.0227
12:	[2s / 37s],		train_loss: 3.6104,	val_loss: 5.0244
13:	[2s / 39s],		train_loss: 3.6048,	val_loss: 5.0159
14:	[2s / 42s],		train_loss: 3.6038,	val_loss: 5.0234
15:	[2s / 44s],		train_loss: 3.6190,	val_loss: 5.1283


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_1_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-15 23:39:08,627 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:39:08,634 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: smotetomek


2024-11-15 23:39:12,832 - INFO - Missing values imputed using IterativeImputer.
2024-11-15 23:39:12,837 - INFO - Dataframe rebalanced with SMOTE and Tomek.


0:	[2s / 2s],		train_loss: 3.6834,	val_loss: 4.9195
1:	[2s / 5s],		train_loss: 3.6443,	val_loss: 4.9716
2:	[2s / 8s],		train_loss: 3.6111,	val_loss: 4.9026
3:	[6s / 14s],		train_loss: 3.6332,	val_loss: 4.8952
4:	[2s / 17s],		train_loss: 3.6232,	val_loss: 4.9265
5:	[2s / 19s],		train_loss: 3.6095,	val_loss: 4.9254
6:	[2s / 22s],		train_loss: 3.5997,	val_loss: 4.9153
7:	[2s / 25s],		train_loss: 3.6251,	val_loss: 5.0457
8:	[2s / 27s],		train_loss: 3.6175,	val_loss: 4.9683
9:	[2s / 30s],		train_loss: 3.6121,	val_loss: 4.9377
10:	[2s / 33s],		train_loss: 3.6123,	val_loss: 4.9208
11:	[2s / 35s],		train_loss: 3.6026,	val_loss: 4.8810
12:	[2s / 38s],		train_loss: 3.5956,	val_loss: 4.9226
13:	[2s / 41s],		train_loss: 3.5942,	val_loss: 4.9058


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_1_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-15 23:39:54,822 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-15 23:39:54,828 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-15 23:39:54,833 - INFO - Performing clustering iteration 1 / 20
2024-11-15 23:39:54,834 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:39:54,836 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: clustering


2024-11-15 23:39:55,394 - INFO - Defined medoid for deepsurv model with 3725 clusters.


0:	[0s / 0s],		train_loss: 4.9144,	val_loss: 7.7658
1:	[0s / 0s],		train_loss: 4.8010,	val_loss: 7.7832
2:	[0s / 0s],		train_loss: 4.7803,	val_loss: 7.7904
3:	[0s / 0s],		train_loss: 4.7784,	val_loss: 7.7887
4:	[0s / 0s],		train_loss: 4.7645,	val_loss: 7.7980
5:	[0s / 0s],		train_loss: 4.7691,	val_loss: 7.7994
6:	[0s / 0s],		train_loss: 4.7514,	val_loss: 7.7963
7:	[0s / 0s],		train_loss: 4.7597,	val_loss: 7.8178
8:	[0s / 0s],		train_loss: 4.7688,	val_loss: 7.8023
9:	[0s / 0s],		train_loss: 4.7658,	val_loss: 7.8010
10:	[0s / 0s],		train_loss: 4.7633,	val_loss: 7.7893


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:39:56,143 - INFO - Performing clustering iteration 2 / 20
2024-11-15 23:39:56,143 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:39:56,146 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:39:56,539 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:39:56,717 - INFO - Performing clustering iteration 3 / 20
2024-11-15 23:39:56,717 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:39:56,720 - INFO - Event column 'endpoint' updated with focus on event value 1.


11:	[0s / 0s],		train_loss: 4.8377,	val_loss: 7.7744


2024-11-15 23:39:57,096 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:39:57,292 - INFO - Performing clustering iteration 4 / 20
2024-11-15 23:39:57,292 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:39:57,296 - INFO - Event column 'endpoint' updated with focus on event value 1.


12:	[0s / 0s],		train_loss: 4.9166,	val_loss: 7.7662


2024-11-15 23:39:57,664 - INFO - Defined medoid for deepsurv model with 3725 clusters.


13:	[0s / 0s],		train_loss: 5.0643,	val_loss: 7.7514
14:	[0s / 0s],		train_loss: 5.0404,	val_loss: 7.7497
15:	[3s / 3s],		train_loss: 5.0334,	val_loss: 7.7524
16:	[0s / 3s],		train_loss: 5.0277,	val_loss: 7.7549
17:	[0s / 3s],		train_loss: 5.0254,	val_loss: 7.7560
18:	[0s / 3s],		train_loss: 5.0119,	val_loss: 7.7649
19:	[0s / 3s],		train_loss: 5.0121,	val_loss: 7.7579
20:	[0s / 3s],		train_loss: 5.0099,	val_loss: 7.7723
21:	[0s / 4s],		train_loss: 5.0099,	val_loss: 7.7752
22:	[0s / 4s],		train_loss: 5.0002,	val_loss: 7.7663
23:	[0s / 4s],		train_loss: 5.0047,	val_loss: 7.7761
24:	[0s / 4s],		train_loss: 5.0055,	val_loss: 7.7752


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:02,000 - INFO - Performing clustering iteration 5 / 20
2024-11-15 23:40:02,001 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:02,004 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:40:02,373 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:02,547 - INFO - Performing clustering iteration 6 / 20
2024-11-15 23:40:02,547 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:02,550 - INFO - Event column 'endpoint' updated with focus on event value 1.


25:	[0s / 0s],		train_loss: 5.0442,	val_loss: 7.7525


2024-11-15 23:40:02,905 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:03,086 - INFO - Performing clustering iteration 7 / 20
2024-11-15 23:40:03,087 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:03,090 - INFO - Event column 'endpoint' updated with focus on event value 1.


26:	[0s / 0s],		train_loss: 5.0522,	val_loss: 7.7498


2024-11-15 23:40:03,427 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:03,606 - INFO - Performing clustering iteration 8 / 20
2024-11-15 23:40:03,607 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:03,609 - INFO - Event column 'endpoint' updated with focus on event value 1.


27:	[0s / 0s],		train_loss: 5.0629,	val_loss: 7.7500


2024-11-15 23:40:03,935 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:04,115 - INFO - Performing clustering iteration 9 / 20
2024-11-15 23:40:04,115 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:04,119 - INFO - Event column 'endpoint' updated with focus on event value 1.


28:	[0s / 0s],		train_loss: 5.0598,	val_loss: 7.7498


2024-11-15 23:40:04,455 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:04,638 - INFO - Performing clustering iteration 10 / 20
2024-11-15 23:40:04,639 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:04,642 - INFO - Event column 'endpoint' updated with focus on event value 1.


29:	[0s / 0s],		train_loss: 5.0760,	val_loss: 7.7499


2024-11-15 23:40:04,980 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:05,176 - INFO - Performing clustering iteration 11 / 20
2024-11-15 23:40:05,177 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:05,180 - INFO - Event column 'endpoint' updated with focus on event value 1.


30:	[0s / 0s],		train_loss: 5.0863,	val_loss: 7.7541


2024-11-15 23:40:05,497 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:05,691 - INFO - Performing clustering iteration 12 / 20
2024-11-15 23:40:05,691 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:05,694 - INFO - Event column 'endpoint' updated with focus on event value 1.


31:	[0s / 0s],		train_loss: 5.0876,	val_loss: 7.7519


2024-11-15 23:40:05,994 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:06,169 - INFO - Performing clustering iteration 13 / 20
2024-11-15 23:40:06,169 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:06,172 - INFO - Event column 'endpoint' updated with focus on event value 1.


32:	[0s / 0s],		train_loss: 5.0849,	val_loss: 7.7538


2024-11-15 23:40:06,465 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:06,639 - INFO - Performing clustering iteration 14 / 20
2024-11-15 23:40:06,640 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:06,644 - INFO - Event column 'endpoint' updated with focus on event value 1.


33:	[0s / 0s],		train_loss: 5.1088,	val_loss: 7.7525


2024-11-15 23:40:06,930 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:07,107 - INFO - Performing clustering iteration 15 / 20
2024-11-15 23:40:07,108 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:07,111 - INFO - Event column 'endpoint' updated with focus on event value 1.


34:	[0s / 0s],		train_loss: 5.1025,	val_loss: 7.7537


2024-11-15 23:40:07,394 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:07,588 - INFO - Performing clustering iteration 16 / 20
2024-11-15 23:40:07,589 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:07,592 - INFO - Event column 'endpoint' updated with focus on event value 1.


35:	[0s / 0s],		train_loss: 5.0835,	val_loss: 7.7556


2024-11-15 23:40:07,858 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:08,037 - INFO - Performing clustering iteration 17 / 20
2024-11-15 23:40:08,038 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:08,041 - INFO - Event column 'endpoint' updated with focus on event value 1.


36:	[0s / 0s],		train_loss: 5.1060,	val_loss: 7.7550


2024-11-15 23:40:08,304 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:08,483 - INFO - Performing clustering iteration 18 / 20
2024-11-15 23:40:08,483 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:08,486 - INFO - Event column 'endpoint' updated with focus on event value 1.


37:	[0s / 0s],		train_loss: 5.1001,	val_loss: 7.7504


2024-11-15 23:40:08,749 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:08,936 - INFO - Performing clustering iteration 19 / 20
2024-11-15 23:40:08,937 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:08,940 - INFO - Event column 'endpoint' updated with focus on event value 1.


38:	[0s / 0s],		train_loss: 5.1054,	val_loss: 7.7547


2024-11-15 23:40:09,186 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:40:09,380 - INFO - Performing clustering iteration 20 / 20
2024-11-15 23:40:09,381 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:40:09,384 - INFO - Event column 'endpoint' updated with focus on event value 1.


39:	[0s / 0s],		train_loss: 5.0835,	val_loss: 7.7549


2024-11-15 23:40:09,623 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))


40:	[0s / 0s],		train_loss: 5.1034,	val_loss: 7.7561
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-15 23:40:10,430 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-15 23:40:10,437 - INFO - Event column 'endpoint' updated with focus on event value 2.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: smoteenn


2024-11-15 23:40:14,202 - INFO - Missing values imputed using IterativeImputer.
2024-11-15 23:40:14,210 - INFO - Dataframe rebalanced with SMOTE and ENN.


0:	[1s / 1s],		train_loss: 4.8324,	val_loss: 7.6427
1:	[1s / 3s],		train_loss: 4.6138,	val_loss: 7.5167
2:	[1s / 5s],		train_loss: 4.5290,	val_loss: 7.4838
3:	[1s / 7s],		train_loss: 4.5476,	val_loss: 7.4889
4:	[1s / 9s],		train_loss: 4.5359,	val_loss: 7.5161
5:	[1s / 10s],		train_loss: 4.5164,	val_loss: 7.5008
6:	[1s / 12s],		train_loss: 4.4945,	val_loss: 7.4864
7:	[1s / 14s],		train_loss: 4.5217,	val_loss: 7.4929
8:	[1s / 16s],		train_loss: 4.5093,	val_loss: 7.4916
9:	[1s / 18s],		train_loss: 4.5003,	val_loss: 7.5183


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-15 23:40:33,235 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-15 23:40:33,240 - INFO - Event column 'endpoint' updated with focus on event value 2.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: smotetomek


2024-11-15 23:40:40,623 - INFO - Missing values imputed using IterativeImputer.
2024-11-15 23:40:40,631 - INFO - Dataframe rebalanced with SMOTE and Tomek.


0:	[2s / 2s],		train_loss: 4.6938,	val_loss: 7.4255
1:	[2s / 4s],		train_loss: 4.6640,	val_loss: 7.3824
2:	[2s / 6s],		train_loss: 4.5899,	val_loss: 7.3939
3:	[2s / 8s],		train_loss: 4.6403,	val_loss: 7.4294
4:	[2s / 11s],		train_loss: 4.6187,	val_loss: 7.4090
5:	[2s / 13s],		train_loss: 4.5856,	val_loss: 7.3997
6:	[2s / 15s],		train_loss: 4.5693,	val_loss: 7.4068
7:	[2s / 17s],		train_loss: 4.6241,	val_loss: 7.4783
8:	[2s / 20s],		train_loss: 4.6051,	val_loss: 7.4627
9:	[2s / 22s],		train_loss: 4.5962,	val_loss: 7.5095
10:	[2s / 24s],		train_loss: 4.5839,	val_loss: 7.4644


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-15 23:41:05,901 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:41:05,998 - INFO - Performing clustering iteration 1 / 20
2024-11-15 23:41:05,998 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:41:06,002 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: LSTM
data balancing method: clustering


2024-11-15 23:41:06,605 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-15 23:41:06,606 - INFO - Performing clustering iteration 2 / 20
2024-11-15 23:41:06,607 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:41:06,609 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:41:07,006 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-15 23:41:07,007 - INFO - Performing clustering iteration 3 / 20
2024-11-15 23:41:07,008 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:41:07,010 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:41:07,412 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-15 23:41:07,413 - INFO - Performing clustering iteration 4 / 20
2024-11-15 23:41:07,414 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:41:07,416 - INFO - Event column 'endpoint' updated with focus on event value 1

64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[0s / 0s],		train_loss: 3.2189,	val_loss: 5.3828
1:	[0s / 1s],		train_loss: 3.0697,	val_loss: 5.8590
2:	[0s / 2s],		train_loss: 2.2388,	val_loss: 5.5454
3:	[0s / 2s],		train_loss: 2.2063,	val_loss: 5.1473
4:	[0s / 3s],		train_loss: 2.1677,	val_loss: 5.6474
5:	[0s / 4s],		train_loss: 2.0999,	val_loss: 5.1721
6:	[0s / 4s],		train_loss: 2.0678,	val_loss: 5.3049
7:	[0s / 5s],		train_loss: 2.1510,	val_loss: 5.6118
8:	[0s / 6s],		train_loss: 2.1197,	val_loss: 5.7434
9:	[0s / 6s],		train_loss: 2.0947,	val_loss: 5.2586
10:	[0s / 7s],		train_loss: 2.1024,	val_loss: 5.2827
11:	[0s / 8s],		train_loss: 2.0948,	val_loss: 5.0368
12:	[0s / 8s],		train_loss: 2.0674,	val_loss: 5.1209
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_1_hazard.pkl.


  self.net.load_state_dict(torch.load(path, **kwargs))


Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-15 23:42:10,387 - INFO - Event column 'endpoint' updated with focus on event value 1.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['_original_index'] = df.index
2024-11-15 23:42:10,488 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: LSTM
data balancing method: NearMiss


2024-11-15 23:42:10,788 - INFO - Dataset for deepsurv model undersampled using method 'NearMiss' with sampling strategy 0.05.
2024-11-15 23:42:15,668 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:42:42,697 - INFO - Validation data retrieved


64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[0s / 0s],		train_loss: 5.1360,	val_loss: 7.8974
1:	[0s / 1s],		train_loss: 5.1355,	val_loss: 7.8970
2:	[0s / 1s],		train_loss: 5.1179,	val_loss: 7.8969
3:	[0s / 2s],		train_loss: 5.1235,	val_loss: 7.8965
4:	[0s / 2s],		train_loss: 5.1454,	val_loss: 7.8961
5:	[0s / 3s],		train_loss: 5.1308,	val_loss: 7.8958
6:	[0s / 3s],		train_loss: 5.1366,	val_loss: 7.8958
7:	[0s / 4s],		train_loss: 5.1391,	val_loss: 7.8951
8:	[0s / 4s],		train_loss: 5.1251,	val_loss: 7.8943
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_1_hazard.pkl.


  self.net.load_state_dict(torch.load(path, **kwargs))


Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-15 23:42:48,139 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-15 23:42:48,233 - INFO - Performing clustering iteration 1 / 20
2024-11-15 23:42:48,233 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:42:48,236 - INFO - Event column 'endpoint' updated with focus on event value 2.


Initiate training of deepsurv neural network
model structure: LSTM
data balancing method: clustering


2024-11-15 23:42:48,772 - INFO - Defined medoid for deepsurv model with 3725 clusters.
2024-11-15 23:42:48,773 - INFO - Performing clustering iteration 2 / 20
2024-11-15 23:42:48,773 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:42:48,775 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-15 23:42:49,163 - INFO - Defined medoid for deepsurv model with 3725 clusters.
2024-11-15 23:42:49,164 - INFO - Performing clustering iteration 3 / 20
2024-11-15 23:42:49,164 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:42:49,166 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-15 23:42:49,537 - INFO - Defined medoid for deepsurv model with 3725 clusters.
2024-11-15 23:42:49,538 - INFO - Performing clustering iteration 4 / 20
2024-11-15 23:42:49,538 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:42:49,541 - INFO - Event column 'endpoint' updated with focus on event value 2

64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[1s / 1s],		train_loss: 4.9840
1:	[1s / 2s],		train_loss: 4.9825
2:	[1s / 3s],		train_loss: 4.9566
3:	[1s / 5s],		train_loss: 4.9380
4:	[1s / 6s],		train_loss: 4.8915
5:	[1s / 7s],		train_loss: 4.8752
6:	[1s / 8s],		train_loss: 4.8627
7:	[1s / 10s],		train_loss: 4.8587
8:	[1s / 11s],		train_loss: 4.8425


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-15 23:44:30,366 - INFO - Event column 'endpoint' updated with focus on event value 2.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['_original_index'] = df.index
2024-11-15 23:44:30,474 - INFO - Event column 'endpoint' updated with focus on event value 2.


Initiate training of deepsurv neural network
model structure: LSTM
data balancing method: NearMiss


2024-11-15 23:44:30,800 - INFO - Dataset for deepsurv model undersampled using method 'NearMiss' with sampling strategy 0.05.
2024-11-15 23:44:58,994 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-15 23:45:32,421 - INFO - Validation data retrieved


64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[0s / 0s],		train_loss: 5.0672
1:	[0s / 1s],		train_loss: 5.0364
2:	[0s / 1s],		train_loss: 4.9895
3:	[0s / 2s],		train_loss: 4.9804
4:	[0s / 2s],		train_loss: 4.9804
5:	[0s / 3s],		train_loss: 4.9720
6:	[0s / 4s],		train_loss: 4.9666
7:	[0s / 4s],		train_loss: 4.9780
8:	[0s / 5s],		train_loss: 4.9683
9:	[0s / 5s],		train_loss: 4.9642
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_2_hazard.pkl.


  self.net.load_state_dict(torch.load(path, **kwargs))


Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-15 23:45:38,855 - INFO - Performing clustering iteration 1 / 20
2024-11-15 23:45:38,855 - INFO - CUDA environment set up and GPU memory cleared.


Initiate training of deephit neural network
model structure: ANN
data balancing method: clustering


2024-11-15 23:45:39,393 - INFO - Defined medoid for deephit model with 4932 clusters.


0:	[0s / 0s],		train_loss: 1.7559,	val_loss: 0.0801
1:	[0s / 0s],		train_loss: 0.5162,	val_loss: 0.0713
2:	[0s / 1s],		train_loss: 0.4664,	val_loss: 0.0699
3:	[0s / 1s],		train_loss: 0.4431,	val_loss: 0.0731
4:	[0s / 2s],		train_loss: 0.4144,	val_loss: 0.0728
5:	[0s / 2s],		train_loss: 0.4015,	val_loss: 0.0743
6:	[0s / 2s],		train_loss: 0.3978,	val_loss: 0.0735


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:45:42,832 - INFO - Performing clustering iteration 2 / 20
2024-11-15 23:45:42,832 - INFO - CUDA environment set up and GPU memory cleared.


7:	[0s / 3s],		train_loss: 0.3859,	val_loss: 0.0745


2024-11-15 23:45:43,222 - INFO - Defined medoid for deephit model with 4932 clusters.


8:	[0s / 0s],		train_loss: 0.4676,	val_loss: 0.0701
9:	[0s / 0s],		train_loss: 0.4190,	val_loss: 0.0683
10:	[0s / 1s],		train_loss: 0.3914,	val_loss: 0.0624
11:	[0s / 1s],		train_loss: 0.3826,	val_loss: 0.0642
12:	[0s / 1s],		train_loss: 0.3750,	val_loss: 0.0659
13:	[0s / 2s],		train_loss: 0.3747,	val_loss: 0.0652
14:	[0s / 2s],		train_loss: 0.3738,	val_loss: 0.0664


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:45:46,488 - INFO - Performing clustering iteration 3 / 20
2024-11-15 23:45:46,489 - INFO - CUDA environment set up and GPU memory cleared.


15:	[0s / 3s],		train_loss: 0.3679,	val_loss: 0.0601


2024-11-15 23:45:46,879 - INFO - Defined medoid for deephit model with 4932 clusters.


16:	[0s / 0s],		train_loss: 0.3614,	val_loss: 0.0636
17:	[0s / 0s],		train_loss: 0.3533,	val_loss: 0.0619
18:	[0s / 1s],		train_loss: 0.3489,	val_loss: 0.0647
19:	[0s / 1s],		train_loss: 0.3464,	val_loss: 0.0656
20:	[0s / 1s],		train_loss: 0.3459,	val_loss: 0.0605
21:	[0s / 2s],		train_loss: 0.3399,	val_loss: 0.0602
22:	[0s / 2s],		train_loss: 0.3382,	val_loss: 0.0615


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:45:50,086 - INFO - Performing clustering iteration 4 / 20
2024-11-15 23:45:50,086 - INFO - CUDA environment set up and GPU memory cleared.


23:	[0s / 3s],		train_loss: 0.3376,	val_loss: 0.0591


2024-11-15 23:45:50,439 - INFO - Defined medoid for deephit model with 4932 clusters.


24:	[0s / 0s],		train_loss: 0.3393,	val_loss: 0.0615
25:	[0s / 0s],		train_loss: 0.3376,	val_loss: 0.0599
26:	[0s / 1s],		train_loss: 0.3395,	val_loss: 0.0612
27:	[0s / 1s],		train_loss: 0.3344,	val_loss: 0.0582
28:	[0s / 1s],		train_loss: 0.3326,	val_loss: 0.0601
29:	[0s / 2s],		train_loss: 0.3362,	val_loss: 0.0599
30:	[0s / 2s],		train_loss: 0.3355,	val_loss: 0.0625


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:45:53,510 - INFO - Performing clustering iteration 5 / 20
2024-11-15 23:45:53,510 - INFO - CUDA environment set up and GPU memory cleared.


31:	[0s / 2s],		train_loss: 0.3382,	val_loss: 0.0561


2024-11-15 23:45:53,872 - INFO - Defined medoid for deephit model with 4932 clusters.


32:	[0s / 0s],		train_loss: 0.3389,	val_loss: 0.0639
33:	[0s / 0s],		train_loss: 0.3331,	val_loss: 0.0656
34:	[0s / 1s],		train_loss: 0.3354,	val_loss: 0.0642
35:	[0s / 1s],		train_loss: 0.3468,	val_loss: 0.0528
36:	[0s / 1s],		train_loss: 0.3347,	val_loss: 0.0602
37:	[0s / 2s],		train_loss: 0.3327,	val_loss: 0.0595
38:	[0s / 2s],		train_loss: 0.3287,	val_loss: 0.0561


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:45:56,927 - INFO - Performing clustering iteration 6 / 20
2024-11-15 23:45:56,927 - INFO - CUDA environment set up and GPU memory cleared.


39:	[0s / 2s],		train_loss: 0.3317,	val_loss: 0.0572


2024-11-15 23:46:00,768 - INFO - Defined medoid for deephit model with 4932 clusters.


40:	[0s / 0s],		train_loss: 0.3375,	val_loss: 0.0605
41:	[0s / 0s],		train_loss: 0.3329,	val_loss: 0.0565
42:	[0s / 1s],		train_loss: 0.3314,	val_loss: 0.0566
43:	[0s / 1s],		train_loss: 0.3311,	val_loss: 0.0508
44:	[0s / 1s],		train_loss: 0.3341,	val_loss: 0.0654
45:	[0s / 2s],		train_loss: 0.3323,	val_loss: 0.0533
46:	[0s / 2s],		train_loss: 0.3336,	val_loss: 0.0545


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:03,866 - INFO - Performing clustering iteration 7 / 20
2024-11-15 23:46:03,866 - INFO - CUDA environment set up and GPU memory cleared.


47:	[0s / 2s],		train_loss: 0.3322,	val_loss: 0.0598


2024-11-15 23:46:04,200 - INFO - Defined medoid for deephit model with 4932 clusters.


48:	[0s / 0s],		train_loss: 0.3351,	val_loss: 0.0613
49:	[0s / 0s],		train_loss: 0.3300,	val_loss: 0.0587
50:	[0s / 1s],		train_loss: 0.3328,	val_loss: 0.0589
51:	[0s / 1s],		train_loss: 0.3262,	val_loss: 0.0555
52:	[0s / 1s],		train_loss: 0.3276,	val_loss: 0.0589


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:06,541 - INFO - Performing clustering iteration 8 / 20
2024-11-15 23:46:06,542 - INFO - CUDA environment set up and GPU memory cleared.


53:	[0s / 2s],		train_loss: 0.3307,	val_loss: 0.0575


2024-11-15 23:46:06,858 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:07,387 - INFO - Performing clustering iteration 9 / 20
2024-11-15 23:46:07,388 - INFO - CUDA environment set up and GPU memory cleared.


54:	[0s / 0s],		train_loss: 0.3305,	val_loss: 0.0613


2024-11-15 23:46:07,708 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:08,257 - INFO - Performing clustering iteration 10 / 20
2024-11-15 23:46:08,257 - INFO - CUDA environment set up and GPU memory cleared.


55:	[0s / 0s],		train_loss: 0.3324,	val_loss: 0.0583


2024-11-15 23:46:08,547 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:09,036 - INFO - Performing clustering iteration 11 / 20
2024-11-15 23:46:09,036 - INFO - CUDA environment set up and GPU memory cleared.


56:	[0s / 0s],		train_loss: 0.3339,	val_loss: 0.0590


2024-11-15 23:46:09,320 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:09,869 - INFO - Performing clustering iteration 12 / 20
2024-11-15 23:46:09,869 - INFO - CUDA environment set up and GPU memory cleared.


57:	[0s / 0s],		train_loss: 0.3350,	val_loss: 0.0584


2024-11-15 23:46:10,154 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:10,644 - INFO - Performing clustering iteration 13 / 20
2024-11-15 23:46:10,644 - INFO - CUDA environment set up and GPU memory cleared.


58:	[0s / 0s],		train_loss: 0.3346,	val_loss: 0.0564


2024-11-15 23:46:10,905 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:11,385 - INFO - Performing clustering iteration 14 / 20
2024-11-15 23:46:11,385 - INFO - CUDA environment set up and GPU memory cleared.


59:	[0s / 0s],		train_loss: 0.3386,	val_loss: 0.0546


2024-11-15 23:46:11,649 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:12,159 - INFO - Performing clustering iteration 15 / 20
2024-11-15 23:46:12,159 - INFO - CUDA environment set up and GPU memory cleared.


60:	[0s / 0s],		train_loss: 0.3403,	val_loss: 0.0530


2024-11-15 23:46:12,418 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:12,912 - INFO - Performing clustering iteration 16 / 20
2024-11-15 23:46:12,912 - INFO - CUDA environment set up and GPU memory cleared.


61:	[0s / 0s],		train_loss: 0.3453,	val_loss: 0.0509


2024-11-15 23:46:13,144 - INFO - Defined medoid for deephit model with 4932 clusters.


62:	[0s / 0s],		train_loss: 0.3780,	val_loss: 0.0493
63:	[0s / 0s],		train_loss: 0.3552,	val_loss: 0.0593
64:	[0s / 1s],		train_loss: 0.3454,	val_loss: 0.0539
65:	[0s / 1s],		train_loss: 0.3388,	val_loss: 0.0555
66:	[0s / 1s],		train_loss: 0.3392,	val_loss: 0.0561
67:	[0s / 2s],		train_loss: 0.3357,	val_loss: 0.0584
68:	[0s / 2s],		train_loss: 0.3364,	val_loss: 0.0540


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:16,239 - INFO - Performing clustering iteration 17 / 20
2024-11-15 23:46:16,239 - INFO - CUDA environment set up and GPU memory cleared.


69:	[0s / 2s],		train_loss: 0.3329,	val_loss: 0.0609


2024-11-15 23:46:16,463 - INFO - Defined medoid for deephit model with 4932 clusters.


70:	[0s / 0s],		train_loss: 0.3514,	val_loss: 0.0510
71:	[0s / 0s],		train_loss: 0.3403,	val_loss: 0.0568


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:17,692 - INFO - Performing clustering iteration 18 / 20
2024-11-15 23:46:17,693 - INFO - CUDA environment set up and GPU memory cleared.


72:	[0s / 1s],		train_loss: 0.3387,	val_loss: 0.0579


2024-11-15 23:46:17,921 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:18,419 - INFO - Performing clustering iteration 19 / 20
2024-11-15 23:46:18,420 - INFO - CUDA environment set up and GPU memory cleared.


73:	[0s / 0s],		train_loss: 0.3518,	val_loss: 0.0585


2024-11-15 23:46:18,621 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-15 23:46:19,157 - INFO - Performing clustering iteration 20 / 20
2024-11-15 23:46:19,157 - INFO - CUDA environment set up and GPU memory cleared.


74:	[0s / 0s],		train_loss: 0.3447,	val_loss: 0.0556


2024-11-15 23:46:19,356 - INFO - Defined medoid for deephit model with 4932 clusters.


75:	[0s / 0s],		train_loss: 0.3463,	val_loss: 0.0570


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_clustering_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_clustering_all_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.
Initiate training of deephit neural network
model structure: ANN
data balancing method: NearMiss


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['_original_index'] = df.index
2024-11-15 23:46:30,859 - INFO - Dataset for deephit model undersampled using method 'NearMiss' with sampling strategy 0.05.


0:	[2s / 2s],		train_loss: 0.1141,	val_loss: 0.0613
1:	[4s / 6s],		train_loss: 0.0735,	val_loss: 0.0353
2:	[1s / 8s],		train_loss: 0.0613,	val_loss: 0.0324
3:	[1s / 9s],		train_loss: 0.0587,	val_loss: 0.0281
4:	[1s / 11s],		train_loss: 0.0556,	val_loss: 0.0271
5:	[1s / 12s],		train_loss: 0.0550,	val_loss: 0.0269
6:	[1s / 14s],		train_loss: 0.0549,	val_loss: 0.0270
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_nearmiss2_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_nearmiss2_all_hazard.pkl.


  self.net.load_state_dict(torch.load(path, **kwargs))


Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-15 23:46:45,959 - INFO - Performing clustering iteration 1 / 20
2024-11-15 23:46:45,959 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:46:45,965 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deephit neural network
model structure: LSTM
data balancing method: clustering


2024-11-15 23:46:46,391 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-15 23:46:46,392 - INFO - Performing clustering iteration 2 / 20
2024-11-15 23:46:46,393 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:46:46,396 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:46:46,813 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-15 23:46:46,814 - INFO - Performing clustering iteration 3 / 20
2024-11-15 23:46:46,814 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:46:46,818 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-15 23:46:47,222 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-15 23:46:47,223 - INFO - Performing clustering iteration 4 / 20
2024-11-15 23:46:47,224 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-15 23:46:47,226 - INFO - Event column 'endpoint' updated with focus on event value 1

64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[1s / 1s],		train_loss: 0.0581,	val_loss: 0.0450
1:	[1s / 2s],		train_loss: 0.0494,	val_loss: 0.0408
2:	[1s / 4s],		train_loss: 0.0387,	val_loss: 0.0465
3:	[1s / 5s],		train_loss: 0.0410,	val_loss: 0.0508
4:	[1s / 6s],		train_loss: 0.0381,	val_loss: 0.0451
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_clustering_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_clustering_all_hazard.pkl.


  self.net.load_state_dict(torch.load(path, **kwargs))


Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.
Initiate training of deephit neural network
model structure: LSTM
data balancing method: NearMiss


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['_original_index'] = df.index
2024-11-15 23:48:20,934 - INFO - Dataset for deephit model undersampled using method 'NearMiss' with sampling strategy 0.05.
2024-11-15 23:48:53,799 - INFO - Validation data retrieved


64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[1s / 1s],		train_loss: 0.4743,	val_loss: 0.0951
1:	[1s / 2s],		train_loss: 0.4158,	val_loss: 0.0645
2:	[4s / 6s],		train_loss: 0.3919,	val_loss: 0.0709
3:	[0s / 7s],		train_loss: 0.3916,	val_loss: 0.0732
4:	[0s / 8s],		train_loss: 0.3853,	val_loss: 0.0715
5:	[1s / 9s],		train_loss: 0.3825,	val_loss: 0.0693
6:	[0s / 10s],		train_loss: 0.3815,	val_loss: 0.0725
7:	[0s / 11s],		train_loss: 0.3855,	val_loss: 0.0761
8:	[0s / 12s],		train_loss: 0.3829,	val_loss: 0.0744


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_nearmiss1_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_nearmiss1_all_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


### 4. Load models and hazards

In [23]:
def load_model(model, model_config, model_path, baseline_hazard_path):
    """
    Load model weights and baseline hazard data.

    Parameters:
    - create_model_func: Function to create the model architecture (e.g., create_neural_network).
    - model_path: Path to load the model weights (.pt file).
    - baseline_hazard_path: Path to load the baseline hazards (.pkl file).

    Returns:
    - model: The loaded model with weights and baseline hazards.
    """
    
    # Load model weights
    model.load_model_weights(model_path)
    
    # Load baseline hazards and assign to model
    if model_config['model'] == 'deepsurv':
        baseline_hazard = pd.read_pickle(baseline_hazard_path)
        model.baseline_hazards_ = baseline_hazard
        model.baseline_cumulative_hazards_ = baseline_hazard.cumsum()
    
    print(f"Model and baseline hazards loaded from {model_path} and {baseline_hazard_path}.")
    return model

In [24]:
# Dictionary to store loaded models
loaded_models = {}

for model_name in model_ls:
    # Retrieve configuration by dynamically constructing the variable name
    config_var_name = model_name + "_config"
    model_config = globals().get(config_var_name)
    
    if model_config is None:
        print(f"Configuration for {config_var_name} not found.")
        continue

    model_weights_path = f'{model_path}{model_name}.pt'
    model_hazard_path = f'{model_path}{model_name}_hazard.pkl'
    
    # Define the model creation function as a lambda to pass the config
    create_model_func = lambda: create_neural_network(
        config=model_config,
        num_risk=len(X_train_transformed[EVENT_COL].unique()) - 1,
        num_time_bins=len(TIME_GRID)
    )
    model = create_model_func()
    
    # Load the model and store it in the dictionary
    loaded_models[model_name] = load_model(model, model_config, model_weights_path, model_hazard_path)
    print(f'Loaded model {model_name}')

Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_1_hazard.pkl.
Loaded model deepsurv_ann_clustering_1
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_1_hazard.pkl.
Loaded model deepsurv_ann_smoteenn_1


  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_1_hazard.pkl.
Loaded model deepsurv_ann_smotetomek_1
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_2_hazard.pkl.
Loaded model deepsurv_ann_clustering_2
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_2_hazard.pkl.
Loaded model deepsurv_ann_smoteenn_2


  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_2_hazard.pkl.
Loaded model deepsurv_ann_smotetomek_2
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_1_hazard.pkl.
Loaded model deepsurv_lstm_clustering_1


  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_1_hazard.pkl.
Loaded model deepsurv_lstm_nearmiss_1
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_2_hazard.pkl.
Loaded model deepsurv_lstm_clustering_2


  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_2_hazard.pkl.
Loaded model deepsurv_lstm_nearmiss_2
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_clustering_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_clustering_all_hazard.pkl.
Loaded model deephit_ann_clustering_all


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_nearmiss2_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_nearmiss2_all_hazard.pkl.
Loaded model deephit_ann_nearmiss2_all
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_clustering_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_clustering_all_hazard.pkl.
Loaded model deephit_lstm_clustering_all
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_nearmiss1_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_nearmiss1_all_hazard.pkl.
Loaded model deephit_lstm_nearmiss1_all


### 5. Prepare and make prediction on the training set, combine the prediction arrays for training of super learner

In [25]:
def predict_neural_network(model, config, X_test, duration_col, event_col, cluster_col, time_grid=None):
    """
    Function to train a given neural network using the provided datasets.

    Args:
        net (torch.nn.Module): Neural network to be trained.
        config (dict): Configuration dictionary containing model hyperparameters.
        X_train (pd.DataFrame): Training dataset with features.
        X_val (pd.DataFrame): Validation dataset with features.
        duration_col (str): Column representing event durations.
        event_col (str): Column representing event occurrences.
        cluster_col (str): Column for grouping during cross-validation.
        callbacks (list): List of callbacks for training.
        time_grid (np.array, optional): Time grid for evaluation if required. Defaults to None.

    Returns:
        model: Trained PyCox model.
        logs: Training logs.
    """
    gc.collect()
    torch.cuda.empty_cache()
    # Train the model
    if config['model'] == 'deepsurv':
        print('Initiate testing of deepsurv neural network')
        X_test = df_event_focus(X_test, event_col, config['endpoint'])
        if config['net'] == 'ann':
            print('model structure: ANN')
            X_test_processed, y_test = preprocess_data(X_test, config['features'], duration_col, event_col)
            surv = model.predict_surv_df(X_test_processed, batch_size=512)
        elif config['net'] == 'lstm':
            print('model structure: LSTM')
            X_test_processed, y_test = prepare_validation_data(X_test, config['features'], duration_col, event_col, config, cluster_col, config['model'], time_grid)
            X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32)
            y_test_tensor = (torch.tensor(y_test[0], dtype=torch.float32), torch.tensor(y_test[1], dtype=torch.float32))
            surv = model.predict_surv_df(X_test_tensor, batch_size=512)
    elif config['model'] == 'deephit':
        print('Initiate testing of deephit neural network')
        if config['net'] == 'ann':
            print('model structure: ANN')
            X_test_processed, y_test = preprocess_data(X_test, config['features'], duration_col, event_col, time_grid, discretize=True)
            surv = model.predict_cif(X_test_processed, batch_size=512)
            print('prediction complete, please note that prediction of deephit models are CIF.')
        elif config['net'] == 'lstm':
            print('model structure: LSTM')
            X_test_processed, y_test = prepare_validation_data(X_test, config['features'], duration_col, event_col, config, cluster_col, config['model'], time_grid)
            surv = model.predict_cif(X_test_processed, batch_size=512)
            print('prediction complete, please note that prediction of deephit models are CIF.')

    # Free memory after training
    gc.collect()
    torch.cuda.empty_cache()

    return surv, y_test

In [26]:

def align_to_time_grid(surv, time_grid):
    """
    Align the survival DataFrame to the closest indices of the time grid.

    Parameters:
        surv (pd.DataFrame): Survival probabilities DataFrame.
        time_grid (np.array): Array of target time points to align.

    Returns:
        aligned_surv (pd.DataFrame): Aligned survival probabilities.
    """
    # Convert the DataFrame's index to a NumPy array for fast computation
    surv_times = np.array(surv.index)
    
    # Find the closest time in the survival DataFrame for each time in the grid
    closest_indices = [np.argmin(np.abs(surv_times - t)) for t in time_grid]
    
    # Extract the rows corresponding to the closest times
    aligned_surv = surv.iloc[closest_indices].copy()
    
    # Reindex the DataFrame to match the time grid
    aligned_surv.index = range(len(time_grid))  # Standardize indices to 0, 1, 2, ...
    return aligned_surv

#### 5.1 Use lifelines and CoxPH Fitter to get the CIF of both outcomes as the 'ground truth' of the training dataset

In [None]:
from lifelines import CoxPHFitter
from lifelines import AalenJohansenFitter
from sklearn.linear_model import LinearRegression
from joblib import Parallel, delayed

# Convert all non-target events to 0 (censored)
X_train_transformed["event1"] = X_train_transformed[EVENT_COL].apply(lambda x: 1 if x == 1 else 0)
X_train_transformed["event2"] = X_train_transformed[EVENT_COL].apply(lambda x: 1 if x == 2 else 0)

class_counts = X_train_transformed[EVENT_COL].value_counts()
X_train_transformed['weights'] = X_train_transformed[EVENT_COL].map(lambda e: 1 / class_counts[e]).values

# Step 1: Fit a Cox model for each event type
cox_model_event_1 = CoxPHFitter()
cox_model_event_1.fit(X_train_transformed[FEATURE_COLS + [DURATION_COL, 'event1', CLUSTER_COL, 'weights']], duration_col=DURATION_COL, event_col="event1", cluster_col=CLUSTER_COL, weights_col="weights", robust=True)

cox_model_event_2 = CoxPHFitter()
cox_model_event_2.fit(X_train_transformed[FEATURE_COLS + [DURATION_COL, 'event2', CLUSTER_COL, 'weights']], duration_col=DURATION_COL, event_col="event2", cluster_col=CLUSTER_COL, weights_col="weights", robust=True)

# Step 2: Predict individual cumulative hazards for each event type
cumulative_hazard_event_1 = cox_model_event_1.predict_cumulative_hazard(X_train_transformed)
cumulative_hazard_event_2 = cox_model_event_2.predict_cumulative_hazard(X_train_transformed)

# Step 3: Compute overall survival for each individual
# Overall survival: S(t) = exp(- (H1(t) + H2(t)))
overall_survival = np.exp(-(cumulative_hazard_event_1 + cumulative_hazard_event_2))

# Step 4: Calculate CIF for each event type
# CIF_k(t) = ∫ h_k(u) * S(u) du (approximated as cumulative sum)
cif_event_1 = (cumulative_hazard_event_1 * overall_survival).cumsum(axis=0)
cif_event_2 = (cumulative_hazard_event_2 * overall_survival).cumsum(axis=0)

# Step 5: Format and display the CIF predictions
cif_event_1_normalized = cif_event_1.div(cif_event_1.iloc[-1].max(), axis=1)
cif_event_2_normalized = cif_event_2.div(cif_event_2.iloc[-1].max(), axis=1)

# Compute the CIF ground truth
cif_ground_truth = np.zeros((2,6,396424))
cif_ground_truth[0] = align_to_time_grid(cif_event_1_normalized, TIME_GRID).values
cif_ground_truth[1] = align_to_time_grid(cif_event_2_normalized, TIME_GRID).values




<lifelines.CoxPHFitter: fitted with 3 total observations, 2 right-censored observations>

In [None]:
_, y_ground_truth = preprocess_data(X_train_transformed, FEATURE_COLS, DURATION_COL, EVENT_COL, TIME_GRID, discretize=True)
concordance_indices = {}
integrated_brier_scores = {}
neg_log_likelihoods = {}
brier_series = {}
for i in range(0, 2):
    event_interest = i + 1
    cif = pd.DataFrame(cif_ground_truth[i], [0, 1, 2, 3, 4, 5])
    ev = EvalSurv(1-cif, y_ground_truth[0], y_ground_truth[1] == event_interest, censor_surv='km')
    concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()
    brier_series[f"Event_{event_interest}"] = ev.brier_score(TIME_GRID)
    integrated_brier_scores[f"Event_{event_interest}"] = ev.integrated_brier_score(TIME_GRID)
    neg_log_likelihoods[f"Event_{event_interest}"] = ev.integrated_nbll(TIME_GRID)

    # # Nam and D'Agostino Chi2 statistic for calibration
    # for time in time_grid:
    #     chi2_stat, p_value, observed_events, expected_events, n, prob_df = nam_dagostino_chi2(
    #         df=df_test, 
    #         duration_col=duration_col, 
    #         event_col=event_col,
    #         surv=(1-cif), 
    #         time=time, 
    #         event_focus=event_interest
    #     )
    #     nam_dagostino_results.append({
    #         'Event': event_interest,
    #         'Year': round(time / 365),
    #         'Chi2_Stat': chi2_stat,
    #         'P_Value': p_value,
    #         'Observed_Events': observed_events.tolist(),
    #         'Expected_Events': expected_events.tolist(),
    #         'Sample_Size': n.tolist()
    #     })
display(concordance_indices)
display(brier_series)
display(integrated_brier_scores)
display(neg_log_likelihoods)


{'Event_1': 0.9818982092210652, 'Event_2': 0.7033707118427875}

{'Event_1': 0       0.000000
 365     0.038203
 730     0.038203
 1095    0.038203
 1460    0.038203
 1825    0.038203
 Name: brier_score, dtype: float64,
 'Event_2': 0       0.000000
 365     0.085103
 730     0.085103
 1095    0.085103
 1460    0.085103
 1825    0.085103
 Name: brier_score, dtype: float64}

{'Event_1': 0.03565589439044665, 'Event_2': 0.07942951892202983}

{'Event_1': 0.17732477425591778, 'Event_2': 0.27434970836708117}

#### 5.2 Get the prediction for each model

In [59]:
gc.collect()
torch.cuda.empty_cache()

# Initialize dictionary to store combined predictions and y_test
combined_predictions = []
combined_y_test = []
cif_ground_truth_test = []

# Get unique keys and split them into 10 groups
unique_keys = X_train_transformed_2['key'].unique()
np.random.shuffle(unique_keys)  # Shuffle keys to ensure randomness
key_folds = np.array_split(unique_keys, 10)

for fold_idx, test_keys in enumerate(key_folds):
    print(f"Processing fold {fold_idx + 1}...")
    # Split the dataset into train and test based on keys
    X_test_fold = X_train_transformed_2[X_train_transformed_2['key'].isin(test_keys)]
    
    # Get the indices of X_test_fold relative to the original dataset
    test_indices = X_test_fold.index.to_numpy()

    # Extract CIF ground truth for these indices
    fold_cif_ground_truth = cif_ground_truth[:, :, test_indices]

    # Stack this fold's CIF ground truth
    cif_ground_truth_test.append(fold_cif_ground_truth)
    
    # Store predictions for this fold
    fold_predictions = {}
    
    X_test, y_test = preprocess_data(X_test_fold, FEATURE_COLS, DURATION_COL, EVENT_COL, TIME_GRID, discretize=True)
    combined_y_test.append(y_test)
    
    for model_name in model_ls:
        # Retrieve configuration by dynamically constructing the variable name
        config_var_name = model_name + "_config"
        model_config = globals().get(config_var_name)
        if model_config is None:
            print(f"Configuration for {config_var_name} not found.")
            continue
        try:
            print(f"Initiating prediction for model: {model_name}")
            
            # Retrieve the loaded model
            model = loaded_models.get(model_name)
            if model is None:
                print(f"Model {model_name} is not loaded.")
                continue
            
            # Predict using the loaded model and configuration
            surv, _ = predict_neural_network(
                model=model,
                config=model_config,
                X_test=X_test_fold,
                duration_col=DURATION_COL,
                event_col=EVENT_COL,
                cluster_col=CLUSTER_COL,
                time_grid=TIME_GRID
            )
            
            # Align survival probabilities (if DeepSurv)
            if model_config['model'] == 'deepsurv':
                surv = align_to_time_grid(surv, TIME_GRID).values  # 2D array
                
                # Structure key dynamically
                key = f"deepsurv_{model_config['net']}_{model_config['balance_method']}"
                
                # Store predictions
                if key not in fold_predictions:
                    fold_predictions[key] = []
                fold_predictions[key].append(np.expand_dims(1 - surv, axis=0))
            
            # Handle DeepHit predictions
            elif model_config['model'] == 'deephit':
                surv = np.array(surv)  # Convert to numpy array
                
                # Structure key dynamically
                key = f"deephit_{model_config['net']}_{model_config['balance_method']}"
                
                # Store predictions
                if key not in fold_predictions:
                    fold_predictions[key] = []
                fold_predictions[key].append(surv)
            
            print(f"Prediction completed for {model_name} on fold {fold_idx + 1}.")
        
        except Exception as e:
            print(f"Error during prediction for {model_name} on fold {fold_idx + 1}: {e}")
            
    combined_predictions.append(fold_predictions)

meta_learner_X_train = {}

for fold_predictions in combined_predictions:
    for model_key, fold_data in fold_predictions.items():
        if model_key not in meta_learner_X_train:
            meta_learner_X_train[model_key] = []
        
        # Align DeepSurv predictions to match DeepHit format
        if "deepsurv" in model_key:
            # Stack competing outcomes and remove the redundant dimension
            meta_learner_X_train[model_key].extend(
                [np.squeeze(np.stack(fold_data, axis=0), axis=1)]  # Squeeze out extra axis
            )
        else:
            # Keep DeepHit predictions as-is
            meta_learner_X_train[model_key].extend(fold_data)

# Combine y_test
meta_learner_y_train = (
    np.concatenate([fold[0] for fold in combined_y_test]),  # Concatenate all first elements
    np.concatenate([fold[1] for fold in combined_y_test])   # Concatenate all second elements
)

# Combine CIF ground truth for all folds
cif_ground_truth_test_stacked = np.concatenate(cif_ground_truth_test, axis=2)

print("Final predictions and y_test combined.")

2024-11-16 17:46:02,084 - INFO - Event column 'endpoint' updated with focus on event value 1.


Processing fold 1...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:46:02,731 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 1.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:46:39,846 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 1.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:47:00,502 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 1.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:47:01,105 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 1.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:47:02,518 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 1.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:47:04,464 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:47:04,469 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 1.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:47:18,564 - INFO - Validation data retrieved
2024-11-16 17:47:19,230 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:47:19,231 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 1.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:47:30,069 - INFO - Validation data retrieved
2024-11-16 17:47:30,668 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:47:30,669 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 1.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:47:40,669 - INFO - Validation data retrieved
2024-11-16 17:47:41,221 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:47:41,222 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 1.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:47:56,588 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 1.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 1.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 1.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:48:10,963 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 1.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:48:24,909 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 1.
Processing fold 2...
Initiating prediction for model: deepsurv_ann_clustering_1


2024-11-16 17:48:25,333 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:48:25,949 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 2.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:48:36,146 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 2.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:48:44,552 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 2.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:48:45,195 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 2.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:48:46,661 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 2.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:48:48,485 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:48:48,486 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 2.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:49:02,091 - INFO - Validation data retrieved
2024-11-16 17:49:02,675 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:49:02,677 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 2.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:49:12,471 - INFO - Validation data retrieved
2024-11-16 17:49:13,047 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:49:13,048 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 2.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:49:22,714 - INFO - Validation data retrieved
2024-11-16 17:49:23,338 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:49:23,339 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 2.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:49:37,609 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 2.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 2.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 2.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:49:50,922 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 2.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:50:00,640 - INFO - Validation data retrieved
2024-11-16 17:50:01,006 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 2.
Processing fold 3...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:50:01,632 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 3.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:50:13,955 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 3.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:50:22,137 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 3.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:50:22,788 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 3.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:50:24,218 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 3.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:50:25,979 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:50:25,986 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 3.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:50:36,077 - INFO - Validation data retrieved
2024-11-16 17:50:36,686 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:50:36,687 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 3.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:50:50,176 - INFO - Validation data retrieved
2024-11-16 17:50:50,766 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:50:50,767 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 3.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:51:00,605 - INFO - Validation data retrieved
2024-11-16 17:51:01,169 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:51:01,170 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 3.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:51:11,815 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 3.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 3.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 3.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:51:28,952 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 3.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:51:38,627 - INFO - Validation data retrieved
2024-11-16 17:51:39,018 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 3.
Processing fold 4...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:51:39,621 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 4.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:51:48,104 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 4.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:52:03,013 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 4.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:52:03,618 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 4.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:52:05,057 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 4.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:52:06,879 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:52:06,880 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 4.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:52:17,252 - INFO - Validation data retrieved
2024-11-16 17:52:17,766 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:52:17,767 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 4.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:52:28,089 - INFO - Validation data retrieved
2024-11-16 17:52:28,777 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:52:28,778 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 4.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:52:42,607 - INFO - Validation data retrieved
2024-11-16 17:52:43,146 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:52:43,148 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 4.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:52:54,588 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 4.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 4.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 4.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:53:12,367 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 4.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:53:22,664 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 4.
Processing fold 5...
Initiating prediction for model: deepsurv_ann_clustering_1


2024-11-16 17:53:23,064 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:53:23,719 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 5.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:53:31,984 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 5.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:53:40,317 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 5.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:53:40,961 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 5.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:53:42,452 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 5.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:53:47,992 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:53:47,994 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 5.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:53:57,991 - INFO - Validation data retrieved
2024-11-16 17:53:58,636 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:53:58,637 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 5.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:54:08,647 - INFO - Validation data retrieved
2024-11-16 17:54:09,254 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:54:09,256 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 5.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:54:19,030 - INFO - Validation data retrieved
2024-11-16 17:54:19,581 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:54:19,582 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 5.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:54:34,464 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 5.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 5.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 5.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:54:48,357 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 5.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:55:02,064 - INFO - Validation data retrieved
2024-11-16 17:55:02,426 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 5.
Processing fold 6...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:55:03,016 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 6.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:55:11,117 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 6.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:55:18,768 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 6.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:55:19,390 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 6.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:55:20,920 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 6.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:55:22,641 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:55:22,642 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 6.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:55:36,117 - INFO - Validation data retrieved
2024-11-16 17:55:36,746 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:55:36,747 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 6.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:55:46,521 - INFO - Validation data retrieved
2024-11-16 17:55:47,061 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:55:47,062 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 6.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:55:56,501 - INFO - Validation data retrieved
2024-11-16 17:55:57,026 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:55:57,027 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 6.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:56:08,162 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 6.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 6.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 6.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:56:25,105 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 6.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:56:34,708 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 6.
Processing fold 7...
Initiating prediction for model: deepsurv_ann_clustering_1


2024-11-16 17:56:35,084 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:56:35,702 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 7.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:56:44,058 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 7.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:56:57,918 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 7.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:56:58,545 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 7.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:57:00,272 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 7.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:57:02,118 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:57:02,119 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 7.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:57:12,718 - INFO - Validation data retrieved
2024-11-16 17:57:13,361 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:57:13,362 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 7.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:57:27,345 - INFO - Validation data retrieved
2024-11-16 17:57:27,924 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:57:27,926 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 7.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:57:38,190 - INFO - Validation data retrieved
2024-11-16 17:57:38,788 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:57:38,789 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 7.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:57:50,606 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 7.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 7.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 7.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:58:08,729 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 7.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:58:19,278 - INFO - Validation data retrieved
2024-11-16 17:58:19,707 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 7.
Processing fold 8...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:58:20,323 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 8.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:58:28,339 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 8.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:58:39,859 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 8.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:58:40,447 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 8.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:58:41,925 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 8.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:58:43,702 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:58:43,703 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 8.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:58:53,523 - INFO - Validation data retrieved
2024-11-16 17:58:54,178 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 17:58:54,179 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 8.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:59:03,912 - INFO - Validation data retrieved
2024-11-16 17:59:04,426 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:59:04,427 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 8.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:59:17,923 - INFO - Validation data retrieved
2024-11-16 17:59:18,458 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 17:59:18,459 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 8.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 17:59:29,346 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 8.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 8.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 8.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:59:42,942 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 8.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 17:59:56,272 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 8.
Processing fold 9...
Initiating prediction for model: deepsurv_ann_clustering_1


2024-11-16 17:59:56,620 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 17:59:57,189 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 9.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 18:00:05,006 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 9.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 18:00:12,763 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 9.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 18:00:13,323 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 9.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 18:00:14,705 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 9.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 18:00:16,566 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 18:00:16,567 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 9.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 18:00:30,281 - INFO - Validation data retrieved
2024-11-16 18:00:30,867 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 18:00:30,868 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 9.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 18:00:40,773 - INFO - Validation data retrieved
2024-11-16 18:00:41,311 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 18:00:41,312 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 9.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 18:00:51,213 - INFO - Validation data retrieved
2024-11-16 18:00:51,736 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 18:00:51,737 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 9.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 18:01:06,101 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 9.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 9.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 9.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 18:01:19,804 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 9.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 18:01:29,709 - INFO - Validation data retrieved
2024-11-16 18:01:30,125 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 9.
Processing fold 10...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 18:01:30,719 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 10.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 18:01:42,441 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 10.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 18:01:50,452 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 10.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 18:01:51,038 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 10.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 18:01:52,542 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 10.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-16 18:01:54,395 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 18:01:54,396 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 10.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 18:02:04,127 - INFO - Validation data retrieved
2024-11-16 18:02:04,742 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-16 18:02:04,743 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 10.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 18:02:18,385 - INFO - Validation data retrieved
2024-11-16 18:02:18,996 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 18:02:18,997 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 10.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 18:02:28,895 - INFO - Validation data retrieved
2024-11-16 18:02:29,440 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-16 18:02:29,441 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 10.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-16 18:02:40,509 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 10.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 10.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 10.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 18:02:57,629 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 10.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-16 18:03:07,443 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 10.
Final predictions and y_test combined.


In [60]:
# Combine all 10 items for each model's predictions
final_meta_learner_X_train = {}

for key, predictions in meta_learner_X_train.items():
    # Concatenate the predictions along the last axis
    final_meta_learner_X_train[key] = np.concatenate(predictions, axis=2)  # Combine along the feature axis

# Validate the shapes
for key, combined_prediction in final_meta_learner_X_train.items():
    print(f"{key}: final shape {combined_prediction.shape}")


deepsurv_ann_clustering: final shape (2, 6, 316242)
deepsurv_ann_enn: final shape (2, 6, 316242)
deepsurv_ann_tomek: final shape (2, 6, 316242)
deepsurv_lstm_clustering: final shape (2, 6, 316242)
deepsurv_lstm_NearMiss: final shape (2, 6, 316242)
deephit_ann_clustering: final shape (2, 6, 316242)
deephit_ann_NearMiss: final shape (2, 6, 316242)
deephit_lstm_clustering: final shape (2, 6, 316242)
deephit_lstm_NearMiss: final shape (2, 6, 316242)


In [61]:
display(np.shape(cif_ground_truth_test_stacked))

# Use the duration and event data as targets
durations = meta_learner_y_train[0]  # Time to event or censoring
events = meta_learner_y_train[1]  # Event type (competing risks)

print(durations.shape)
print(events.shape)
print(np.unique(events, return_counts=True))

(2, 6, 316242)

(316242,)
(316242,)
(array([0, 1, 2]), array([310074,   1509,   4659]))


### 6. Ensemble methods

#### 6.1 Prepare each model's prediction on validation set


In [None]:
gc.collect()
torch.cuda.empty_cache()

# Initialize dictionary to store combined predictions and y_test
combined_predictions = []
combined_y_val = []
cif_ground_truth_val = []

# Get unique keys and split them into 10 groups
unique_keys = X_fin_val['key'].unique()
np.random.shuffle(unique_keys)  # Shuffle keys to ensure randomness
key_folds = np.array_split(unique_keys, 10)

for fold_idx, test_keys in enumerate(key_folds):
    print(f"Processing fold {fold_idx + 1}...")
    # Split the dataset into train and test based on keys
    X_val_fold = X_fin_val[X_fin_val['key'].isin(test_keys)]
    
    # Get the indices of X_test_fold relative to the original dataset
    test_indices = X_val_fold.index.to_numpy()

    # Extract CIF ground truth for these indices
    cif_ground_truth_val_fold = cif_ground_truth[:, :, test_indices]

    # Stack this fold's CIF ground truth
    cif_ground_truth_val.append(cif_ground_truth_val_fold)
    
    # Store predictions for this fold
    fold_predictions = {}
    
    X_val, y_val = preprocess_data(X_val_fold, FEATURE_COLS, DURATION_COL, EVENT_COL, TIME_GRID, discretize=True)
    combined_y_val.append(y_val)
    
    for model_name in model_ls:
        # Retrieve configuration by dynamically constructing the variable name
        config_var_name = model_name + "_config"
        model_config = globals().get(config_var_name)
        if model_config is None:
            print(f"Configuration for {config_var_name} not found.")
            continue
        try:
            print(f"Initiating prediction for model: {model_name}")
            
            # Retrieve the loaded model
            model = loaded_models.get(model_name)
            if model is None:
                print(f"Model {model_name} is not loaded.")
                continue
            
            # Predict using the loaded model and configuration
            surv, _ = predict_neural_network(
                model=model,
                config=model_config,
                X_test=X_val_fold,
                duration_col=DURATION_COL,
                event_col=EVENT_COL,
                cluster_col=CLUSTER_COL,
                time_grid=TIME_GRID
            )
            
            # Align survival probabilities (if DeepSurv)
            if model_config['model'] == 'deepsurv':
                surv = align_to_time_grid(surv, TIME_GRID).values  # 2D array
                
                # Structure key dynamically
                key = f"deepsurv_{model_config['net']}_{model_config['balance_method']}"
                
                # Store predictions
                if key not in fold_predictions:
                    fold_predictions[key] = []
                fold_predictions[key].append(np.expand_dims(1 - surv, axis=0))
            
            # Handle DeepHit predictions
            elif model_config['model'] == 'deephit':
                surv = np.array(surv)  # Convert to numpy array
                
                # Structure key dynamically
                key = f"deephit_{model_config['net']}_{model_config['balance_method']}"
                
                # Store predictions
                if key not in fold_predictions:
                    fold_predictions[key] = []
                fold_predictions[key].append(surv)
            
            print(f"Prediction completed for {model_name} on fold {fold_idx + 1}.")
        
        except Exception as e:
            print(f"Error during prediction for {model_name} on fold {fold_idx + 1}: {e}")
            
    combined_predictions.append(fold_predictions)

meta_learner_X_val = {}

for fold_predictions in combined_predictions:
    for model_key, fold_data in fold_predictions.items():
        if model_key not in meta_learner_X_val:
            meta_learner_X_val[model_key] = []
        
        # Align DeepSurv predictions to match DeepHit format
        if "deepsurv" in model_key:
            # Stack competing outcomes and remove the redundant dimension
            meta_learner_X_val[model_key].extend(
                [np.squeeze(np.stack(fold_data, axis=0), axis=1)]  # Squeeze out extra axis
            )
        else:
            # Keep DeepHit predictions as-is
            meta_learner_X_val[model_key].extend(fold_data)

# Combine y_test
meta_learner_y_val = (
    np.concatenate([fold[0] for fold in combined_y_val]),  # Concatenate all first elements
    np.concatenate([fold[1] for fold in combined_y_val])   # Concatenate all second elements
)

# Combine CIF ground truth for all folds
cif_ground_truth_val_stacked = np.concatenate(cif_ground_truth_val, axis=2)

print("Validation predictions and y_val combined.")

2024-11-18 02:36:18,328 - INFO - Event column 'endpoint' updated with focus on event value 1.


Processing fold 1...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:36:18,790 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 1.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:36:21,687 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 1.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:36:27,567 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 1.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:36:27,998 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 1.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:36:28,662 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 1.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:36:29,458 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:36:29,459 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 1.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:36:31,992 - INFO - Validation data retrieved
2024-11-18 02:36:32,387 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:36:32,388 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 1.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:36:34,942 - INFO - Validation data retrieved
2024-11-18 02:36:35,372 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:36:35,373 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 1.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:36:37,833 - INFO - Validation data retrieved
2024-11-18 02:36:38,245 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:36:38,246 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 1.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:36:40,799 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 1.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 1.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 1.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:36:44,934 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 1.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:36:47,620 - INFO - Validation data retrieved
2024-11-18 02:36:47,961 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 1.
Processing fold 2...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:36:48,371 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 2.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:36:51,012 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 2.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:36:53,399 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 2.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:36:53,842 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 2.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:36:54,512 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 2.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:36:55,291 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:36:55,292 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 2.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:36:57,667 - INFO - Validation data retrieved
2024-11-18 02:37:01,743 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:37:01,744 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 2.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:37:04,139 - INFO - Validation data retrieved
2024-11-18 02:37:04,528 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:37:04,529 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 2.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:37:06,958 - INFO - Validation data retrieved
2024-11-18 02:37:07,360 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:37:07,360 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 2.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:37:09,810 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 2.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 2.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 2.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:37:13,634 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 2.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:37:16,269 - INFO - Validation data retrieved
2024-11-18 02:37:16,617 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 2.
Processing fold 3...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:37:17,034 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 3.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:37:19,607 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 3.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:37:21,916 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 3.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:37:22,363 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 3.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:37:23,101 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 3.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:37:23,872 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:37:23,873 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 3.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:37:26,267 - INFO - Validation data retrieved
2024-11-18 02:37:26,706 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:37:26,707 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 3.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:37:29,071 - INFO - Validation data retrieved
2024-11-18 02:37:29,497 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:37:29,498 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 3.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:37:31,982 - INFO - Validation data retrieved
2024-11-18 02:37:32,389 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:37:32,390 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 3.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:37:38,556 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 3.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 3.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 3.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:37:42,391 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 3.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:37:45,029 - INFO - Validation data retrieved
2024-11-18 02:37:45,407 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 3.
Processing fold 4...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:37:45,825 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 4.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:37:48,101 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 4.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:37:50,258 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 4.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:37:50,687 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 4.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:37:51,304 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 4.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:37:52,010 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:37:52,011 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 4.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:37:54,407 - INFO - Validation data retrieved
2024-11-18 02:37:54,851 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:37:54,852 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 4.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:37:57,178 - INFO - Validation data retrieved
2024-11-18 02:37:57,578 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:37:57,579 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 4.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:37:59,919 - INFO - Validation data retrieved
2024-11-18 02:38:00,305 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:38:00,306 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 4.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:38:02,687 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 4.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 4.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 4.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:38:06,488 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 4.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:38:09,140 - INFO - Validation data retrieved
2024-11-18 02:38:09,516 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 4.
Processing fold 5...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:38:09,951 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 5.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:38:15,961 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 5.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:38:18,181 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 5.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:38:18,603 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 5.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:38:19,256 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 5.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:38:19,947 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:38:19,948 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 5.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:38:22,394 - INFO - Validation data retrieved
2024-11-18 02:38:22,793 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:38:22,794 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 5.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:38:25,199 - INFO - Validation data retrieved
2024-11-18 02:38:25,574 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:38:25,575 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 5.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:38:28,038 - INFO - Validation data retrieved
2024-11-18 02:38:28,440 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:38:28,441 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 5.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:38:30,908 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 5.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 5.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 5.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:38:34,838 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 5.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:38:37,624 - INFO - Validation data retrieved
2024-11-18 02:38:37,952 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 5.
Processing fold 6...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:38:38,424 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 6.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:38:40,902 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 6.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:38:43,296 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 6.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:38:43,720 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 6.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:38:44,363 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 6.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:38:45,109 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:38:45,109 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 6.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:38:51,255 - INFO - Validation data retrieved
2024-11-18 02:38:51,666 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:38:51,666 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 6.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:38:54,223 - INFO - Validation data retrieved
2024-11-18 02:38:54,624 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:38:54,624 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 6.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:38:57,151 - INFO - Validation data retrieved
2024-11-18 02:38:57,563 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:38:57,563 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 6.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:39:00,168 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 6.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 6.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 6.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:39:04,262 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 6.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:39:07,120 - INFO - Validation data retrieved
2024-11-18 02:39:07,481 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 6.
Processing fold 7...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:39:08,025 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 7.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:39:10,413 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 7.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:39:12,658 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 7.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:39:13,067 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 7.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:39:13,688 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 7.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:39:14,377 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:39:14,378 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 7.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:39:16,909 - INFO - Validation data retrieved
2024-11-18 02:39:17,318 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:39:17,319 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 7.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:39:19,806 - INFO - Validation data retrieved
2024-11-18 02:39:20,242 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:39:20,242 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 7.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:39:26,324 - INFO - Validation data retrieved
2024-11-18 02:39:26,699 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:39:26,701 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 7.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:39:29,274 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 7.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 7.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 7.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:39:33,254 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 7.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:39:35,957 - INFO - Validation data retrieved
2024-11-18 02:39:36,277 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 7.
Processing fold 8...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:39:36,738 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 8.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:39:39,288 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 8.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:39:41,482 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 8.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:39:41,880 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 8.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:39:42,509 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 8.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:39:43,179 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:39:43,179 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 8.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:39:45,504 - INFO - Validation data retrieved
2024-11-18 02:39:45,923 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:39:45,924 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 8.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:39:48,201 - INFO - Validation data retrieved
2024-11-18 02:39:48,580 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:39:48,581 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 8.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:39:50,853 - INFO - Validation data retrieved
2024-11-18 02:39:51,284 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:39:51,284 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 8.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:39:53,633 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 8.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 8.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 8.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:39:57,328 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 8.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:40:03,476 - INFO - Validation data retrieved
2024-11-18 02:40:03,794 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 8.
Processing fold 9...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:40:04,227 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 9.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:40:06,538 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 9.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:40:08,682 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 9.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:40:09,087 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 9.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:40:09,710 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 9.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:40:10,397 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:40:10,397 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 9.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:40:12,827 - INFO - Validation data retrieved
2024-11-18 02:40:13,206 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:40:13,207 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 9.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:40:15,513 - INFO - Validation data retrieved
2024-11-18 02:40:15,927 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:40:15,929 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 9.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:40:18,251 - INFO - Validation data retrieved
2024-11-18 02:40:18,635 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:40:18,636 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 9.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:40:21,004 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 9.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 9.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 9.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:40:24,794 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 9.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:40:27,348 - INFO - Validation data retrieved
2024-11-18 02:40:27,686 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 9.
Processing fold 10...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:40:28,151 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 10.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:40:30,765 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 10.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:40:33,115 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 10.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:40:33,550 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 10.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:40:34,179 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 10.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 02:40:38,629 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:40:38,630 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 10.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:40:40,977 - INFO - Validation data retrieved
2024-11-18 02:40:41,422 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 02:40:41,423 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 10.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:40:43,838 - INFO - Validation data retrieved
2024-11-18 02:40:44,313 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:40:44,315 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 10.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:40:46,699 - INFO - Validation data retrieved
2024-11-18 02:40:47,115 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 02:40:47,116 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 10.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 02:40:49,554 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 10.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 10.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 10.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:40:53,374 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 10.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 02:40:55,953 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 10.
Final predictions and y_test combined.


In [None]:
# Combine all 10 items for each model's predictions
final_meta_learner_X_val = {}

for key, predictions in meta_learner_X_val.items():
    # Concatenate the predictions along the last axis
    final_meta_learner_X_val[key] = np.concatenate(predictions, axis=2)  # Combine along the feature axis

# Validate the shapes
for key, combined_prediction in final_meta_learner_X_val.items():
    print(f"{key}: final shape {combined_prediction.shape}")

deepsurv_ann_clustering: final shape (2, 6, 80182)
deepsurv_ann_enn: final shape (2, 6, 80182)
deepsurv_ann_tomek: final shape (2, 6, 80182)
deepsurv_lstm_clustering: final shape (2, 6, 80182)
deepsurv_lstm_NearMiss: final shape (2, 6, 80182)
deephit_ann_clustering: final shape (2, 6, 80182)
deephit_ann_NearMiss: final shape (2, 6, 80182)
deephit_lstm_clustering: final shape (2, 6, 80182)
deephit_lstm_NearMiss: final shape (2, 6, 80182)


In [88]:
# Use the duration and event data as targets
durations_val = meta_learner_y_val[0]  # Time to event or censoring
events_val = meta_learner_y_val[1]  # Event type (competing risks)

print(durations_val.shape)
print(events_val.shape)
print(np.unique(events_val, return_counts=True))

(80182,)
(80182,)
(array([0, 1, 2]), array([78593,   416,  1173]))


#### 6.2 Prepare each model's prediction on test set

In [93]:
gc.collect()
torch.cuda.empty_cache()

# Initialize dictionary to store combined predictions and y_test
combined_predictions = []
combined_y_fin_test = []
cif_ground_truth_fin_test = []

# Get unique keys and split them into 10 groups
unique_keys = X_test_transformed['key'].unique()
np.random.shuffle(unique_keys)  # Shuffle keys to ensure randomness
key_folds = np.array_split(unique_keys, 10)

for fold_idx, test_keys in enumerate(key_folds):
    print(f"Processing fold {fold_idx + 1}...")
    # Split the dataset into train and test based on keys
    X_test_fold = X_test_transformed[X_test_transformed['key'].isin(test_keys)]
    
    # Get the indices of X_test_fold relative to the original dataset
    test_indices = X_test_transformed.index.to_numpy()
    
    # Store predictions for this fold
    fold_predictions = {}
    
    X_fin_test, y_fin_test = preprocess_data(X_test_fold, FEATURE_COLS, DURATION_COL, EVENT_COL, TIME_GRID, discretize=True)
    combined_y_fin_test.append(y_fin_test)
    
    for model_name in model_ls:
        # Retrieve configuration by dynamically constructing the variable name
        config_var_name = model_name + "_config"
        model_config = globals().get(config_var_name)
        if model_config is None:
            print(f"Configuration for {config_var_name} not found.")
            continue
        try:
            print(f"Initiating prediction for model: {model_name}")
            
            # Retrieve the loaded model
            model = loaded_models.get(model_name)
            if model is None:
                print(f"Model {model_name} is not loaded.")
                continue
            
            # Predict using the loaded model and configuration
            surv, _ = predict_neural_network(
                model=model,
                config=model_config,
                X_test=X_test_fold,
                duration_col=DURATION_COL,
                event_col=EVENT_COL,
                cluster_col=CLUSTER_COL,
                time_grid=TIME_GRID
            )
            
            # Align survival probabilities (if DeepSurv)
            if model_config['model'] == 'deepsurv':
                surv = align_to_time_grid(surv, TIME_GRID).values  # 2D array
                
                # Structure key dynamically
                key = f"deepsurv_{model_config['net']}_{model_config['balance_method']}"
                
                # Store predictions
                if key not in fold_predictions:
                    fold_predictions[key] = []
                fold_predictions[key].append(np.expand_dims(1 - surv, axis=0))
            
            # Handle DeepHit predictions
            elif model_config['model'] == 'deephit':
                surv = np.array(surv)  # Convert to numpy array
                
                # Structure key dynamically
                key = f"deephit_{model_config['net']}_{model_config['balance_method']}"
                
                # Store predictions
                if key not in fold_predictions:
                    fold_predictions[key] = []
                fold_predictions[key].append(surv)
            
            print(f"Prediction completed for {model_name} on fold {fold_idx + 1}.")
        
        except Exception as e:
            print(f"Error during prediction for {model_name} on fold {fold_idx + 1}: {e}")
            
    combined_predictions.append(fold_predictions)

meta_learner_X_fin_test = {}

for fold_predictions in combined_predictions:
    for model_key, fold_data in fold_predictions.items():
        if model_key not in meta_learner_X_fin_test:
            meta_learner_X_fin_test[model_key] = []
        
        # Align DeepSurv predictions to match DeepHit format
        if "deepsurv" in model_key:
            # Stack competing outcomes and remove the redundant dimension
            meta_learner_X_fin_test[model_key].extend(
                [np.squeeze(np.stack(fold_data, axis=0), axis=1)]  # Squeeze out extra axis
            )
        else:
            # Keep DeepHit predictions as-is
            meta_learner_X_fin_test[model_key].extend(fold_data)

# Combine y_test
meta_learner_y_fin_test = (
    np.concatenate([fold[0] for fold in combined_y_fin_test]),  # Concatenate all first elements
    np.concatenate([fold[1] for fold in combined_y_fin_test])   # Concatenate all second elements
)

print("Final test predictions and y_fin_test combined.")

2024-11-18 12:56:23,656 - INFO - Event column 'endpoint' updated with focus on event value 1.


Processing fold 1...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:24,047 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 1.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:25,427 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 1.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:26,713 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 1.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:27,068 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 1.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:27,693 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 1.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:28,245 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:56:28,246 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 1.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:56:29,379 - INFO - Validation data retrieved
2024-11-18 12:56:29,748 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:56:29,749 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 1.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:56:30,917 - INFO - Validation data retrieved
2024-11-18 12:56:31,286 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:56:31,287 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 1.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:56:32,430 - INFO - Validation data retrieved
2024-11-18 12:56:32,776 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:56:32,777 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 1.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:56:33,962 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 1.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 1.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 1.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:56:36,362 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 1.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:56:40,791 - INFO - Validation data retrieved
2024-11-18 12:56:41,106 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 1.
Processing fold 2...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:41,479 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 2.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:42,893 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 2.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:44,275 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 2.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:44,623 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 2.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:45,174 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 2.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:45,718 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:56:45,719 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 2.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:56:46,938 - INFO - Validation data retrieved
2024-11-18 12:56:47,292 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:56:47,293 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 2.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:56:48,591 - INFO - Validation data retrieved
2024-11-18 12:56:48,969 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:56:48,970 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 2.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:56:50,304 - INFO - Validation data retrieved
2024-11-18 12:56:50,697 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:56:50,698 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 2.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:56:52,056 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 2.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 2.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 2.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:56:54,639 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 2.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:56:56,129 - INFO - Validation data retrieved
2024-11-18 12:56:56,438 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 2.
Processing fold 3...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:56,830 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 3.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:58,078 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 3.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:59,404 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 3.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:56:59,754 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 3.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:00,239 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 3.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:00,771 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:57:00,772 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 3.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:02,051 - INFO - Validation data retrieved
2024-11-18 12:57:02,437 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:57:02,438 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 3.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:03,548 - INFO - Validation data retrieved
2024-11-18 12:57:03,902 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:57:03,903 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 3.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:04,981 - INFO - Validation data retrieved
2024-11-18 12:57:05,333 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:57:05,334 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 3.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:06,453 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 3.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 3.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 3.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:57:08,688 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 3.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:57:10,076 - INFO - Validation data retrieved
2024-11-18 12:57:10,392 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 3.
Processing fold 4...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:10,794 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 4.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:15,113 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 4.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:16,417 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 4.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:16,814 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 4.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:17,344 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 4.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:17,931 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:57:17,932 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 4.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:19,259 - INFO - Validation data retrieved
2024-11-18 12:57:19,638 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:57:19,639 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 4.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:20,960 - INFO - Validation data retrieved
2024-11-18 12:57:21,350 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:57:21,351 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 4.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:22,636 - INFO - Validation data retrieved
2024-11-18 12:57:23,022 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:57:23,022 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 4.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:24,364 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 4.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 4.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 4.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:57:26,893 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 4.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:57:28,390 - INFO - Validation data retrieved
2024-11-18 12:57:28,707 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 4.
Processing fold 5...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:29,061 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 5.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:30,450 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 5.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:31,763 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 5.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:32,167 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 5.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:32,814 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 5.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:33,479 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:57:33,480 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 5.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:34,738 - INFO - Validation data retrieved
2024-11-18 12:57:35,146 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:57:35,147 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 5.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:36,521 - INFO - Validation data retrieved
2024-11-18 12:57:36,878 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:57:36,879 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 5.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:38,059 - INFO - Validation data retrieved
2024-11-18 12:57:38,450 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:57:38,450 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 5.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:39,680 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 5.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 5.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 5.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:57:42,072 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 5.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:57:43,592 - INFO - Validation data retrieved
2024-11-18 12:57:43,905 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 5.
Processing fold 6...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:44,276 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 6.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:45,624 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 6.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:46,894 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 6.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:47,257 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 6.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:50,691 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 6.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:57:51,238 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:57:51,239 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 6.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:52,492 - INFO - Validation data retrieved
2024-11-18 12:57:52,881 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:57:52,882 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 6.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:54,142 - INFO - Validation data retrieved
2024-11-18 12:57:54,551 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:57:54,551 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 6.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:55,812 - INFO - Validation data retrieved
2024-11-18 12:57:56,308 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:57:56,309 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 6.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:57:57,616 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 6.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 6.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 6.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:58:00,241 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 6.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:58:01,751 - INFO - Validation data retrieved
2024-11-18 12:58:02,069 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 6.
Processing fold 7...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:02,471 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 7.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:03,824 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 7.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:05,253 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 7.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:05,635 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 7.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:06,151 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 7.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:06,818 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:58:06,818 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 7.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:08,001 - INFO - Validation data retrieved
2024-11-18 12:58:08,373 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:58:08,374 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 7.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:09,532 - INFO - Validation data retrieved
2024-11-18 12:58:09,909 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:58:09,910 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 7.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:11,056 - INFO - Validation data retrieved
2024-11-18 12:58:11,398 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:58:11,399 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 7.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:12,564 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 7.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 7.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 7.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:58:14,899 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 7.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:58:16,345 - INFO - Validation data retrieved
2024-11-18 12:58:16,665 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 7.
Processing fold 8...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:17,050 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 8.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:18,468 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 8.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:19,908 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 8.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:20,277 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 8.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:20,775 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 8.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:21,441 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:58:21,442 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 8.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:25,753 - INFO - Validation data retrieved
2024-11-18 12:58:26,160 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:58:26,161 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 8.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:27,506 - INFO - Validation data retrieved
2024-11-18 12:58:27,898 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:58:27,899 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 8.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:29,247 - INFO - Validation data retrieved
2024-11-18 12:58:29,656 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:58:29,657 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 8.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:31,058 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 8.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 8.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 8.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:58:33,745 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 8.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:58:35,512 - INFO - Validation data retrieved
2024-11-18 12:58:35,855 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 8.
Processing fold 9...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:36,285 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 9.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:37,732 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 9.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:39,131 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 9.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:39,521 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 9.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:40,006 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 9.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:40,570 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:58:40,571 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 9.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:41,766 - INFO - Validation data retrieved
2024-11-18 12:58:42,175 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:58:42,176 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 9.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:43,370 - INFO - Validation data retrieved
2024-11-18 12:58:43,755 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:58:43,756 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 9.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:44,953 - INFO - Validation data retrieved
2024-11-18 12:58:45,318 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:58:45,319 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 9.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:46,548 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 9.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 9.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 9.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:58:48,920 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 9.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:58:50,410 - INFO - Validation data retrieved
2024-11-18 12:58:50,729 - INFO - Event column 'endpoint' updated with focus on event value 1.


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 9.
Processing fold 10...
Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:51,094 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1 on fold 10.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:52,589 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smoteenn_1 on fold 10.
Initiating prediction for model: deepsurv_ann_smotetomek_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:53,927 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smotetomek_1 on fold 10.
Initiating prediction for model: deepsurv_ann_clustering_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:54,289 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_clustering_2 on fold 10.
Initiating prediction for model: deepsurv_ann_smoteenn_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:54,883 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_ann_smoteenn_2 on fold 10.
Initiating prediction for model: deepsurv_ann_smotetomek_2
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-18 12:58:55,444 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:58:55,445 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_smotetomek_2 on fold 10.
Initiating prediction for model: deepsurv_lstm_clustering_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:58:56,853 - INFO - Validation data retrieved
2024-11-18 12:58:57,232 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-18 12:58:57,233 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_lstm_clustering_1 on fold 10.
Initiating prediction for model: deepsurv_lstm_nearmiss_1
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:59:01,548 - INFO - Validation data retrieved
2024-11-18 12:59:01,943 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:59:01,944 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_nearmiss_1 on fold 10.
Initiating prediction for model: deepsurv_lstm_clustering_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:59:03,333 - INFO - Validation data retrieved
2024-11-18 12:59:03,755 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-18 12:59:03,756 - INFO - Event column 'endpoint' updated with focus on event value 2.


Prediction completed for deepsurv_lstm_clustering_2 on fold 10.
Initiating prediction for model: deepsurv_lstm_nearmiss_2
Initiate testing of deepsurv neural network
model structure: LSTM


2024-11-18 12:59:05,181 - INFO - Validation data retrieved


Prediction completed for deepsurv_lstm_nearmiss_2 on fold 10.
Initiating prediction for model: deephit_ann_clustering_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_clustering_all on fold 10.
Initiating prediction for model: deephit_ann_nearmiss2_all
Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_ann_nearmiss2_all on fold 10.
Initiating prediction for model: deephit_lstm_clustering_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:59:07,962 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_clustering_all on fold 10.
Initiating prediction for model: deephit_lstm_nearmiss1_all
Initiate testing of deephit neural network
model structure: LSTM


2024-11-18 12:59:09,536 - INFO - Validation data retrieved


prediction complete, please note that prediction of deephit models are CIF.
Prediction completed for deephit_lstm_nearmiss1_all on fold 10.
Final test predictions and y_fin_test combined.


In [94]:
# Combine all 10 items for each model's predictions
final_meta_learner_X_fin_test = {}

for key, predictions in meta_learner_X_fin_test.items():
    # Concatenate the predictions along the last axis
    final_meta_learner_X_fin_test[key] = np.concatenate(predictions, axis=2)  # Combine along the feature axis

# Validate the shapes
for key, combined_prediction in final_meta_learner_X_fin_test.items():
    print(f"{key}: final shape {combined_prediction.shape}")
    
# Use the duration and event data as targets
durations_fin_test = meta_learner_y_fin_test[0]  # Time to event or censoring
events_fin_test = meta_learner_y_fin_test[1]  # Event type (competing risks)

print(durations_fin_test.shape)
print(events_fin_test.shape)
print(np.unique(events_fin_test, return_counts=True))

deepsurv_ann_clustering: final shape (2, 6, 40513)
deepsurv_ann_enn: final shape (2, 6, 40513)
deepsurv_ann_tomek: final shape (2, 6, 40513)
deepsurv_lstm_clustering: final shape (2, 6, 40513)
deepsurv_lstm_NearMiss: final shape (2, 6, 40513)
deephit_ann_clustering: final shape (2, 6, 40513)
deephit_ann_NearMiss: final shape (2, 6, 40513)
deephit_lstm_clustering: final shape (2, 6, 40513)
deephit_lstm_NearMiss: final shape (2, 6, 40513)
(40513,)
(40513,)
(array([0, 1, 2]), array([39793,   200,   520]))


#### 6.2 Averaging

In [95]:
# Predictions from each model
predictions = [final_meta_learner_X_fin_test[key] for key in final_meta_learner_X_fin_test.keys()]

# Aggregate by majority voting for each time point and event type
average_predictions = np.mean(predictions, axis=0)

print("Average Voting Predictions shape:", average_predictions.shape)


Average Voting Predictions shape: (2, 6, 40513)


In [157]:
concordance_indices = {}
integrated_brier_scores = {}
neg_log_likelihoods = {}
brier_series = {}
nam_dagostino_results = []  # Initialize as a list

for i in range(0, 2):
    event_interest = i + 1
    cif = pd.DataFrame(average_predictions[i], index=[0, 1, 2, 3, 4, 5])  # Use index correctly
    ev = EvalSurv(1 - cif, durations_fin_test, events_fin_test == event_interest, censor_surv='km')

    # Concordance index
    concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()

    # Brier score series
    brier_series[f"Event_{event_interest}"] = ev.brier_score(TIME_GRID)

    # Integrated Brier score
    integrated_brier_scores[f"Event_{event_interest}"] = ev.integrated_brier_score(TIME_GRID)

    # Negative log-likelihood
    neg_log_likelihoods[f"Event_{event_interest}"] = ev.integrated_nbll(TIME_GRID)

    # Nam and D'Agostino Chi2 statistic for calibration
    for time_idx, time in enumerate(TIME_GRID):
        # Filter data for current time point
        mask = durations_fin_test <= time
        durations_filtered = durations_fin_test[mask]
        events_filtered = events_fin_test[mask]
        
        if len(durations_filtered) == 0:
            print(f"Skipping time {time} for event {event_interest} due to empty data.")
            continue
        
        try:
            # Calculate Nam-D'Agostino Chi² statistic
            chi2_stat, p_value, observed_events, expected_events, n, prob_df = nam_dagostino_chi2(
                df=pd.DataFrame({"durations": durations_filtered, "events": events_filtered}),
                duration_col="durations",
                event_col="events",
                surv=(1 - cif),  # Survival function
                time=time_idx,
                event_focus=event_interest
            )

            # Append results
            nam_dagostino_results.append({
                'Event': event_interest,
                'Year': round(time / 365),
                'Chi2_Stat': chi2_stat,
                'P_Value': p_value,
                'Observed_Events': observed_events.tolist() if isinstance(observed_events, np.ndarray) else observed_events,
                'Expected_Events': expected_events.tolist() if isinstance(expected_events, np.ndarray) else expected_events,
                'Sample_Size': n if isinstance(n, int) else n.tolist()
            })
        except ValueError as e:
            print(f"Error calculating Nam-D'Agostino Chi² at time {time} for event {event_interest}: {e}")

# Display results
print("Concordance Indices:", concordance_indices)
print("Brier Score Series:", brier_series)
print("Integrated Brier Scores:", integrated_brier_scores)
print("Negative Log-Likelihoods:", neg_log_likelihoods)
print("Nam-D'Agostino Results:", nam_dagostino_results)



Error calculating Nam-D'Agostino Chi² at time 0 for event 1: Length of values (40513) does not match length of index (19588)
Error calculating Nam-D'Agostino Chi² at time 0 for event 2: Length of values (40513) does not match length of index (19588)


                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.


Concordance Indices: {'Event_1': 0.9861141445942802, 'Event_2': 0.8047894453585371}
Brier Score Series: {'Event_1': 0       3.783331e-07
365     4.689445e-02
730     4.689445e-02
1095    4.689445e-02
1460    4.689445e-02
1825    4.689445e-02
Name: brier_score, dtype: float64, 'Event_2': 0       6.123067e-07
365     2.107838e-01
730     2.107838e-01
1095    2.107838e-01
1460    2.107838e-01
1825    2.107838e-01
Name: brier_score, dtype: float64}
Integrated Brier Scores: {'Event_1': 0.04376818205145606, 'Event_2': 0.19673160328047123}
Negative Log-Likelihoods: {'Event_1': 0.22729299286338747, 'Event_2': 0.5733443844258161}
Nam-D'Agostino Results: [{'Event': 1, 'Year': 1, 'Chi2_Stat': 0.12214437522851679, 'P_Value': 0.9982093122377609, 'Observed_Events': quantile
0    0.000000
1    0.000000
2    0.000000
3    0.000000
4    0.022289
Name: observed_probs, dtype: float64, 'Expected_Events': quantile
0    0.016495
1    0.018913
2    0.023356
3    0.026458
4    0.074863
Name: predicted_probs, 

                To resolve ties, data is randomly jittered.


In [156]:
np.unique(durations_fin_test)

array([0, 1, 2, 3, 4, 5])

#### 6.3 Stacking

##### 6.3.1 XGBoost

In [159]:
def train_and_predict_with_xgboost(outcome_idx, time_idx):
    """
    Train an XGBoost model using CIF targets for a specific outcome and time point.
    """
    # Training data
    stacking_inputs_train = np.array([
        final_meta_learner_X_train[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_train.keys()
    ]).T  # Shape: (num_train_samples, num_models)
    
    target_train = cif_ground_truth_test_stacked[outcome_idx, time_idx, :]  # Correct indexing

    # Validation data
    stacking_inputs_val = np.array([
        final_meta_learner_X_val[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_val.keys()
    ]).T  # Shape: (num_val_samples, num_models)

    target_val = cif_ground_truth_val_stacked[outcome_idx, time_idx, :]  # Correct indexing

    # Test data
    stacking_inputs_test = np.array([
        final_meta_learner_X_fin_test[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_fin_test.keys()
    ]).T  # Shape: (num_test_samples, num_models)

    # Prepare DMatrix for XGBoost
    dtrain = xgb.DMatrix(stacking_inputs_train, label=target_train)
    dval = xgb.DMatrix(stacking_inputs_val, label=target_val)
    dtest = xgb.DMatrix(stacking_inputs_test)

    # Configure GPU training
    params = {'objective': 'reg:squarederror',
        'tree_method': 'gpu_hist',
        'max_depth': 3,
        'learning_rate': 0.014837295326564928,
        'subsample': 0.8168005866659258,
        'colsample_bytree': 0.8290129403377126,
        'lambda': 6.7539372305286465,
        'alpha': 0.5018353832953043}

    # Train the model with early stopping
    booster = xgb.train(
        params, dtrain, num_boost_round=500,
        evals=[(dtrain, "train"), (dval, "validation")],
        early_stopping_rounds=20,  # Stop if no improvement for 20 rounds
        verbose_eval=False
    )

    # Predict CIF for test patients
    cif_predictions_test = booster.predict(dtest)
    
    return outcome_idx, time_idx, cif_predictions_test

# Parallel processing for each outcome and time point
results = Parallel(n_jobs=-1)(
    delayed(train_and_predict_with_xgboost)(outcome_idx, time_idx)
    for outcome_idx in range(2)
    for time_idx in range(6)
)

# Assign results to the xgboost_predictions array
xgboost_predictions = np.zeros((2, 6, X_test_transformed.shape[0]))
for outcome_idx, time_idx, cif_predictions in results:
    xgboost_predictions[outcome_idx, time_idx] = cif_predictions

# Verify the shape of the stacked predictions
print("XGBoost Stacking Predictions Shape:", xgboost_predictions.shape)
# Expected output: (2, 6, 316242)



    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_method = "hist", device = "cuda"


    E.g. tree_metho

XGBoost Stacking Predictions Shape: (2, 6, 40513)


In [160]:
concordance_indices = {}
integrated_brier_scores = {}
neg_log_likelihoods = {}
brier_series = {}
nam_dagostino_results = []  # Initialize as a list

for i in range(0, 2):
    event_interest = i + 1
    cif = pd.DataFrame(xgboost_predictions[i], index=[0, 1, 2, 3, 4, 5])  # Use index correctly
    ev = EvalSurv(1 - cif, durations_fin_test, events_fin_test == event_interest, censor_surv='km')

    # Concordance index
    concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()

    # Brier score series
    brier_series[f"Event_{event_interest}"] = ev.brier_score(TIME_GRID)

    # Integrated Brier score
    integrated_brier_scores[f"Event_{event_interest}"] = ev.integrated_brier_score(TIME_GRID)

    # Negative log-likelihood
    neg_log_likelihoods[f"Event_{event_interest}"] = ev.integrated_nbll(TIME_GRID)

    # Nam and D'Agostino Chi2 statistic for calibration
    for time_idx, time in enumerate(TIME_GRID):
        # Filter data for current time point
        mask = durations_fin_test <= time
        durations_filtered = durations_fin_test[mask]
        events_filtered = events_fin_test[mask]
        
        if len(durations_filtered) == 0:
            print(f"Skipping time {time} for event {event_interest} due to empty data.")
            continue
        
        try:
            # Calculate Nam-D'Agostino Chi² statistic
            chi2_stat, p_value, observed_events, expected_events, n, prob_df = nam_dagostino_chi2(
                df=pd.DataFrame({"durations": durations_filtered, "events": events_filtered}),
                duration_col="durations",
                event_col="events",
                surv=(1 - cif),  # Survival function
                time=time_idx,
                event_focus=event_interest
            )

            # Append results
            nam_dagostino_results.append({
                'Event': event_interest,
                'Year': round(time / 365),
                'Chi2_Stat': chi2_stat,
                'P_Value': p_value,
                'Observed_Events': observed_events.tolist() if isinstance(observed_events, np.ndarray) else observed_events,
                'Expected_Events': expected_events.tolist() if isinstance(expected_events, np.ndarray) else expected_events,
                'Sample_Size': n if isinstance(n, int) else n.tolist()
            })
        except ValueError as e:
            print(f"Error calculating Nam-D'Agostino Chi² at time {time} for event {event_interest}: {e}")

# Display results
print("Concordance Indices:", concordance_indices)
print("Brier Score Series:", brier_series)
print("Integrated Brier Scores:", integrated_brier_scores)
print("Negative Log-Likelihoods:", neg_log_likelihoods)
print("Nam-D'Agostino Results:", nam_dagostino_results)



Error calculating Nam-D'Agostino Chi² at time 0 for event 1: Length of values (40513) does not match length of index (19588)
Error calculating Nam-D'Agostino Chi² at time 0 for event 2: Length of values (40513) does not match length of index (19588)


                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.


Concordance Indices: {'Event_1': 0.9792338086166885, 'Event_2': 0.7275737517718853}
Brier Score Series: {'Event_1': 0       0.000000
365     0.034227
730     0.034227
1095    0.034227
1460    0.034227
1825    0.034227
Name: brier_score, dtype: float64, 'Event_2': 0       0.000000
365     0.127521
730     0.127521
1095    0.127521
1460    0.127521
1825    0.127521
Name: brier_score, dtype: float64}
Integrated Brier Scores: {'Event_1': 0.03194480400702308, 'Event_2': 0.11901920846077037}
Negative Log-Likelihoods: {'Event_1': 0.1744737147244062, 'Event_2': 0.37606509292105755}
Nam-D'Agostino Results: [{'Event': 1, 'Year': 1, 'Chi2_Stat': 0.02168567931458386, 'P_Value': 0.9999416396119769, 'Observed_Events': quantile
0    0.00000
1    0.00000
2    0.00000
3    0.00000
4    0.01914
Name: observed_probs, dtype: float64, 'Expected_Events': quantile
0    0.001888
1    0.002650
2    0.003851
3    0.007186
4    0.033434
Name: predicted_probs, dtype: float64, 'Sample_Size': [8104, 8101, 8103, 810

                To resolve ties, data is randomly jittered.
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [78]:
import numpy as np
import ray.train
import xgboost as xgb
import ray
from ray import tune
from sklearn.model_selection import train_test_split
from pycox.evaluation import EvalSurv
import pandas as pd

# Random seed for reproducibility
RANDOM_SEED = 42

def xgboost_training_wrapper(config, data):
    """
    A Ray Tune-compatible wrapper to train XGBoost models for all outcomes and time points,
    evaluate predictions using concordance index, and report mean concordance index.

    Args:
        config: Dictionary of hyperparameters provided by Ray Tune.
        data: Dictionary containing final_meta_learner_X_train, cif_ground_truth_test_stacked,
              durations, and events.
    """
    final_meta_learner_X_train = data["final_meta_learner_X_train"]
    cif_ground_truth_test_stacked = data["cif_ground_truth_test_stacked"]
    durations = data["durations"]
    events = data["events"]

    # Initialize predictions array (2 outcomes, 6 time points, 316242 patients)
    xgboost_predictions = np.zeros((2, 6, len(durations)))

    # Train models and make predictions for each outcome and time point
    for outcome_idx in range(2):
        for time_idx in range(6):
            # Prepare data
            stacking_inputs = np.array([
                final_meta_learner_X_train[key][outcome_idx, time_idx]
                for key in final_meta_learner_X_train.keys()
            ]).T  # Shape: (316242, 9)

            target = cif_ground_truth_test_stacked[outcome_idx, time_idx, :]  # Shape: (316242,)

            # Split data into training and validation sets
            X_train, X_val, y_train, y_val = train_test_split(
                stacking_inputs, target, test_size=0.2, random_state=RANDOM_SEED
            )

            # Prepare DMatrix for XGBoost
            dtrain = xgb.DMatrix(X_train, label=y_train)
            dval = xgb.DMatrix(X_val, label=y_val)

            # Train XGBoost model
            booster = xgb.train(
                config,
                dtrain,
                num_boost_round=500,
                evals=[(dtrain, "train"), (dval, "validation")],
                early_stopping_rounds=20,
                verbose_eval=False
            )

            # Predict CIF for all patients
            dtest = xgb.DMatrix(stacking_inputs)
            xgboost_predictions[outcome_idx, time_idx] = booster.predict(dtest)

    # Compute concordance index for each event
    concordance_indices = {}
    for i in range(2):
        event_interest = i + 1
        cif = pd.DataFrame(xgboost_predictions[i], index=[0, 1, 2, 3, 4, 5])
        ev = EvalSurv(1 - cif, durations, events == event_interest, censor_surv="km")
        concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()

    # Calculate mean concordance index
    mean_concordance_index = np.mean(list(concordance_indices.values()))

    # Report mean concordance index to Ray Tune
    ray.train.report({'c-index stat': mean_concordance_index})

# Define the search space
search_space = {
    "objective": "reg:squarederror",
    "tree_method": "gpu_hist",  # Use GPU acceleration
    "max_depth": tune.randint(3, 10),  # Maximum depth of trees
    "learning_rate": tune.loguniform(0.01, 0.3),  # Learning rate
    "subsample": tune.uniform(0.5, 1.0),  # Subsampling rate
    "colsample_bytree": tune.uniform(0.5, 1.0),  # Feature subsampling rate
    "lambda": tune.loguniform(1e-4, 10.0),  # L2 regularization term
    "alpha": tune.loguniform(1e-4, 10.0),  # L1 regularization term
}

# Prepare data
data = {
    "final_meta_learner_X_train": final_meta_learner_X_train,
    "cif_ground_truth_test_stacked": cif_ground_truth_test_stacked,
    "durations": durations,
    "events": events,
}

# Run Ray Tune
analysis = tune.run(
    tune.with_parameters(xgboost_training_wrapper, data=data),
    config=search_space,
    resources_per_trial={"cpu": 2, "gpu": 1},  # Adjust based on your resources
    num_samples=50,  # Number of hyperparameter configurations to try
    metric='c-index stat',  # Metric to optimize
    mode="max",  # Maximize the concordance index
    storage_path="/mnt/d/PYDataScience/g3_regress/data/results",  # Directory to store results
)

# Get the best configuration
best_config = analysis.get_best_config(metric="mean_concordance_index", mode="max")
print("Best hyperparameters:", best_config)


2024-11-17 18:31:06,984	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-11-17 20:15:33
Running for:,01:44:26.88
Memory:,100.8/117.9 GiB

Trial name,status,loc,alpha,colsample_bytree,lambda,learning_rate,max_depth,subsample,iter,total time (s),c-index stat
xgboost_training_wrapper_0df1a_00000,TERMINATED,192.168.236.234:1003253,0.0141936,0.872683,0.00338673,0.0666382,4,0.614585,1,114.835,0.85425
xgboost_training_wrapper_0df1a_00001,TERMINATED,192.168.236.234:1003687,0.0063679,0.965151,2.80002,0.0217801,8,0.629694,1,130.689,0.852337
xgboost_training_wrapper_0df1a_00002,TERMINATED,192.168.236.234:1004147,0.0244766,0.600954,0.00310441,0.228248,3,0.803935,1,112.6,0.853303
xgboost_training_wrapper_0df1a_00003,TERMINATED,192.168.236.234:1004568,1.89679,0.520151,7.01459,0.0431147,4,0.88264,1,114.363,0.856744
xgboost_training_wrapper_0df1a_00004,TERMINATED,192.168.236.234:1004988,0.0766019,0.689039,0.0100393,0.0121418,8,0.970722,1,132.607,0.855037
xgboost_training_wrapper_0df1a_00005,TERMINATED,192.168.236.234:1005454,0.000204935,0.638219,0.00213184,0.0719649,8,0.769812,1,126.329,0.848903
xgboost_training_wrapper_0df1a_00006,TERMINATED,192.168.236.234:1005910,0.00897389,0.700659,6.11243,0.0141268,5,0.515068,1,115.955,0.858138
xgboost_training_wrapper_0df1a_00007,TERMINATED,192.168.236.234:1006335,0.261425,0.997296,0.0508392,0.140841,8,0.684653,1,125.169,0.848343
xgboost_training_wrapper_0df1a_00008,TERMINATED,192.168.236.234:1006794,0.00456015,0.958849,0.000103928,0.0159644,5,0.863895,1,119.522,0.857573
xgboost_training_wrapper_0df1a_00009,TERMINATED,192.168.236.234:1007216,0.00350792,0.708339,0.00135998,0.0230098,3,0.719504,1,112.981,0.859644


Trial name,c-index stat
xgboost_training_wrapper_0df1a_00000,0.85425
xgboost_training_wrapper_0df1a_00001,0.852337
xgboost_training_wrapper_0df1a_00002,0.853303
xgboost_training_wrapper_0df1a_00003,0.856744
xgboost_training_wrapper_0df1a_00004,0.855037
xgboost_training_wrapper_0df1a_00005,0.848903
xgboost_training_wrapper_0df1a_00006,0.858138
xgboost_training_wrapper_0df1a_00007,0.848343
xgboost_training_wrapper_0df1a_00008,0.857573
xgboost_training_wrapper_0df1a_00009,0.859644


2024-11-17 18:33:07,651 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:35:20,666 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:37:15,012 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:39:11,679 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:41:26,716 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:43:35,691 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:45:37,053 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:47:44,061 - INFO - Summary name ray/tune/c-index stat is illegal; using ray/tune/c-index_stat instead.
2024-11-17 18:49:45,263 - INFO - Summary name ray/tune/c-index s

Best hyperparameters: None


In [122]:
import os
import json

# Define the base directory containing the trial folders
base_dir = "/mnt/d/PYDataScience/g3_regress/data/results/xgboost_training_wrapper_2024-11-17_18-31-06"

# Initialize variables to store the max c-index and corresponding config
max_c_index = float("-inf")
best_config = None

# Iterate through all folders and parse the result.json files
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file == "result.json":
            file_path = os.path.join(root, file)
            with open(file_path, "r") as f:
                data = json.load(f)
                if "c-index stat" in data:
                    c_index = data["c-index stat"]
                    # Update max c-index and config if a new max is found
                    if c_index > max_c_index:
                        max_c_index = c_index
                        best_config = data.get("config", None)

# Display the results
max_c_index, best_config


(0.8614533246252081,
 {'objective': 'reg:squarederror',
  'tree_method': 'gpu_hist',
  'max_depth': 3,
  'learning_rate': 0.014837295326564928,
  'subsample': 0.8168005866659258,
  'colsample_bytree': 0.8290129403377126,
  'lambda': 6.7539372305286465,
  'alpha': 0.5018353832953043})

##### 6.3.2 Linear Regression

In [142]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import numpy as np
from joblib import Parallel, delayed

# Initialize the final stacking predictions array
lineregression_predictions = np.zeros((2, 6, X_test_transformed.shape[0]))  # (2 outcomes, 6 time points, 316242 patients)

def train_and_predict_with_linear_regression(outcome_idx, time_idx):
    """
    Train a Linear Regression model using CIF targets for a specific outcome and time point.
    """
    # Prepare stacking inputs and targets
    # Training data
    stacking_inputs_train = np.array([
        final_meta_learner_X_train[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_train.keys()
    ]).T  # Shape: (num_train_samples, num_models)
    
    target_train = cif_ground_truth_test_stacked[outcome_idx, time_idx, :]  # Correct indexing

    # Validation data
    stacking_inputs_val = np.array([
        final_meta_learner_X_val[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_val.keys()
    ]).T  # Shape: (num_val_samples, num_models)

    target_val = cif_ground_truth_val_stacked[outcome_idx, time_idx, :]  # Correct indexing

    # Test data
    stacking_inputs_test = np.array([
        final_meta_learner_X_fin_test[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_fin_test.keys()
    ]).T  # Shape: (num_test_samples, num_models)

    # Train the Linear Regression model
    model = LinearRegression()
    model.fit(stacking_inputs_train, target_train)

    # Predict CIF for all patients
    cif_predictions = model.predict(stacking_inputs_test)

    return outcome_idx, time_idx, cif_predictions

# Parallel processing for each outcome and time point
results = Parallel(n_jobs=-1)(
    delayed(train_and_predict_with_linear_regression)(outcome_idx, time_idx)
    for outcome_idx in range(2)
    for time_idx in range(6)
)

# Assign results to the lineregression_predictions array
for outcome_idx, time_idx, cif_predictions in results:
    lineregression_predictions[outcome_idx, time_idx] = cif_predictions

# Verify the shape of the stacked predictions
print("Linear Regression Stacking Predictions Shape:", lineregression_predictions.shape)

Linear Regression Stacking Predictions Shape: (2, 6, 40513)


In [161]:
concordance_indices = {}
integrated_brier_scores = {}
neg_log_likelihoods = {}
brier_series = {}
nam_dagostino_results = []  # Initialize as a list

for i in range(0, 2):
    event_interest = i + 1
    cif = pd.DataFrame(lineregression_predictions[i], index=[0, 1, 2, 3, 4, 5])  # Use index correctly
    ev = EvalSurv(1 - cif, durations_fin_test, events_fin_test == event_interest, censor_surv='km')

    # Concordance index
    concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()

    # Brier score series
    brier_series[f"Event_{event_interest}"] = ev.brier_score(TIME_GRID)

    # Integrated Brier score
    integrated_brier_scores[f"Event_{event_interest}"] = ev.integrated_brier_score(TIME_GRID)

    # Negative log-likelihood
    neg_log_likelihoods[f"Event_{event_interest}"] = ev.integrated_nbll(TIME_GRID)

    # Nam and D'Agostino Chi2 statistic for calibration
    for time_idx, time in enumerate(TIME_GRID):
        # Filter data for current time point
        mask = durations_fin_test <= time
        durations_filtered = durations_fin_test[mask]
        events_filtered = events_fin_test[mask]
        
        if len(durations_filtered) == 0:
            print(f"Skipping time {time} for event {event_interest} due to empty data.")
            continue
        
        try:
            # Calculate Nam-D'Agostino Chi² statistic
            chi2_stat, p_value, observed_events, expected_events, n, prob_df = nam_dagostino_chi2(
                df=pd.DataFrame({"durations": durations_filtered, "events": events_filtered}),
                duration_col="durations",
                event_col="events",
                surv=(1 - cif),  # Survival function
                time=time_idx,
                event_focus=event_interest
            )

            # Append results
            nam_dagostino_results.append({
                'Event': event_interest,
                'Year': round(time / 365),
                'Chi2_Stat': chi2_stat,
                'P_Value': p_value,
                'Observed_Events': observed_events.tolist() if isinstance(observed_events, np.ndarray) else observed_events,
                'Expected_Events': expected_events.tolist() if isinstance(expected_events, np.ndarray) else expected_events,
                'Sample_Size': n if isinstance(n, int) else n.tolist()
            })
        except ValueError as e:
            print(f"Error calculating Nam-D'Agostino Chi² at time {time} for event {event_interest}: {e}")

# Display results
print("Concordance Indices:", concordance_indices)
print("Brier Score Series:", brier_series)
print("Integrated Brier Scores:", integrated_brier_scores)
print("Negative Log-Likelihoods:", neg_log_likelihoods)
print("Nam-D'Agostino Results:", nam_dagostino_results)



Error calculating Nam-D'Agostino Chi² at time 0 for event 1: Length of values (40513) does not match length of index (19588)
Error calculating Nam-D'Agostino Chi² at time 0 for event 2: Length of values (40513) does not match length of index (19588)


                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.


Concordance Indices: {'Event_1': 0.979147976082038, 'Event_2': 0.7546631563758323}
Brier Score Series: {'Event_1': 0       0.00000
365     0.03744
730     0.03744
1095    0.03744
1460    0.03744
1825    0.03744
Name: brier_score, dtype: float64, 'Event_2': 0       0.000000
365     0.116634
730     0.116634
1095    0.116634
1460    0.116634
1825    0.116634
Name: brier_score, dtype: float64}
Integrated Brier Scores: {'Event_1': 0.034943942825916126, 'Event_2': 0.1088581410271849}
Negative Log-Likelihoods: {'Event_1': 0.18104063587078995, 'Event_2': 0.3927945233629699}
Nam-D'Agostino Results: [{'Event': 1, 'Year': 1, 'Chi2_Stat': 0.020654911205768265, 'P_Value': 0.9999470375752154, 'Observed_Events': quantile
0    0.000000
1    0.000000
2    0.000000
3    0.000000
4    0.019646
Name: observed_probs, dtype: float64, 'Expected_Events': quantile
0    0.001286
1    0.002432
2    0.003715
3    0.006436
4    0.035073
Name: predicted_probs, dtype: float64, 'Sample_Size': [8103, 8102, 8103, 8102

                To resolve ties, data is randomly jittered.


##### 6.3.3 RandomForest regression

In [125]:
from sklearn.ensemble import RandomForestRegressor
from joblib import Parallel, delayed
import numpy as np

def train_and_predict_with_rf(outcome_idx, time_idx):
    """
    Train a Random Forest model using CIF targets for a specific outcome and time point.
    Use pre-prepared validation and test predictions.
    """
    # Prepare training data
    stacking_inputs_train = np.array([
        final_meta_learner_X_train[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_train.keys()
    ]).T  # Shape: (num_train_samples, num_models)
    
    target_train = cif_ground_truth_test_stacked[outcome_idx, time_idx, :]  # Correct indexing

    # Validation data
    stacking_inputs_val = np.array([
        final_meta_learner_X_val[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_val.keys()
    ]).T  # Shape: (num_val_samples, num_models)

    target_val = cif_ground_truth_val_stacked[outcome_idx, time_idx, :]  # Correct indexing

    # Test data
    stacking_inputs_test = np.array([
        final_meta_learner_X_fin_test[key][outcome_idx, time_idx]
        for key in final_meta_learner_X_fin_test.keys()
    ]).T  # Shape: (num_test_samples, num_models)

    # Train the Random Forest Regressor
    model = RandomForestRegressor(
        n_estimators=168,
        max_depth=3,
        min_samples_split=3,
        min_samples_leaf=4,
        random_state=RANDOM_SEED,
        max_features='sqrt',
        n_jobs=-1  # Use all available cores,
        
    )
    model.fit(stacking_inputs_train, target_train, )

    # Predict CIF for all patients in the test set
    cif_predictions = model.predict(stacking_inputs_test)

    return outcome_idx, time_idx, cif_predictions

# Parallel processing for each outcome and time point
rf_predictions = np.zeros((2, 6, X_test_transformed.shape[0]))  # Initialize prediction array
results = Parallel(n_jobs=-1)(
    delayed(train_and_predict_with_rf)(outcome_idx, time_idx)
    for outcome_idx in range(2)
    for time_idx in range(6)
)

# Assign results to the rf_predictions array
for outcome_idx, time_idx, cif_predictions in results:
    rf_predictions[outcome_idx, time_idx] = cif_predictions

# Verify the shape of the stacked predictions
print("Random Forest Stacking Predictions Shape:", rf_predictions.shape)
# Expected output: (2, 6, num_test_samples)


Random Forest Stacking Predictions Shape: (2, 6, 40513)


In [162]:
concordance_indices = {}
integrated_brier_scores = {}
neg_log_likelihoods = {}
brier_series = {}
nam_dagostino_results = []  # Initialize as a list

for i in range(0, 2):
    event_interest = i + 1
    cif = pd.DataFrame(rf_predictions[i], index=[0, 1, 2, 3, 4, 5])  # Use index correctly
    ev = EvalSurv(1 - cif, durations_fin_test, events_fin_test == event_interest, censor_surv='km')

    # Concordance index
    concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()

    # Brier score series
    brier_series[f"Event_{event_interest}"] = ev.brier_score(TIME_GRID)

    # Integrated Brier score
    integrated_brier_scores[f"Event_{event_interest}"] = ev.integrated_brier_score(TIME_GRID)

    # Negative log-likelihood
    neg_log_likelihoods[f"Event_{event_interest}"] = ev.integrated_nbll(TIME_GRID)

    # Nam and D'Agostino Chi2 statistic for calibration
    for time_idx, time in enumerate(TIME_GRID):
        # Filter data for current time point
        mask = durations_fin_test <= time
        durations_filtered = durations_fin_test[mask]
        events_filtered = events_fin_test[mask]
        
        if len(durations_filtered) == 0:
            print(f"Skipping time {time} for event {event_interest} due to empty data.")
            continue
        
        try:
            # Calculate Nam-D'Agostino Chi² statistic
            chi2_stat, p_value, observed_events, expected_events, n, prob_df = nam_dagostino_chi2(
                df=pd.DataFrame({"durations": durations_filtered, "events": events_filtered}),
                duration_col="durations",
                event_col="events",
                surv=(1 - cif),  # Survival function
                time=time_idx,
                event_focus=event_interest
            )

            # Append results
            nam_dagostino_results.append({
                'Event': event_interest,
                'Year': round(time / 365),
                'Chi2_Stat': chi2_stat,
                'P_Value': p_value,
                'Observed_Events': observed_events.tolist() if isinstance(observed_events, np.ndarray) else observed_events,
                'Expected_Events': expected_events.tolist() if isinstance(expected_events, np.ndarray) else expected_events,
                'Sample_Size': n if isinstance(n, int) else n.tolist()
            })
        except ValueError as e:
            print(f"Error calculating Nam-D'Agostino Chi² at time {time} for event {event_interest}: {e}")

# Display results
print("Concordance Indices:", concordance_indices)
print("Brier Score Series:", brier_series)
print("Integrated Brier Scores:", integrated_brier_scores)
print("Negative Log-Likelihoods:", neg_log_likelihoods)
print("Nam-D'Agostino Results:", nam_dagostino_results)



Error calculating Nam-D'Agostino Chi² at time 0 for event 1: Length of values (40513) does not match length of index (19588)
Error calculating Nam-D'Agostino Chi² at time 0 for event 2: Length of values (40513) does not match length of index (19588)


                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.
                To resolve ties, data is randomly jittered.


Concordance Indices: {'Event_1': 0.9796230921738424, 'Event_2': 0.7554581494270163}
Brier Score Series: {'Event_1': 0       0.000000
365     0.053315
730     0.053315
1095    0.053315
1460    0.053315
1825    0.053315
Name: brier_score, dtype: float64, 'Event_2': 0       0.000000
365     0.099329
730     0.099329
1095    0.099329
1460    0.099329
1825    0.099329
Name: brier_score, dtype: float64}
Integrated Brier Scores: {'Event_1': 0.0497605494173283, 'Event_2': 0.0927068468793533}
Negative Log-Likelihoods: {'Event_1': 0.23971583618508627, 'Event_2': 0.3280149880111438}
Nam-D'Agostino Results: [{'Event': 1, 'Year': 1, 'Chi2_Stat': 0.02078836491212447, 'P_Value': 0.9999463533563095, 'Observed_Events': quantile
0    0.000000
1    0.000000
2    0.000000
3    0.000000
4    0.020235
Name: observed_probs, dtype: float64, 'Expected_Events': quantile
0    0.003513
1    0.003532
2    0.003596
3    0.005635
4    0.032308
Name: predicted_probs, dtype: float64, 'Sample_Size': [12648, 3869, 7812,

                To resolve ties, data is randomly jittered.


In [118]:
def random_forest_training_wrapper(config, data):
    """
    A Ray Tune-compatible wrapper to train Random Forest models for all outcomes and time points,
    evaluate predictions using concordance index, and report mean concordance index.

    Args:
        config: Dictionary of hyperparameters provided by Ray Tune.
        data: Dictionary containing final_meta_learner_X_train, cif_ground_truth_test_stacked,
              durations, and events.
    """
    final_meta_learner_X_train = data["final_meta_learner_X_train"]
    final_meta_learner_X_val = data["final_meta_learner_X_val"]
    cif_ground_truth_test_stacked = data["cif_ground_truth_test_stacked"]
    cif_ground_truth_val_stacked = data["cif_ground_truth_val_stacked"]
    durations_val = data["durations_val"]
    events_val = data["events_val"]

    # Initialize predictions array (2 outcomes, 6 time points, number of patients)
    rf_predictions = np.zeros((2, 6, X_fin_val.shape[0]))

    # Train models and make predictions for each outcome and time point
    for outcome_idx in range(2):
        for time_idx in range(6):
            # Prepare training data
            stacking_inputs_train = np.array([
                final_meta_learner_X_train[key][outcome_idx, time_idx]
                for key in final_meta_learner_X_train.keys()
            ]).T  # Shape: (num_train_samples, num_models)
            
            target_train = cif_ground_truth_test_stacked[outcome_idx, time_idx, :]  # Correct indexing

            # Validation data
            stacking_inputs_val = np.array([
                final_meta_learner_X_val[key][outcome_idx, time_idx]
                for key in final_meta_learner_X_val.keys()
            ]).T  # Shape: (num_val_samples, num_models)

            target_val = cif_ground_truth_val_stacked[outcome_idx, time_idx, :]  # Correct indexing

            # Train Random Forest model
            model = RandomForestRegressor(
                n_estimators=config["n_estimators"],
                max_depth=config["max_depth"],
                min_samples_split=config["min_samples_split"],
                min_samples_leaf=config["min_samples_leaf"],
                max_features=config["max_features"],
                random_state=RANDOM_SEED,
                n_jobs=-1
            )
            model.fit(stacking_inputs_train , target_train)

            # Predict CIF for all patients
            rf_predictions[outcome_idx, time_idx] = model.predict(stacking_inputs_val)

    # Compute concordance index for each event
    concordance_indices = {}
    for i in range(2):
        event_interest = i + 1
        cif = pd.DataFrame(rf_predictions[i], index=[0, 1, 2, 3, 4, 5])
        ev = EvalSurv(1 - cif, durations_val, events_val == event_interest, censor_surv="km")
        concordance_indices[f"Event_{event_interest}"] = ev.concordance_td()

    # Calculate mean concordance index
    mean_concordance_index = np.mean(list(concordance_indices.values()))

    # Report mean concordance index to Ray Tune
    ray.train.report({'c-index_stat': mean_concordance_index})

# Define the search space
search_space = {
    "n_estimators": tune.randint(50, 500),  # Number of trees in the forest
    "max_depth": tune.randint(3, 20),  # Maximum depth of the tree
    "min_samples_split": tune.randint(2, 10),  # Minimum number of samples required to split an internal node
    "min_samples_leaf": tune.randint(1, 10),  # Minimum number of samples required to be at a leaf node
    "max_features": tune.choice(["sqrt", "log2"])  # Number of features to consider when looking for the best split
}

# Prepare data
data = {
    "final_meta_learner_X_train": final_meta_learner_X_train,
    "final_meta_learner_X_val": final_meta_learner_X_val,
    "cif_ground_truth_test_stacked": cif_ground_truth_test_stacked,
    "cif_ground_truth_val_stacked": cif_ground_truth_val_stacked,
    "durations_val": durations_val,
    "events_val": events_val,
}

ray.shutdown()
ray.init()
# Run Ray Tune
analysis = tune.run(
    tune.with_parameters(random_forest_training_wrapper, data=data),
    config=search_space,
    resources_per_trial={"cpu": 20, "gpu": 0},  # Random Forest does not require GPU
    num_samples=50,  # Number of hyperparameter configurations to try
    metric="c-index_stat",  # Metric to optimize
    mode="max",  # Maximize the concordance index
    storage_path="/mnt/d/PYDataScience/g3_regress/data/results",  # Directory to store results
    verbose=2
)

# Get the best configuration
best_config = analysis.get_best_config(metric="c-index stat", mode="max")
print("Best hyperparameters:", best_config)

ray.shutdown()

2024-11-19 07:06:14,475	INFO worker.py:1807 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
2024-11-19 07:06:15,356	INFO tune.py:616 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-11-19 08:39:23
Running for:,01:33:07.76
Memory:,53.2/117.9 GiB

Trial name,status,loc,max_depth,max_features,min_samples_leaf,min_samples_split,n_estimators,iter,total time (s),c-index_stat
random_forest_training_wrapper_b6411_00000,TERMINATED,192.168.236.234:1387505,9,sqrt,5,8,164,1,73.9574,0.857171
random_forest_training_wrapper_b6411_00001,TERMINATED,192.168.236.234:1388561,7,log2,9,2,411,1,139.173,0.859153
random_forest_training_wrapper_b6411_00002,TERMINATED,192.168.236.234:1389744,3,sqrt,3,2,420,1,70.2625,0.861421
random_forest_training_wrapper_b6411_00003,TERMINATED,192.168.236.234:1390733,17,sqrt,3,5,201,1,139.298,0.849436
random_forest_training_wrapper_b6411_00004,TERMINATED,192.168.236.234:1391916,12,sqrt,6,3,368,1,190.577,0.853591
random_forest_training_wrapper_b6411_00005,TERMINATED,192.168.236.234:1393240,5,log2,7,5,97,1,28.2665,0.860848
random_forest_training_wrapper_b6411_00006,TERMINATED,192.168.236.234:1394131,9,log2,8,9,207,1,89.8782,0.857037
random_forest_training_wrapper_b6411_00007,TERMINATED,192.168.236.234:1395194,6,log2,3,9,168,1,57.0302,0.860378
random_forest_training_wrapper_b6411_00008,TERMINATED,192.168.236.234:1396143,3,sqrt,2,8,128,1,24.5496,0.861465
random_forest_training_wrapper_b6411_00009,TERMINATED,192.168.236.234:1397018,9,log2,6,6,429,1,179.317,0.857075




Trial name,c-index_stat
random_forest_training_wrapper_b6411_00000,0.857171
random_forest_training_wrapper_b6411_00001,0.859153
random_forest_training_wrapper_b6411_00002,0.861421
random_forest_training_wrapper_b6411_00003,0.849436
random_forest_training_wrapper_b6411_00004,0.853591
random_forest_training_wrapper_b6411_00005,0.860848
random_forest_training_wrapper_b6411_00006,0.857037
random_forest_training_wrapper_b6411_00007,0.860378
random_forest_training_wrapper_b6411_00008,0.861465
random_forest_training_wrapper_b6411_00009,0.857075


2024-11-19 08:39:23,246	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/mnt/d/PYDataScience/g3_regress/data/results/random_forest_training_wrapper_2024-11-19_07-06-15' in 1.2829s.
2024-11-19 08:39:23,252	INFO tune.py:1041 -- Total run time: 5587.90 seconds (5586.48 seconds for the tuning loop).


Best hyperparameters: None


In [124]:
import os
import json

# Define the base directory containing the trial folders
base_dir = "/mnt/d/PYDataScience/g3_regress/data/results/random_forest_training_wrapper_2024-11-19_07-06-15"

# Initialize variables to store the max c-index and corresponding config
max_c_index = float("-inf")
best_config = None

# Iterate through all folders and parse the result.json files
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file == "result.json":
            file_path = os.path.join(root, file)
            with open(file_path, "r") as f:
                data = json.load(f)
                if "c-index_stat" in data:
                    c_index = data["c-index_stat"]
                    # Update max c-index and config if a new max is found
                    if c_index > max_c_index:
                        max_c_index = c_index
                        best_config = data.get("config", None)

# Display the results
max_c_index, best_config


(0.8617197160068155,
 {'n_estimators': 168,
  'max_depth': 3,
  'min_samples_split': 3,
  'min_samples_leaf': 4,
  'max_features': 'sqrt'})