### 1. Import libraries

In [1]:
import gc
import json
import os
import math
import multiprocessing
import numpy as np
import pandas as pd
import torch
import importlib
import logging
from pathlib import Path
from sklearn.model_selection import GroupKFold, GroupShuffleSplit

# Pycox and PyTorch tuples for survival analysis
import torchtuples as tt
import pycox
from pycox.preprocessing.label_transforms import LabTransDiscreteTime
from pycox.models import CoxPH, DeepHit
from pycox.evaluation import EvalSurv

# Ray for hyperparameter tuning and distributed processing
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.search.bayesopt import BayesOptSearch
from ray.tune.search.optuna import OptunaSearch
from ray.tune.search import ConcurrencyLimiter
from ray.tune.schedulers import ASHAScheduler, PopulationBasedTraining
from ray.air import session
import ray.cloudpickle as pickle

# Custom modules for data handling, balancing, training, evaluation, and model architectures
import dataloader2
import databalancer2
import datatrainer2
import modeleval
import netweaver2

# Reload custom modules to ensure latest changes are available
importlib.reload(dataloader2)
importlib.reload(databalancer2)
importlib.reload(datatrainer2)
importlib.reload(modeleval)
importlib.reload(netweaver2)

# Import specific functions from custom modules to keep code clean and readable
from netweaver2 import (
    lstm_net_init, DHANNWrapper, LSTMWrapper, generalized_ann_net_init
)
from dataloader2 import (
    load_and_transform_data, preprocess_data #stack_sequences, dh_dataset_loader
)
from databalancer2 import (
    define_medoid_general, df_event_focus, rebalance_data, underbalance_data_general, medoid_cluster, 
    dh_rebalance_data
)
from datatrainer2 import (
    recursive_clustering, prepare_training_data, 
    prepare_validation_data, lstm_training
)
from modeleval import (
    dh_test_model, nam_dagostino_chi2, get_baseline_hazard_at_timepoints, combined_test_model
)

import psutil
torch.cuda.empty_cache()
gc.collect()

80

### 2. Define constants, load datasets

In [2]:
# Define Constants and Load Datasets
RANDOM_SEED = 12345
N_SPLIT = 2
FEATURE_COLS = ['gender', 'dm', 'ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
DURATION_COL = 'date_from_sub_60'
EVENT_COL = 'endpoint'
CLUSTER_COL = 'key'
TIME_GRID = np.array([i * 365 for i in range(6)])

# Define Feature Groups
CAT_FEATURES = ['gender', 'dm', 'ht', 'sprint']
LOG_FEATURES = ['a1c', 'po4', 'UACR_mg_g', 'Cr']
STANDARD_FEATURES = ['age', 'alb', 'ca', 'hb', 'hco3']
PASSTHROUGH_FEATURES = ['key', 'date_from_sub_60', 'endpoint']

# Load and Transform Data
BASE_FILENAME = '/mnt/d/pydatascience/g3_regress/data/X/X_20240628'
X_train_transformed, X_test_transformed = load_and_transform_data(
    BASE_FILENAME, CAT_FEATURES, LOG_FEATURES, STANDARD_FEATURES, PASSTHROUGH_FEATURES
)

2024-11-11 01:11:14,040 - INFO - Transforming training data...
2024-11-11 01:11:28,099 - INFO - Transforming test data...


### 3. Train and save models
- model naming: {deepsurv/deephit}\_{nn}\_{resample method}_{outcome}
- for deepsurv model, only the result in time_grid will be retrieved so the result of deepsurv and deephit models are compatible

In [3]:
def create_neural_network(config, num_risk = len(X_train_transformed[EVENT_COL].unique()) - 1, num_time_bins=len(TIME_GRID)):
    """
    Function to create a neural network based on the given configuration.

    Args:
        config (dict): Configuration dictionary containing model type, network type, and hyperparameters.

    Returns:
        torch.nn.Module: Created neural network model.
    """
    gc.collect()
    torch.cuda.empty_cache()
    if config['model'] == 'deepsurv':
        num_risk = None
        num_time_bins=None
    elif config['model'] == 'deephit':
        num_risk = num_risk
        num_time_bins = num_time_bins
    # Create the Neural Network
    if config['net'] == 'ann':
        net = generalized_ann_net_init(
            input_size=len(config['features']),
            num_nodes=config["num_nodes"],
            batch_norm=config["batch_norm"],
            dropout=config["dropout"],
            output_size=1, # Default output size for DeepSurv
            num_risks = num_risk,
            num_time_bins = num_time_bins
        )
    elif config['net'] == 'lstm':
        net = lstm_net_init(
            input_size=len(config['features']),
            num_nodes=config["num_nodes"],
            batch_norm=config["batch_norm"],
            dropout=config["dropout"],
            num_risks = num_risk,
            num_time_bins = num_time_bins
        )
    else:
        raise ValueError("Unknown network type: {}".format(config['net']))

    optimizer = tt.optim.AdamWR(decoupled_weight_decay=1e-6, cycle_eta_multiplier=0.8)
    if config['model'] == 'deepsurv':
        model = CoxPH(net, optimizer)
    elif config['model'] == 'deephit':
        model = DeepHit(net, optimizer)
    model.optimizer.set_lr(config["lr"])
    
    return model

def train_neural_network(model, config, X_train, X_val, duration_col, event_col, cluster_col, callbacks, time_grid=None):
    """
    Function to train a given neural network using the provided datasets.

    Args:
        net (torch.nn.Module): Neural network to be trained.
        config (dict): Configuration dictionary containing model hyperparameters.
        X_train (pd.DataFrame): Training dataset with features.
        X_val (pd.DataFrame): Validation dataset with features.
        duration_col (str): Column representing event durations.
        event_col (str): Column representing event occurrences.
        cluster_col (str): Column for grouping during cross-validation.
        callbacks (list): List of callbacks for training.
        time_grid (np.array, optional): Time grid for evaluation if required. Defaults to None.

    Returns:
        model: Trained PyCox model.
        logs: Training logs.
    """
    gc.collect()
    torch.cuda.empty_cache()
    # Train the model
    if config['model'] == 'deepsurv':
        print('Initiate training of deepsurv neural network')
        X_val = df_event_focus(X_val, event_col, config['endpoint'])
        X_val_processed, y_val = preprocess_data(X_val, config['features'], duration_col, event_col)
        val_data = (X_val_processed, y_val)
        if config['net'] == 'ann':
            print('model structure: ANN')
            if config['balance_method'] == 'clustering':
                print('data balancing method: clustering')
                model, logs = recursive_clustering(model, X_train, duration_col, event_col, config, val_data, callbacks, max_repeats=30)
            elif config['balance_method'] == 'enn':
                print('data balancing method: smoteenn')
                X_train = rebalance_data(X_train, event_col, config['endpoint'], CAT_FEATURES, config, RANDOM_SEED, method='ENN')
                X_train, y_train = preprocess_data(X_train, config['features'], duration_col, event_col)
                logs = model.fit(X_train, y_train, config['batch_size'], int(config['max_epochs']), callbacks, verbose=True, val_data=val_data, num_workers=10)
            elif config['balance_method'] == 'tomek':
                print('data balancing method: smotetomek')
                X_train = rebalance_data(X_train, event_col, config['endpoint'], CAT_FEATURES, config, RANDOM_SEED, method='Tomek')
                X_train, y_train = preprocess_data(X_train, config['features'], duration_col, event_col)
                logs = model.fit(X_train, y_train, config['batch_size'], int(config['max_epochs']), callbacks, verbose=True, val_data=val_data, num_workers=10)
        elif config['net'] == 'lstm':
            print('model structure: LSTM')
            if config['balance_method'] == 'clustering':
                print('data balancing method: clustering')
                model, logs = lstm_training(model, X_train, X_val, duration_col, event_col, cluster_col, config, callbacks, time_grid)
            elif config['balance_method'] == 'NearMiss':
                print('data balancing method: NearMiss')
                model, logs = lstm_training(model, X_train, X_val, duration_col, event_col, cluster_col, config, callbacks, time_grid)
    elif config['model'] == 'deephit':
        print('Initiate training of deephit neural network')
        X_val_processed, y_val = preprocess_data(X_val, config['features'], duration_col, event_col, TIME_GRID, discretize=True)
        val_data = (X_val_processed, y_val)
        if config['net'] == 'ann':
            print('model structure: ANN')
            if config['balance_method'] == 'clustering':
                print('data balancing method: clustering')
                model, logs = recursive_clustering(model, X_train, duration_col, event_col, config, val_data, callbacks, max_repeats=30, time_grid=TIME_GRID)
            elif config['balance_method'] == 'NearMiss':
                print('data balancing method: NearMiss')
                X_train = underbalance_data_general(X_train, EVENT_COL, CLUSTER_COL, config, version=config['version'])
                X_train, y_train = preprocess_data(X_train, config['features'], duration_col, event_col, TIME_GRID, discretize=True)
                logs = model.fit(X_train, y_train, config['batch_size'], int(config['max_epochs']), callbacks, verbose=True, val_data=val_data)
        elif config['net'] == 'lstm':
            print('model structure: LSTM')
            if config['balance_method'] == 'clustering':
                print('data balancing method: clustering')
                model, logs = lstm_training(model, X_train, X_val, duration_col, event_col, cluster_col, config, callbacks, time_grid)
            elif config['balance_method'] == 'NearMiss':
                print('data balancing method: NearMiss')
                model, logs = lstm_training(model, X_train, X_val, duration_col, event_col, cluster_col, config, callbacks, time_grid)        

    # Free memory after training
    gc.collect()
    torch.cuda.empty_cache()

    return model, logs

def save_model(params, model, model_path, baseline_hazard_path):
    """
    Save model weights and baseline hazard data.

    Parameters:
    - model: The trained model to save.
    - model_path: Path to save the model weights (.pt file).
    - baseline_hazard_path: Path to save the baseline hazards (.pkl file).
    """
    # Compute baseline hazards and save
    if params['model'] == 'deepsurv':
        baseline_hazard = model.compute_baseline_hazards()
        baseline_hazard.to_pickle(baseline_hazard_path)
    
    # Save model weights
    model.save_model_weights(model_path)
    print(f"Model and baseline hazards saved to {model_path} and {baseline_hazard_path}.")

def training_wrapper(df, config, spliter, model_path, hazard_path, feature_col=FEATURE_COLS, duration_col=DURATION_COL, event_col=EVENT_COL, cluster_col=CLUSTER_COL, time_grid=TIME_GRID):
    """
    Train and save a survival analysis model with grouped cross-validation splits.

    This function performs training on grouped cross-validation splits of the input DataFrame and saves each trained model
    along with its baseline hazards. Memory management is handled to ensure efficient GPU usage.

    Parameters:
    - df (pd.DataFrame): DataFrame containing training data.
    - config (dict): Configuration dictionary for initializing the neural network.
    - spliter (object): Splitter object (e.g., GroupShuffleSplit or StratifiedKFold) used for creating train-validation splits.
    - model_path (str): File path to save the trained model weights (.pt file).
    - hazard_path (str): File path to save the baseline hazards (.pkl file).
    - feature_col (list): List of feature column names in `df` used for model training.
    - duration_col (str): Name of the column representing duration/time-to-event.
    - event_col (str): Name of the column representing the event indicator (0 = censored, 1 = event).
    - cluster_col (str): Name of the column used for grouping (clusters for cross-validation).
    - time_grid (list): List or array defining the time grid for training.

    Returns:
    - None: Saves the model weights and baseline hazard data for each cross-validation split.
    """
    for train_idx, val_idx in spliter.split(X=df[feature_col], y=df[event_col], groups=df[cluster_col]):
        # Clear GPU memory for each split
        gc.collect()
        torch.cuda.empty_cache()
        
        # Define early stopping callback
        callbacks = [tt.cb.EarlyStopping()]
        
        # Create training and validation sets
        train_df = df.iloc[train_idx]
        val_df = df.iloc[val_idx]
        
        # Initialize and train the model
        model = create_neural_network(config)
        model, logs = train_neural_network(
            model, config,
            X_train=train_df, X_val=val_df,
            duration_col=duration_col, event_col=event_col,
            cluster_col=cluster_col, callbacks=callbacks, time_grid=time_grid
        )
        
        # Save the trained model and its baseline hazards
        save_model(config, model, model_path, hazard_path)
        
        # Free memory for the next iteration
        del model, logs
        gc.collect()
        torch.cuda.empty_cache()

    print("Training and saving completed for all cross-validation splits.")

    print("All models have been trained and saved successfully.")

#### 3.1 deepsurv_ann_clustering_1
- features: ['gender', 'dm', 'ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling strategy: 0.05
- 2 hidden layers with 8 and 4 nodes
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.1144793446270997
- learning rate: 0.1
- max epochs: 9
- batch size: 512

In [4]:
gc.collect()
torch.cuda.empty_cache()

deepsurv_ann_clustering_1_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'clustering',
    'features': ['gender', 'dm', 'ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 1,
    'num_nodes': [8, 4],
    'batch_norm': False,
    'dropout': 0.1144793446270997,
    'lr': 0.1,
    'max_epochs': 9,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.2 deepsurv_ann_smoteenn_1
- features: ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3']
- sampling strategy: 0.3
- 4 hidden layers with 64, 32, 16 and 8 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.09555033386059111
- learning rate: 0.1
- max epochs: 16
- batch size: 512

In [5]:
deepsurv_ann_smoteenn_1_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'enn',
    'features': ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3'],
    'endpoint': 1,
    'num_nodes': [64, 32, 16, 8],
    'batch_norm': True,
    'dropout': 0.09555033386059111,
    'lr': 0.1,
    'max_epochs': 16,
    'batch_size': 512,
    'sampling_strategy': 0.3,
    'seq_length': 1,
}

#### 3.3 deepsurv_ann_smotetomek_1
- features:  ['gender', 'dm', 'ht', 'sprint', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3']
- sampling strategy: 0.2
- 3 hidden layers with 32, 16 and 8 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.23872991564684112
- learning rate: 0.1
- max epochs: 14
- batch size: 512

In [6]:
deepsurv_ann_smotetomek_1_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'tomek',
    'features': ['gender', 'dm', 'ht', 'sprint', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3'],
    'endpoint': 1,
    'num_nodes': [32, 16, 8],
    'batch_norm': True,
    'dropout': 0.23872991564684112,
    'lr': 0.1,
    'max_epochs': 14,
    'batch_size': 512,
    'sampling_strategy': 0.2,
    'seq_length': 1,
}

#### 3.4 deepsurv_ann_clustering_2
- features: ["gender", "a1c", "po4", "UACR_mg_g", "Cr"]
- sampling_strategy: 0.05
- 3 hidden layers with 32, 16, 8 nodes
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.3058921011568742
- learning rate: 0.1
- max epochs: 14
- batch size: 512

In [7]:
deepsurv_ann_clustering_2_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'clustering',
    'features': ["gender", "a1c", "po4", "UACR_mg_g", "Cr"],
    'endpoint': 2,
    'num_nodes': [32, 16, 8],
    'batch_norm': False,
    'dropout': 0.3058921011568742,
    'lr': 0.1,
    'max_epochs': 14,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.5 deepsurv_ann_smoteenn_2
- features: ["gender", "dm", "ht", "sprint", "a1c", "po4", "UACR_mg_g", "Cr", "age", "alb", "ca", "hb", "hco3"]
- sampling_strategy: 0.1, 
- 2 hidden layers with 8, 4 nodes
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.38878203553667456
- learning rate: 0.01
- max epochs: 10
- batch size: 512

In [8]:
deepsurv_ann_smoteenn_2_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'enn',
    'features': ["gender", "dm", "ht", "sprint", "a1c", "po4", "UACR_mg_g", "Cr", "age", "alb", "ca", "hb", "hco3"],
    'endpoint': 2,
    'num_nodes': [8, 4],
    'batch_norm': False,
    'dropout': 0.38878203553667456,
    'lr': 0.01,
    'max_epochs': 10,
    'batch_size': 512,
    'sampling_strategy': 0.1,
    'seq_length': 1,
}

#### 3.6 deepsurv_ann_smotetomek_2
- features: ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling_strategy: 0.05
- 2 hidden layers with 64, 32 nodes
- batch normalization in each hidden layer 
- dropout ratio in each layer: 0.3162398297390827
- learning rate: 0.1
- max epochs: 11
- batch size: 512

In [9]:
deepsurv_ann_smotetomek_2_config = {
    'model': 'deepsurv',
    'net': 'ann',
    'balance_method': 'tomek',
    'features': ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 2,
    'num_nodes': [64, 32],
    'batch_norm': True,
    'dropout': 0.3162398297390827,
    'lr': 0.1,
    'max_epochs': 11,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.7 deepsurv_lstm_clustering_1
- features: ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr']
- sampling_strategy: 0.05
- 3 hidden layers with 8, 4, 2 nodes
- sequence length 7
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.2772567071863989
- learning rate: 0.1
- max epochs: 13
- batch size: 512

In [10]:
deepsurv_lstm_clustering_1_config = {
    'model': 'deepsurv',
    'net': 'lstm',
    'balance_method': 'clustering',
    'features': ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr'],
    'endpoint': 1,
    'num_nodes': [8, 4, 2],
    'batch_norm': False,
    'dropout': 0.2772567071863989,
    'lr': 0.1,
    'max_epochs': 13,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 7,
}

#### 3.8 deepsurv_lstm_nearmiss_1
- features: ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr']
- sampling_strategy: 0.05
- 3 hidden layers with 8, 4, 2 nodes
- seq_length': 8
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.3397308077824205
- learning rate: 0.001
- max epochs: 9
- batch size: 512

In [11]:
deepsurv_lstm_nearmiss_1_config = {
    'model': 'deepsurv',
    'net': 'lstm',
    'balance_method': 'NearMiss',
    'features': ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr'],
    'endpoint': 1,
    'num_nodes': [8, 4, 2],
    'batch_norm': False,
    'dropout': 0.3397308077824205,
    'lr': 0.001,
    'max_epochs': 9,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 8,
}

#### 3.9 deepsurv_lstm_clustering_2
- features: ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr']
- sampling_strategy: 0.05
- 3 hidden layers with 8, 4, 2 nodes
- seq_length': 8
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.3397308077824205
- learning rate: 0.001
- max epochs: 9
- batch size: 512

In [12]:
deepsurv_lstm_clustering_2_config = {
    'model': 'deepsurv',
    'net': 'lstm',
    'balance_method': 'clustering',
    'features': ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr'],
    'endpoint': 2,
    'num_nodes': [8, 4, 2],
    'batch_norm': False,
    'dropout': 0.3397308077824205,
    'lr': 0.001,
    'max_epochs': 9,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 8,
}

#### 3.10 deepsurv_lstm_nearmiss_2
- features: ['sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling_strategy: 0.05
- 2 hidden layers with 32, 16 nodes
- seq_length': 2
- no batch normalization in each hidden layer
- dropout ratio in each layer: 0.35763396978044143
- learning rate: 0.1
- max epochs: 10
- batch size: 512

In [13]:
deepsurv_lstm_nearmiss_2_config = {
    'model': 'deepsurv',
    'net': 'lstm',
    'balance_method': 'NearMiss',
    'features': ['sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 2,
    'num_nodes': [32, 16],
    'batch_norm': False,
    'dropout': 0.35763396978044143,
    'lr': 0.1,
    'max_epochs': 10,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 2,
}

#### 3.11 deephit_ann_clustering_all
- features: ['gender', 'dm', 'ht', 'sprint', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3']
- sampling strategy: 0.05
- 2 hidden layers with 64 and 32 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.26400151710698067
- learning rate: 0.1
- max epochs: 8
- batch size: 512

In [14]:
deephit_ann_clustering_all_config = {
    'model': 'deephit',
    'net': 'ann',
    'balance_method': 'clustering',
    'features': ['gender', 'dm', 'ht', 'sprint', 'po4', 'UACR_mg_g', 'Cr', 'age', 'hb', 'hco3'],
    'endpoint': 'all',
    'num_nodes': [64, 32],
    'batch_norm': True,
    'dropout': 0.26400151710698067,
    'lr': 0.1,
    'max_epochs': 8,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.12 deephit_ann_nearmiss2_all
- features: ['sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling strategy: 0.05
- 2 hidden layers with 8, 4 and 2 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.7346754269827496
- learning rate: 0.01
- max epochs: 7
- batch size: 512

In [15]:
deephit_ann_nearmiss2_all_config = {
    'model': 'deephit',
    'net': 'ann',
    'balance_method': 'NearMiss',
    'version': 2,
    'features': ['sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 'all',
    'num_nodes': [8, 4, 2],
    'batch_norm': True,
    'dropout': 0.7346754269827496,
    'lr': 0.01,
    'max_epochs': 7,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 1,
}

#### 3.13 deephit_lstm_clustering_all
- features: ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3']
- sampling strategy: 0.05
- seq_length: 6
- 3 hidden layers with 64, 32 and 16 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.46132889488306583
- learning rate: 0.1
- max epochs: 5
- batch size: 512

In [16]:
deephit_lstm_clustering_all_config = {
    'model': 'deephit',
    'net': 'lstm',
    'balance_method': 'clustering',
    'version': 2,
    'features': ['ht', 'sprint', 'a1c', 'po4', 'UACR_mg_g', 'Cr', 'age', 'alb', 'ca', 'hb', 'hco3'],
    'endpoint': 'all',
    'num_nodes': [64, 32, 16],
    'batch_norm': True,
    'dropout': 0.46132889488306583,
    'lr': 0.1,
    'max_epochs': 5,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 6,
}

#### 3.14 deephit_lstm_nearmiss1_all
- features: ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr']
- sampling strategy: 0.05
- seq_length: 9
- 3 hidden layers with 32, 16 and 8 nodes
- batch normalization in each hidden layer
- dropout ratio in each layer: 0.18001924589390816
- learning rate: 0.1
- max epochs: 9
- batch size: 512

In [17]:
deephit_lstm_nearmiss1_all_config = {
    'model': 'deephit',
    'net': 'lstm',
    'balance_method': 'NearMiss',
    'version': 1,
    'features': ['gender', 'a1c', 'po4', 'UACR_mg_g', 'Cr'],
    'endpoint': 'all',
    'num_nodes': [32, 16, 8],
    'batch_norm': True,
    'dropout': 0.18001924589390816,
    'lr': 0.1,
    'max_epochs': 9,
    'batch_size': 512,
    'sampling_strategy': 0.05,
    'seq_length': 9,
}

In [18]:
model_ls = ['deepsurv_ann_clustering_1', 'deepsurv_ann_smoteenn_1', 'deepsurv_ann_smotetomek_1',
            'deepsurv_ann_clustering_2', 'deepsurv_ann_smoteenn_2', 'deepsurv_ann_smotetomek_2',
            'deepsurv_lstm_clustering_1', 'deepsurv_lstm_nearmiss', 'deepsurv_lstm_clustering_2', 'deepsurv_lstm_nearmiss_2',
            'deephit_ann_clustering_all', 'deephit_ann_nearmiss2_all', 'deephit_lstm_clustering_all', 'deephit_lstm_nearmiss1_all']
model_path = '/mnt/d/PYDataScience/g3_regress/code/models/'

In [19]:
gss1 = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=RANDOM_SEED)
gss2 = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=RANDOM_SEED)
for train_idx_1, fin_val_idx in gss1.split(X=X_train_transformed[FEATURE_COLS], y=X_train_transformed[EVENT_COL], groups=X_train_transformed[CLUSTER_COL]):
    X_train_transformed_2, X_fin_val = X_train_transformed.iloc[train_idx_1, :], X_train_transformed.iloc[fin_val_idx, :]
    gc.collect()
    torch.cuda.empty_cache()
    for model in model_ls:
        config_var_name = model + "_config"
        model_config = globals().get(config_var_name)
        if model_config is None:
            print(f"Configuration for {config_var_name} not found.")
            continue

        model_weights_path = f'{model_path}{model}.pt'
        model_hazard_path = f'{model_path}{model}_hazard.pkl'
        
        training_wrapper(X_train_transformed_2, model_config, gss2, model_weights_path, 
                        model_hazard_path, 
                        feature_col=FEATURE_COLS, duration_col=DURATION_COL, event_col=EVENT_COL, cluster_col=CLUSTER_COL, time_grid=TIME_GRID)
        gc.collect()
        torch.cuda.empty_cache()

2024-11-11 01:11:29,990 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:11:29,999 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:11:30,015 - INFO - Performing clustering iteration 1 / 20
2024-11-11 01:11:30,015 - INFO - init
2024-11-11 01:11:30,017 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:30,022 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: clustering


2024-11-11 01:11:31,098 - INFO - Defined medoid for deepsurv model with 1207 clusters.
	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha = 1) (Triggered internally at ../torch/csrc/utils/python_arg_parser.cpp:1581.)
  p.data = p.data.add(-weight_decay * eta, p.data)


0:	[0s / 0s],		train_loss: 4.9830,	val_loss: 7.6472
1:	[0s / 0s],		train_loss: 4.7894,	val_loss: 7.4912
2:	[0s / 0s],		train_loss: 4.7147,	val_loss: 7.4432
3:	[0s / 0s],		train_loss: 4.6912,	val_loss: 7.4037
4:	[0s / 0s],		train_loss: 4.6718,	val_loss: 7.4363
5:	[0s / 0s],		train_loss: 4.6559,	val_loss: 7.3902
6:	[0s / 0s],		train_loss: 4.6536,	val_loss: 7.3836
7:	[0s / 0s],		train_loss: 4.6475,	val_loss: 7.4139
8:	[0s / 0s],		train_loss: 4.6416,	val_loss: 6.8589


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:32,023 - INFO - Performing clustering iteration 2 / 20
2024-11-11 01:11:32,024 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:32,030 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:11:32,546 - INFO - Defined medoid for deepsurv model with 1207 clusters.


9:	[0s / 0s],		train_loss: 4.5630,	val_loss: 6.6073
10:	[0s / 0s],		train_loss: 4.5284,	val_loss: 6.3454
11:	[0s / 0s],		train_loss: 4.5609,	val_loss: 6.6320
12:	[0s / 0s],		train_loss: 4.5473,	val_loss: 6.8888
13:	[0s / 0s],		train_loss: 4.5494,	val_loss: 6.8011
14:	[0s / 0s],		train_loss: 4.5360,	val_loss: 6.4630
15:	[0s / 0s],		train_loss: 4.5318,	val_loss: 6.7356
16:	[0s / 0s],		train_loss: 4.5259,	val_loss: 6.2433
17:	[0s / 0s],		train_loss: 4.5288,	val_loss: 6.7170


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:33,059 - INFO - Performing clustering iteration 3 / 20
2024-11-11 01:11:33,060 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:33,064 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:11:33,542 - INFO - Defined medoid for deepsurv model with 1207 clusters.


18:	[0s / 0s],		train_loss: 4.5234,	val_loss: 6.7651
19:	[0s / 0s],		train_loss: 4.5202,	val_loss: 6.1921
20:	[0s / 0s],		train_loss: 4.5133,	val_loss: 6.7178
21:	[0s / 0s],		train_loss: 4.5079,	val_loss: 6.5168
22:	[0s / 0s],		train_loss: 4.5074,	val_loss: 6.4060
23:	[0s / 0s],		train_loss: 4.5092,	val_loss: 6.4155
24:	[0s / 0s],		train_loss: 4.4982,	val_loss: 6.5076
25:	[0s / 0s],		train_loss: 4.5049,	val_loss: 6.4778
26:	[0s / 0s],		train_loss: 4.5045,	val_loss: 6.4605


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:34,043 - INFO - Performing clustering iteration 4 / 20
2024-11-11 01:11:34,044 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:34,048 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:11:34,492 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))


27:	[0s / 0s],		train_loss: 4.5468,	val_loss: 6.2626
28:	[0s / 0s],		train_loss: 4.5285,	val_loss: 6.3278
29:	[0s / 0s],		train_loss: 4.5354,	val_loss: 6.3442


2024-11-11 01:11:34,755 - INFO - Performing clustering iteration 5 / 20
2024-11-11 01:11:34,756 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:34,760 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:11:35,231 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:35,407 - INFO - Performing clustering iteration 6 / 20
2024-11-11 01:11:35,408 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:35,412 - INFO - Event column 'endpoint' updated with focus on event value 1.


30:	[0s / 0s],		train_loss: 4.5305,	val_loss: 6.8553


2024-11-11 01:11:35,871 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:36,053 - INFO - Performing clustering iteration 7 / 20
2024-11-11 01:11:36,053 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:36,058 - INFO - Event column 'endpoint' updated with focus on event value 1.


31:	[0s / 0s],		train_loss: 4.5263,	val_loss: 6.3886


2024-11-11 01:11:36,510 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:36,702 - INFO - Performing clustering iteration 8 / 20
2024-11-11 01:11:36,702 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:36,706 - INFO - Event column 'endpoint' updated with focus on event value 1.


32:	[0s / 0s],		train_loss: 4.5248,	val_loss: 6.4644


2024-11-11 01:11:37,174 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:37,337 - INFO - Performing clustering iteration 9 / 20
2024-11-11 01:11:37,337 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:37,342 - INFO - Event column 'endpoint' updated with focus on event value 1.


33:	[0s / 0s],		train_loss: 4.5247,	val_loss: 6.3777


2024-11-11 01:11:37,786 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:37,986 - INFO - Performing clustering iteration 10 / 20
2024-11-11 01:11:37,987 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:37,991 - INFO - Event column 'endpoint' updated with focus on event value 1.


34:	[0s / 0s],		train_loss: 4.5203,	val_loss: 6.3242


2024-11-11 01:11:38,499 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:38,714 - INFO - Performing clustering iteration 11 / 20
2024-11-11 01:11:38,715 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:38,719 - INFO - Event column 'endpoint' updated with focus on event value 1.


35:	[0s / 0s],		train_loss: 4.5261,	val_loss: 6.4288


2024-11-11 01:11:39,166 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:39,369 - INFO - Performing clustering iteration 12 / 20
2024-11-11 01:11:39,369 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:39,375 - INFO - Event column 'endpoint' updated with focus on event value 1.


36:	[0s / 0s],		train_loss: 4.5244,	val_loss: 6.3611


2024-11-11 01:11:39,812 - INFO - Defined medoid for deepsurv model with 1207 clusters.


37:	[0s / 0s],		train_loss: 4.5180,	val_loss: 6.1555
38:	[0s / 0s],		train_loss: 4.5295,	val_loss: 6.5595
39:	[0s / 0s],		train_loss: 4.5377,	val_loss: 5.9231
40:	[0s / 0s],		train_loss: 4.5192,	val_loss: 6.4159
41:	[0s / 0s],		train_loss: 4.5152,	val_loss: 6.1324
42:	[0s / 0s],		train_loss: 4.5171,	val_loss: 6.0364
43:	[0s / 0s],		train_loss: 4.5126,	val_loss: 6.1844
44:	[0s / 0s],		train_loss: 4.5125,	val_loss: 6.1942
45:	[0s / 0s],		train_loss: 4.5036,	val_loss: 6.0507


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:40,406 - INFO - Performing clustering iteration 13 / 20
2024-11-11 01:11:40,407 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:40,411 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:11:40,920 - INFO - Defined medoid for deepsurv model with 1207 clusters.


46:	[0s / 0s],		train_loss: 4.5302,	val_loss: 6.3100
47:	[0s / 0s],		train_loss: 4.5200,	val_loss: 6.2886
48:	[0s / 0s],		train_loss: 4.5132,	val_loss: 5.9895
49:	[0s / 0s],		train_loss: 4.5187,	val_loss: 6.0262


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:41,326 - INFO - Performing clustering iteration 14 / 20
2024-11-11 01:11:41,327 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:41,332 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:11:41,820 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:42,028 - INFO - Performing clustering iteration 15 / 20
2024-11-11 01:11:42,029 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:42,033 - INFO - Event column 'endpoint' updated with focus on event value 1.


50:	[0s / 0s],		train_loss: 4.5469,	val_loss: 6.1884


2024-11-11 01:11:42,505 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:42,693 - INFO - Performing clustering iteration 16 / 20
2024-11-11 01:11:42,694 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:42,697 - INFO - Event column 'endpoint' updated with focus on event value 1.


51:	[0s / 0s],		train_loss: 4.5249,	val_loss: 6.3582


2024-11-11 01:11:43,144 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:43,350 - INFO - Performing clustering iteration 17 / 20
2024-11-11 01:11:43,351 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:43,354 - INFO - Event column 'endpoint' updated with focus on event value 1.


52:	[0s / 0s],		train_loss: 4.5187,	val_loss: 6.1702


2024-11-11 01:11:43,826 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:44,066 - INFO - Performing clustering iteration 18 / 20
2024-11-11 01:11:44,066 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:44,073 - INFO - Event column 'endpoint' updated with focus on event value 1.


53:	[0s / 0s],		train_loss: 4.5352,	val_loss: 6.1690


2024-11-11 01:11:44,538 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:44,732 - INFO - Performing clustering iteration 19 / 20
2024-11-11 01:11:44,732 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:44,736 - INFO - Event column 'endpoint' updated with focus on event value 1.


54:	[0s / 0s],		train_loss: 4.5292,	val_loss: 6.1099


2024-11-11 01:11:45,218 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:11:45,434 - INFO - Performing clustering iteration 20 / 20
2024-11-11 01:11:45,435 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:11:45,439 - INFO - Event column 'endpoint' updated with focus on event value 1.


55:	[0s / 0s],		train_loss: 4.5350,	val_loss: 6.0536


2024-11-11 01:11:45,890 - INFO - Defined medoid for deepsurv model with 1207 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))


56:	[0s / 0s],		train_loss: 4.5276,	val_loss: 5.9846
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_1_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-11 01:11:49,339 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:11:49,345 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: smoteenn


2024-11-11 01:11:53,551 - INFO - Missing values imputed using IterativeImputer.
2024-11-11 01:11:53,557 - INFO - Dataframe rebalanced with SMOTE and ENN.


0:	[2s / 2s],		train_loss: 3.7039,	val_loss: 5.0026
1:	[2s / 5s],		train_loss: 3.6747,	val_loss: 5.1365
2:	[2s / 8s],		train_loss: 3.6364,	val_loss: 4.9951
3:	[2s / 10s],		train_loss: 3.6516,	val_loss: 4.9670
4:	[2s / 13s],		train_loss: 3.6385,	val_loss: 5.0735
5:	[3s / 16s],		train_loss: 3.6231,	val_loss: 5.0273
6:	[2s / 19s],		train_loss: 3.6107,	val_loss: 5.0000
7:	[2s / 22s],		train_loss: 3.6328,	val_loss: 5.0556
8:	[2s / 25s],		train_loss: 3.6288,	val_loss: 5.0316
9:	[5s / 30s],		train_loss: 3.6194,	val_loss: 4.9399
10:	[2s / 33s],		train_loss: 3.6120,	val_loss: 4.9992
11:	[2s / 36s],		train_loss: 3.6053,	val_loss: 4.9803
12:	[2s / 39s],		train_loss: 3.5982,	val_loss: 5.0213
13:	[2s / 41s],		train_loss: 3.5945,	val_loss: 4.9910
14:	[2s / 44s],		train_loss: 3.5897,	val_loss: 4.9887
15:	[2s / 47s],		train_loss: 3.6117,	val_loss: 5.0459


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_1_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-11 01:12:42,068 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:12:42,079 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: smotetomek


2024-11-11 01:12:46,955 - INFO - Missing values imputed using IterativeImputer.
2024-11-11 01:12:46,967 - INFO - Dataframe rebalanced with SMOTE and Tomek.


0:	[3s / 3s],		train_loss: 3.6937,	val_loss: 4.8984
1:	[3s / 6s],		train_loss: 3.6496,	val_loss: 5.1068
2:	[3s / 9s],		train_loss: 3.6155,	val_loss: 4.8960
3:	[5s / 14s],		train_loss: 3.6329,	val_loss: 5.0473
4:	[3s / 17s],		train_loss: 3.6244,	val_loss: 4.9454
5:	[2s / 20s],		train_loss: 3.6105,	val_loss: 4.9168
6:	[3s / 23s],		train_loss: 3.5995,	val_loss: 4.8704
7:	[2s / 26s],		train_loss: 3.6258,	val_loss: 4.8531
8:	[3s / 29s],		train_loss: 3.6212,	val_loss: 4.8991
9:	[3s / 32s],		train_loss: 3.6159,	val_loss: 5.0214
10:	[3s / 35s],		train_loss: 3.6119,	val_loss: 4.9520
11:	[2s / 38s],		train_loss: 3.6045,	val_loss: 4.9148
12:	[3s / 41s],		train_loss: 3.6014,	val_loss: 4.9259
13:	[5s / 46s],		train_loss: 3.5973,	val_loss: 4.9116


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_1_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-11 01:13:34,780 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-11 01:13:34,785 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-11 01:13:34,792 - INFO - Performing clustering iteration 1 / 20
2024-11-11 01:13:34,793 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:34,797 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: clustering


2024-11-11 01:13:35,467 - INFO - Defined medoid for deepsurv model with 3725 clusters.


0:	[0s / 0s],		train_loss: 4.8377,	val_loss: 7.8246
1:	[0s / 0s],		train_loss: 4.7746,	val_loss: 7.8049
2:	[0s / 0s],		train_loss: 4.7532,	val_loss: 7.8124
3:	[0s / 0s],		train_loss: 4.7500,	val_loss: 7.8242
4:	[0s / 0s],		train_loss: 4.7434,	val_loss: 7.8194
5:	[0s / 0s],		train_loss: 4.7410,	val_loss: 7.8201
6:	[0s / 0s],		train_loss: 4.7261,	val_loss: 7.8172
7:	[0s / 0s],		train_loss: 4.7398,	val_loss: 7.7947
8:	[0s / 0s],		train_loss: 4.7344,	val_loss: 7.7907
9:	[0s / 0s],		train_loss: 4.7263,	val_loss: 7.7879
10:	[0s / 0s],		train_loss: 4.7294,	val_loss: 7.8262
11:	[0s / 0s],		train_loss: 4.7218,	val_loss: 7.8164
12:	[0s / 0s],		train_loss: 4.7225,	val_loss: 7.8102


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:36,647 - INFO - Performing clustering iteration 2 / 20
2024-11-11 01:13:36,647 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:36,651 - INFO - Event column 'endpoint' updated with focus on event value 1.


13:	[0s / 1s],		train_loss: 4.7231,	val_loss: 7.8125


2024-11-11 01:13:37,093 - INFO - Defined medoid for deepsurv model with 3725 clusters.


14:	[0s / 0s],		train_loss: 4.7996,	val_loss: 7.8146
15:	[0s / 0s],		train_loss: 4.7748,	val_loss: 7.8052
16:	[0s / 0s],		train_loss: 4.7459,	val_loss: 7.8373
17:	[0s / 0s],		train_loss: 4.7464,	val_loss: 7.8240


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:37,613 - INFO - Performing clustering iteration 3 / 20
2024-11-11 01:13:37,613 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:37,618 - INFO - Event column 'endpoint' updated with focus on event value 1.


18:	[0s / 0s],		train_loss: 4.7450,	val_loss: 7.8250
19:	[0s / 0s],		train_loss: 4.7419,	val_loss: 7.8279


2024-11-11 01:13:38,039 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:38,229 - INFO - Performing clustering iteration 4 / 20
2024-11-11 01:13:38,230 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:38,234 - INFO - Event column 'endpoint' updated with focus on event value 1.


20:	[0s / 0s],		train_loss: 4.8583,	val_loss: 7.8048


2024-11-11 01:13:38,650 - INFO - Defined medoid for deepsurv model with 3725 clusters.


21:	[0s / 0s],		train_loss: 5.0357,	val_loss: 7.7539
22:	[0s / 0s],		train_loss: 5.0232,	val_loss: 7.7552
23:	[0s / 0s],		train_loss: 5.0247,	val_loss: 7.7596
24:	[0s / 0s],		train_loss: 5.0237,	val_loss: 7.7566
25:	[0s / 0s],		train_loss: 5.0254,	val_loss: 7.7562
26:	[0s / 0s],		train_loss: 5.0245,	val_loss: 7.7558
27:	[0s / 0s],		train_loss: 5.0276,	val_loss: 7.7559
28:	[0s / 0s],		train_loss: 5.0248,	val_loss: 7.7562
29:	[0s / 0s],		train_loss: 5.0239,	val_loss: 7.7564
30:	[0s / 0s],		train_loss: 5.0212,	val_loss: 7.7571
31:	[0s / 0s],		train_loss: 5.0254,	val_loss: 7.7538
32:	[0s / 0s],		train_loss: 5.0254,	val_loss: 7.7577
33:	[0s / 0s],		train_loss: 5.0249,	val_loss: 7.7574
34:	[0s / 0s],		train_loss: 5.0273,	val_loss: 7.7564


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:39,654 - INFO - Performing clustering iteration 5 / 20
2024-11-11 01:13:39,655 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:39,658 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:13:40,070 - INFO - Defined medoid for deepsurv model with 3725 clusters.


35:	[0s / 0s],		train_loss: 5.0410,	val_loss: 7.7629
36:	[0s / 0s],		train_loss: 5.0320,	val_loss: 7.7625
37:	[0s / 0s],		train_loss: 5.0372,	val_loss: 7.7558
38:	[0s / 0s],		train_loss: 5.0327,	val_loss: 7.7621
39:	[0s / 0s],		train_loss: 5.0326,	val_loss: 7.7584
40:	[0s / 0s],		train_loss: 5.0289,	val_loss: 7.7618
41:	[0s / 0s],		train_loss: 5.0335,	val_loss: 7.7576


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:40,636 - INFO - Performing clustering iteration 6 / 20
2024-11-11 01:13:40,636 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:40,640 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:13:41,033 - INFO - Defined medoid for deepsurv model with 3725 clusters.


42:	[0s / 0s],		train_loss: 5.0511,	val_loss: 7.7513
43:	[0s / 0s],		train_loss: 5.0472,	val_loss: 7.7513
44:	[0s / 0s],		train_loss: 5.0537,	val_loss: 7.7513
45:	[0s / 0s],		train_loss: 5.0533,	val_loss: 7.7513
46:	[0s / 0s],		train_loss: 5.0481,	val_loss: 7.7514
47:	[0s / 0s],		train_loss: 5.0486,	val_loss: 7.7513
48:	[0s / 0s],		train_loss: 5.0528,	val_loss: 7.7513
49:	[0s / 0s],		train_loss: 5.0505,	val_loss: 7.7514
50:	[0s / 0s],		train_loss: 5.0523,	val_loss: 7.7514
51:	[0s / 0s],		train_loss: 5.0442,	val_loss: 7.7513
52:	[0s / 0s],		train_loss: 5.0488,	val_loss: 7.7513
53:	[0s / 0s],		train_loss: 5.0467,	val_loss: 7.7513
54:	[0s / 0s],		train_loss: 5.0508,	val_loss: 7.7513
55:	[0s / 0s],		train_loss: 5.0500,	val_loss: 7.7513


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:42,127 - INFO - Performing clustering iteration 7 / 20
2024-11-11 01:13:42,128 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:42,131 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:13:42,522 - INFO - Defined medoid for deepsurv model with 3725 clusters.


56:	[0s / 0s],		train_loss: 5.0576,	val_loss: 7.7513
57:	[0s / 0s],		train_loss: 5.0590,	val_loss: 7.7514
58:	[0s / 0s],		train_loss: 5.0585,	val_loss: 7.7514
59:	[0s / 0s],		train_loss: 5.0570,	val_loss: 7.7514
60:	[0s / 0s],		train_loss: 5.0552,	val_loss: 7.7514
61:	[0s / 0s],		train_loss: 5.0564,	val_loss: 7.7514
62:	[0s / 0s],		train_loss: 5.0568,	val_loss: 7.7514
63:	[0s / 0s],		train_loss: 5.0535,	val_loss: 7.7542
64:	[0s / 0s],		train_loss: 5.0596,	val_loss: 7.7514
65:	[0s / 0s],		train_loss: 5.0576,	val_loss: 7.7513


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:43,302 - INFO - Performing clustering iteration 8 / 20
2024-11-11 01:13:43,303 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:43,307 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:13:43,683 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:43,883 - INFO - Performing clustering iteration 9 / 20
2024-11-11 01:13:43,883 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:43,888 - INFO - Event column 'endpoint' updated with focus on event value 1.


66:	[0s / 0s],		train_loss: 5.0576,	val_loss: 7.7514


2024-11-11 01:13:44,247 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:44,462 - INFO - Performing clustering iteration 10 / 20
2024-11-11 01:13:44,463 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:44,466 - INFO - Event column 'endpoint' updated with focus on event value 1.


67:	[0s / 0s],		train_loss: 5.0770,	val_loss: 7.7523


2024-11-11 01:13:44,871 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:45,095 - INFO - Performing clustering iteration 11 / 20
2024-11-11 01:13:45,095 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:45,099 - INFO - Event column 'endpoint' updated with focus on event value 1.


68:	[0s / 0s],		train_loss: 5.0887,	val_loss: 7.7536


2024-11-11 01:13:45,451 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))


69:	[0s / 0s],		train_loss: 5.0887,	val_loss: 7.7524


2024-11-11 01:13:45,752 - INFO - Performing clustering iteration 12 / 20
2024-11-11 01:13:45,753 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:45,758 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:13:46,132 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:46,368 - INFO - Performing clustering iteration 13 / 20
2024-11-11 01:13:46,369 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:46,373 - INFO - Event column 'endpoint' updated with focus on event value 1.


70:	[0s / 0s],		train_loss: 5.0805,	val_loss: 7.7562


2024-11-11 01:13:46,705 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:46,958 - INFO - Performing clustering iteration 14 / 20
2024-11-11 01:13:46,959 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:46,964 - INFO - Event column 'endpoint' updated with focus on event value 1.


71:	[0s / 0s],		train_loss: 5.1157,	val_loss: 7.7569


2024-11-11 01:13:47,350 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:47,588 - INFO - Performing clustering iteration 15 / 20
2024-11-11 01:13:47,589 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:47,594 - INFO - Event column 'endpoint' updated with focus on event value 1.


72:	[0s / 0s],		train_loss: 5.1057,	val_loss: 7.7577


2024-11-11 01:13:47,945 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:48,183 - INFO - Performing clustering iteration 16 / 20
2024-11-11 01:13:48,183 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:48,188 - INFO - Event column 'endpoint' updated with focus on event value 1.


73:	[0s / 0s],		train_loss: 5.0863,	val_loss: 7.7556


2024-11-11 01:13:48,537 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:48,790 - INFO - Performing clustering iteration 17 / 20
2024-11-11 01:13:48,791 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:48,794 - INFO - Event column 'endpoint' updated with focus on event value 1.


74:	[0s / 0s],		train_loss: 5.1062,	val_loss: 7.7576


2024-11-11 01:13:49,108 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:49,313 - INFO - Performing clustering iteration 18 / 20
2024-11-11 01:13:49,314 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:49,318 - INFO - Event column 'endpoint' updated with focus on event value 1.


75:	[0s / 0s],		train_loss: 5.1011,	val_loss: 7.7552


2024-11-11 01:13:49,619 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:49,817 - INFO - Performing clustering iteration 19 / 20
2024-11-11 01:13:49,817 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:49,821 - INFO - Event column 'endpoint' updated with focus on event value 1.


76:	[0s / 0s],		train_loss: 5.1087,	val_loss: 7.7561


2024-11-11 01:13:50,095 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:13:50,294 - INFO - Performing clustering iteration 20 / 20
2024-11-11 01:13:50,295 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:13:50,298 - INFO - Event column 'endpoint' updated with focus on event value 1.


77:	[0s / 0s],		train_loss: 5.0830,	val_loss: 7.7576


2024-11-11 01:13:50,565 - INFO - Defined medoid for deepsurv model with 3725 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))


78:	[0s / 0s],		train_loss: 5.1019,	val_loss: 7.7563
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-11 01:13:51,577 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-11 01:13:51,583 - INFO - Event column 'endpoint' updated with focus on event value 2.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: smoteenn


2024-11-11 01:13:55,883 - INFO - Missing values imputed using IterativeImputer.
2024-11-11 01:13:55,889 - INFO - Dataframe rebalanced with SMOTE and ENN.


0:	[2s / 2s],		train_loss: 4.9248,	val_loss: 7.6376
1:	[2s / 4s],		train_loss: 4.8521,	val_loss: 7.5118
2:	[2s / 6s],		train_loss: 4.8133,	val_loss: 7.5078
3:	[2s / 8s],		train_loss: 4.8133,	val_loss: 7.4827
4:	[4s / 13s],		train_loss: 4.8067,	val_loss: 7.4808
5:	[2s / 15s],		train_loss: 4.7945,	val_loss: 7.4821
6:	[2s / 18s],		train_loss: 4.7994,	val_loss: 7.4787
7:	[2s / 20s],		train_loss: 4.7979,	val_loss: 7.4919
8:	[2s / 22s],		train_loss: 4.8081,	val_loss: 7.4771
9:	[2s / 24s],		train_loss: 4.7931,	val_loss: 7.4882


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-11 01:14:21,246 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-11 01:14:21,254 - INFO - Event column 'endpoint' updated with focus on event value 2.


Initiate training of deepsurv neural network
model structure: ANN
data balancing method: smotetomek


2024-11-11 01:14:25,733 - INFO - Missing values imputed using IterativeImputer.
2024-11-11 01:14:25,742 - INFO - Dataframe rebalanced with SMOTE and Tomek.


0:	[2s / 2s],		train_loss: 4.6971,	val_loss: 7.4266
1:	[2s / 5s],		train_loss: 4.6767,	val_loss: 7.4766
2:	[2s / 7s],		train_loss: 4.6137,	val_loss: 7.4052
3:	[2s / 10s],		train_loss: 4.6696,	val_loss: 7.4253
4:	[2s / 12s],		train_loss: 4.6354,	val_loss: 7.4492
5:	[4s / 17s],		train_loss: 4.6036,	val_loss: 7.3843
6:	[2s / 20s],		train_loss: 4.5856,	val_loss: 7.4143
7:	[2s / 22s],		train_loss: 4.6285,	val_loss: 7.3914
8:	[2s / 25s],		train_loss: 4.6278,	val_loss: 7.4165
9:	[2s / 27s],		train_loss: 4.6162,	val_loss: 7.4229
10:	[2s / 30s],		train_loss: 4.6005,	val_loss: 7.4228


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-11 01:14:57,049 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:14:57,171 - INFO - Performing clustering iteration 1 / 20
2024-11-11 01:14:57,171 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:14:57,178 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deepsurv neural network
model structure: LSTM
data balancing method: clustering


2024-11-11 01:14:57,851 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-11 01:14:57,852 - INFO - Performing clustering iteration 2 / 20
2024-11-11 01:14:57,853 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:14:57,856 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:14:58,286 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-11 01:14:58,287 - INFO - Performing clustering iteration 3 / 20
2024-11-11 01:14:58,288 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:14:58,291 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:14:58,748 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-11 01:14:58,749 - INFO - Performing clustering iteration 4 / 20
2024-11-11 01:14:58,750 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:14:58,753 - INFO - Event column 'endpoint' updated with focus on event value 1

64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[0s / 0s],		train_loss: 4.5617,	val_loss: 7.2081
1:	[0s / 1s],		train_loss: 2.9387,	val_loss: 5.9918
2:	[0s / 2s],		train_loss: 2.4066,	val_loss: 5.3972
3:	[0s / 3s],		train_loss: 2.6390,	val_loss: 6.5935
4:	[0s / 4s],		train_loss: 2.5702,	val_loss: 5.2999
5:	[0s / 4s],		train_loss: 2.3631,	val_loss: 5.1957
6:	[0s / 5s],		train_loss: 2.3455,	val_loss: 5.1295
7:	[0s / 6s],		train_loss: 2.3875,	val_loss: 5.1657
8:	[0s / 7s],		train_loss: 2.3445,	val_loss: 5.6417
9:	[0s / 8s],		train_loss: 2.3155,	val_loss: 5.8319
10:	[0s / 9s],		train_loss: 2.2676,	val_loss: 5.7024
11:	[1s / 10s],		train_loss: 2.1691,	val_loss: 5.3814
12:	[1s / 11s],		train_loss: 2.1433,	val_loss: 5.2277


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_1_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.
Configuration for deepsurv_lstm_nearmiss_config not found.


2024-11-11 01:16:08,806 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-11 01:16:08,942 - INFO - Performing clustering iteration 1 / 20
2024-11-11 01:16:08,944 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:16:08,956 - INFO - Event column 'endpoint' updated with focus on event value 2.


Initiate training of deepsurv neural network
model structure: LSTM
data balancing method: clustering


2024-11-11 01:16:09,540 - INFO - Defined medoid for deepsurv model with 3725 clusters.
2024-11-11 01:16:09,543 - INFO - Performing clustering iteration 2 / 20
2024-11-11 01:16:09,544 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:16:09,550 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-11 01:16:10,037 - INFO - Defined medoid for deepsurv model with 3725 clusters.
2024-11-11 01:16:10,038 - INFO - Performing clustering iteration 3 / 20
2024-11-11 01:16:10,038 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:16:10,043 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-11 01:16:10,512 - INFO - Defined medoid for deepsurv model with 3725 clusters.
2024-11-11 01:16:10,514 - INFO - Performing clustering iteration 4 / 20
2024-11-11 01:16:10,514 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:16:10,518 - INFO - Event column 'endpoint' updated with focus on event value 2

64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[1s / 1s],		train_loss: 4.9825
1:	[1s / 3s],		train_loss: 4.9802
2:	[1s / 4s],		train_loss: 4.9777
3:	[1s / 6s],		train_loss: 4.9773
4:	[1s / 8s],		train_loss: 4.9466
5:	[1s / 9s],		train_loss: 4.9355
6:	[1s / 11s],		train_loss: 4.9283
7:	[1s / 12s],		train_loss: 4.9099
8:	[1s / 14s],		train_loss: 4.8834


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-11 01:18:02,797 - INFO - Event column 'endpoint' updated with focus on event value 2.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['_original_index'] = df.index
2024-11-11 01:18:02,932 - INFO - Event column 'endpoint' updated with focus on event value 2.


Initiate training of deepsurv neural network
model structure: LSTM
data balancing method: NearMiss


2024-11-11 01:18:03,270 - INFO - Dataset for deepsurv model undersampled using method 'NearMiss' with sampling strategy 0.05.
2024-11-11 01:18:36,969 - INFO - Event column 'endpoint' updated with focus on event value 2.
2024-11-11 01:19:13,194 - INFO - Validation data retrieved


64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[0s / 0s],		train_loss: 5.0475
1:	[0s / 1s],		train_loss: 5.0110
2:	[0s / 1s],		train_loss: 4.9827
3:	[0s / 2s],		train_loss: 5.0092
4:	[0s / 3s],		train_loss: 4.9928
5:	[0s / 3s],		train_loss: 4.9793
6:	[2s / 6s],		train_loss: 4.9707
7:	[0s / 7s],		train_loss: 4.9829
8:	[0s / 7s],		train_loss: 5.0025
9:	[0s / 8s],		train_loss: 4.9780


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_2_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-11 01:19:22,601 - INFO - Performing clustering iteration 1 / 20
2024-11-11 01:19:22,602 - INFO - CUDA environment set up and GPU memory cleared.


Initiate training of deephit neural network
model structure: ANN
data balancing method: clustering


2024-11-11 01:19:23,287 - INFO - Defined medoid for deephit model with 4932 clusters.


0:	[0s / 0s],		train_loss: 0.4790,	val_loss: 0.0682
1:	[0s / 1s],		train_loss: 0.3757,	val_loss: 0.0837
2:	[0s / 1s],		train_loss: 0.3531,	val_loss: 0.0777
3:	[0s / 2s],		train_loss: 0.3406,	val_loss: 0.0724
4:	[0s / 2s],		train_loss: 0.3315,	val_loss: 0.0695
5:	[0s / 3s],		train_loss: 0.3265,	val_loss: 0.0703
6:	[0s / 3s],		train_loss: 0.3247,	val_loss: 0.0695


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:28,126 - INFO - Performing clustering iteration 2 / 20
2024-11-11 01:19:28,126 - INFO - CUDA environment set up and GPU memory cleared.


7:	[0s / 4s],		train_loss: 0.3287,	val_loss: 0.0694


2024-11-11 01:19:28,563 - INFO - Defined medoid for deephit model with 4932 clusters.


8:	[0s / 0s],		train_loss: 0.4116,	val_loss: 0.0737
9:	[0s / 1s],		train_loss: 0.3832,	val_loss: 0.0633
10:	[0s / 1s],		train_loss: 0.3494,	val_loss: 0.0583
11:	[0s / 2s],		train_loss: 0.3435,	val_loss: 0.0605
12:	[0s / 2s],		train_loss: 0.3386,	val_loss: 0.0622
13:	[0s / 3s],		train_loss: 0.3354,	val_loss: 0.0640
14:	[0s / 3s],		train_loss: 0.3358,	val_loss: 0.0630


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:33,066 - INFO - Performing clustering iteration 3 / 20
2024-11-11 01:19:33,066 - INFO - CUDA environment set up and GPU memory cleared.


15:	[0s / 4s],		train_loss: 0.3330,	val_loss: 0.0513


2024-11-11 01:19:33,480 - INFO - Defined medoid for deephit model with 4932 clusters.


16:	[0s / 0s],		train_loss: 0.3407,	val_loss: 0.0590
17:	[0s / 1s],		train_loss: 0.3331,	val_loss: 0.0658
18:	[0s / 1s],		train_loss: 0.3281,	val_loss: 0.0605
19:	[0s / 2s],		train_loss: 0.3256,	val_loss: 0.0645
20:	[0s / 2s],		train_loss: 0.3244,	val_loss: 0.0620
21:	[0s / 3s],		train_loss: 0.3238,	val_loss: 0.0618
22:	[0s / 3s],		train_loss: 0.3241,	val_loss: 0.0584


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:37,883 - INFO - Performing clustering iteration 4 / 20
2024-11-11 01:19:37,884 - INFO - CUDA environment set up and GPU memory cleared.


23:	[0s / 4s],		train_loss: 0.3217,	val_loss: 0.0625


2024-11-11 01:19:38,302 - INFO - Defined medoid for deephit model with 4932 clusters.


24:	[0s / 0s],		train_loss: 0.3375,	val_loss: 0.0585


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:39,542 - INFO - Performing clustering iteration 5 / 20
2024-11-11 01:19:39,542 - INFO - CUDA environment set up and GPU memory cleared.


25:	[0s / 1s],		train_loss: 0.3302,	val_loss: 0.0603


2024-11-11 01:19:39,982 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:40,656 - INFO - Performing clustering iteration 6 / 20
2024-11-11 01:19:40,656 - INFO - CUDA environment set up and GPU memory cleared.


26:	[0s / 0s],		train_loss: 0.3379,	val_loss: 0.0613


2024-11-11 01:19:41,038 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:41,771 - INFO - Performing clustering iteration 7 / 20
2024-11-11 01:19:41,771 - INFO - CUDA environment set up and GPU memory cleared.


27:	[0s / 0s],		train_loss: 0.3419,	val_loss: 0.0637


2024-11-11 01:19:42,170 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:42,885 - INFO - Performing clustering iteration 8 / 20
2024-11-11 01:19:42,886 - INFO - CUDA environment set up and GPU memory cleared.


28:	[0s / 0s],		train_loss: 0.3477,	val_loss: 0.0592


2024-11-11 01:19:43,277 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:43,983 - INFO - Performing clustering iteration 9 / 20
2024-11-11 01:19:43,984 - INFO - CUDA environment set up and GPU memory cleared.


29:	[0s / 0s],		train_loss: 0.3492,	val_loss: 0.0544


2024-11-11 01:19:44,368 - INFO - Defined medoid for deephit model with 4932 clusters.


30:	[0s / 0s],		train_loss: 0.3674,	val_loss: 0.0508
31:	[0s / 1s],		train_loss: 0.3583,	val_loss: 0.0511
32:	[0s / 1s],		train_loss: 0.3464,	val_loss: 0.0609
33:	[0s / 2s],		train_loss: 0.3377,	val_loss: 0.0602
34:	[0s / 2s],		train_loss: 0.3343,	val_loss: 0.0580
35:	[0s / 3s],		train_loss: 0.3348,	val_loss: 0.0552
36:	[0s / 3s],		train_loss: 0.3330,	val_loss: 0.0566


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:48,967 - INFO - Performing clustering iteration 10 / 20
2024-11-11 01:19:48,968 - INFO - CUDA environment set up and GPU memory cleared.


37:	[0s / 4s],		train_loss: 0.3307,	val_loss: 0.0573


2024-11-11 01:19:49,362 - INFO - Defined medoid for deephit model with 4932 clusters.


38:	[0s / 0s],		train_loss: 0.3501,	val_loss: 0.0610
39:	[0s / 1s],		train_loss: 0.3385,	val_loss: 0.0581


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:51,174 - INFO - Performing clustering iteration 11 / 20
2024-11-11 01:19:51,175 - INFO - CUDA environment set up and GPU memory cleared.


40:	[0s / 1s],		train_loss: 0.3378,	val_loss: 0.0582


2024-11-11 01:19:51,554 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:54,454 - INFO - Performing clustering iteration 12 / 20
2024-11-11 01:19:54,455 - INFO - CUDA environment set up and GPU memory cleared.


41:	[2s / 2s],		train_loss: 0.3508,	val_loss: 0.0592


2024-11-11 01:19:54,767 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:55,432 - INFO - Performing clustering iteration 13 / 20
2024-11-11 01:19:55,433 - INFO - CUDA environment set up and GPU memory cleared.


42:	[0s / 0s],		train_loss: 0.3518,	val_loss: 0.0595


2024-11-11 01:19:55,775 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:56,450 - INFO - Performing clustering iteration 14 / 20
2024-11-11 01:19:56,451 - INFO - CUDA environment set up and GPU memory cleared.


43:	[0s / 0s],		train_loss: 0.3481,	val_loss: 0.0582


2024-11-11 01:19:56,798 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:57,535 - INFO - Performing clustering iteration 15 / 20
2024-11-11 01:19:57,536 - INFO - CUDA environment set up and GPU memory cleared.


44:	[0s / 0s],		train_loss: 0.3506,	val_loss: 0.0617


2024-11-11 01:19:57,829 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:58,520 - INFO - Performing clustering iteration 16 / 20
2024-11-11 01:19:58,521 - INFO - CUDA environment set up and GPU memory cleared.


45:	[0s / 0s],		train_loss: 0.3534,	val_loss: 0.0582


2024-11-11 01:19:58,784 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:19:59,584 - INFO - Performing clustering iteration 17 / 20
2024-11-11 01:19:59,585 - INFO - CUDA environment set up and GPU memory cleared.


46:	[0s / 0s],		train_loss: 0.3536,	val_loss: 0.0595


2024-11-11 01:19:59,839 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:20:00,630 - INFO - Performing clustering iteration 18 / 20
2024-11-11 01:20:00,631 - INFO - CUDA environment set up and GPU memory cleared.


47:	[0s / 0s],		train_loss: 0.3515,	val_loss: 0.0577


2024-11-11 01:20:00,887 - INFO - Defined medoid for deephit model with 4932 clusters.


48:	[0s / 0s],		train_loss: 0.3550,	val_loss: 0.0545


  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:20:01,788 - INFO - Performing clustering iteration 19 / 20
2024-11-11 01:20:01,789 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:20:02,042 - INFO - Defined medoid for deephit model with 4932 clusters.
  self.net.load_state_dict(torch.load(path, **kwargs))
2024-11-11 01:20:02,938 - INFO - Performing clustering iteration 20 / 20
2024-11-11 01:20:02,938 - INFO - CUDA environment set up and GPU memory cleared.


49:	[0s / 0s],		train_loss: 0.3567,	val_loss: 0.0585


2024-11-11 01:20:03,182 - INFO - Defined medoid for deephit model with 4932 clusters.


50:	[0s / 0s],		train_loss: 0.3539,	val_loss: 0.0618


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_clustering_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_clustering_all_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.
Initiate training of deephit neural network
model structure: ANN
data balancing method: NearMiss


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['_original_index'] = df.index
2024-11-11 01:20:16,008 - INFO - Dataset for deephit model undersampled using method 'NearMiss' with sampling strategy 0.05.


0:	[2s / 2s],		train_loss: 0.1217,	val_loss: 0.0696
1:	[1s / 4s],		train_loss: 0.0738,	val_loss: 0.0373
2:	[1s / 6s],		train_loss: 0.0619,	val_loss: 0.0344
3:	[2s / 8s],		train_loss: 0.0578,	val_loss: 0.0281
4:	[1s / 9s],		train_loss: 0.0555,	val_loss: 0.0273
5:	[4s / 14s],		train_loss: 0.0549,	val_loss: 0.0270
6:	[1s / 16s],		train_loss: 0.0549,	val_loss: 0.0270
Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_nearmiss2_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_nearmiss2_all_hazard.pkl.


  self.net.load_state_dict(torch.load(path, **kwargs))


Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


2024-11-11 01:20:33,628 - INFO - Performing clustering iteration 1 / 20
2024-11-11 01:20:33,628 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:20:33,633 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate training of deephit neural network
model structure: LSTM
data balancing method: clustering


2024-11-11 01:20:34,101 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-11 01:20:34,103 - INFO - Performing clustering iteration 2 / 20
2024-11-11 01:20:34,103 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:20:34,106 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:20:34,620 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-11 01:20:34,621 - INFO - Performing clustering iteration 3 / 20
2024-11-11 01:20:34,622 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:20:34,625 - INFO - Event column 'endpoint' updated with focus on event value 1.
2024-11-11 01:20:35,080 - INFO - Defined medoid for deepsurv model with 1207 clusters.
2024-11-11 01:20:35,082 - INFO - Performing clustering iteration 4 / 20
2024-11-11 01:20:35,083 - INFO - CUDA environment set up and GPU memory cleared.
2024-11-11 01:20:35,086 - INFO - Event column 'endpoint' updated with focus on event value 1

64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[1s / 1s],		train_loss: 0.0546,	val_loss: 0.0508
1:	[1s / 3s],		train_loss: 0.0422,	val_loss: 0.0467
2:	[1s / 5s],		train_loss: 0.0285,	val_loss: 0.0505
3:	[1s / 7s],		train_loss: 0.0338,	val_loss: 0.0507
4:	[1s / 9s],		train_loss: 0.0263,	val_loss: 0.0544


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_clustering_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_clustering_all_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.
Initiate training of deephit neural network
model structure: LSTM
data balancing method: NearMiss


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['_original_index'] = df.index
2024-11-11 01:22:34,957 - INFO - Dataset for deephit model undersampled using method 'NearMiss' with sampling strategy 0.05.
2024-11-11 01:23:12,848 - INFO - Validation data retrieved


64249


  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


0:	[1s / 1s],		train_loss: 0.4642,	val_loss: 0.0951
1:	[1s / 2s],		train_loss: 0.4103,	val_loss: 0.0848
2:	[1s / 4s],		train_loss: 0.3984,	val_loss: 0.0821
3:	[1s / 5s],		train_loss: 0.3965,	val_loss: 0.0723
4:	[1s / 6s],		train_loss: 0.3833,	val_loss: 0.0728
5:	[1s / 8s],		train_loss: 0.3557,	val_loss: 0.0676
6:	[3s / 12s],		train_loss: 0.3368,	val_loss: 0.0642
7:	[1s / 13s],		train_loss: 0.3466,	val_loss: 0.0571
8:	[1s / 15s],		train_loss: 0.3259,	val_loss: 0.0536


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards saved to /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_nearmiss1_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_nearmiss1_all_hazard.pkl.
Training and saving completed for all cross-validation splits.
All models have been trained and saved successfully.


### 4. Load models amd hazards

In [20]:
def load_model(model, model_config, model_path, baseline_hazard_path):
    """
    Load model weights and baseline hazard data.

    Parameters:
    - create_model_func: Function to create the model architecture (e.g., create_neural_network).
    - model_path: Path to load the model weights (.pt file).
    - baseline_hazard_path: Path to load the baseline hazards (.pkl file).

    Returns:
    - model: The loaded model with weights and baseline hazards.
    """
    
    # Load model weights
    model.load_model_weights(model_path)
    
    # Load baseline hazards and assign to model
    if model_config['model'] == 'deepsurv':
        baseline_hazard = pd.read_pickle(baseline_hazard_path)
        model.baseline_hazards_ = baseline_hazard
        model.baseline_cumulative_hazards_ = baseline_hazard.cumsum()
    
    print(f"Model and baseline hazards loaded from {model_path} and {baseline_hazard_path}.")
    return model

# model_ls = ['deepsurv_ann_clustering_1', 'deepsurv_ann_smoteenn_1', 'deepsurv_ann_smotetomek_1',
#             'deepsurv_ann_clustering_2', 'deepsurv_ann_smoteenn_2', 'deepsurv_ann_smotetomek_2',
#             'deepsurv_lstm_clustering_1', 'deepsurv_lstm_nearmiss', 'deepsurv_lstm_clustering_2', 'deepsurv_lstm_nearmiss_2',
#             'deephit_ann_clustering_all', 'deephit_ann_nearmiss2_all', 'deephit_lstm_clustering_all', 'deephit_lstm_nearmiss1_all']
# model_path = '/mnt/d/PYDataScience/g3_regress/code/models/'

In [24]:
# Dictionary to store loaded models
loaded_models = {}

for model_name in model_ls:
    # Retrieve configuration by dynamically constructing the variable name
    config_var_name = model_name + "_config"
    model_config = globals().get(config_var_name)
    
    if model_config is None:
        print(f"Configuration for {config_var_name} not found.")
        continue

    model_weights_path = f'{model_path}{model_name}.pt'
    model_hazard_path = f'{model_path}{model_name}_hazard.pkl'
    
    # Define the model creation function as a lambda to pass the config
    create_model_func = lambda: create_neural_network(
        config=model_config,
        num_risk=len(X_train_transformed[EVENT_COL].unique()) - 1,
        num_time_bins=len(TIME_GRID)
    )
    model = create_model_func()
    
    # Load the model and store it in the dictionary
    loaded_models[model_name] = load_model(model, model_config, model_weights_path, model_hazard_path)
    print(f'Loaded model {model_name}')

Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_1_hazard.pkl.
Loaded model deepsurv_ann_clustering_1
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_1_hazard.pkl.
Loaded model deepsurv_ann_smoteenn_1


  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_1_hazard.pkl.
Loaded model deepsurv_ann_smotetomek_1
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_clustering_2_hazard.pkl.
Loaded model deepsurv_ann_clustering_2


  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smoteenn_2_hazard.pkl.
Loaded model deepsurv_ann_smoteenn_2
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_ann_smotetomek_2_hazard.pkl.
Loaded model deepsurv_ann_smotetomek_2


  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_1.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_1_hazard.pkl.
Loaded model deepsurv_lstm_clustering_1
Configuration for deepsurv_lstm_nearmiss_config not found.
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_clustering_2_hazard.pkl.
Loaded model deepsurv_lstm_clustering_2


  self.net.load_state_dict(torch.load(path, **kwargs))
  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_2.pt and /mnt/d/PYDataScience/g3_regress/code/models/deepsurv_lstm_nearmiss_2_hazard.pkl.
Loaded model deepsurv_lstm_nearmiss_2
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_clustering_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_clustering_all_hazard.pkl.
Loaded model deephit_ann_clustering_all


  self.net.load_state_dict(torch.load(path, **kwargs))


Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_nearmiss2_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_ann_nearmiss2_all_hazard.pkl.
Loaded model deephit_ann_nearmiss2_all
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_clustering_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_clustering_all_hazard.pkl.
Loaded model deephit_lstm_clustering_all
Model and baseline hazards loaded from /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_nearmiss1_all.pt and /mnt/d/PYDataScience/g3_regress/code/models/deephit_lstm_nearmiss1_all_hazard.pkl.
Loaded model deephit_lstm_nearmiss1_all


### 5. Prepare test dataset and make prediction on the 

In [21]:
def predict_neural_network(model, config, X_test, duration_col, event_col, cluster_col, time_grid=None):
    """
    Function to train a given neural network using the provided datasets.

    Args:
        net (torch.nn.Module): Neural network to be trained.
        config (dict): Configuration dictionary containing model hyperparameters.
        X_train (pd.DataFrame): Training dataset with features.
        X_val (pd.DataFrame): Validation dataset with features.
        duration_col (str): Column representing event durations.
        event_col (str): Column representing event occurrences.
        cluster_col (str): Column for grouping during cross-validation.
        callbacks (list): List of callbacks for training.
        time_grid (np.array, optional): Time grid for evaluation if required. Defaults to None.

    Returns:
        model: Trained PyCox model.
        logs: Training logs.
    """
    gc.collect()
    torch.cuda.empty_cache()
    # Train the model
    if config['model'] == 'deepsurv':
        print('Initiate testing of deepsurv neural network')
        X_test = df_event_focus(X_test, event_col, config['endpoint'])
        if config['net'] == 'ann':
            print('model structure: ANN')
            X_test_processed, y_test = preprocess_data(X_test, config['features'], duration_col, event_col)
            surv = model.predict_surv_df(X_test_processed, batch_size=512)
        elif config['net'] == 'lstm':
            print('model structure: LSTM')
            X_test_processed, y_test = prepare_validation_data(X_test, config['features'], duration_col, event_col, config, cluster_col, config['model'], time_grid)
            X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32)
            y_test_tensor = (torch.tensor(y_test[0], dtype=torch.float32), torch.tensor(y_test[1], dtype=torch.float32))
            surv = model.predict_surv_df(X_test_tensor, batch_size=512)
    elif config['model'] == 'deephit':
        print('Initiate testing of deephit neural network')
        if config['net'] == 'ann':
            print('model structure: ANN')
            X_test_processed, y_test = preprocess_data(X_test, config['features'], duration_col, event_col, time_grid, discretize=True)
            surv = model.predict_cif(X_test_processed, batch_size=512)
            print('prediction complete, please note that prediction of deephit models are CIF.')
        elif config['net'] == 'lstm':
            print('model structure: LSTM')
            X_test_processed, y_test = prepare_validation_data(X_test, config['features'], duration_col, event_col, config, cluster_col, config['model'], time_grid)
            surv = model.predict_cif(X_test_processed, batch_size=512)
            print('prediction complete, please note that prediction of deephit models are CIF.')

    # Free memory after training
    gc.collect()
    torch.cuda.empty_cache()

    return surv, y_test

In [22]:

def align_to_time_grid(surv, time_grid):
    """
    Align the survival DataFrame to the closest indices of the time grid.

    Parameters:
        surv (pd.DataFrame): Survival probabilities DataFrame.
        time_grid (np.array): Array of target time points to align.

    Returns:
        aligned_surv (pd.DataFrame): Aligned survival probabilities.
    """
    # Convert the DataFrame's index to a NumPy array for fast computation
    surv_times = np.array(surv.index)
    
    # Find the closest time in the survival DataFrame for each time in the grid
    closest_indices = [np.argmin(np.abs(surv_times - t)) for t in time_grid]
    
    # Extract the rows corresponding to the closest times
    aligned_surv = surv.iloc[closest_indices].copy()
    
    # Reindex the DataFrame to match the time grid
    aligned_surv.index = range(len(time_grid))  # Standardize indices to 0, 1, 2, ...
    return aligned_surv

In [33]:
gc.collect()
surv, y_test = predict_neural_network(model=loaded_models['deepsurv_ann_smoteenn_1'], config=deepsurv_ann_smoteenn_1_config, X_test=X_train_transformed_2, duration_col=DURATION_COL,
                    event_col=EVENT_COL, cluster_col=CLUSTER_COL, time_grid=TIME_GRID)
surv = align_to_time_grid(surv, TIME_GRID)
gc.collect()

2024-11-11 00:59:25,848 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiate testing of deepsurv neural network
model structure: ANN


KeyboardInterrupt: 

In [43]:
gc.collect()
surv, y_test = predict_neural_network(model=loaded_models['deephit_ann_nearmiss2_all'], config=deephit_ann_nearmiss2_all_config, X_test=X_train_transformed_2, duration_col=DURATION_COL,
                    event_col=EVENT_COL, cluster_col=CLUSTER_COL, time_grid=TIME_GRID)
surv1 = pd.DataFrame(surv[0], index=loaded_models['deephit_ann_nearmiss2_all'].duration_index)
ev = EvalSurv(1- surv1, y_test[0], y_test[1] == 1, censor_surv='km')
display(ev.concordance_td())
gc.collect()

Initiate testing of deephit neural network
model structure: ANN
prediction complete, please note that prediction of deephit models are CIF.


0.7593451986384061

0

In [64]:
# model_ls = ['deepsurv_ann_clustering_1', 'deepsurv_ann_smoteenn_1', 'deepsurv_ann_smotetomek_1',
#             'deepsurv_ann_clustering_2', 'deepsurv_ann_smoteenn_2', 'deepsurv_ann_smotetomek_2',
#             'deepsurv_lstm_clustering_1', 'deepsurv_lstm_nearmiss', 'deepsurv_lstm_clustering_2', 'deepsurv_lstm_nearmiss_2',
            # 'deephit_ann_clustering_all', 'deephit_ann_nearmiss2_all', 'deephit_lstm_clustering_all', 'deephit_lstm_nearmiss1_all'
            # ]
gc.collect()
torch.cuda.empty_cache()
model_predictions = {}
for model_name in model_ls:
    # Retrieve configuration by dynamically constructing the variable name
    config_var_name = model_name + "_config"
    model_config = globals().get(config_var_name)
    if model_config is None:
        print(f"Configuration for {config_var_name} not found.")
        continue
    try:
        print(f"Initiating prediction for model: {model_name}")
        
        # Retrieve the loaded model
        model = loaded_models.get(model_name)
        if model is None:
            print(f"Model {model_name} is not loaded.")
            continue
        
        # Predict using the loaded model and configuration
        surv, y_test = predict_neural_network(
            model=model,
            config=model_config,
            X_test=X_train_transformed_2,
            duration_col=DURATION_COL,
            event_col=EVENT_COL,
            cluster_col=CLUSTER_COL,
            time_grid=TIME_GRID
        )
        
        if model_config['model'] == 'deepsurv':
            surv = align_to_time_grid(surv, TIME_GRID).values  # 2D array
            
            # Structure key dynamically
            key = f"deepsurv_{model_config['net']}_{model_config['balance_method']}"
            
            # Initialize if not exists
            if key not in model_predictions:
                model_predictions[key] = np.zeros((2, *surv.shape), dtype=np.float32)
            
            # Store predictions for the corresponding endpoint
            if model_config['endpoint'] == 1:
                model_predictions[key][0] = 1 - surv
            elif model_config['endpoint'] == 2:
                model_predictions[key][1] = 1 - surv
        elif model_config['model'] == 'deephit':
            surv = np.array(surv)  # Convert to numpy array
            # Structure key dynamically
            key = f"deephit_{model_config['net']}_{model_config['balance_method']}"
            model_predictions[key] = surv
        
        print(f"Prediction completed for {model_name}.")
    
    except Exception as e:
        print(f"Error during prediction for {model_name}: {e}")

2024-11-11 00:13:58,107 - INFO - Event column 'endpoint' updated with focus on event value 1.


Initiating prediction for model: deepsurv_ann_clustering_1
Initiate testing of deepsurv neural network
model structure: ANN


2024-11-11 00:14:01,915 - INFO - Event column 'endpoint' updated with focus on event value 1.


Prediction completed for deepsurv_ann_clustering_1.
Initiating prediction for model: deepsurv_ann_smoteenn_1
Initiate testing of deepsurv neural network
model structure: ANN


: 

In [62]:
display(model_predictions['deepsurv_lstm_clustering'][0].shape)
display(model_predictions['deephit_lstm_clustering'][0].shape)

(6, 40513)

(6, 316242)