## Load libraries

In [None]:
%load_ext autoreload
%autoreload 2
import gc
import numpy as np
import os
import pickle
import random
import warnings
from math import sqrt
from datetime import datetime
from tqdm import tqdm
import ml_insights as mli

# Data processing
import pandas as pd

# Machine learning
from sklearn.metrics import (
    average_precision_score, roc_curve, 
    roc_auc_score, accuracy_score, f1_score
)
from sklearn.model_selection import GroupKFold
from sklearn.utils import shuffle
from sklearn.calibration import IsotonicRegression
from sklearn.metrics import brier_score_loss
from sklearn.linear_model import LinearRegression
from sklearn.metrics import confusion_matrix
from scipy.stats import chi2

# Deep learning
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau

# XGBoost
import xgboost as xgb

# Visualization
import shap
import optuna

# Custom imports
from model_code import *
from model.blocks import FinalModel
from team_code import *
from helper_code import *
from plot_model import *
from delong import *

# Settings
np.random.seed(0)
warnings.filterwarnings("ignore")
DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'
shap.initjs()

'''
# Imports for ICD 10 code extraction
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.preprocessing.text import Tokenizer
import json
'''

In [2]:
# Modify CustomCNN to take dropout_rate as an argument
def collate_fn(batch):
    inputs = torch.stack([torch.tensor(i[0], dtype=torch.float32) for i in batch])
    targets = torch.stack([torch.tensor(i[1], dtype=torch.float32) for i in batch])
    age = torch.tensor([[i[3]] for i in batch], dtype=torch.float32)
    gender = torch.tensor([[i[4]] for i in batch], dtype=torch.float32)

    return inputs, targets, age, gender

## Youden index
def youden(y_true, y_score):
    fpr, tpr, thresholds = roc_curve(y_true, y_score)
    idx = np.argmax(tpr - fpr)
    return thresholds[idx]

def roc_auc_ci(y_true, y_score, positive=1):
    AUC = roc_auc_score(y_true, y_score)
    N1 = sum(y_true == positive)
    N2 = sum(y_true != positive)
    Q1 = AUC / (2 - AUC)
    Q2 = 2*AUC**2 / (1 + AUC)
    SE_AUC = sqrt((AUC*(1 - AUC) + (N1 - 1)*(Q1 - AUC**2) + (N2 - 1)*(Q2 - AUC**2)) / (N1*N2))
    lower = AUC - 1.96*SE_AUC
    upper = AUC + 1.96*SE_AUC
    if lower < 0:
        lower = 0
    if upper > 1:
        upper = 1
    return [lower, upper]

# Function to train the model
def train_model(train_loader, model, criterion, optimizer, scheduler, device=DEVICE):
    model.train()
    running_loss = 0.0

    n_batch = len(train_loader)
    pbar = tqdm(enumerate(train_loader), total=n_batch)

    for i_batch, (inputs, labels, age, gender) in pbar:
        inputs, labels, age, gender = inputs.to(device), labels.to(device), age.to(device), gender.to(device)
        optimizer.zero_grad()
        outputs = model(inputs,age,gender)
        labels = labels[:, 1].unsqueeze(1)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        pbar.set_description(f'Training ... {1 + i_batch}/{n_batch}')
        
    #scheduler.step()
    return running_loss / len(train_loader)

def evaluate_model(valid_loader, model, device=DEVICE):
    model.eval()
    model.return_hidden = True  # Enable hidden state return
    y_true, y_pred, hidden_states = None, None, None
    n_batch = len(valid_loader)
    pbar = tqdm(enumerate(valid_loader), total=n_batch)

    with torch.no_grad():
        for i_batch, (X_batch, y_true_batch, age_batch, gender_batch) in pbar:
            X_batch, y_true_batch = X_batch.to(device), y_true_batch.to(device)
            age_batch, gender_batch = age_batch.to(device), gender_batch.to(device)
            y_true_batch = y_true_batch[:, 1].unsqueeze(1)
            
            y_pred_batch, hidden_batch = model(X_batch, age_batch, gender_batch)
            y_pred_batch = F.sigmoid(y_pred_batch)
            
            # Convert to numpy arrays
            y_pred_batch = y_pred_batch.cpu().numpy().reshape((-1, 1))
            y_true_batch = y_true_batch.cpu().numpy().reshape((-1, 1))
            hidden_batch = hidden_batch.cpu().numpy()

            # Concatenate results
            if y_pred is None:
                y_pred = y_pred_batch
                y_true = y_true_batch
                hidden_states = hidden_batch
            else:
                y_pred = np.r_[y_pred, y_pred_batch]
                y_true = np.r_[y_true, y_true_batch]
                hidden_states = np.r_[hidden_states, hidden_batch]
                
            pbar.set_description(f'Evaluating ... {1 + i_batch}/{n_batch}')
    
    model.return_hidden = False  # Reset hidden state return
    return y_true, y_pred, hidden_states

# Random search
def loguniform(low, high, size=None):
    return np.exp(np.random.uniform(np.log(low), np.log(high), size))

def calc_unreliability(y_true, y_pred):
    # Add small epsilon to avoid log(0)
    eps = 1e-15
    y_pred = np.clip(y_pred, eps, 1-eps)
    
    # Log likelihood of uncalibrated predictions
    ll_uncal = np.sum(y_true * np.log(y_pred) + (1-y_true) * np.log(1-y_pred))
    
    # Fit calibration model
    cal_model = LinearRegression()
    cal_model.fit(y_pred.reshape(-1,1), y_true)
    y_cal = cal_model.predict(y_pred.reshape(-1,1))
    y_cal = np.clip(y_cal, eps, 1-eps)
    
    # Log likelihood of calibrated predictions
    ll_cal = np.sum(y_true * np.log(y_cal) + (1-y_true) * np.log(1-y_cal))
    
    # Calculate U statistic
    U = -2 * (ll_uncal - ll_cal) / len(y_true)
    
    # Calculate p-value using chi-square distribution with 2 df

    p_value = 1 - chi2.cdf(U * len(y_true), df=2)
    
    return U, p_value

In [None]:
# Reproducibility
RANDOM_SEED = 98
def seed_everything(random_seed):
    random.seed(random_seed)
    np.random.seed(random_seed)
    os.environ['PYTHONHASHSEED'] = str(random_seed)

seed_everything(RANDOM_SEED)
# ==================================================
DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'
N_TRIAL                 = 50
N_FOLD                  = 5
N_EPOCH                 = 100

# Train hyperparams
BATCH_SIZE_LIST_GLOBAL  = [32,64,128]

LR_INITIAL_LO           = 5e-4
LR_INITIAL_HI           = 5e-3

LR_STEP_SIZE_LO         = 2
LR_STEP_SIZE_HI         = 3
LR_STEP_GAMMA_LO        = 0.05
LR_STEP_GAMMA_HI        = 0.3

EARLY_STOP_PATIENCE_LO  = 3
EARLY_STOP_PATIENCE_HI  = 4

# Architecture hyperparams
BLOCK_SIZE_GLOBAL = [12,16,24]
BLOCK_DEPTH_GLOBAL = [3,4]
BLOCK_LAYERS_GLOBAL = [3,4]
HIDDEN_SIZE_GLOBAL = [32,64,128]
KERNEL_NUM_GLOBAL = [5,7,9]
# ==================================================

In [5]:
def objective(trial, df_train, df_test, model_name, cv=N_FOLD):
    param_grid = {
        'objective': 'binary:logistic',
        'tree_method': 'gpu_hist',
        'predictor': 'gpu_predictor',
        'scale_pos_weight': trial.suggest_int('scale_pos_weight', 1,70,3), #trial.suggest_int('scale_pos_weight', 1,70,3) 
        'tweedie_variance_power': trial.suggest_discrete_uniform('tweedie_variance_power', 1.0, 2.0, 0.1),
        'max_depth': trial.suggest_int('max_depth', 5, 10), # Extremely prone to overfitting! 
        'n_estimators': trial.suggest_int('n_estimators', 400, 2000, 200), # Extremely prone to overfitting!
        'eta': trial.suggest_float('eta', 0.007, 0.013), # Most important parameter. 
        'subsample': trial.suggest_discrete_uniform('subsample', 0.2, 0.9, 0.1), 
        'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.2, 0.9, 0.1),
        'colsample_bylevel': trial.suggest_discrete_uniform('colsample_bylevel', 0.2, 0.9, 0.1),
        'min_child_weight': trial.suggest_loguniform('min_child_weight', 1e-4, 1e4), # I've had trouble with LB score until tuning this.
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-4, 1e4), # L2 regularization
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-4, 1e4), # L1 regularization
        #'max_delta_step': trial.suggest_float('max_delta_step', 0, 1e1), 
        'gamma': trial.suggest_loguniform('gamma', 1e-4, 1e4),
        'random_state': RANDOM_SEED,
        'enable_categorical': True
    }

    # Split by patient
    h = df_train['hid'].unique()
    groups = np.array(range(len(h)))
    h_shuffled, groups_shuffled = shuffle(h, groups, random_state=RANDOM_SEED)

    #X_shuffled, y_shuffled, groups_shuffled = shuffle(X, y, groups, random_state=SEED)
    group_kf = GroupKFold(n_splits=cv)
    cv_scores = np.empty(cv)
    best_models = []

    X_test, y_test = df_test.drop(columns=['hid','label']), df_test['label'].values
    y_pred_proba = []

    for idx, (train_h, val_h) in enumerate(group_kf.split(h_shuffled, groups=groups_shuffled)):
        X_train, X_valid = df_train[df_train['hid'].isin(h_shuffled[train_h])].drop(columns=['hid','label']), df_train[df_train['hid'].isin(h_shuffled[val_h])].drop(columns=['hid','label'])
        y_train, y_valid = df_train.loc[df_train['hid'].isin(h_shuffled[train_h]),'label'].values, df_train.loc[df_train['hid'].isin(h_shuffled[val_h]),'label'].values

        model = xgb.XGBClassifier(**param_grid,
                                  eval_metric='auc', early_stopping_rounds=20,callbacks=[optuna.integration.XGBoostPruningCallback(trial, "validation_0-auc")])
        model.fit(
            X_train,
            y_train,
            eval_set=[(X_valid, y_valid)]
        )
        best_models.append(model)
        y_pred_proba.append(model.predict_proba(X_test)[:, 1])

    y_pred_proba_mean = np.mean(y_pred_proba, axis=0)
    mean_cv_score = roc_auc_score(y_test, y_pred_proba_mean)
    
    # Save models if this is the best trial so far
    try:
        if mean_cv_score > trial.study.best_value:
            # Save the mean CV score along with the models
            with open(f'model/{model_name}/best_model_score_{model_name}.txt', 'w') as f:
                f.write(str(mean_cv_score))
            for idx, model in enumerate(best_models):
                model.save_model(f'model/{model_name}/best_model_fold_{idx}_{model_name}.ubj')
    except ValueError:  # First trial
        with open(f'model/{model_name}/best_model_score_{model_name}.txt', 'w') as f:
            f.write(str(mean_cv_score))
        for idx, model in enumerate(best_models):
            model.save_model(f'model/{model_name}/best_model_fold_{idx}_{model_name}.ubj')
    
    return mean_cv_score

def get_metrics(y_true, y_pred_proba, threshold):
    y_pred = (y_pred_proba >= threshold).astype(int)
    
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    precision = tp / (tp + fp)
    f1 = f1_score(y_true, y_pred)
    accuracy = (tp + tn) / (tp + tn + fp + fn) * 100
    auroc = roc_auc_score(y_true, y_pred_proba)
    auprc = average_precision_score(y_true, y_pred_proba)
    
    # Calculate 95% CI using Wilson score interval
    def wilson_ci(p, n):
        z = 1.96 # 95% CI
        denominator = 1 + z**2/n
        centre_adjusted = p + z**2/(2*n)
        spread = z * np.sqrt(p*(1-p)/n + z**2/(4*n**2))
        return (centre_adjusted - spread)/denominator, (centre_adjusted + spread)/denominator
    
    n_sens = tp + fn
    n_spec = tn + fp
    n_prec = tp + fp
    n_total = len(y_true)
    
    sens_ci = wilson_ci(sensitivity, n_sens)
    spec_ci = wilson_ci(specificity, n_spec)
    prec_ci = wilson_ci(precision, n_prec)
    f1_ci = wilson_ci(f1, n_total)
    auroc_ci = roc_auc_ci(y_true, y_pred_proba)
    auprc_ci = wilson_ci(auprc, n_total)
    
    return {
        'Threshold': threshold,
        'F1-Score': f1,
        'F1-Score CI': f1_ci,
        'Sensitivity': sensitivity,
        'Sensitivity CI': sens_ci,
        'Specificity': specificity, 
        'Specificity CI': spec_ci,
        'Precision': precision,
        'Precision CI': prec_ci,
        'AUROC': auroc,
        'AUROC CI': auroc_ci,
        'AUPRC': auprc,
        'AUPRC CI': auprc_ci,
        'TN': tn,
        'FP': fp,
        'FN': fn,
        'TP': tp,
        'Accuracy': accuracy
    }

In [None]:
'''
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, droprate):
        super().__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.droprate = droprate
        self.ff_dim = ff_dim
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential([layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(droprate)
        self.dropout2 = layers.Dropout(droprate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

    def get_config(self):
        return {'embed_dim': self.embed_dim, 'num_heads': self.num_heads, 'ff_dim':self.ff_dim, 'droprate':self.droprate}

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super().__init__()
        self.maxlen = maxlen
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        return {'maxlen': self.maxlen, 'vocab_size': self.vocab_size, 'embed_dim': self.embed_dim}
'''

## Train Test split

In [None]:
# Try different random seeds to find one with balanced distributions
best_seed = None
min_kl_div = float('inf')
threshold = 0.1

df = pd.read_csv('data_labels_M_N.csv')
unique_patient_ids = df['hid'].unique()

# Function to calculate KL divergence between distributions
def kl_divergence(p, q):
    return np.sum(p * np.log(p/q))

def get_label_dist(patient_ids, df):
    subset = df[df['hid'].isin(patient_ids)]
    total = len(subset)
    dist = subset['label'].value_counts(normalize=True)
    return dist

# Get original distribution as reference
orig_dist = df['label'].value_counts(normalize=True)

# Try seeds from 0 to 99
for seed in range(1000):
    random.seed(seed)
    np.random.seed(seed)
    
    # Shuffle and split patients
    shuffled_ids = unique_patient_ids.copy()
    random.shuffle(shuffled_ids)
    num_tval_patients = int(len(shuffled_ids) * 0.90)
    tval_patients = shuffled_ids[:num_tval_patients]
    test_patients = shuffled_ids[num_tval_patients:]

    # Split into cross validation folds
    num_groups = N_FOLD
    group_size = len(tval_patients) // num_groups
    cross_val_sets = []

    for i in range(num_groups):
        start_idx = i * group_size
        end_idx = (i + 1) * group_size if i != num_groups - 1 else len(tval_patients)
        valid_patients = tval_patients[start_idx:end_idx]
        train_patients = np.concatenate([tval_patients[:start_idx], tval_patients[end_idx:]])
        cross_val_sets.append((train_patients, valid_patients))
    
    # Calculate max KL divergence across all splits
    max_kl = 0
    reference_dist = orig_dist.values
    
    for train_patients, valid_patients in cross_val_sets:
        train_dist = get_label_dist(train_patients, df).values
        valid_dist = get_label_dist(valid_patients, df).values
        max_kl = max(max_kl, kl_divergence(reference_dist, train_dist))
        max_kl = max(max_kl, kl_divergence(reference_dist, valid_dist))
    
    test_dist = get_label_dist(test_patients, df).values
    max_kl = max(max_kl, kl_divergence(reference_dist, test_dist))
    
    # Update best seed if this distribution is more balanced
    if max_kl < min_kl_div:
        min_kl_div = max_kl
        best_seed = seed
        best_cross_val_sets = cross_val_sets
        best_test_patients = test_patients

# Use the best seed found
print(f"Best random seed found: {best_seed} with max KL divergence: {min_kl_div:.4f}")
random.seed(best_seed)
np.random.seed(best_seed)

# Save the splits using best seed
for i, (train_patients, valid_patients) in enumerate(best_cross_val_sets):
    with open(f'train_patients_cv_{i}.pkl', 'wb') as f:
        pickle.dump(train_patients, f)
    with open(f'valid_patients_cv_{i}.pkl', 'wb') as f:
        pickle.dump(valid_patients, f)

with open('test_patients.pkl', 'wb') as f:
    pickle.dump(best_test_patients, f)

# Print distributions for verification
print("\nFinal distributions:")
print("Original distribution:")
print(orig_dist)

print("\nCross validation sets distributions:")
for i, (train_patients, valid_patients) in enumerate(best_cross_val_sets):
    print(f"\nFold {i}:")
    print("Train distribution:")
    print(get_label_dist(train_patients, df))
    print("Validation distribution:")
    print(get_label_dist(valid_patients, df))

print("\nTest set distribution:")
print(get_label_dist(best_test_patients, df))

## Hyperparameter tuning

In [None]:
df = pd.read_csv('data_labels_M_N.csv')

# Load the train dataset
with open('test_patients.pkl', 'rb') as f:
    test_patients = pickle.load(f)

test= dataset(header_files=df.loc[df['hid'].isin(test_patients),'filename'].to_list())
test.num_leads = 12
test.sample = False
test.files.reset_index(drop=True, inplace=True)

In [None]:
DIAGNOSIS = 'MACCE'
# Make study directory
study_date = datetime.now().strftime('%y%m%d')
study_folder_prefix = f'{study_date}_lead12_{DIAGNOSIS}'
study_num = 0
for folder in sorted(os.listdir('model'), reverse=True):
    if folder.startswith(study_folder_prefix):
        ch_idx = len(study_folder_prefix) + 1
        study_num = int(folder[ch_idx:ch_idx + 2]) + 1
        break
STUDY_DIR = f'model/{study_folder_prefix}_{study_num:02d}'
os.makedirs(STUDY_DIR, exist_ok=True)
# Save configuration (Hyperparam search space)
search_space_train = {
    'BATCH_SIZE_LIST'       : BATCH_SIZE_LIST_GLOBAL,
    'LR_INITIAL_LO'         : LR_INITIAL_LO,
    'LR_INITIAL_HI'         : LR_INITIAL_HI,
    'LR_STEP_SIZE_LO'       : LR_STEP_SIZE_LO,
    'LR_STEP_SIZE_HI'       : LR_STEP_SIZE_HI,
    'LR_STEP_GAMMA_LO'      : LR_STEP_GAMMA_LO,
    'LR_STEP_GAMMA_HI'      : LR_STEP_GAMMA_HI,
    'EARLY_STOP_PATIENCE_LO'  : EARLY_STOP_PATIENCE_LO,
    'EARLY_STOP_PATIENCE_HI'  : EARLY_STOP_PATIENCE_HI
}
search_space_arch = {
    'BLOCK_SIZE'          : BLOCK_SIZE_GLOBAL,
    'BLOCK_DEPTH'        : BLOCK_DEPTH_GLOBAL,
    'BLOCK_LAYERS'       : BLOCK_LAYERS_GLOBAL,
    'HIDDEN_SIZE'        : HIDDEN_SIZE_GLOBAL,
    'KERNEL_NUM'         : KERNEL_NUM_GLOBAL,  
}

SEARCH_SPACE_ARCH_FILENAME  = f'{STUDY_DIR}/search_space_arch.pkl'
SEARCH_SPACE_TRAIN_FILENAME = f'{STUDY_DIR}/search_space_train.pkl'

pickle.dump(search_space_arch, open(SEARCH_SPACE_ARCH_FILENAME, 'wb'))
pickle.dump(search_space_train, open(SEARCH_SPACE_TRAIN_FILENAME, 'wb'))

for i_trial in range(N_TRIAL):
    search_space_arch = pickle.load(open(SEARCH_SPACE_ARCH_FILENAME, 'rb'))
    search_space_train = pickle.load(open(SEARCH_SPACE_TRAIN_FILENAME, 'rb'))
    # Train hparams
    batch_size          = np.random.choice(search_space_train['BATCH_SIZE_LIST'], size=1)[0]
    lr_initial          = loguniform(low=search_space_train['LR_INITIAL_LO'], high=search_space_train['LR_INITIAL_HI'], size=1)[0]
    lr_step_size        = np.random.randint(low=search_space_train['LR_STEP_SIZE_LO'], high=search_space_train['LR_STEP_SIZE_HI'], size=1)[0]
    lr_step_gamma       = np.random.uniform(low=search_space_train['LR_STEP_GAMMA_LO'], high=search_space_train['LR_STEP_GAMMA_HI'], size=1)[0]
    early_stop_pat      = np.random.randint(low=search_space_train['EARLY_STOP_PATIENCE_LO'], high=search_space_train['EARLY_STOP_PATIENCE_HI'], size=1)[0]

    # Architecture hparams
    block_size             = np.random.choice(search_space_arch['BLOCK_SIZE'], size=1)[0]
    block_depth             = np.random.choice(search_space_arch['BLOCK_DEPTH'], size=1)[0]
    block_layers            = np.random.choice(search_space_arch['BLOCK_LAYERS'], size=1)[0]
    hidden_size             = np.random.choice(search_space_arch['HIDDEN_SIZE'], size=1)[0]
    kernel_num              = np.random.choice(search_space_arch['KERNEL_NUM'], size=1)[0]
    # Trial directory
    trial_folder = ''
    trial_folder += f'batch={batch_size}_'
    trial_folder += f'lr={lr_initial:.5f}_step={lr_step_size}_gam={lr_step_gamma:.3f}_pat={early_stop_pat}_'
    trial_folder += f'block_size={block_size}_block_depth={block_depth}_hidden_size={hidden_size}_block_layers={block_layers}_kernel_num={kernel_num}'
    
    trial_dir = f'{STUDY_DIR}/{trial_folder}'
    os.makedirs(trial_dir, exist_ok=True)

    # Save configurations
    hparams_train = {
        'N_FOLD'            : N_FOLD,
        'N_EPOCH'           : N_EPOCH,
        'BATCH_SIZE'        : batch_size,
        'LR_INITIAL'        : lr_initial,
        'LR_STEP_SIZE'      : lr_step_size,
        'LR_STEP_GAMMA'     : lr_step_gamma,
        'EARLY_STOP_PAT'    : early_stop_pat,
    }
    hparams_arch = {
        'BLOCK_SIZE'        : block_size,
        'BLOCK_DEPTH'       : block_depth,
        'BLOCK_LAYERS'      : block_layers,
        'HIDDEN_SIZE'       : hidden_size,
        'KERNEL_NUM'        : kernel_num,
    }
    dataset_config = {
        'DIAGNOSIS'         : DIAGNOSIS,
    }

    print('='*100)
    print(f'{DIAGNOSIS} RANDOM SEARCH TRIAL {1 + i_trial}/{N_TRIAL}')
    print('='*100)
    print()

    HPARAMS_TRAIN_FILENAME  = f'{trial_dir}/hparams_train.pkl'
    HPARAMS_ARCH_FILENAME   = f'{trial_dir}/hparams_arch.pkl'
    DATASET_CONFIG_FILENAME = f'{trial_dir}/dataset_config.pkl'

    pickle.dump(hparams_train, open(HPARAMS_TRAIN_FILENAME, 'wb'))
    pickle.dump(hparams_arch, open(HPARAMS_ARCH_FILENAME, 'wb'))
    pickle.dump(dataset_config, open(DATASET_CONFIG_FILENAME, 'wb'))

    # ========== #
    # File Paths #
    # ========== #
    TRIAL_DIR                   = trial_dir
    TRIAL_FOLDER                = TRIAL_DIR.split('/')[-1]
    STUDY_DIR                   = TRIAL_DIR[:-(len(TRIAL_FOLDER) + 1)]

    HPARAMS_ARCH_FILENAME       = f'{TRIAL_DIR}/hparams_arch.pkl'
    HPARAMS_TRAIN_FILENAME      = f'{TRIAL_DIR}/hparams_train.pkl'
    DATASET_CONFIG_FILENAME     = f'{TRIAL_DIR}/dataset_config.pkl'

    # ==================== #
    # Load Hyperparameters #
    # ==================== #

    # Architecture
    hparams_arch    = pickle.load(open(HPARAMS_ARCH_FILENAME, 'rb'))

    BLOCK_SIZE      = int(hparams_arch['BLOCK_SIZE'])
    BLOCK_DEPTH     = int(hparams_arch['BLOCK_DEPTH'])
    BLOCK_LAYERS    = int(hparams_arch['BLOCK_LAYERS'])
    HIDDEN_SIZE     = int(hparams_arch['HIDDEN_SIZE'])
    KERNEL_NUM      = int(hparams_arch['KERNEL_NUM'])
    # Train
    hparams_train   = pickle.load(open(HPARAMS_TRAIN_FILENAME, 'rb'))

    N_FOLD          = int(hparams_train['N_FOLD'])
    N_EPOCH         = int(hparams_train['N_EPOCH'])
    BATCH_SIZE      = int(hparams_train['BATCH_SIZE'])
    LR_INITIAL      = hparams_train['LR_INITIAL']
    LR_STEP_SIZE    = hparams_train['LR_STEP_SIZE']
    LR_STEP_GAMMA   = hparams_train['LR_STEP_GAMMA']
    EARLY_STOP_PAT  = hparams_train['EARLY_STOP_PAT']

    #N_WORKERS       = 8
    #PREFETCH_FACTOR = 4

    # Dataset configs
    dataset_config  = pickle.load(open(DATASET_CONFIG_FILENAME, 'rb'))

    DIAGNOSIS       = dataset_config['DIAGNOSIS']

    # =========== #
    # Train Model #
    # =========== #

    # Initialize model & save weights
    model = FinalModel(block_size =BLOCK_SIZE, block_depth =BLOCK_DEPTH, block_layers=BLOCK_LAYERS, hidden_size=HIDDEN_SIZE, kernel_num=KERNEL_NUM).to(DEVICE)
    torch.save(model.state_dict(), f'{TRIAL_DIR}/initial_weights.pth')

    # Create test data loader
    test_loader = DataLoader(test, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=False)
    y_pred_test_list = []

    # [Main] 4-Fold Validation
    # Create datasets & data loaders
    for i_fold in range(N_FOLD):
        df = pd.read_csv('data_labels_M_N.csv')

        # Load the train dataset
        with open(f'train_patients_cv_{i_fold}.pkl', 'rb') as f:
            train_patients = pickle.load(f)
        # Load the valid dataset
        with open(f'valid_patients_cv_{i_fold}.pkl', 'rb') as f:
            valid_patients = pickle.load(f)
            
        # Create instances of the new class
        train = dataset(header_files=df.loc[df['hid'].isin(train_patients),'filename'].to_list())
        train.num_leads = 12
        train.sample = True

        valid= dataset(header_files=df.loc[df['hid'].isin(valid_patients),'filename'].to_list())
        valid.num_leads = 12
        valid.sample = False
        valid.files.reset_index(drop=True, inplace=True)

        train_loader = DataLoader(train, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=True)
        valid_loader = DataLoader(valid, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=False)

        # Load model weights & Initialize study
        model.load_state_dict(torch.load(f'{TRIAL_DIR}/initial_weights.pth', weights_only=True))
        weight_cache = f'{TRIAL_DIR}/weights_fold_{i_fold+1}.pth'

        criterion = nn.BCEWithLogitsLoss()
        optimizer = Adam(model.parameters(), lr=LR_INITIAL)
        scheduler = ReduceLROnPlateau(optimizer, patience=LR_STEP_SIZE, factor=LR_STEP_GAMMA, min_lr=1e-5, mode='max')

        best_val_auroc = 0.0
        early_stop_count = 0

        # Model train & validation
        for epoch in range(N_EPOCH):
            print(f'FOLD {i_fold+1} - EPOCH {1 + epoch}/{N_EPOCH}')        
            train_loss = train_model(train_loader, model, criterion, optimizer, scheduler)
            y_true_valid, y_pred_valid,_ = evaluate_model(valid_loader, model)
            valid_loss = F.binary_cross_entropy(torch.FloatTensor(y_pred_valid), torch.FloatTensor(y_true_valid))
            valid_auroc = roc_auc_score(y_true_valid, y_pred_valid)
            print(f'train_loss: {train_loss:.4f} - val_loss: {valid_loss:.4f} - val_auroc: {valid_auroc:.4f}')

            scheduler.step(valid_auroc)
            # Update best model
            if valid_auroc > best_val_auroc:
                print(f'>> val_auroc increased from {best_val_auroc:.4f} to {valid_auroc:.4f}\n>> Saving weights to [{weight_cache}]')
                torch.save(model.state_dict(), weight_cache)

                best_val_auroc = valid_auroc
                early_stop_count = 0

            else:
                early_stop_count += 1
                
            # Early stoping
            if early_stop_count >= EARLY_STOP_PAT:
                break

            print()

        # Load best weights & inference on test set
        model.load_state_dict(torch.load(weight_cache, weights_only=True))
        y_true_test, y_pred_test, _ = evaluate_model(test_loader, model)
        test_auroc = roc_auc_score(y_true_test, y_pred_test)
        y_pred_test_list.append(y_pred_test)

        print()
        print(f'>> TEST AUROC = {test_auroc:.4f}')
        print()

    # ================ #
    # Ensemble Results #
    # ================ #

    # Inference on test set
    y_pred_test_ensemble = np.mean(np.array(y_pred_test_list), axis=0)
    test_auroc_ensemble = roc_auc_score(y_true_test, y_pred_test_ensemble)

    # Print results
    print('*' * 100)
    print('TEST RESULTS (AUROC)')
    for i in range(N_FOLD):
        print(f'Fold #{i}  : {roc_auc_score(y_true_test, y_pred_test_list[i]):.4f}')
    print()
    print(f'Ensemble : {test_auroc_ensemble:.4f}')
    print('*' * 100)

    # Rename trial folder
    TRIAL_FOLDER_NEW = f'auc={test_auroc_ensemble:.4f}_{TRIAL_FOLDER}'
    TRIAL_DIR_NEW = f'{STUDY_DIR}/{TRIAL_FOLDER_NEW}'
    os.rename(TRIAL_DIR, TRIAL_DIR_NEW)

    gc.collect()

## Inference

In [7]:
######### Code for rechecking score #############
trial_dir = "model/241211_lead12_MACCE_00/auc=0.8772_batch=64_lr=0.00086_step=2_gam=0.164_pat=4_block_size=16_block_depth=3_hidden_size=32_block_layers=3_kernel_num=5"
# ========== #
# File Paths #
# ========== #
TRIAL_DIR                   = trial_dir
TRIAL_FOLDER                = TRIAL_DIR.split('/')[-1]
STUDY_DIR                   = TRIAL_DIR[:-(len(TRIAL_FOLDER) + 1)]

HPARAMS_ARCH_FILENAME       = f'{TRIAL_DIR}/hparams_arch.pkl'
HPARAMS_TRAIN_FILENAME      = f'{TRIAL_DIR}/hparams_train.pkl'
DATASET_CONFIG_FILENAME     = f'{TRIAL_DIR}/dataset_config.pkl'

# ==================== #
# Load Hyperparameters #
# ==================== #
# Architecture
hparams_arch    = pickle.load(open(HPARAMS_ARCH_FILENAME, 'rb'))

BLOCK_SIZE      = int(hparams_arch['BLOCK_SIZE'])
BLOCK_DEPTH     = int(hparams_arch['BLOCK_DEPTH'])
BLOCK_LAYERS    = int(hparams_arch['BLOCK_LAYERS'])
HIDDEN_SIZE     = int(hparams_arch['HIDDEN_SIZE'])
KERNEL_NUM      = int(hparams_arch['KERNEL_NUM'])
# Train
hparams_train   = pickle.load(open(HPARAMS_TRAIN_FILENAME, 'rb'))

N_FOLD          = int(hparams_train['N_FOLD'])
N_EPOCH         = int(hparams_train['N_EPOCH'])
BATCH_SIZE      = int(hparams_train['BATCH_SIZE'])
LR_INITIAL      = hparams_train['LR_INITIAL']
LR_STEP_SIZE    = hparams_train['LR_STEP_SIZE']
LR_STEP_GAMMA   = hparams_train['LR_STEP_GAMMA']
EARLY_STOP_PAT  = hparams_train['EARLY_STOP_PAT']

#N_WORKERS       = 8
#PREFETCH_FACTOR = 4
# Dataset configs
dataset_config  = pickle.load(open(DATASET_CONFIG_FILENAME, 'rb'))

DIAGNOSIS       = dataset_config['DIAGNOSIS']

### 1) Evaluate time series model

In [None]:
df = pd.read_csv('data_labels_M_N.csv')

##############################################################
# Load the train dataset
with open('test_patients.pkl', 'rb') as f:
    test_patients = pickle.load(f)

test= dataset(header_files=df.loc[df['hid'].isin(test_patients),'filename'].to_list())
test.num_leads = 12
test.sample = False
test.files.reset_index(drop=True, inplace=True)

# Create test data loader
test_loader = DataLoader(test, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=False)
y_pred_test_list = []

# Create datasets & data loaders
for i_fold in range(N_FOLD):
    # Initialize model & save weights
    model = FinalModel(block_size =BLOCK_SIZE, block_depth =BLOCK_DEPTH, block_layers=BLOCK_LAYERS, hidden_size=HIDDEN_SIZE, kernel_num=KERNEL_NUM).to(DEVICE)
    weight_cache = f'{TRIAL_DIR}/weights_fold_{i_fold+1}.pth'
    # Load best weights & inference on test set
    model.load_state_dict(torch.load(weight_cache, weights_only=True))
    y_true_test, y_pred_test, hidden_states = evaluate_model(test_loader, model)
    test_auroc = roc_auc_score(y_true_test, y_pred_test)
    y_pred_test_list.append(y_pred_test)
    print()
    print(f'>> TEST AUROC = {test_auroc:.4f}')
    print()

y_pred_test = np.mean(np.array(y_pred_test_list), axis=0)
y_true_test = y_true_test.ravel()  # Flatten to 1D array
y_pred_test = y_pred_test.ravel()  # Flatten to 1D array
##############################################################

with open('train_patients_cv_0.pkl', 'rb') as f:
    train_patients = pickle.load(f)
with open('valid_patients_cv_0.pkl', 'rb') as f:
    val_patients = pickle.load(f)
tval_patients = np.concatenate([train_patients, val_patients])

tval= dataset(header_files=df.loc[df['hid'].isin(tval_patients),'filename'].to_list())
tval.num_leads = 12
tval.sample = False
tval.files.reset_index(drop=True, inplace=True)

# Create test data loader
tval_loader = DataLoader(tval, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=False)
y_pred_tval_list = []

# [Main] 4-Fold Validation
# Create datasets & data loaders
for i_fold in range(N_FOLD):
    # Initialize model & save weights
    model = FinalModel(block_size =BLOCK_SIZE, block_depth =BLOCK_DEPTH, block_layers=BLOCK_LAYERS, hidden_size=HIDDEN_SIZE, kernel_num=KERNEL_NUM).to(DEVICE)
    weight_cache = f'{TRIAL_DIR}/weights_fold_{i_fold+1}.pth'
    # Load best weights & inference on test set
    model.load_state_dict(torch.load(weight_cache, weights_only=True))
    y_true_tval, y_pred_tval, hidden_states = evaluate_model(tval_loader, model)
    tval_auroc = roc_auc_score(y_true_tval, y_pred_tval)
    y_pred_tval_list.append(y_pred_tval)
    print()
    print(f'>> TVAL AUROC = {tval_auroc:.4f}')
    print()
y_pred_tval = np.mean(np.array(y_pred_tval_list), axis=0)
y_true_tval = y_true_tval.ravel()  # Flatten to 1D array
y_pred_tval = y_pred_tval.ravel()  # Flatten to 1D array

# Save train/val predictions
with open('y_pred_tval_cnn.pkl', 'wb') as f:
    pickle.dump(y_pred_tval, f)

######################################################################
with open('y_pred_tval_cnn.pkl', 'rb') as f:
    y_pred_tval = pickle.load(f)

# Calculate calibration metrics before linear regression
cal_model = LinearRegression()
cal_model.fit(y_pred_test.reshape(-1,1), y_true_test)
cal_intercept = cal_model.intercept_
cal_slope = cal_model.coef_[0]
brier_before = brier_score_loss(y_true_test, y_pred_test)

# Calculate unreliability index before calibration

U_before, p_before = calc_unreliability(y_true_test, y_pred_test)

ir = IsotonicRegression(out_of_bounds='clip')
ir.fit(y_pred_tval, y_true_tval)
y_pred_test_cal = ir.predict(y_pred_test)

#calib = mli.SplineCalib(unity_prior=False, unity_prior_weight=100, random_state=42, max_iter =500)
#calib.fit(y_pred_tval, y_true_tval)
#y_pred_test_cal = calib.calibrate(y_pred_test)

# Ensure predictions are not exactly 0 or 1
y_pred_test_cal = np.clip(y_pred_test_cal, 1e-15, 1-1e-15)

# Calculate calibration metrics after linear regression
cal_model = LinearRegression()
cal_model.fit(y_pred_test_cal.reshape(-1,1), y_true_test)
cal_intercept_after = cal_model.intercept_
cal_slope_after = cal_model.coef_[0]
brier_after = brier_score_loss(y_true_test, y_pred_test_cal)

# Calculate unreliability after calibration
U_after, p_after = calc_unreliability(y_true_test, y_pred_test_cal)

print("Before Calibration:")
print(f"Calibration Intercept: {cal_intercept:.4f}")
print(f"Calibration Slope: {cal_slope:.4f}")
print(f"Brier Score: {brier_before:.4f}")
print(f"Unreliability Index: {U_before:.4f}")
print(f"Unreliability p-value: {p_before:.4f}")

print("\nAfter Linear Regression:")
print(f"Calibration Intercept: {cal_intercept_after:.4f}")
print(f"Calibration Slope: {cal_slope_after:.4f}")
print(f"Brier Score: {brier_after:.4f}")
print(f"Unreliability Index: {U_after:.4f}")
print(f"Unreliability p-value: {p_after:.4f}")

prob_true, prob_pred = calibration_curve(y_true_test, y_pred_test, n_bins=10)
draw_calibration_plot(prob_true, prob_pred, y_pred_test, 10, 1.0)

Youden = youden(y_true_tval, y_pred_tval)
print(Youden)

AUROC = roc_auc_score(y_true_test, y_pred_test)
AUPRC = average_precision_score(y_true_test, y_pred_test)
y_pred_label = [1 if value > Youden else 0 for value in y_pred_test]
y_pred_label = np.array(y_pred_label)
F1_score = f1_score(y_true_test, y_pred_label)

print(f'AUROC : {AUROC:.4f}')
print(f'AUROC CI(95%): ({roc_auc_ci(y_true_test, y_pred_test)[0]:.4f},{roc_auc_ci(y_true_test, y_pred_test)[1]:.4f})')
print(f'AUPRC : {AUPRC:.4f}')
print(f'F1 Score : {F1_score:.4f}')
print(f'Test Accuracy : {round(100*accuracy_score(y_true_test, y_pred_label), 2)}% ')

draw_roc_curve(y_true_test, y_pred_test)
draw_confusion_matrix(y_true_test, y_pred_label)
draw_y_test_proba(y_pred_test)

# Save test results
with open(f'{trial_dir}/y_test_cnn.pkl', 'wb') as f:
    pickle.dump(y_true_test, f)
with open(f'{trial_dir}/y_test_proba_cnn.pkl', 'wb') as f:
    pickle.dump(y_pred_test, f)

### 2) Evaluate GBM model

#### 2-1) Extract ICD 10 codes


In [38]:
df = pd.read_csv('data_labels_M_N.csv')
df2 = pd.read_csv('ecg_labels_w_features_lab_asa_3mo_N.csv')

df['andur'] = df['filename'].map(df2.set_index('filename')['andur'])
df['asa'] = df['filename'].map(df2.set_index('filename')['final_asa'])

df.to_csv('data_labels_gbm.csv', index=False)

In [None]:
df = pd.read_csv('data_labels_gbm.csv')

all= dataset(header_files=df['filename'].to_list())
all.num_leads = 12
all.sample = False
all.files.reset_index(drop=True, inplace=True)

# Create test data loader
all_loader = DataLoader(all, collate_fn=collate_fn, batch_size=128, shuffle=False)

for i_fold in range(N_FOLD):
    # Initialize model & save weights
    model = FinalModel(block_size =BLOCK_SIZE, block_depth =BLOCK_DEPTH, block_layers=BLOCK_LAYERS, hidden_size=HIDDEN_SIZE, kernel_num=KERNEL_NUM).to(DEVICE)
    weight_cache = f'{TRIAL_DIR}/weights_fold_{i_fold+1}.pth'
    # Load best weights & inference on test set
    model.load_state_dict(torch.load(weight_cache, weights_only=True))
    y_true_test, y_pred_test, hidden_states = evaluate_model(all_loader, model)
    hidden_states_df = pd.DataFrame(hidden_states, columns=[f'hidden_{i_fold+1}_{i}' for i in range(HIDDEN_SIZE)])
    df = pd.concat([df, hidden_states_df], axis=1)

df.to_csv('data_labels_gbm_hidden.csv', index=False)

In [None]:
'''
# Imports for ICD 10 code extraction
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, droprate):
        super().__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.droprate = droprate
        self.ff_dim = ff_dim
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential([layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(droprate)
        self.dropout2 = layers.Dropout(droprate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

    def get_config(self):
        return {'embed_dim': self.embed_dim, 'num_heads': self.num_heads, 'ff_dim':self.ff_dim, 'droprate':self.droprate}

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super().__init__()
        self.maxlen = maxlen
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        return {'maxlen': self.maxlen, 'vocab_size': self.vocab_size, 'embed_dim': self.embed_dim}

# SNUH data
# system, operation, body part, approach
df3 = pd.read_csv('data_labels_gbm.csv', dtype=str)
df3 = df3.merge(pd.read_csv('transformer/all_data_labels_241106(asa_updated, opname).csv', usecols=['opname','filename'], dtype=str), how='left', on='filename')  # the label manually checked by hclee
# merge the name with opname and opname_final
df3['src'] = 'snuh'
df3 = df3.merge(pd.read_csv('transformer/icd10_mapping_hclee.csv', usecols=['opname', 'p', 'o', 'a'], dtype=str), how='left', on='opname')
df3['name'] = df3['opname']
df = df3.copy()
# remove duplicate rows
df = df.drop_duplicates(subset=['filename'], keep='first')

# shuffling
df = df.sample(frac=1).reset_index(drop=True)
# Ensure all elements in x are strings
x = np.copy(df['name'].values.astype(str))  # Convert all elements to strings
# Process x
vocab_size = 4000
t_x = Tokenizer(vocab_size)
t_x.fit_on_texts(x)
x = t_x.texts_to_sequences(x)
maxlen = 158  # Instead of dynamically calculating maxlen
x = keras.preprocessing.sequence.pad_sequences(x, maxlen=maxlen)

prefix = 'transformer_res'
for target in ('p', 'o', 'a'):
    print(f'Processing {target}...')
    opath = f'transformer/{prefix}_{target}.csv'
    
    print('Loading tokenizer...')
    t_y = Tokenizer()
    tokenizer_config = json.loads(open(f'transformer/tokenizer_y_{target}.json').read())
    tokenizer_config = tokenizer_config['config']
    # Parse string representations back to dictionaries
    t_y.word_index = json.loads(tokenizer_config['word_index']) if isinstance(tokenizer_config['word_index'], str) else tokenizer_config['word_index']
    t_y.index_word = json.loads(tokenizer_config['index_word']) if isinstance(tokenizer_config['index_word'], str) else tokenizer_config['index_word']
    t_y.word_counts = json.loads(tokenizer_config['word_counts']) if isinstance(tokenizer_config['word_counts'], str) else tokenizer_config['word_counts']
    # Convert index_word keys to integers
    t_y.index_word = {int(k): v for k, v in t_y.index_word.items()}
    t_y.document_count = tokenizer_config['document_count']

    print('Loading model...')
    custom_objects = {
        'TokenAndPositionEmbedding': TokenAndPositionEmbedding,
        'TransformerBlock': TransformerBlock
    }
    model = keras.models.model_from_json(
        open(f'transformer/model_{target}.json').read(),
        custom_objects=custom_objects
    )
    model.load_weights(f'transformer/tuned_weights_{target}.h5')

    print('Running inference...')
    pred = model.predict(x, verbose=1)
    
    print('Post-processing predictions...')
    df['pred'] = pd.Series(t_y.sequences_to_texts(np.argmax(pred, axis=1)[...,None] + 1)).str.upper()
    df['conf'] = pred.max(axis=1)
    df['matched'] = (df['pred'] == df[target]).astype(int)

    print(f'Saving results to {opath}...')
    df[df['src'] == 'snuh'].drop(columns='src').to_csv(opath, index=False, encoding='utf-8-sig')
    print(f'Finished processing {target}\n')

# merge
df_a = pd.read_csv(f'transformer/{prefix}_a.csv', dtype=str, usecols=['filename', 'pred', 'matched', 'conf'])
df_o = pd.read_csv(f'transformer/{prefix}_o.csv', dtype=str, usecols=['filename', 'pred', 'matched', 'conf'])
df = pd.read_csv(f'transformer/{prefix}_p.csv', dtype=str)
df = df.merge(df_a, how='left', on='filename', suffixes=('', '_a'))
df = df.merge(df_o, how='left', on='filename', suffixes=('', '_o'))
df.rename(columns={'pred':'pred_p', 'matched':'matched_p'}, inplace=True)

# Fill null values in p,o,a columns with predicted values
df['p'] = df['p'].fillna(df['pred_p'])
df['o'] = df['o'].fillna(df['pred_o']) 
df['a'] = df['a'].fillna(df['pred_a'])

# Keep only columns up to 'a' 
df = df[['filename', 'hid', 'age', 'gender', 'asa', 'andur', 'label', 'p', 'o', 'a']]

df2 = pd.read_csv('data_labels_gbm_hidden.csv', dtype=str)
df2 = df2.merge(df[['filename', 'p', 'o', 'a']], how='left', on='filename')
df2.to_csv('data_labels_gbm_hidden_icd.csv', index=False, encoding='utf-8-sig')
'''

#### 2-2) Training

In [97]:
#### OPID ICD Matching
'''
df2 = pd.read_csv('df_data.csv')
df2['AcquisitionDate'] = pd.to_datetime(df2['AcquisitionDate'])

df3 = pd.read_csv('opid_macce_orig.csv')
df3['opid'] = df3['opid'].astype(str).str.zfill(9)
df3['op_date'] = pd.to_datetime(df3['opid'].str[:6].apply(lambda x: f'20{x[:2]}-{x[2:4]}-{x[4:6]}'))

opid_map = {}
for _, row in df3.iterrows():
    hid = row['hid']
    opdate = row['op_date']
    opid = row['opid']
    
    # Create key as tuple of hid and opdate
    key = (hid, opdate)
    if key not in opid_map:
        opid_map[key] = []
    opid_map[key].append(opid)

def find_opid(row):
    hid = row['hid']
    acq_date = row['AcquisitionDate']
    end_date = acq_date + pd.DateOffset(months=3)
    # Check all dates in range
    for op_date in pd.date_range(acq_date, end_date):
        key = (hid, op_date)
        if key in opid_map:
            return opid_map[key][0] # Return first matching opid
    return None

df2['opid'] = df2.apply(find_opid, axis=1)

df = pd.read_csv('data_labels_gbm_hidden_icd.csv')
filename_to_opid = df2.set_index('filename')['opid'].to_dict()
df['opid'] = df['filename'].map(filename_to_opid)

# Create new dataframe without 'hidden' columns
df_x = df.loc[:, ~df.columns.str.contains('hidden', case=False)]
df_x.to_csv('opid_icd_matching.csv', index=False)
'''

In [26]:
df = pd.read_csv('data_labels_gbm_hidden_icd.csv')
df_catcol = ['gender','p','o','a'] #'gender','asa','p','o','a'
df_numcol = ['age'] #'age','andur'

for i_fold in range(N_FOLD):
   df_numcol.extend([f'hidden_{i_fold+1}_{i}' for i in range(HIDDEN_SIZE)])
labels = 'label'
model_name = 'xgb_demo_icd' # change model name according to df_catcol, df_numcol combination

df[df_catcol] = df[df_catcol].astype('category')
# Filter dataframe to only include specified columns
df = df[df_catcol + df_numcol +['label','hid']]

In [16]:
'''
# Get unique hospital IDs and split into train/test
# Load the train dataset
i_fold =0
with open('test_patients.pkl', 'rb') as f:
    test_hids = pickle.load(f)
with open(f'train_patients_cv_{i_fold}.pkl', 'rb') as f:
    train_hids = pickle.load(f)
with open(f'valid_patients_cv_{i_fold}.pkl', 'rb') as f:
    valid_hids = pickle.load(f)

train_val_hids = np.concatenate((train_hids, valid_hids))

# Save all HID groups to pickle file
hid_splits = {
    'test_hids': test_hids,
    'tval_hids': train_val_hids
}

with open(f'hid_splits.pkl', 'wb') as f:
    pickle.dump(hid_splits, f)
'''

In [27]:
# Convert label 3 to 1, keeping 1 as 1 and 0 as 0
df['label'] = df['label'].replace({3: 1})

with open(f'hid_splits.pkl', 'rb') as f:
    hid_splits= pickle.load(f)
df_train = df[df['hid'].isin(hid_splits['tval_hids'])]
df_test = df[df['hid'].isin(hid_splits['test_hids'])]

#df.to_csv(f'df_MACCE.csv', index=False)
#df_train.to_csv(f'df_train_MACCE.csv', index=False)
#df_test.to_csv(f'df_test_MACCE.csv', index=False)

In [None]:
# Create model directory if it doesn't exist
if not os.path.exists(f'model/{model_name}'):
    os.makedirs(f'model/{model_name}')

study = optuna.create_study(direction="maximize", study_name="XGB Classifier")
func = lambda trial: objective(trial, df_train, df_test, model_name)
#func = lambda trial: objective(trial, pd.DataFrame(X_train, columns=feature_names), pd.DataFrame(y_train, columns=['label']), pd.DataFrame(h_train, columns=['hid']))
study.optimize(func, n_trials=20)

In [None]:
# Confirm test results
X_test = df_test.drop(['label', 'hid'], axis=1)
y_test = df_test['label']

y_pred_proba = []
for idx in range(N_FOLD):
    # Load the best model from saved file
    best_model = xgb.XGBClassifier()
    best_model.load_model(f"model/{model_name}/best_model_fold_{idx}_{model_name}.ubj")
    # Make predictions on test set
    y_pred_proba.append(best_model.predict_proba(X_test)[:,1])
y_test_proba = np.mean(y_pred_proba, axis=0)

y_test = y_test.ravel()  # Flatten to 1D array
y_test_proba = y_test_proba.ravel()  # Flatten to 1D array

##########################
X_train = df_train.drop(['label', 'hid'], axis=1)
y_train = df_train['label']

y_pred_proba = []
for idx in range(N_FOLD):
    # Load the best model from saved file
    best_model = xgb.XGBClassifier()
    best_model.load_model(f"model/{model_name}/best_model_fold_{idx}_{model_name}.ubj")
    # Make predictions on test set
    y_pred_proba.append(best_model.predict_proba(X_train)[:,1])
y_train_proba = np.mean(y_pred_proba, axis=0)

y_train = y_train.ravel()  # Flatten to 1D array
y_train_proba = y_train_proba.ravel()  # Flatten to 1D array
###########################

# Calculate calibration metrics before linear regression
cal_model = LinearRegression()
cal_model.fit(y_test_proba.reshape(-1,1), y_test)
cal_intercept = cal_model.intercept_
cal_slope = cal_model.coef_[0]
brier_before = brier_score_loss(y_test, y_test_proba)

U_before, p_before = calc_unreliability(y_test, y_test_proba)

# Apply isotonic regression
ir = IsotonicRegression(out_of_bounds='clip')
ir.fit(y_train_proba, y_train)
y_test_proba_cal = ir.predict(y_test_proba)

# Save isotonic regression model
with open(f"isotonic_calibration.pkl", 'wb') as f:
    pickle.dump(ir, f)
    
#calib = mli.SplineCalib(unity_prior=False, unity_prior_weight=100,random_state=42, max_iter =500)
#calib.fit(y_train_proba, y_train)
#y_test_proba_cal = calib.calibrate(y_test_proba)

# Ensure predictions are not exactly 0 or 1
#y_test_proba_cal = np.clip(y_test_proba_cal, 1e-15, 1-1e-15)

# Calculate calibration metrics after linear regression
cal_model = LinearRegression()
cal_model.fit(y_test_proba_cal.reshape(-1,1), y_test)
cal_intercept_after = cal_model.intercept_
cal_slope_after = cal_model.coef_[0]
brier_after = brier_score_loss(y_test, y_test_proba_cal)

# Calculate unreliability after calibration
U_after, p_after = calc_unreliability(y_test, y_test_proba_cal)

print("Before Calibration:")
print(f"Calibration Intercept: {cal_intercept:.4f}")
print(f"Calibration Slope: {cal_slope:.4f}")
print(f"Brier Score: {brier_before:.4f}")
print(f"Unreliability Index: {U_before:.4f}")
print(f"Unreliability p-value: {p_before:.4f}")

print("\nAfter Linear Regression:")
print(f"Calibration Intercept: {cal_intercept_after:.4f}")
print(f"Calibration Slope: {cal_slope_after:.4f}")
print(f"Brier Score: {brier_after:.4f}")
print(f"Unreliability Index: {U_after:.4f}")
print(f"Unreliability p-value: {p_after:.4f}")

# Calculate and plot calibration curve
prob_true, prob_pred = calibration_curve(y_test, y_test_proba_cal, n_bins=10)
draw_calibration_plot(prob_true, prob_pred, y_test_proba_cal, 10, 1.0)
########################

Youden = youden(y_train, y_train_proba)
print(f'Youden Index : {Youden:.4f}')

AUROC = roc_auc_score(y_test, y_test_proba_cal)
AUPRC = average_precision_score(y_test, y_test_proba_cal)
y_pred_test = [1 if value > Youden else 0 for value in y_test_proba_cal]
F1_score = f1_score(y_test, y_pred_test)

print(f'AUROC : {AUROC:.4f}')
print(f'AUROC CI(95%): ({roc_auc_ci(y_test, y_test_proba_cal)[0]:.4f},{roc_auc_ci(y_test, y_test_proba_cal)[1]:.4f})')
print(f'AUPRC : {AUPRC:.4f}')
print(f'F1 Score : {F1_score:.4f}')
print(f'Test Accuracy : {round(100*accuracy_score(y_test, y_pred_test), 2)}% ')

draw_roc_curve(y_test, y_test_proba_cal)
draw_confusion_matrix(y_test, y_pred_test)
draw_y_test_proba(y_test_proba_cal)

# Save test results
with open(f'model/{model_name}/y_test_{model_name}.pkl', 'wb') as f:
    pickle.dump(y_test, f)
with open(f'model/{model_name}/y_test_proba_{model_name}.pkl', 'wb') as f:
    pickle.dump(y_test_proba_cal, f)

# Get thresholds using Youden's index
threshold_model = youden(y_test, y_test_proba_cal)

# Calculate metrics for each model
metrics_model = get_metrics(y_test, y_test_proba_cal, threshold_model)

# Print results
models = {
    'Model': metrics_model,
}

for model_name, metrics in models.items():
    print(f"\n{model_name}:")
    print(f"Threshold: {metrics['Threshold']:.4f}")
    print(f"AUROC: {metrics['AUROC']:.4f} (95% CI: {metrics['AUROC CI'][0]:.4f}-{metrics['AUROC CI'][1]:.4f})")
    print(f"AUPRC: {metrics['AUPRC']:.4f} (95% CI: {metrics['AUPRC CI'][0]:.4f}-{metrics['AUPRC CI'][1]:.4f})")
    print(f"F1-Score: {metrics['F1-Score']:.4f} (95% CI: {metrics['F1-Score CI'][0]:.4f}-{metrics['F1-Score CI'][1]:.4f})")
    print(f"Sensitivity: {metrics['Sensitivity']:.4f} (95% CI: {metrics['Sensitivity CI'][0]:.4f}-{metrics['Sensitivity CI'][1]:.4f})")
    print(f"Specificity: {metrics['Specificity']:.4f} (95% CI: {metrics['Specificity CI'][0]:.4f}-{metrics['Specificity CI'][1]:.4f})")
    print(f"Precision: {metrics['Precision']:.4f} (95% CI: {metrics['Precision CI'][0]:.4f}-{metrics['Precision CI'][1]:.4f})")
    print(f"TN: {metrics['TN']}, FP: {metrics['FP']}, FN: {metrics['FN']}, TP: {metrics['TP']}")
    print(f"Accuracy: {metrics['Accuracy']:.2f}%")

In [None]:
model_name_1 = 'xgb_demo_icd'
model_name_2 = 'xgb'

# Save test results
with open(f'model/{model_name_1}/y_test_{model_name_1}.pkl', 'rb') as f:
    y_test = pickle.load(f)
with open(f'model/{model_name_1}/y_test_proba_{model_name_1}.pkl', 'rb') as f:
    y_test_proba_1 = pickle.load(f)
with open(f'model/{model_name_2}/y_test_proba_{model_name_2}.pkl', 'rb') as f:
    y_test_proba_2 = pickle.load(f)

#with open(f'{trial_dir}/y_test_proba_cnn.pkl', 'rb') as f:
#    y_test_proba_2 = pickle.load(f)

#Delong test p-value
pvalue = delong_roc_test(y_test, y_test_proba_1, y_test_proba_2)
print(f'p_value: {pvalue[0][0]:.9f}')

## Comparison with RCRI and Troponin

In [None]:
'''
FILTER_MONTHS=3
df_1 = pd.read_csv('opid_macce_orig_lab_asa.csv')
df_2 = pd.read_csv('opid_macce_'+str(FILTER_MONTHS)+'mo_N.csv')
df_3 = pd.read_csv('ecg_labels_w_features_lab_asa_'+str(FILTER_MONTHS)+'mo_N.csv')

df_1= df_1.merge(df_2[['opid','hid']], on ='opid')
df_1= df_1[pd.notnull(df_1['hid'])]
df_1.drop(columns=['dept'], inplace=True)

df_1.rename(columns={'age': 'real_PatientAge', 'sex': 'real_Gender'}, inplace=True)
df_1.replace({'real_Gender': {'F': 0, 'M': 1}}, inplace=True)

df_1['orin']= pd.to_datetime(df_1['opid'].astype(str).str.zfill(9).str[:2] + '/' + df_1['opid'].astype(str).str.zfill(9).str[2:4] + '/' + df_1['opid'].astype(str).str.zfill(9).str[4:6], format='%y/%m/%d')

df_3['AcquisitionDate']= pd.to_datetime(df_3['AcquisitionDate'])
for i in tqdm(df_3.index):
    cond1 = (df_1['hid']==df_3.loc[i,'hid'])
    cond2 = (df_1['orin']>=df_3.loc[i,'AcquisitionDate'])
    cond3 = (df_1['orin']<=df_3.loc[i,'AcquisitionDate']+pd.DateOffset(months=FILTER_MONTHS))
    if len(df_1[cond1 & cond2 & cond3])>0:
        df_3.loc[i, 'opid'] = df_1[cond1&cond2&cond3].sort_values(by='orin').iloc[0]['opid']
df_1.drop(columns=['hid'], inplace=True)

df_3 = df_3.merge(df_1, on ='opid')
#df_3.drop(columns=['PatientAge','Gender'], inplace=True)
df_3.to_csv('all_data_labels_w_troponin.csv')

df_4 = pd.read_csv('df_test_MACCE.csv')

for i in tqdm(df_4.index):
    matching_opids = df_3.loc[df_3['filename']==df_4.loc[i, 'filename'], 'opid']
    if len(matching_opids) > 0:
        df_4.loc[i, 'opid'] = matching_opids.values[0]
    else:
        print(df_4.loc[i, 'filename'])

df_5= pd.read_csv('opid_rcri_orig.csv')

for i in tqdm(df_4.index):
    matching_rcri = df_5.loc[df_5['opid']==df_4.loc[i, 'opid'], 'rcri_score'].values
    if len(matching_rcri) > 0:
        df_4.loc[i, 'rcri'] = matching_rcri[0]/6

df_6= pd.read_csv('opid_troponin_orig.csv')

for i in tqdm(df_4.index):
    matching_tni = df_6.loc[df_6['opid']==df_4.loc[i, 'opid'], 'pre_troponin_I']
    if len(matching_tni) == 0:
        continue
    df_4.loc[i, 'TnI'] = matching_tni.values[0]

df_4.to_csv('df_test_MACCE_w_rcri_troponin.csv', index=False)
'''

In [None]:
'''
df_3 = pd.read_csv('all_data_labels_w_troponin.csv')
df_4 = pd.read_csv('df_MACCE.csv')

for i in tqdm(df_4.index):
    matching_opids = df_3.loc[df_3['filename']==df_4.loc[i, 'filename'], 'opid']
    if len(matching_opids) > 0:
        df_4.loc[i, 'opid'] = matching_opids.values[0]

df_5= pd.read_csv('opid_rcri_orig.csv')

for i in tqdm(df_4.index):
    matching_rcri = df_5.loc[df_5['opid']==df_4.loc[i, 'opid'], 'rcri_score'].values
    if len(matching_rcri) > 0:
        df_4.loc[i, 'rcri'] = matching_rcri[0]/6

df_6= pd.read_csv('opid_troponin_orig.csv')

for i in tqdm(df_4.index):
    matching_tni = df_6.loc[df_6['opid']==df_4.loc[i, 'opid'], 'pre_troponin_I']
    if len(matching_tni) == 0:
        continue
    df_4.loc[i, 'TnI'] = matching_tni.values[0]
    #df_4.loc[i, 'TnT'] = df_6.loc[df_6['opid']==df_4.loc[i, 'opid'], 'pre_troponin_T'].values[0]

df_4.to_csv('df_MACCE_w_rcri_troponin.csv', index=False)
'''

In [None]:
## RCRI
df_4 = pd.read_csv('df_test_MACCE_w_rcri_troponin.csv')
# Remove rows with NaN values in 'label' or 'rcri' columns
x= pd.read_csv('rcri_missing.csv')

# Map rcri_score from x to df_4 based on matching opids
for i in tqdm(df_4.index):
    matching_rcri = x.loc[x['opid']==df_4.loc[i, 'opid'], 'rcri_score']
    if len(matching_rcri) > 0:
        df_4.loc[i, 'rcri'] = matching_rcri.values[0]/6

model_name_1 = 'xgb_demo_icd'
with open(f'model/{model_name_1}/y_test_proba_{model_name_1}.pkl', 'rb') as f:
    y_test_proba_1 = pickle.load(f)
df_4['proba']= y_test_proba_1

model_name_2 = 'xgb_wo_cnn_demo_icd'
with open(f'model/{model_name_2}/y_test_proba_{model_name_2}.pkl', 'rb') as f:
    y_test_proba_2 = pickle.load(f)
df_4['proba_wo_cnn']= y_test_proba_2

'''
test= dataset(header_files=df_4['filename'].to_list())
test.num_leads = 12
test.sample = False
test.files.reset_index(drop=True, inplace=True)

# Create test data loader
test_loader = DataLoader(test, collate_fn=collate_fn, batch_size=BATCH_SIZE, shuffle=False)
y_pred_test_list = []

# [Main] 4-Fold Validation
# Create datasets & data loaders
for i_fold in range(N_FOLD):
    # Initialize model & save weights
    model = FinalModel(block_size =BLOCK_SIZE, block_depth =BLOCK_DEPTH, block_layers=BLOCK_LAYERS, hidden_size=HIDDEN_SIZE, kernel_num=KERNEL_NUM).to(DEVICE)
    weight_cache = f'{TRIAL_DIR}/weights_fold_{i_fold+1}.pth'
    # Load best weights & inference on test set
    model.load_state_dict(torch.load(weight_cache, weights_only=True))
    y_true_test, y_pred_test, hidden_states = evaluate_model(test_loader, model)
    test_auroc = roc_auc_score(y_true_test, y_pred_test)
    y_pred_test_list.append(y_pred_test)
    print()

y_pred_test = np.mean(np.array(y_pred_test_list), axis=0)
y_test_proba_cnn = y_pred_test.ravel()  # Flatten to 1D array
'''
model_name_3 = 'xgb'
with open(f'model/{model_name_3}/y_test_proba_{model_name_3}.pkl', 'rb') as f:
    y_test_proba_cnn = pickle.load(f)

y_test = df_4['label']
y_test_proba_rcri = df_4['rcri']
y_test_proba_model = df_4['proba']
y_test_proba_model_wo_cnn = df_4['proba_wo_cnn']
y_test_proba_asa = df_4['asa']

# Calculate metrics for RCRI
AUROC_rcri = roc_auc_score(y_test, y_test_proba_rcri)
AUPRC_rcri = average_precision_score(y_test, y_test_proba_rcri)

# Calculate metrics for model
AUROC_model = roc_auc_score(y_test, y_test_proba_model)
AUPRC_model = average_precision_score(y_test, y_test_proba_model)

AUROC_cnn = roc_auc_score(y_test, y_test_proba_cnn)
AUPRC_cnn = average_precision_score(y_test, y_test_proba_cnn)

AUROC_model_wo_cnn = roc_auc_score(y_test, y_test_proba_model_wo_cnn)

AUROC_asa = roc_auc_score(y_test, y_test_proba_asa)

print(f'RCRI AUROC : {AUROC_rcri:.4f}')
print(f'RCRI AUROC CI(95%): ({roc_auc_ci(y_test, y_test_proba_rcri)[0]:.4f},{roc_auc_ci(y_test, y_test_proba_rcri)[1]:.4f})')
print(f'RCRI AUPRC : {AUPRC_rcri:.4f}')

print(f'\nModel AUROC : {AUROC_model:.4f}')
print(f'Model AUROC CI(95%): ({roc_auc_ci(y_test, y_test_proba_model)[0]:.4f},{roc_auc_ci(y_test, y_test_proba_model)[1]:.4f})')
print(f'Model AUPRC : {AUPRC_model:.4f}')

# Plot ROC curves
fpr_rcri, tpr_rcri, _ = roc_curve(y_test, y_test_proba_rcri)
fpr_model, tpr_model, _ = roc_curve(y_test, y_test_proba_model)
fpr_cnn, tpr_cnn, _ = roc_curve(y_test, y_test_proba_cnn)
fpr_model_wo_cnn, tpr_model_wo_cnn, _ = roc_curve(y_test, y_test_proba_model_wo_cnn)
fpr_asa, tpr_asa, _ = roc_curve(y_test, y_test_proba_asa)

plt.figure(figsize=(8, 6))
plt.plot(fpr_model, tpr_model, color='blue',
         label=f'Multimodal GBM (AUC = {AUROC_model:.3f}, 95% CI: {roc_auc_ci(y_test, y_test_proba_model)[0]:.3f}-{roc_auc_ci(y_test, y_test_proba_model)[1]:.3f})')
plt.plot(fpr_model_wo_cnn, tpr_model_wo_cnn, color='brown', 
         label=f'Baseline GBM (AUC = {AUROC_model_wo_cnn:.3f}, 95% CI: {roc_auc_ci(y_test, y_test_proba_model_wo_cnn)[0]:.3f}-{roc_auc_ci(y_test, y_test_proba_model_wo_cnn)[1]:.3f})')
plt.plot(fpr_cnn, tpr_cnn, color='green', 
         label=f'GBM with only ECG (AUC = {AUROC_cnn:.3f}, 95% CI: {roc_auc_ci(y_test, y_test_proba_cnn)[0]:.3f}-{roc_auc_ci(y_test, y_test_proba_cnn)[1]:.3f})')
plt.plot(fpr_rcri, tpr_rcri, color='orange', 
         label=f'RCRI (AUC = {AUROC_rcri:.3f}, 95% CI: {roc_auc_ci(y_test, y_test_proba_rcri)[0]:.3f}-{roc_auc_ci(y_test, y_test_proba_rcri)[1]:.3f})')
plt.plot(fpr_asa, tpr_asa, color='red', 
         label=f'ASA (AUC = {AUROC_asa:.3f}, 95% CI: {roc_auc_ci(y_test, y_test_proba_asa)[0]:.3f}-{roc_auc_ci(y_test, y_test_proba_asa)[1]:.3f})')

plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
#plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()

#Delong test p-value
pvalue = delong_roc_test(df_4['label'], df_4['rcri'], df_4['proba'])
print(f'\nDelong p_value: {pvalue[0][0]:.9f}')

In [None]:
# Get thresholds using Youden's index
threshold_model = youden(y_test, y_test_proba_model)
threshold_cnn = youden(y_test, y_test_proba_cnn)
threshold_model_wo_cnn = youden(y_test, y_test_proba_model_wo_cnn)
threshold_rcri = youden(y_test, y_test_proba_rcri)
threshold_asa = youden(y_test, y_test_proba_asa)

# Calculate metrics for each model
metrics_model = get_metrics(y_test, y_test_proba_model, threshold_model)
metrics_cnn = get_metrics(y_test, y_test_proba_cnn, threshold_cnn)
metrics_model_wo_cnn = get_metrics(y_test, y_test_proba_model_wo_cnn, threshold_model_wo_cnn)
metrics_rcri = get_metrics(y_test, y_test_proba_rcri, threshold_rcri)
metrics_asa = get_metrics(y_test, y_test_proba_asa, threshold_asa)

# Print results
models = {
    'Baseline GBM': metrics_model,
    'DNN': metrics_cnn,
    'GBM without DNN features': metrics_model_wo_cnn,
    'RCRI': metrics_rcri,
    'ASA': metrics_asa
}

for model_name, metrics in models.items():
    print(f"\n{model_name}:")
    print(f"Threshold: {metrics['Threshold']:.4f}")
    print(f"AUROC: {metrics['AUROC']:.4f} (95% CI: {metrics['AUROC CI'][0]:.4f}-{metrics['AUROC CI'][1]:.4f})")
    print(f"AUPRC: {metrics['AUPRC']:.4f} (95% CI: {metrics['AUPRC CI'][0]:.4f}-{metrics['AUPRC CI'][1]:.4f})")
    print(f"F1-Score: {metrics['F1-Score']:.4f} (95% CI: {metrics['F1-Score CI'][0]:.4f}-{metrics['F1-Score CI'][1]:.4f})")
    print(f"Sensitivity: {metrics['Sensitivity']:.4f} (95% CI: {metrics['Sensitivity CI'][0]:.4f}-{metrics['Sensitivity CI'][1]:.4f})")
    print(f"Specificity: {metrics['Specificity']:.4f} (95% CI: {metrics['Specificity CI'][0]:.4f}-{metrics['Specificity CI'][1]:.4f})")
    print(f"Precision: {metrics['Precision']:.4f} (95% CI: {metrics['Precision CI'][0]:.4f}-{metrics['Precision CI'][1]:.4f})")
    print(f"TN: {metrics['TN']}, FP: {metrics['FP']}, FN: {metrics['FN']}, TP: {metrics['TP']}")
    print(f"Accuracy: {metrics['Accuracy']:.2f}%")

In [None]:
# Calculate metrics for each model
#y_test_proba_model
#y_test_proba_cnn
#y_test_proba_model_wo_cnn
#y_test_proba_rcri
#y_test_proba_asa

#Delong test p-value
pvalue = delong_roc_test(y_test, y_test_proba_model, y_test_proba_model_wo_cnn)
print(f'p_value: {pvalue[0][0]:.9f}')

pvalue = delong_roc_test(y_test, y_test_proba_model, y_test_proba_cnn)
print(f'p_value: {pvalue[0][0]:.9f}')

pvalue = delong_roc_test(y_test, y_test_proba_model, y_test_proba_rcri)
print(f'p_value: {pvalue[0][0]:.9f}')

pvalue = delong_roc_test(y_test, y_test_proba_model, y_test_proba_asa)
print(f'p_value: {pvalue[0][0]:.9f}')

In [None]:
# Apply calibration to each model's predictions
calib = mli.SplineCalib(unity_prior=False, unity_prior_weight=100, random_state=42, max_iter=500)
#calib = IsotonicRegression(out_of_bounds='clip')

# Dictionary to store calibrated probabilities
calibrated_probas = {}
models_data = {
    'Baseline GBM': y_test_proba_model,
    'DNN': y_test_proba_cnn, 
    'GBM without DNN features': y_test_proba_model_wo_cnn,
    'RCRI': y_test_proba_rcri,
    'ASA': y_test_proba_asa
}

print("\nCalibration Metrics:")
print("-" * 50)

for model_name, y_proba in models_data.items():
    print(f"\n{model_name}:")
    
    # Calculate metrics before calibration
    cal_model = LinearRegression()
    cal_model.fit(y_proba.reshape(-1,1), y_test)
    cal_intercept = cal_model.intercept_
    cal_slope = cal_model.coef_[0]
    brier_before = brier_score_loss(y_test, y_proba)
    U_before, p_before = calc_unreliability(y_test, y_proba)
    
    # Apply calibration
    calib.fit(y_proba, y_test)
    y_proba_cal = calib.calibrate(y_proba)
    #y_proba_cal = calib.predict(y_proba)
    y_proba_cal = np.clip(y_proba_cal, 1e-15, 1-1e-15)
    calibrated_probas[model_name] = y_proba_cal
    
    # Calculate metrics after calibration
    cal_model = LinearRegression()
    cal_model.fit(y_proba_cal.reshape(-1,1), y_test)
    cal_intercept_after = cal_model.intercept_
    cal_slope_after = cal_model.coef_[0]
    brier_after = brier_score_loss(y_test, y_proba_cal)
    U_after, p_after = calc_unreliability(y_test, y_proba_cal)
    
    print("Before Calibration:")
    print(f"Calibration Intercept: {cal_intercept:.4f}")
    print(f"Calibration Slope: {cal_slope:.4f}")
    print(f"Brier Score: {brier_before:.4f}")
    print(f"Unreliability Index: {U_before:.4f}")
    print(f"Unreliability p-value: {p_before:.4f}")
    
    print("\nAfter Calibration:")
    print(f"Calibration Intercept: {cal_intercept_after:.4f}")
    print(f"Calibration Slope: {cal_slope_after:.4f}")
    print(f"Brier Score: {brier_after:.4f}")
    print(f"Unreliability Index: {U_after:.4f}")
    print(f"Unreliability p-value: {p_after:.4f}")

In [None]:
## TnI
FILTER_MONTHS=3
df_1 = pd.read_csv('opid_macce_orig_lab_asa.csv')
df_2 = pd.read_csv('opid_macce_'+str(FILTER_MONTHS)+'mo.csv')
df_3 = pd.read_csv('ecg_labels_w_features_'+str(FILTER_MONTHS)+'mo.csv')

df_1= df_1.merge(df_2[['opid','hid']], on ='opid')
df_1= df_1[pd.notnull(df_1['hid'])]
df_1.drop(columns=['dept'], inplace=True)

df_1.rename(columns={'age': 'real_PatientAge', 'sex': 'real_Gender'}, inplace=True)
df_1.replace({'real_Gender': {'F': 0, 'M': 1}}, inplace=True)

df_1['orin']= pd.to_datetime(df_1['opid'].astype(str).str.zfill(9).str[:2] + '/' + df_1['opid'].astype(str).str.zfill(9).str[2:4] + '/' + df_1['opid'].astype(str).str.zfill(9).str[4:6], format='%y/%m/%d')

df_3['AcquisitionDate']= pd.to_datetime(df_3['AcquisitionDate'])
for i in tqdm(df_3.index):
    cond1 = (df_1['hid']==df_3.loc[i,'hid'])
    cond2 = (df_1['orin']>df_3.loc[i,'AcquisitionDate'])
    cond3 = (df_1['orin']<df_3.loc[i,'AcquisitionDate']+pd.DateOffset(months=FILTER_MONTHS))
    if len(df_1[cond1 & cond2 & cond3])>0:
        df_3.loc[i, 'opid'] = df_1[cond1&cond2&cond3].sort_values(by='orin').iloc[0]['opid']
df_1.drop(columns=['hid'], inplace=True)

df_3 = df_3.merge(df_1, on ='opid')
df_3.drop(columns=['PatientAge','Gender'], inplace=True)

df_6= pd.read_csv('opid_troponin_orig.csv')

for i in tqdm(df_3.index):
    df_3.loc[i, 'TnI'] = df_6.loc[df_6['opid']==df_3.loc[i, 'opid'], 'pre_troponin_I'].values[0]
    df_3.loc[i, 'TnT'] = df_6.loc[df_6['opid']==df_3.loc[i, 'opid'], 'pre_troponin_T'].values[0]

df_3= df_3[~df_3['TnI'].isna()]

df_3.to_csv('all_data_labels_w_troponin.csv')

y_test = df_3['label']
y_test_proba = df_3['TnI']

AUROC = roc_auc_score(y_test, y_test_proba)
AUPRC = average_precision_score(y_test, y_test_proba)

print(f'AUROC : {AUROC:.4f}')
print(f'AUROC CI(95%): ({roc_auc_ci(y_test, y_test_proba)[0]:.4f},{roc_auc_ci(y_test, y_test_proba)[1]:.4f})')
print(f'AUPRC : {AUPRC:.4f}')

draw_roc_curve(y_test, y_test_proba)

## No TnI vs TnI GBM

In [13]:
df = pd.read_csv('df_MACCE_w_rcri_troponin.csv')
df = df[~df['TnI'].isna()]
with open('exclude_files.pkl', 'rb') as f:
    exclude_files = pickle.load(f)  
df = df[~df['filename'].isin(exclude_files)]
df2= pd.read_csv('data_labels_gbm_hidden_icd.csv')
df2 = pd.merge(df2, df[['filename', 'TnI']], on='filename', how='left')
df = df2[~df2['TnI'].isna()]

df_catcol = ['gender','p','o','a'] #'gender','p','o','a'
df_numcol = ['age','TnI'] #'age', 'TnI'
for i_fold in range(N_FOLD):
    df_numcol.extend([f'hidden_{i_fold+1}_{i}' for i in range(HIDDEN_SIZE)])
labels = 'label'
model_name = 'xgb_tni_subset_demo_icd_tni' #'xgb_tni_subset_demo_icd_tni', 'xgb_demo_icd'

df[df_catcol] = df[df_catcol].astype('category')
# Filter dataframe to only include specified columns
df = df[df_catcol + df_numcol +['label','hid']]

In [108]:
'''
# Get unique hospital IDs and shuffle them
unique_hids = df['hid'].unique()
np.random.shuffle(unique_hids)

# Calculate split index for 90-10 split
split_idx = int(len(unique_hids) * 0.8)

# Split hospital IDs into train/val and test sets
tval_hids = unique_hids[:split_idx]
test_hids = unique_hids[split_idx:]

# Save hospital ID splits for TnI subset
hid_tni_subset_splits = {
    'tval_hids': tval_hids,
    'test_hids': test_hids
}

with open('hid_tni_subset_splits.pkl', 'wb') as f:
    pickle.dump(hid_tni_subset_splits, f)
'''

In [None]:
'''
with open(f'hid_splits.pkl', 'rb') as f:
    hid_splits= pickle.load(f)
df_train = df[df['hid'].isin(hid_splits['tval_hids'])]
df_test = df[df['hid'].isin(hid_splits['test_hids'])]
print(len(df_train), len(df_test))
'''

In [None]:
with open('hid_tni_subset_splits.pkl', 'rb') as f:
    hid_tni_subset_splits = pickle.load(f)

tval_hids = hid_tni_subset_splits['tval_hids']
test_hids = hid_tni_subset_splits['test_hids']

# Create dataframes based on hospital ID splits
df_train = df[df['hid'].isin(tval_hids)]
df_test = df[df['hid'].isin(test_hids)]

print(len(df_train), len(df_test))

In [None]:
# Create model directory if it doesn't exist
if not os.path.exists(f'model/{model_name}'):
    os.makedirs(f'model/{model_name}')
study = optuna.create_study(direction="maximize", study_name="XGB Classifier")
func = lambda trial: objective(trial, df_train, df_test, model_name)
#func = lambda trial: objective(trial, pd.DataFrame(X_train, columns=feature_names), pd.DataFrame(y_train, columns=['label']), pd.DataFrame(h_train, columns=['hid']))
study.optimize(func, n_trials=10)

In [None]:
# Confirm test results
X_test = df_test.drop(['label', 'hid', 'TnI'], axis=1)
X_train = df_train.drop(['label', 'hid','TnI'], axis=1)

model_name= 'xgb_tni_subset_demo_icd'
##########################
y_test = df_test['label']
y_pred_proba = []
for idx in range(N_FOLD):
    # Load the best model from saved file
    best_model = xgb.XGBClassifier()
    best_model.load_model(f"model/{model_name}/best_model_fold_{idx}_{model_name}.ubj")
    # Make predictions on test set
    y_pred_proba.append(best_model.predict_proba(X_test)[:,1])
y_test_proba = np.mean(y_pred_proba, axis=0)

y_test = y_test.ravel()  # Flatten to 1D array
y_test_proba = y_test_proba.ravel()  # Flatten to 1D array
########################
y_train = df_train['label']
y_pred_proba = []
for idx in range(N_FOLD):
    # Load the best model from saved file
    best_model = xgb.XGBClassifier()
    best_model.load_model(f"model/{model_name}/best_model_fold_{idx}_{model_name}.ubj")
    # Make predictions on test set
    y_pred_proba.append(best_model.predict_proba(X_train)[:,1])
y_train_proba = np.mean(y_pred_proba, axis=0)

y_train = y_train.ravel()  # Flatten to 1D array
y_train_proba = y_train_proba.ravel()  # Flatten to 1D array
#############################

ir = IsotonicRegression(out_of_bounds='clip')
ir.fit(y_train_proba, y_train)
y_test_proba = ir.predict(y_test_proba)

# Calculate and plot calibration curve
#prob_true, prob_pred = calibration_curve(y_test, y_test_proba , n_bins=10)
#draw_calibration_plot(prob_true, prob_pred, y_test_proba, 10, 1.0)

Youden = youden(y_train, y_train_proba)
'''
with open(f'model/{model_name}/youden_{model_name}.pkl', 'wb') as f:
    pickle.dump(Youden, f)
'''
print(f'Youden Index : {Youden:.4f}')

AUROC = roc_auc_score(y_test, y_test_proba)
AUPRC = average_precision_score(y_test, y_test_proba)
y_pred_test = [1 if value > Youden else 0 for value in y_test_proba]
F1_score = f1_score(y_test, y_pred_test)

print(f'AUROC : {AUROC:.4f}')
print(f'AUROC CI(95%): ({roc_auc_ci(y_test, y_test_proba)[0]:.4f},{roc_auc_ci(y_test, y_test_proba)[1]:.4f})')
print(f'AUPRC : {AUPRC:.4f}')
print(f'F1 Score : {F1_score:.4f}')
print(f'Test Accuracy : {round(100*accuracy_score(y_test, y_pred_test), 2)}% ')

draw_roc_curve(y_test, y_test_proba)
draw_confusion_matrix(y_test, y_pred_test)
draw_y_test_proba(y_test_proba)

'''
# Save test results
with open(f'model/{model_name}/y_test_{model_name}_tni.pkl', 'wb') as f:
    pickle.dump(y_test, f)
with open(f'model/{model_name}/y_test_proba_{model_name}_tni.pkl', 'wb') as f:
    pickle.dump(y_test_proba, f)
'''

X_test_2 = df_test.drop(['label', 'hid'], axis=1)
X_train_2 = df_train.drop(['label', 'hid'], axis=1)
model_name_2= 'xgb_tni_subset_demo_icd_tni'
##########################
y_test = df_test['label']
y_pred_proba_2 = []
for idx in range(N_FOLD):
    # Load the best model from saved file
    best_model_2 = xgb.XGBClassifier()
    best_model_2.load_model(f"model/{model_name_2}/best_model_fold_{idx}_{model_name_2}.ubj")
    # Make predictions on test set
    y_pred_proba_2.append(best_model_2.predict_proba(X_test_2)[:,1])
y_test_proba_2 = np.mean(y_pred_proba_2, axis=0)

y_test = y_test.ravel()  # Flatten to 1D array
y_test_proba_2 = y_test_proba_2.ravel()  # Flatten to 1D array
########################
y_train = df_train['label']
y_pred_proba_2 = []
for idx in range(N_FOLD):
    # Load the best model from saved file
    best_model_2 = xgb.XGBClassifier()
    best_model_2.load_model(f"model/{model_name_2}/best_model_fold_{idx}_{model_name_2}.ubj")
    # Make predictions on test set
    y_pred_proba_2.append(best_model_2.predict_proba(X_train_2)[:,1])
y_train_proba_2 = np.mean(y_pred_proba_2, axis=0)

y_train = y_train.ravel()  # Flatten to 1D array
y_train_proba_2 = y_train_proba_2.ravel()  # Flatten to 1D array
#############################
ir = IsotonicRegression(out_of_bounds='clip')
ir.fit(y_train_proba_2, y_train)
y_test_proba_2 = ir.predict(y_test_proba_2)

# Calculate ROC curve for both models
fpr_model, tpr_model, _ = roc_curve(y_test, y_test_proba)
fpr_model_2, tpr_model_2, _ = roc_curve(y_test, y_test_proba_2)
fpr_tni, tpr_tni, _ = roc_curve(y_test, df_test['TnI'])

# Calculate AUROCs
AUROC_model = roc_auc_score(y_test, y_test_proba)
AUROC_model_2 = roc_auc_score(y_test, y_test_proba_2)
AUROC_tni = roc_auc_score(y_test, df_test['TnI'])

# Plot ROC curves
plt.figure(figsize=(8, 6))
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr_model, tpr_model, color='blue',
         label=f'Multimodal GBM (AUC = {AUROC_model:.3f}, 95% CI: {roc_auc_ci(y_test, y_test_proba)[0]:.3f}-{roc_auc_ci(y_test, y_test_proba)[1]:.3f})')
#plt.plot(fpr_model_2, tpr_model_2, color='green',
#         label=f'TnI-GBM (AUC = {AUROC_model_2:.3f}, 95% CI: {roc_auc_ci(y_test, y_test_proba_2)[0]:.3f}-{roc_auc_ci(y_test, y_test_proba_2)[1]:.3f})')
plt.plot(fpr_tni, tpr_tni, color='orange', 
         label=f'TnI (AUC = {AUROC_tni:.3f}, 95% CI: {roc_auc_ci(y_test, df_test["TnI"])[0]:.3f}-{roc_auc_ci(y_test, df_test["TnI"])[1]:.3f})')

plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
#plt.title('ROC Curve')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()

#Delong test p-value
pvalue = delong_roc_test(y_test, df_test["TnI"], y_test_proba)
print(f'\nDelong p_value: {pvalue[0][0]:.9f}')
pvalue = delong_roc_test(y_test, df_test["TnI"], y_test_proba_2)
print(f'\nDelong p_value: {pvalue[0][0]:.9f}')
pvalue = delong_roc_test(y_test, y_test_proba, y_test_proba_2)
print(f'\nDelong p_value: {pvalue[0][0]:.9f}')


In [None]:
model_name_1 = 'xgb_demo_icd'
model_name_2 = 'xgb_tni_subset_demo_icd_tni'

# Save test results
with open(f'model/{model_name_1}/y_test_{model_name_1}_tni.pkl', 'rb') as f:
    y_test = pickle.load(f)
with open(f'model/{model_name_1}/y_test_proba_{model_name_1}_tni.pkl', 'rb') as f:
    y_test_proba_1 = pickle.load(f)
with open(f'model/{model_name_2}/y_test_proba_{model_name_2}_tni.pkl', 'rb') as f:
    y_test_proba_2 = pickle.load(f)

#with open(f'{trial_dir}/y_test_proba_cnn.pkl', 'rb') as f:
#    y_test_proba_2 = pickle.load(f)

#Delong test p-value
pvalue = delong_roc_test(y_test, y_test_proba_1, y_test_proba_2)
print(f'p_value: {pvalue[0][0]:.9f}')