Connected to .venv (Python 3.12.10)

In [1]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[]

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    confusion_matrix as sk_confusion_matrix,
    roc_auc_score,
    f1_score,
    precision_score,
    recall_score,
)
from sklearn.preprocessing import StandardScaler
import logging
import time
import os
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from catboost import CatBoostClassifier
from tensorflow.keras.utils import to_categorical 
import tensorflow as tf 
import logging
import gc 
import matplotlib.pyplot as plt

In [3]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [4]:
ENSEMBLE_WEIGHTS = [0.4, 0.3, 0.3]
num_classes = 2
epochs = 50
batch_size = 16
learning_rate = 0.001
all_possible_labels = list(range(num_classes))

In [5]:
def create_cnn_model_1(input_shape, num_classes):
    input_layer = Input(shape=input_shape)
    conv1 = Conv1D(filters=10, kernel_size=6, activation='relu', padding='same')(input_layer)
    conv2 = Conv1D(filters=10, kernel_size=6, activation='relu', padding='same')(conv1)
    pool = MaxPooling1D(pool_size=2, padding='same')(conv2)
    flatten = Flatten()(pool)
    dense1 = Dense(units=8, activation='relu')(flatten)
    output_layer = Dense(units=num_classes, activation='softmax', name='ecn2_output')(dense1)
    return Model(inputs=input_layer, outputs=output_layer)


def create_cnn_model_2(input_shape, num_classes):
    input_layer = Input(shape=input_shape)
    conv1 = Conv1D(filters=10, kernel_size=6, activation='relu', padding='same')(input_layer)
    conv2 = Conv1D(filters= 10, kernel_size=6, activation='relu', padding='same')(conv1)
    pool = MaxPooling1D(pool_size=2, padding='same')(conv2)
    flatten = Flatten()(pool)
    dense1 = Dense(units=8, activation='relu')(flatten)
    dropout1 = Dropout(0.5)(dense1)
    output_layer = Dense(units=num_classes, activation='softmax', name='ecn2_output')(dropout1)
    return Model(inputs=input_layer, outputs=output_layer)


def create_cnn_model_3(input_shape, num_classes):
    input_layer = Input(shape=input_shape)
    conv1 = Conv1D(filters=10, kernel_size=6, activation='relu', padding='same')(input_layer)
    conv2 = Conv1D(filters=10, kernel_size=6, activation='relu', padding='same')(conv1)
    pool = MaxPooling1D(pool_size=2, padding='same')(conv2)
    flatten = Flatten()(pool)
    output_layer = Dense(units=num_classes, activation='softmax', name='ecn3_output')(flatten)
    return Model(inputs=input_layer, outputs=output_layer)

In [6]:
def weighted_ensemble_predictions(predictions, weights):
    if len(predictions) != len(weights):
        raise ValueError("Number of models must match number of weights")
    weighted_predictions = np.array([predictions[i] * weights[i] for i in range(len(predictions))])
    ensemble_predictions = np.sum(weighted_predictions, axis=0)
    ensemble_predictions = np.argmax(ensemble_predictions, axis=1)
    return ensemble_predictions

In [7]:
def train_and_evaluate_cnn_ensemble(X_train, y_train, X_val, y_val, params):

    start_time = time.time()
    logging.info("Training CNN Ensemble...")

    X_train_np = X_train.values if isinstance(X_train, pd.DataFrame) else np.array(X_train)
    y_train_np = y_train.values if isinstance(y_train, pd.Series) else np.array(y_train)
    X_val_np = X_val.values if isinstance(X_val, pd.DataFrame) else np.array(X_val)
    y_val_np = y_val.values if isinstance(y_val, pd.Series) else np.array(y_val)

    y_train_np = y_train_np.astype(int)
    y_val_np = y_val_np.astype(int)


    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_np)
    X_val_scaled = scaler.transform(X_val_np)

    X_train_reshaped = np.expand_dims(X_train_scaled, axis=-1).astype(np.float32)
    X_val_reshaped = np.expand_dims(X_val_scaled, axis=-1).astype(np.float32)

    if X_train_reshaped.shape[1] == 0:
        logging.error("Error: X_train has 0 features after preprocessing/selection.")
        return None
    input_shape = (X_train_reshaped.shape[1], 1)

    y_train_categorical = to_categorical(y_train_np, num_classes=num_classes)
    y_val_categorical = to_categorical(y_val_np, num_classes=num_classes)



    cnn_model_1 = create_cnn_model_1(input_shape, num_classes)
    cnn_model_2 = create_cnn_model_2(input_shape, num_classes)
    cnn_model_3 = create_cnn_model_3(input_shape, num_classes)

    loss_function = 'binary_crossentropy'

    cnn_model_1.compile(optimizer=Adam(learning_rate=params['learning_rate']), loss=loss_function, metrics=['accuracy'])
    cnn_model_2.compile(optimizer=Adam(learning_rate=params['learning_rate']), loss=loss_function, metrics=['accuracy'])
    cnn_model_3.compile(optimizer=Adam(learning_rate=params['learning_rate']), loss=loss_function, metrics=['accuracy'])



    train_start_time = time.time()
    logging.info("Fitting CNN Models...")
    history1 = cnn_model_1.fit(X_train_reshaped, y_train_categorical, epochs=params['epochs'],
                    batch_size=params['batch_size'], verbose=0,
                    validation_data=(X_val_reshaped, y_val_categorical)) 

    history2 = cnn_model_2.fit(X_train_reshaped, y_train_categorical, epochs=params['epochs'],
                    batch_size=params['batch_size'], verbose=0,
                    validation_data=(X_val_reshaped, y_val_categorical))

    history3 = cnn_model_3.fit(X_train_reshaped, y_train_categorical, epochs=params['epochs'],
                    batch_size=params['batch_size'], verbose=0,
                    validation_data=(X_val_reshaped, y_val_categorical))
    
    train_end_time = time.time()
    training_time = train_end_time - train_start_time
    logging.info(f"CNN Ensemble trained in {training_time:.2f} seconds.")
    
    predict_start_time = time.time()
    logging.info("Predicting with CNN Models...")
    cnn_predictions_1 = cnn_model_1.predict(X_val_reshaped, batch_size=params['batch_size'], verbose=0)
    cnn_predictions_2 = cnn_model_2.predict(X_val_reshaped, batch_size=params['batch_size'], verbose=0)
    cnn_predictions_3 = cnn_model_3.predict(X_val_reshaped, batch_size=params['batch_size'], verbose=0)

    ensemble_predictions = weighted_ensemble_predictions(
        [cnn_predictions_1, cnn_predictions_2, cnn_predictions_3], ENSEMBLE_WEIGHTS)
    
    predict_end_time = time.time()
    prediction_time = predict_end_time - predict_start_time
    logging.info(f"CNN Ensemble predicted in {prediction_time:.2f} seconds.")


    logging.info("Calculating Evaluation Metrics...")
    try:
        metrics = {
            'accuracy': accuracy_score(y_val_np, ensemble_predictions),
            'precision': precision_score(y_val_np, ensemble_predictions, average='macro', zero_division=0),
            'recall': recall_score(y_val_np, ensemble_predictions, average='macro', zero_division=0),
            'f1_score': f1_score(y_val_np, ensemble_predictions, average='macro', zero_division=0),
            'confusion_matrix': sk_confusion_matrix(y_val_np, ensemble_predictions,labels=all_possible_labels).tolist(),
            'roc_auc': roc_auc_score(y_val, ensemble_predictions),
            'confusion_matrix': confusion_matrix(y_val, ensemble_predictions).tolist(),
            'specificity': confusion_matrix(y_val, ensemble_predictions)[0, 0] / (
                confusion_matrix(y_val, ensemble_predictions)[0, 0] + confusion_matrix(y_val, ensemble_predictions)[0, 1]),
            'sensitivity': confusion_matrix(y_val, ensemble_predictions)[1, 1] / (
                confusion_matrix(y_val, ensemble_predictions)[1, 0] + confusion_matrix(y_val, ensemble_predictions)[1, 1]),
        }
        logging.info(f"Evaluation metrics: {metrics}")
    except Exception as e:
        logging.error(f"Error calculating metrics: {e}")
        metrics = None 


    logging.info("Cleaning up models...")
    del cnn_model_1, cnn_model_2, cnn_model_3
    del cnn_predictions_1, cnn_predictions_2, cnn_predictions_3
    tf.keras.backend.clear_session() 
    gc.collect() 

    logging.info(f"CNN Ensemble trained and evaluated in {time.time() - start_time:.2f} seconds.")

    return metrics, training_time, prediction_time

In [8]:
def load_and_preprocess_data_and_split(data_path, target_column='target'):

    logging.info(f"Loading data from: {data_path}")

    data = pd.read_csv(data_path)
    data = data.drop(columns='Unnamed: 0', errors='ignore')
    data = data.fillna(data.mean())
    print(data.shape)
    X = data.drop(target_column, axis=1)
    y = data[target_column]

    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    logging.info("Data loaded and preprocessed.")
    logging.info(f"Training set shape: {X_train.shape}, Validation set shape: {X_val.shape}")
    return data, X_train, y_train, X_val, y_val

In [9]:
def reduce_dimensionality(X, y, top_n_features=50, save_path='reduced_data.pkl'):
    start_time = time.time()
    logging.info("Starting dimensionality reduction...")

    if save_path and os.path.exists(save_path):
        logging.info(f"Loading reduced data from: {save_path}")
        saved_data = pd.read_pickle(save_path)
        X_reduced = saved_data['X']
        selected_features = saved_data['features']
        logging.info(f"Loaded reduced data from {save_path} in {time.time() - start_time:.2f} seconds.")
        return X_reduced, selected_features


    from catboost import CatBoostClassifier
    model = CatBoostClassifier(iterations=200, 
                               depth=4, 
                               learning_rate=0.1,
                               loss_function='Logloss',
                               verbose=1, 
                               random_seed=42,
                               early_stopping_rounds=200,
                               task_type='GPU')
    model.fit(X, y, verbose=1)
    importance_df = pd.DataFrame({'feature': X.columns,
                                  'importance': model.get_feature_importance()})
    top_features_catboost = importance_df.nlargest(top_n_features,
                                                 'importance')['feature'].tolist()

    import shap
    logging.info("Calculating SHAP values...")
    top_features_shap = None
    try:
        explainer = shap.TreeExplainer(model)
        shap_values = explainer.shap_values(X)

        if isinstance(shap_values, list):
            logging.info("SHAP values appear to be multiclass (list).") 
      
            if not shap_values:
                 raise ValueError("SHAP explainer returned an empty list.")
            shap_values = [np.array(vals) for vals in shap_values]

            shapes_in_list = [vals.shape for vals in shap_values]
            logging.info(f"Shapes within SHAP values list: {shapes_in_list}")

            shap_class_importance = [np.abs(vals).mean(axis=0) for vals in shap_values]
            shap_importance = np.mean(shap_class_importance, axis=0)
        elif isinstance(shap_values, np.ndarray) and shap_values.ndim == 3:
            logging.info(f"SHAP values: Multiclass (3D Array), Shape: {shap_values.shape}")

            shap_importance = np.abs(shap_values).mean(axis=(0, 2)) 
        else:            
            if not isinstance(shap_values, np.ndarray):
                 raise TypeError(f"Expected shap_values to be list or numpy array, got {type(shap_values)}")

            logging.info(f"SHAP values appear to be binary/regression (shape: {shap_values.shape}).") # Debug log
            shap_importance = np.abs(shap_values).mean(axis=0)
  
        logging.info(f"Type of shap_importance: {type(shap_importance)}")
        logging.info(f"Shape of shap_importance: {np.array(shap_importance).shape}")
        logging.info(f"Type of X.columns: {type(X.columns)}")
        logging.info(f"Shape of X.columns: {X.columns.shape}")
        logging.info(f"Length comparison: len(X.columns)={len(X.columns)}, len(shap_importance)={len(shap_importance)}")
      
        if np.isnan(shap_importance).any() or np.isinf(shap_importance).any():
             logging.warning("NaN or Inf detected in shap_importance values.")
             shap_importance = np.nan_to_num(shap_importance, nan=0.0, posinf=0.0, neginf=0.0)


        shap_importance_df = pd.DataFrame({'feature': X.columns,
                                           'importance': shap_importance}) 

        top_features_shap = shap_importance_df.nlargest(top_n_features, 'importance')['feature'].tolist()
        logging.info("SHAP values calculated.")
        

    except Exception as e:
        logging.error(f"Error calculating SHAP values: {e}") 
        logging.warning("Using only CatBoost features due to SHAP error.")
        top_features_shap = top_features_catboost


    selected_features = list(set(top_features_catboost) & set(top_features_shap))
    X_reduced = X[selected_features]
    


    if save_path:
        data_to_save = {'X': X_reduced, 'features': selected_features}
        pd.to_pickle(data_to_save, save_path)
        logging.info(f"Saved reduced data to {save_path}")

    logging.info(f"Dimensionality reduction completed in {time.time() - start_time:.2f} seconds.")
    return X_reduced, selected_features, top_features_catboost, top_features_shap

In [10]:
from sklearn.model_selection import StratifiedKFold
import seaborn as sns

if __name__ == '__main__':
    total_start_time = time.time()
    data_path = 'C:/Class/HK6/IPrj/Datasets/merged_3374_data.csv'

    data, X_train, y_train, X_test, y_test = load_and_preprocess_data_and_split(data_path)
    
    logging.info("--- CatBoost26 Model ---")
    filtered_features = ["234632_x_at","209603_at","230527_at","229963_at","217901_at","214719_at","219513_s_at","210789_x_at","204777_s_at","203294_s_at","230753_at","242056_at","217680_x_at","214945_at","222312_s_at","214705_at","241688_at","241611_s_at","236952_at","207636_at","243659_at","226311_at","211772_x_at","244719_at","239766_at","243272_at"]
    X_filtered = X_train[filtered_features]

    X = pd.concat([X_train, X_test], axis=0)
    y = pd.concat([y_train, y_test], axis=0)


    params_cnn = {
        'learning_rate': 0.001,
        'epochs': 50,
        'batch_size': 32,
    }


    kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    all_metrics = []
    all_training_times = []
    all_prediction_times = []

    for fold, (train_index, val_index) in enumerate(kf.split(X, y)):
        logging.info(f"Fold {fold + 1}/{kf.get_n_splits()}")

        X_fold_train, X_fold_val = X.iloc[train_index], X.iloc[val_index]
        y_fold_train, y_fold_val = y.iloc[train_index], y.iloc[val_index]


        X_fold_train_reduced, selected_features, top_features_catboost, top_features_shap = reduce_dimensionality(
            X_fold_train.copy(), y_fold_train.copy(),
            save_path=f'cnn_ensemble_reduced_data_fold_{fold}.pkl'
        )
        X_fold_val_reduced = X_fold_val[selected_features]


        metrics_cnn, training_time, prediction_time = train_and_evaluate_cnn_ensemble(
            X_fold_train_reduced, y_fold_train, X_fold_val_reduced, y_fold_val,
            params_cnn
        )
        all_metrics.append(metrics_cnn)
        all_training_times.append(training_time)
        all_prediction_times.append(prediction_time)


    avg_metrics = {}
    for metric in all_metrics[0]:
        if metric != 'confusion_matrix':
            avg_metrics[metric] = np.mean([fold_metrics[metric] for fold_metrics in all_metrics])
        else:
            avg_cm = np.sum([np.array(fold_metrics[metric]) for fold_metrics in all_metrics], axis=0)
            avg_metrics[metric] = avg_cm.tolist()

    logging.info("--- Cross-Validation Completed ---")
    logging.info(f"Average CNN Ensemble Metrics: {avg_metrics}")
    
    for fold, (train_t, pred_t) in enumerate(zip(all_training_times, all_prediction_times), 1):
        print(f"Fold {fold}: Training time = {train_t:.2f} s, Prediction time = {pred_t:.2f} s")
    
    print(f"\nAverage training time: {np.mean(all_training_times):.2f} s")
    print(f"Average prediction time: {np.mean(all_prediction_times):.2f} s")
    
    total_end_time = time.time()    
    total_duration = total_end_time - total_start_time
    logging.info(f"Total cross-validation execution time: {total_duration:.2f} seconds")

2025-07-18 09:52:51,088 - INFO - Loading data from: C:/Class/HK6/IPrj/Datasets/merged_3374_data.csv


(3374, 44755)


2025-07-18 09:55:06,518 - INFO - Data loaded and preprocessed.
2025-07-18 09:55:06,518 - INFO - Training set shape: (2699, 44754), Validation set shape: (675, 44754)
2025-07-18 09:55:06,629 - INFO - --- CatBoost26 Model ---
2025-07-18 09:55:09,002 - INFO - Fold 1/10
2025-07-18 09:55:17,275 - INFO - Starting dimensionality reduction...


0:	learn: 0.4878172	total: 374ms	remaining: 1m 14s
1:	learn: 0.3294566	total: 655ms	remaining: 1m 4s
2:	learn: 0.2392160	total: 4.58s	remaining: 5m
3:	learn: 0.1804072	total: 4.92s	remaining: 4m 1s
4:	learn: 0.1351421	total: 5.22s	remaining: 3m 23s
5:	learn: 0.1115542	total: 8.27s	remaining: 4m 27s
6:	learn: 0.0911198	total: 8.51s	remaining: 3m 54s
7:	learn: 0.0757133	total: 8.74s	remaining: 3m 29s
8:	learn: 0.0628936	total: 8.97s	remaining: 3m 10s
9:	learn: 0.0556788	total: 12.8s	remaining: 4m 3s
10:	learn: 0.0497294	total: 13.1s	remaining: 3m 45s
11:	learn: 0.0461480	total: 13.5s	remaining: 3m 30s
12:	learn: 0.0421591	total: 17.6s	remaining: 4m 12s
13:	learn: 0.0380767	total: 17.8s	remaining: 3m 57s
14:	learn: 0.0349028	total: 18.1s	remaining: 3m 43s
15:	learn: 0.0313259	total: 18.4s	remaining: 3m 31s
16:	learn: 0.0276910	total: 22.4s	remaining: 4m 1s
17:	learn: 0.0257083	total: 22.7s	remaining: 3m 49s
18:	learn: 0.0243686	total: 22.9s	remaining: 3m 38s
19:	learn: 0.0225466	total: 23

  from .autonotebook import tqdm as notebook_tqdm
2025-07-18 09:57:03,297 - INFO - Calculating SHAP values...
2025-07-18 09:57:24,061 - INFO - SHAP values appear to be binary/regression (shape: (3036, 44754)).
2025-07-18 09:57:24,643 - INFO - Type of shap_importance: <class 'numpy.ndarray'>
2025-07-18 09:57:24,643 - INFO - Shape of shap_importance: (44754,)
2025-07-18 09:57:24,643 - INFO - Type of X.columns: <class 'pandas.core.indexes.base.Index'>
2025-07-18 09:57:24,643 - INFO - Shape of X.columns: (44754,)
2025-07-18 09:57:24,643 - INFO - Length comparison: len(X.columns)=44754, len(shap_importance)=44754
2025-07-18 09:57:24,666 - INFO - SHAP values calculated.
2025-07-18 09:57:24,676 - INFO - Saved reduced data to cnn_ensemble_reduced_data_fold_0.pkl
2025-07-18 09:57:24,678 - INFO - Dimensionality reduction completed in 127.42 seconds.
2025-07-18 09:57:24,689 - INFO - Training CNN Ensemble...
2025-07-18 09:57:24,917 - INFO - Fitting CNN Models...
2025-07-18 09:58:24,281 - INFO - CN





2025-07-18 09:58:27,574 - INFO - CNN Ensemble trained and evaluated in 62.89 seconds.
2025-07-18 09:58:27,574 - INFO - Fold 2/10
2025-07-18 09:58:36,838 - INFO - Starting dimensionality reduction...


0:	learn: 0.5159912	total: 2.42s	remaining: 8m 1s
1:	learn: 0.3681818	total: 2.63s	remaining: 4m 20s
2:	learn: 0.2648510	total: 2.86s	remaining: 3m 8s
3:	learn: 0.2018491	total: 3.08s	remaining: 2m 30s
4:	learn: 0.1455406	total: 3.35s	remaining: 2m 10s
5:	learn: 0.1167807	total: 3.77s	remaining: 2m 1s
6:	learn: 0.0941282	total: 4.14s	remaining: 1m 54s
7:	learn: 0.0788792	total: 4.44s	remaining: 1m 46s
8:	learn: 0.0686376	total: 4.73s	remaining: 1m 40s
9:	learn: 0.0620943	total: 5.02s	remaining: 1m 35s
10:	learn: 0.0541207	total: 5.27s	remaining: 1m 30s
11:	learn: 0.0471537	total: 5.48s	remaining: 1m 25s
12:	learn: 0.0427132	total: 5.7s	remaining: 1m 21s
13:	learn: 0.0382321	total: 5.92s	remaining: 1m 18s
14:	learn: 0.0340942	total: 6.13s	remaining: 1m 15s
15:	learn: 0.0308115	total: 6.35s	remaining: 1m 13s
16:	learn: 0.0288154	total: 6.66s	remaining: 1m 11s
17:	learn: 0.0263765	total: 6.95s	remaining: 1m 10s
18:	learn: 0.0242370	total: 7.2s	remaining: 1m 8s
19:	learn: 0.0225850	total: 

2025-07-18 09:59:47,958 - INFO - Calculating SHAP values...
2025-07-18 10:00:06,158 - INFO - SHAP values appear to be binary/regression (shape: (3036, 44754)).
2025-07-18 10:00:06,671 - INFO - Type of shap_importance: <class 'numpy.ndarray'>
2025-07-18 10:00:06,671 - INFO - Shape of shap_importance: (44754,)
2025-07-18 10:00:06,671 - INFO - Type of X.columns: <class 'pandas.core.indexes.base.Index'>
2025-07-18 10:00:06,671 - INFO - Shape of X.columns: (44754,)
2025-07-18 10:00:06,674 - INFO - Length comparison: len(X.columns)=44754, len(shap_importance)=44754
2025-07-18 10:00:06,678 - INFO - SHAP values calculated.
2025-07-18 10:00:06,678 - INFO - Saved reduced data to cnn_ensemble_reduced_data_fold_1.pkl
2025-07-18 10:00:06,678 - INFO - Dimensionality reduction completed in 89.84 seconds.
2025-07-18 10:00:06,945 - INFO - Training CNN Ensemble...
2025-07-18 10:00:07,054 - INFO - Fitting CNN Models...
2025-07-18 10:01:53,449 - INFO - CNN Ensemble trained in 106.39 seconds.
2025-07-18 10

0:	learn: 0.4951475	total: 497ms	remaining: 1m 38s
1:	learn: 0.3457989	total: 796ms	remaining: 1m 18s
2:	learn: 0.2551226	total: 1.07s	remaining: 1m 10s
3:	learn: 0.1914008	total: 1.39s	remaining: 1m 7s
4:	learn: 0.1398764	total: 1.67s	remaining: 1m 5s
5:	learn: 0.1115756	total: 1.97s	remaining: 1m 3s
6:	learn: 0.0936625	total: 2.29s	remaining: 1m 3s
7:	learn: 0.0793864	total: 2.58s	remaining: 1m 1s
8:	learn: 0.0707729	total: 2.86s	remaining: 1m
9:	learn: 0.0597125	total: 3.16s	remaining: 1m
10:	learn: 0.0524479	total: 3.44s	remaining: 59.1s
11:	learn: 0.0447361	total: 3.74s	remaining: 58.5s
12:	learn: 0.0402628	total: 4.01s	remaining: 57.7s
13:	learn: 0.0366277	total: 4.32s	remaining: 57.4s
14:	learn: 0.0337526	total: 4.59s	remaining: 56.6s
15:	learn: 0.0309158	total: 4.91s	remaining: 56.4s
16:	learn: 0.0295542	total: 5.2s	remaining: 56s
17:	learn: 0.0274506	total: 5.51s	remaining: 55.7s
18:	learn: 0.0254136	total: 5.78s	remaining: 55.1s
19:	learn: 0.0230975	total: 6.1s	remaining: 54.

2025-07-18 10:03:19,897 - INFO - Calculating SHAP values...
2025-07-18 10:04:01,091 - INFO - SHAP values appear to be binary/regression (shape: (3036, 44754)).
2025-07-18 10:04:02,108 - INFO - Type of shap_importance: <class 'numpy.ndarray'>
2025-07-18 10:04:02,108 - INFO - Shape of shap_importance: (44754,)
2025-07-18 10:04:02,108 - INFO - Type of X.columns: <class 'pandas.core.indexes.base.Index'>
2025-07-18 10:04:02,108 - INFO - Shape of X.columns: (44754,)
2025-07-18 10:04:02,108 - INFO - Length comparison: len(X.columns)=44754, len(shap_importance)=44754
2025-07-18 10:04:02,124 - INFO - SHAP values calculated.
2025-07-18 10:04:02,157 - INFO - Saved reduced data to cnn_ensemble_reduced_data_fold_2.pkl
2025-07-18 10:04:02,157 - INFO - Dimensionality reduction completed in 117.52 seconds.
2025-07-18 10:04:02,601 - INFO - Training CNN Ensemble...
2025-07-18 10:04:02,967 - INFO - Fitting CNN Models...
2025-07-18 10:05:24,107 - INFO - CNN Ensemble trained in 81.14 seconds.
2025-07-18 10

0:	learn: 0.4890783	total: 401ms	remaining: 1m 19s
1:	learn: 0.3342391	total: 610ms	remaining: 1m
2:	learn: 0.2368283	total: 816ms	remaining: 53.6s
3:	learn: 0.1788971	total: 1.03s	remaining: 50.7s
4:	learn: 0.1277930	total: 1.24s	remaining: 48.5s
5:	learn: 0.0976945	total: 1.45s	remaining: 46.8s
6:	learn: 0.0812907	total: 1.65s	remaining: 45.5s
7:	learn: 0.0700338	total: 1.88s	remaining: 45.2s
8:	learn: 0.0607785	total: 2.11s	remaining: 44.8s
9:	learn: 0.0550846	total: 2.32s	remaining: 44s
10:	learn: 0.0493406	total: 2.54s	remaining: 43.6s
11:	learn: 0.0451952	total: 2.74s	remaining: 43s
12:	learn: 0.0412090	total: 2.99s	remaining: 43s
13:	learn: 0.0373375	total: 3.2s	remaining: 42.6s
14:	learn: 0.0345083	total: 3.4s	remaining: 42s
15:	learn: 0.0316360	total: 3.62s	remaining: 41.6s
16:	learn: 0.0294512	total: 3.84s	remaining: 41.4s
17:	learn: 0.0259679	total: 4.07s	remaining: 41.1s
18:	learn: 0.0230734	total: 4.27s	remaining: 40.7s
19:	learn: 0.0214097	total: 4.49s	remaining: 40.4s
20

2025-07-18 10:06:23,782 - INFO - Calculating SHAP values...
2025-07-18 10:06:42,698 - INFO - SHAP values appear to be binary/regression (shape: (3036, 44754)).
2025-07-18 10:06:43,445 - INFO - Type of shap_importance: <class 'numpy.ndarray'>
2025-07-18 10:06:43,445 - INFO - Shape of shap_importance: (44754,)
2025-07-18 10:06:43,445 - INFO - Type of X.columns: <class 'pandas.core.indexes.base.Index'>
2025-07-18 10:06:43,445 - INFO - Shape of X.columns: (44754,)
2025-07-18 10:06:43,445 - INFO - Length comparison: len(X.columns)=44754, len(shap_importance)=44754
2025-07-18 10:06:43,460 - INFO - SHAP values calculated.
2025-07-18 10:06:43,473 - INFO - Saved reduced data to cnn_ensemble_reduced_data_fold_3.pkl
2025-07-18 10:06:43,475 - INFO - Dimensionality reduction completed in 72.26 seconds.
2025-07-18 10:06:43,758 - INFO - Training CNN Ensemble...
2025-07-18 10:06:43,921 - INFO - Fitting CNN Models...
2025-07-18 10:07:53,026 - INFO - CNN Ensemble trained in 69.11 seconds.
2025-07-18 10:

0:	learn: 0.4763791	total: 427ms	remaining: 1m 24s
1:	learn: 0.3333178	total: 688ms	remaining: 1m 8s
2:	learn: 0.2486264	total: 936ms	remaining: 1m 1s
3:	learn: 0.1846759	total: 1.19s	remaining: 58.1s
4:	learn: 0.1422757	total: 1.44s	remaining: 56.3s
5:	learn: 0.1114990	total: 1.68s	remaining: 54.4s
6:	learn: 0.0917384	total: 1.95s	remaining: 53.8s
7:	learn: 0.0731718	total: 2.18s	remaining: 52.3s
8:	learn: 0.0625680	total: 2.41s	remaining: 51.2s
9:	learn: 0.0528602	total: 2.64s	remaining: 50.3s
10:	learn: 0.0466759	total: 2.86s	remaining: 49.1s
11:	learn: 0.0427000	total: 3.1s	remaining: 48.6s
12:	learn: 0.0387658	total: 3.35s	remaining: 48.1s
13:	learn: 0.0349138	total: 3.57s	remaining: 47.5s
14:	learn: 0.0329327	total: 3.79s	remaining: 46.7s
15:	learn: 0.0304728	total: 4.08s	remaining: 46.9s
16:	learn: 0.0284821	total: 4.34s	remaining: 46.7s
17:	learn: 0.0268783	total: 4.6s	remaining: 46.5s
18:	learn: 0.0250280	total: 4.92s	remaining: 46.8s
19:	learn: 0.0230618	total: 5.2s	remaining

2025-07-18 10:09:07,530 - INFO - Calculating SHAP values...
2025-07-18 10:09:29,864 - INFO - SHAP values appear to be binary/regression (shape: (3037, 44754)).
2025-07-18 10:09:30,620 - INFO - Type of shap_importance: <class 'numpy.ndarray'>
2025-07-18 10:09:30,622 - INFO - Shape of shap_importance: (44754,)
2025-07-18 10:09:30,623 - INFO - Type of X.columns: <class 'pandas.core.indexes.base.Index'>
2025-07-18 10:09:30,623 - INFO - Shape of X.columns: (44754,)
2025-07-18 10:09:30,624 - INFO - Length comparison: len(X.columns)=44754, len(shap_importance)=44754
2025-07-18 10:09:30,634 - INFO - SHAP values calculated.
2025-07-18 10:09:30,639 - INFO - Saved reduced data to cnn_ensemble_reduced_data_fold_4.pkl
2025-07-18 10:09:30,639 - INFO - Dimensionality reduction completed in 91.13 seconds.
2025-07-18 10:09:30,909 - INFO - Training CNN Ensemble...
2025-07-18 10:09:31,071 - INFO - Fitting CNN Models...
2025-07-18 10:11:05,654 - INFO - CNN Ensemble trained in 94.58 seconds.
2025-07-18 10:

0:	learn: 0.4935212	total: 443ms	remaining: 1m 28s
1:	learn: 0.3387461	total: 658ms	remaining: 1m 5s
2:	learn: 0.2483694	total: 901ms	remaining: 59.1s
3:	learn: 0.1809107	total: 1.11s	remaining: 54.4s
4:	learn: 0.1418665	total: 1.31s	remaining: 51.3s
5:	learn: 0.1088018	total: 1.54s	remaining: 49.9s
6:	learn: 0.0931420	total: 1.77s	remaining: 48.9s
7:	learn: 0.0752948	total: 1.98s	remaining: 47.5s
8:	learn: 0.0619456	total: 2.2s	remaining: 46.8s
9:	learn: 0.0550095	total: 2.41s	remaining: 45.9s
10:	learn: 0.0499049	total: 2.62s	remaining: 45.1s
11:	learn: 0.0455867	total: 2.85s	remaining: 44.6s
12:	learn: 0.0410493	total: 3.06s	remaining: 44s
13:	learn: 0.0359031	total: 3.27s	remaining: 43.5s
14:	learn: 0.0324876	total: 3.5s	remaining: 43.1s
15:	learn: 0.0296961	total: 3.7s	remaining: 42.6s
16:	learn: 0.0283042	total: 3.9s	remaining: 42s
17:	learn: 0.0269316	total: 4.13s	remaining: 41.8s
18:	learn: 0.0249321	total: 4.36s	remaining: 41.5s
19:	learn: 0.0221007	total: 4.59s	remaining: 41.

2025-07-18 10:12:29,106 - INFO - Calculating SHAP values...
2025-07-18 10:12:48,418 - INFO - SHAP values appear to be binary/regression (shape: (3037, 44754)).
2025-07-18 10:12:49,157 - INFO - Type of shap_importance: <class 'numpy.ndarray'>
2025-07-18 10:12:49,157 - INFO - Shape of shap_importance: (44754,)
2025-07-18 10:12:49,157 - INFO - Type of X.columns: <class 'pandas.core.indexes.base.Index'>
2025-07-18 10:12:49,157 - INFO - Shape of X.columns: (44754,)
2025-07-18 10:12:49,157 - INFO - Length comparison: len(X.columns)=44754, len(shap_importance)=44754
2025-07-18 10:12:49,157 - INFO - SHAP values calculated.
2025-07-18 10:12:49,176 - INFO - Saved reduced data to cnn_ensemble_reduced_data_fold_5.pkl
2025-07-18 10:12:49,178 - INFO - Dimensionality reduction completed in 96.70 seconds.
2025-07-18 10:12:49,370 - INFO - Training CNN Ensemble...
2025-07-18 10:12:49,483 - INFO - Fitting CNN Models...
2025-07-18 10:15:12,963 - INFO - CNN Ensemble trained in 143.48 seconds.
2025-07-18 10

0:	learn: 0.4867330	total: 476ms	remaining: 1m 34s
1:	learn: 0.3442705	total: 793ms	remaining: 1m 18s
2:	learn: 0.2516065	total: 1.12s	remaining: 1m 13s
3:	learn: 0.1936998	total: 1.42s	remaining: 1m 9s
4:	learn: 0.1438140	total: 1.77s	remaining: 1m 8s
5:	learn: 0.1168810	total: 2.06s	remaining: 1m 6s
6:	learn: 0.0942434	total: 2.4s	remaining: 1m 6s
7:	learn: 0.0733261	total: 2.7s	remaining: 1m 4s
8:	learn: 0.0600183	total: 3.06s	remaining: 1m 4s
9:	learn: 0.0526408	total: 3.4s	remaining: 1m 4s
10:	learn: 0.0463510	total: 3.72s	remaining: 1m 3s
11:	learn: 0.0394077	total: 4.05s	remaining: 1m 3s
12:	learn: 0.0366196	total: 4.38s	remaining: 1m 2s
13:	learn: 0.0331056	total: 4.74s	remaining: 1m 2s
14:	learn: 0.0299857	total: 5.12s	remaining: 1m 3s
15:	learn: 0.0280755	total: 5.43s	remaining: 1m 2s
16:	learn: 0.0260079	total: 5.74s	remaining: 1m 1s
17:	learn: 0.0233894	total: 6.11s	remaining: 1m 1s
18:	learn: 0.0206631	total: 6.59s	remaining: 1m 2s
19:	learn: 0.0196211	total: 6.96s	remaini

2025-07-18 10:17:57,122 - INFO - Calculating SHAP values...
2025-07-18 10:18:16,379 - INFO - SHAP values appear to be binary/regression (shape: (3037, 44754)).
2025-07-18 10:18:16,956 - INFO - Type of shap_importance: <class 'numpy.ndarray'>
2025-07-18 10:18:16,956 - INFO - Shape of shap_importance: (44754,)
2025-07-18 10:18:16,956 - INFO - Type of X.columns: <class 'pandas.core.indexes.base.Index'>
2025-07-18 10:18:16,971 - INFO - Shape of X.columns: (44754,)
2025-07-18 10:18:16,971 - INFO - Length comparison: len(X.columns)=44754, len(shap_importance)=44754
2025-07-18 10:18:16,978 - INFO - SHAP values calculated.
2025-07-18 10:18:16,985 - INFO - Saved reduced data to cnn_ensemble_reduced_data_fold_6.pkl
2025-07-18 10:18:16,986 - INFO - Dimensionality reduction completed in 169.66 seconds.
2025-07-18 10:18:17,216 - INFO - Training CNN Ensemble...
2025-07-18 10:18:17,325 - INFO - Fitting CNN Models...
2025-07-18 10:19:54,437 - INFO - CNN Ensemble trained in 97.11 seconds.
2025-07-18 10

0:	learn: 0.4895717	total: 453ms	remaining: 1m 30s
1:	learn: 0.3457973	total: 725ms	remaining: 1m 11s
2:	learn: 0.2545460	total: 984ms	remaining: 1m 4s
3:	learn: 0.1884798	total: 1.29s	remaining: 1m 3s
4:	learn: 0.1450466	total: 1.57s	remaining: 1m 1s
5:	learn: 0.1149338	total: 1.86s	remaining: 1m
6:	learn: 0.0933028	total: 2.14s	remaining: 59.1s
7:	learn: 0.0769289	total: 2.4s	remaining: 57.7s
8:	learn: 0.0667301	total: 2.67s	remaining: 56.6s
9:	learn: 0.0559543	total: 2.95s	remaining: 56s
10:	learn: 0.0495417	total: 3.23s	remaining: 55.4s
11:	learn: 0.0449514	total: 3.48s	remaining: 54.5s
12:	learn: 0.0423064	total: 3.77s	remaining: 54.2s
13:	learn: 0.0393403	total: 4.02s	remaining: 53.5s
14:	learn: 0.0365648	total: 4.33s	remaining: 53.4s
15:	learn: 0.0335230	total: 4.58s	remaining: 52.6s
16:	learn: 0.0312299	total: 4.89s	remaining: 52.6s
17:	learn: 0.0289189	total: 5.14s	remaining: 52s
18:	learn: 0.0268020	total: 5.45s	remaining: 51.9s
19:	learn: 0.0253473	total: 5.71s	remaining: 51

2025-07-18 10:22:14,239 - INFO - Calculating SHAP values...
2025-07-18 10:22:43,679 - INFO - SHAP values appear to be binary/regression (shape: (3037, 44754)).
2025-07-18 10:22:44,332 - INFO - Type of shap_importance: <class 'numpy.ndarray'>
2025-07-18 10:22:44,332 - INFO - Shape of shap_importance: (44754,)
2025-07-18 10:22:44,332 - INFO - Type of X.columns: <class 'pandas.core.indexes.base.Index'>
2025-07-18 10:22:44,332 - INFO - Shape of X.columns: (44754,)
2025-07-18 10:22:44,338 - INFO - Length comparison: len(X.columns)=44754, len(shap_importance)=44754
2025-07-18 10:22:44,348 - INFO - SHAP values calculated.
2025-07-18 10:22:44,367 - INFO - Saved reduced data to cnn_ensemble_reduced_data_fold_7.pkl
2025-07-18 10:22:44,368 - INFO - Dimensionality reduction completed in 154.58 seconds.
2025-07-18 10:22:44,637 - INFO - Training CNN Ensemble...
2025-07-18 10:22:44,814 - INFO - Fitting CNN Models...
2025-07-18 10:25:18,113 - INFO - CNN Ensemble trained in 153.30 seconds.
2025-07-18 1

0:	learn: 0.4729150	total: 438ms	remaining: 1m 27s
1:	learn: 0.3043730	total: 680ms	remaining: 1m 7s
2:	learn: 0.2308962	total: 912ms	remaining: 59.9s
3:	learn: 0.1724635	total: 1.14s	remaining: 56.1s
4:	learn: 0.1337870	total: 1.37s	remaining: 53.5s
5:	learn: 0.1037084	total: 1.59s	remaining: 51.4s
6:	learn: 0.0877794	total: 1.81s	remaining: 49.8s
7:	learn: 0.0770721	total: 2.02s	remaining: 48.6s
8:	learn: 0.0674856	total: 2.24s	remaining: 47.6s
9:	learn: 0.0584791	total: 2.46s	remaining: 46.7s
10:	learn: 0.0531049	total: 2.68s	remaining: 46.1s
11:	learn: 0.0465897	total: 2.9s	remaining: 45.4s
12:	learn: 0.0420545	total: 3.11s	remaining: 44.8s
13:	learn: 0.0381824	total: 3.34s	remaining: 44.3s
14:	learn: 0.0343771	total: 3.56s	remaining: 43.9s
15:	learn: 0.0308688	total: 3.78s	remaining: 43.5s
16:	learn: 0.0290251	total: 4.01s	remaining: 43.1s
17:	learn: 0.0263685	total: 4.23s	remaining: 42.8s
18:	learn: 0.0232120	total: 4.45s	remaining: 42.4s
19:	learn: 0.0223170	total: 4.69s	remaini

2025-07-18 10:27:45,590 - INFO - Calculating SHAP values...
2025-07-18 10:28:21,096 - INFO - SHAP values appear to be binary/regression (shape: (3037, 44754)).
2025-07-18 10:28:22,035 - INFO - Type of shap_importance: <class 'numpy.ndarray'>
2025-07-18 10:28:22,035 - INFO - Shape of shap_importance: (44754,)
2025-07-18 10:28:22,051 - INFO - Type of X.columns: <class 'pandas.core.indexes.base.Index'>
2025-07-18 10:28:22,051 - INFO - Shape of X.columns: (44754,)
2025-07-18 10:28:22,051 - INFO - Length comparison: len(X.columns)=44754, len(shap_importance)=44754
2025-07-18 10:28:22,114 - INFO - SHAP values calculated.
2025-07-18 10:28:22,130 - INFO - Saved reduced data to cnn_ensemble_reduced_data_fold_8.pkl
2025-07-18 10:28:22,146 - INFO - Dimensionality reduction completed in 170.73 seconds.
2025-07-18 10:28:22,571 - INFO - Training CNN Ensemble...
2025-07-18 10:28:22,959 - INFO - Fitting CNN Models...
2025-07-18 10:31:08,622 - INFO - CNN Ensemble trained in 165.66 seconds.
2025-07-18 1

0:	learn: 0.5002450	total: 326ms	remaining: 1m 4s
1:	learn: 0.3534442	total: 507ms	remaining: 50.2s
2:	learn: 0.2567063	total: 688ms	remaining: 45.2s
3:	learn: 0.1784751	total: 864ms	remaining: 42.4s
4:	learn: 0.1429671	total: 1.04s	remaining: 40.7s
5:	learn: 0.1166317	total: 1.22s	remaining: 39.6s
6:	learn: 0.0957286	total: 1.41s	remaining: 38.9s
7:	learn: 0.0802093	total: 1.59s	remaining: 38.2s
8:	learn: 0.0661870	total: 1.77s	remaining: 37.7s
9:	learn: 0.0566937	total: 1.95s	remaining: 37.1s
10:	learn: 0.0494016	total: 2.15s	remaining: 36.9s
11:	learn: 0.0436243	total: 2.34s	remaining: 36.6s
12:	learn: 0.0403446	total: 2.52s	remaining: 36.3s
13:	learn: 0.0374289	total: 2.71s	remaining: 36s
14:	learn: 0.0350064	total: 2.9s	remaining: 35.7s
15:	learn: 0.0322327	total: 3.08s	remaining: 35.5s
16:	learn: 0.0300120	total: 3.27s	remaining: 35.2s
17:	learn: 0.0274143	total: 3.46s	remaining: 35s
18:	learn: 0.0250130	total: 3.65s	remaining: 34.8s
19:	learn: 0.0237026	total: 3.84s	remaining: 3

2025-07-18 10:31:59,159 - INFO - Calculating SHAP values...
2025-07-18 10:32:16,264 - INFO - SHAP values appear to be binary/regression (shape: (3037, 44754)).
2025-07-18 10:32:16,701 - INFO - Type of shap_importance: <class 'numpy.ndarray'>
2025-07-18 10:32:16,701 - INFO - Shape of shap_importance: (44754,)
2025-07-18 10:32:16,701 - INFO - Type of X.columns: <class 'pandas.core.indexes.base.Index'>
2025-07-18 10:32:16,701 - INFO - Shape of X.columns: (44754,)
2025-07-18 10:32:16,701 - INFO - Length comparison: len(X.columns)=44754, len(shap_importance)=44754
2025-07-18 10:32:16,712 - INFO - SHAP values calculated.
2025-07-18 10:32:16,724 - INFO - Saved reduced data to cnn_ensemble_reduced_data_fold_9.pkl
2025-07-18 10:32:16,725 - INFO - Dimensionality reduction completed in 61.84 seconds.
2025-07-18 10:32:16,867 - INFO - Training CNN Ensemble...
2025-07-18 10:32:16,964 - INFO - Fitting CNN Models...
2025-07-18 10:33:19,907 - INFO - CNN Ensemble trained in 62.94 seconds.
2025-07-18 10:

Fold 1: Training time = 59.36 s, Prediction time = 0.60 s
Fold 2: Training time = 106.39 s, Prediction time = 1.26 s
Fold 3: Training time = 81.14 s, Prediction time = 0.58 s
Fold 4: Training time = 69.11 s, Prediction time = 0.78 s
Fold 5: Training time = 94.58 s, Prediction time = 0.67 s
Fold 6: Training time = 143.48 s, Prediction time = 2.10 s
Fold 7: Training time = 97.11 s, Prediction time = 1.92 s
Fold 8: Training time = 153.30 s, Prediction time = 1.82 s
Fold 9: Training time = 165.66 s, Prediction time = 0.69 s
Fold 10: Training time = 62.94 s, Prediction time = 0.83 s

Average training time: 103.31 s
Average prediction time: 1.12 s
