In [1]:
import pandas as pd
import sklearn as sklearn
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, recall_score, f1_score, precision_recall_curve, 
    auc, matthews_corrcoef, balanced_accuracy_score
)
from imblearn.metrics import geometric_mean_score
import joblib 
import tensorflow as tf

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from imblearn.ensemble import BalancedRandomForestClassifier

import matplotlib.pyplot as plt
import os
import seaborn as sns

In [2]:
df = pd.read_csv('processed_ctg_results.csv')
X = df[['LTV', 'baseline', 'std_FHR']]
y = df['target']

In [3]:
# Load the saved splits
all_outer_splits = joblib.load('outer_splits.pkl')

In [4]:
X

Unnamed: 0,LTV,baseline,std_FHR
0,16.509977,136.896563,17.552659
1,17.251002,149.519386,20.116324
2,15.700698,123.414978,9.082117
3,16.558716,137.675271,17.507201
4,15.488042,120.472210,16.209344
...,...,...,...
535,15.811623,125.181141,9.921944
536,15.694979,123.849656,19.191513
537,17.518647,153.592949,9.616529
538,15.180447,116.156978,19.246689


In [5]:
def run_model_cv(model_name, model, param_grid, X, y, all_outer_splits):
    """
    Runs nested cross-validation for a single model and returns a DataFrame of results.
    
    Parameters:
        model_name (str): Name of the model.
        model: Scikit-learn model instance.
        param_grid (dict): Hyperparameter grid for GridSearchCV.
        X (pd.DataFrame): Feature matrix.
        y (pd.Series): Target variable.
        all_outer_splits (list): Precomputed outer splits.
    
    Returns:
        pd.DataFrame: Results of cross-validation for the given model.
    """
    results = []
    
    # Create directory to save models
    model_dir = f"{model_name}_models"
    os.makedirs(model_dir, exist_ok=True)
    
    for split_num, outer_splits in enumerate(all_outer_splits, start=1):
        print(f"\nProcessing Outer Split {split_num} for {model_name}...")
        
        for fold_num, (train_idx, test_idx) in enumerate(outer_splits, start=1):
            print(f"Processing Fold {fold_num} of Split {split_num}...")
            
            # Split data
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            y_train, y_test = y.iloc[train_idx].values.ravel(), y.iloc[test_idx].values.ravel()
            
            # Inner cross-validation
            inner_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
            
            # Define pipeline
            pipeline = Pipeline([
                ('scaler', StandardScaler()),
                ('classifier', model)
            ])
            
            # Grid search
            grid_search = GridSearchCV(
                estimator=pipeline,
                param_grid=param_grid,
                cv=inner_cv,
                scoring='balanced_accuracy',
                n_jobs=-1
            )
            grid_search.fit(X_train, y_train)
            best_model = grid_search.best_estimator_

            # Save the best model
            model_filename = os.path.join(
                model_dir,
                f"{model_name}_split{split_num}_fold{fold_num}.joblib"
            )
            joblib.dump(best_model, model_filename)
            
            # Make predictions
            y_pred = best_model.predict(X_test)
            y_pred_proba = best_model.predict_proba(X_test)[:, 1]
            
            # Compute metrics
            precision, recall_curve, _ = precision_recall_curve(y_test, y_pred_proba)
            pr_auc = auc(recall_curve, precision)
            
            # Store results
            fold_results = {
                'split': split_num,
                'fold': fold_num,
                'model': model_name,
                'best_params': grid_search.best_params_,
                'accuracy': accuracy_score(y_test, y_pred),
                'recall': recall_score(y_test, y_pred),
                'f1_score': f1_score(y_test, y_pred),
                'mcc': matthews_corrcoef(y_test, y_pred),
                'balanced_accuracy': balanced_accuracy_score(y_test, y_pred),
                'g_mean': geometric_mean_score(y_test, y_pred),
                'pr_auc': pr_auc
            }
            results.append(fold_results)
            
            print(f"{model_name} - Split {split_num}, Fold {fold_num}: "
                  f"Best Params: {grid_search.best_params_} | "
                  f"F1: {fold_results['f1_score']:.3f} | "
                  f"Recall: {fold_results['recall']:.3f} | "
                  f"PR-AUC: {fold_results['pr_auc']:.3f} | "
                  f"MCC: {fold_results['mcc']:.3f}")
    
    # Convert results to DataFrame
    df_results = pd.DataFrame(results)
    
    # Calculate mean values and append as a new row
    mean_values = df_results.drop(columns=['split', 'fold', 'model', 'best_params']).mean().to_dict()
    mean_values.update({'split': 'mean', 'fold': 'mean', 'model': model_name, 'best_params': 'N/A'})
    df_results = pd.concat([df_results, pd.DataFrame([mean_values])], ignore_index=True)
    
    # Save results
    df_results.to_csv(f'{model_name.lower()}_cv_results.csv', index=False)
    print(f"Results for {model_name} saved to {model_name.lower()}_cv_results.csv")
    
    return df_results

In [3]:
def process_model_results(file_name):
    try:
        df = pd.read_csv(file_name)
        
        # Select numerical columns for aggregation
        numerical_cols = ["accuracy", "recall", "f1_score", "mcc", "balanced_accuracy", "g_mean", "pr_auc"]
        
        # Compute mean per split
        summary_df = df.groupby("split")[numerical_cols].mean()
        
        # Reset index for better readability
        summary_df.reset_index(inplace=True)
        
        return summary_df
    except FileNotFoundError:
        print(f"File {file_name} not found. Ensure the model ran successfully.")
        return None

In [7]:
# Load the saved outer splits
all_outer_splits = joblib.load('outer_splits.pkl')

# Define models and their hyperparameter grids
models = {
    "BalancedRandomForest": (BalancedRandomForestClassifier(random_state=42),
    {
        'classifier__n_estimators': [50, 100],
        'classifier__max_depth': [None, 10, 20, 50],
        'classifier__min_samples_split': [2, 5, 10]
    } ),
    "SVM": (SVC(probability=True, random_state=42), {
        'classifier__C': [0.1, 1, 10, 100],
        'classifier__kernel': ['linear', 'rbf', 'poly'],
        'classifier__gamma': ['scale', 'auto'],
        'classifier__class_weight': [None, 'balanced'],
    }),
    "LogisticRegression": (LogisticRegression(random_state=42, max_iter=1000), {
        'classifier__C': [0.01, 0.1, 1, 10, 100],
        'classifier__penalty': ['l1', 'l2'],
        'classifier__solver': ['liblinear', 'saga'],
        'classifier__class_weight': [None, 'balanced'],
    })
}

In [8]:
# Run cross-validation for each model
for model_name, (model, param_grid) in models.items():
    run_model_cv(model_name, model, param_grid, X, y, all_outer_splits)


Processing Outer Split 1 for BalancedRandomForest...
Processing Fold 1 of Split 1...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 1, Fold 1: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.235 | Recall: 0.600 | PR-AUC: 0.214 | MCC: 0.145
Processing Fold 2 of Split 1...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 1, Fold 2: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.203 | Recall: 0.600 | PR-AUC: 0.145 | MCC: 0.094
Processing Fold 3 of Split 1...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 1, Fold 3: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 100} | F1: 0.240 | Recall: 0.545 | PR-AUC: 0.119 | MCC: 0.129
Processing Fold 4 of Split 1...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 1, Fold 4: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 100} | F1: 0.286 | Recall: 0.727 | PR-AUC: 0.150 | MCC: 0.212
Processing Fold 5 of Split 1...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 1, Fold 5: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.273 | Recall: 0.818 | PR-AUC: 0.142 | MCC: 0.208

Processing Outer Split 2 for BalancedRandomForest...
Processing Fold 1 of Split 2...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 2, Fold 1: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.328 | Recall: 1.000 | PR-AUC: 0.158 | MCC: 0.338
Processing Fold 2 of Split 2...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 2, Fold 2: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 100} | F1: 0.182 | Recall: 0.400 | PR-AUC: 0.126 | MCC: 0.059
Processing Fold 3 of Split 2...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 2, Fold 3: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 50} | F1: 0.305 | Recall: 0.818 | PR-AUC: 0.197 | MCC: 0.253
Processing Fold 4 of Split 2...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 2, Fold 4: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.264 | Recall: 0.636 | PR-AUC: 0.138 | MCC: 0.171
Processing Fold 5 of Split 2...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 2, Fold 5: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 100} | F1: 0.231 | Recall: 0.545 | PR-AUC: 0.179 | MCC: 0.115

Processing Outer Split 3 for BalancedRandomForest...
Processing Fold 1 of Split 3...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 3, Fold 1: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 50} | F1: 0.267 | Recall: 0.800 | PR-AUC: 0.437 | MCC: 0.216
Processing Fold 2 of Split 3...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 3, Fold 2: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 50} | F1: 0.182 | Recall: 0.500 | PR-AUC: 0.120 | MCC: 0.054
Processing Fold 3 of Split 3...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 3, Fold 3: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 100} | F1: 0.291 | Recall: 0.727 | PR-AUC: 0.125 | MCC: 0.219
Processing Fold 4 of Split 3...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 3, Fold 4: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.302 | Recall: 0.727 | PR-AUC: 0.191 | MCC: 0.234
Processing Fold 5 of Split 3...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 3, Fold 5: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.314 | Recall: 0.727 | PR-AUC: 0.135 | MCC: 0.249

Processing Outer Split 4 for BalancedRandomForest...
Processing Fold 1 of Split 4...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 4, Fold 1: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 100} | F1: 0.276 | Recall: 0.800 | PR-AUC: 0.174 | MCC: 0.229
Processing Fold 2 of Split 4...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 4, Fold 2: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.276 | Recall: 0.800 | PR-AUC: 0.250 | MCC: 0.229
Processing Fold 3 of Split 4...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 4, Fold 3: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.179 | Recall: 0.455 | PR-AUC: 0.122 | MCC: 0.026
Processing Fold 4 of Split 4...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 4, Fold 4: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 50} | F1: 0.255 | Recall: 0.636 | PR-AUC: 0.226 | MCC: 0.157
Processing Fold 5 of Split 4...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 4, Fold 5: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.087 | Recall: 0.182 | PR-AUC: 0.092 | MCC: -0.102

Processing Outer Split 5 for BalancedRandomForest...
Processing Fold 1 of Split 5...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 5, Fold 1: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 100} | F1: 0.250 | Recall: 0.700 | PR-AUC: 0.122 | MCC: 0.177
Processing Fold 2 of Split 5...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 5, Fold 2: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.208 | Recall: 0.500 | PR-AUC: 0.106 | MCC: 0.099
Processing Fold 3 of Split 5...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 5, Fold 3: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.226 | Recall: 0.545 | PR-AUC: 0.107 | MCC: 0.108
Processing Fold 4 of Split 5...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 5, Fold 4: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.250 | Recall: 0.636 | PR-AUC: 0.177 | MCC: 0.150
Processing Fold 5 of Split 5...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 5, Fold 5: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 50} | F1: 0.286 | Recall: 0.636 | PR-AUC: 0.154 | MCC: 0.201

Processing Outer Split 6 for BalancedRandomForest...
Processing Fold 1 of Split 6...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 6, Fold 1: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.197 | Recall: 0.600 | PR-AUC: 0.116 | MCC: 0.082
Processing Fold 2 of Split 6...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 6, Fold 2: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.314 | Recall: 0.800 | PR-AUC: 0.213 | MCC: 0.277
Processing Fold 3 of Split 6...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 6, Fold 3: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 100} | F1: 0.254 | Recall: 0.727 | PR-AUC: 0.236 | MCC: 0.166
Processing Fold 4 of Split 6...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 6, Fold 4: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 50} | F1: 0.340 | Recall: 0.727 | PR-AUC: 0.252 | MCC: 0.281
Processing Fold 5 of Split 6...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 6, Fold 5: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.200 | Recall: 0.455 | PR-AUC: 0.133 | MCC: 0.066

Processing Outer Split 7 for BalancedRandomForest...
Processing Fold 1 of Split 7...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 7, Fold 1: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.264 | Recall: 0.700 | PR-AUC: 0.145 | MCC: 0.197
Processing Fold 2 of Split 7...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 7, Fold 2: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.226 | Recall: 0.700 | PR-AUC: 0.123 | MCC: 0.140
Processing Fold 3 of Split 7...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 7, Fold 3: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 50} | F1: 0.333 | Recall: 0.818 | PR-AUC: 0.246 | MCC: 0.289
Processing Fold 4 of Split 7...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 7, Fold 4: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 100} | F1: 0.203 | Recall: 0.545 | PR-AUC: 0.128 | MCC: 0.068
Processing Fold 5 of Split 7...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 7, Fold 5: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.286 | Recall: 0.727 | PR-AUC: 0.219 | MCC: 0.212

Processing Outer Split 8 for BalancedRandomForest...
Processing Fold 1 of Split 8...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 8, Fold 1: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.320 | Recall: 0.800 | PR-AUC: 0.263 | MCC: 0.284
Processing Fold 2 of Split 8...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 8, Fold 2: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.196 | Recall: 0.500 | PR-AUC: 0.166 | MCC: 0.079
Processing Fold 3 of Split 8...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 8, Fold 3: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 100} | F1: 0.204 | Recall: 0.455 | PR-AUC: 0.163 | MCC: 0.072
Processing Fold 4 of Split 8...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 8, Fold 4: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 50} | F1: 0.136 | Recall: 0.364 | PR-AUC: 0.134 | MCC: -0.055
Processing Fold 5 of Split 8...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 8, Fold 5: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 50} | F1: 0.207 | Recall: 0.545 | PR-AUC: 0.127 | MCC: 0.075

Processing Outer Split 9 for BalancedRandomForest...
Processing Fold 1 of Split 9...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 9, Fold 1: Best Params: {'classifier__max_depth': 10, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 100} | F1: 0.233 | Recall: 0.700 | PR-AUC: 0.126 | MCC: 0.152
Processing Fold 2 of Split 9...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 9, Fold 2: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.231 | Recall: 0.600 | PR-AUC: 0.210 | MCC: 0.138
Processing Fold 3 of Split 9...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 9, Fold 3: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 50} | F1: 0.179 | Recall: 0.455 | PR-AUC: 0.133 | MCC: 0.026
Processing Fold 4 of Split 9...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 9, Fold 4: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.264 | Recall: 0.636 | PR-AUC: 0.155 | MCC: 0.171
Processing Fold 5 of Split 9...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 9, Fold 5: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.304 | Recall: 0.636 | PR-AUC: 0.238 | MCC: 0.225

Processing Outer Split 10 for BalancedRandomForest...
Processing Fold 1 of Split 10...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 10, Fold 1: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 50} | F1: 0.250 | Recall: 0.700 | PR-AUC: 0.364 | MCC: 0.177
Processing Fold 2 of Split 10...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 10, Fold 2: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.281 | Recall: 0.800 | PR-AUC: 0.173 | MCC: 0.235
Processing Fold 3 of Split 10...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 10, Fold 3: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 100} | F1: 0.250 | Recall: 0.636 | PR-AUC: 0.123 | MCC: 0.150
Processing Fold 4 of Split 10...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 10, Fold 4: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 50} | F1: 0.185 | Recall: 0.455 | PR-AUC: 0.222 | MCC: 0.039
Processing Fold 5 of Split 10...


  warn(
  warn(
  warn(


BalancedRandomForest - Split 10, Fold 5: Best Params: {'classifier__max_depth': None, 'classifier__min_samples_split': 10, 'classifier__n_estimators': 100} | F1: 0.239 | Recall: 0.727 | PR-AUC: 0.176 | MCC: 0.141
Results for BalancedRandomForest saved to balancedrandomforest_cv_results.csv

Processing Outer Split 1 for SVM...
Processing Fold 1 of Split 1...
SVM - Split 1, Fold 1: Best Params: {'classifier__C': 1, 'classifier__class_weight': 'balanced', 'classifier__gamma': 'scale', 'classifier__kernel': 'linear'} | F1: 0.217 | Recall: 0.500 | PR-AUC: 0.258 | MCC: 0.113
Processing Fold 2 of Split 1...
SVM - Split 1, Fold 2: Best Params: {'classifier__C': 100, 'classifier__class_weight': 'balanced', 'classifier__gamma': 'scale', 'classifier__kernel': 'linear'} | F1: 0.217 | Recall: 0.500 | PR-AUC: 0.139 | MCC: 0.113
Processing Fold 3 of Split 1...
SVM - Split 1, Fold 3: Best Params: {'classifier__C': 100, 'classifier__class_weight': 'balanced', 'classifier__gamma': 'scale', 'classifier__

In [4]:
# List of model names
model_names = ["BalancedRandomForest", "SVM", "LogisticRegression", "NeuralNet"]

# Dictionary to store DataFrames
model_dfs = {}
summary_dfs = {}

# Load and process each model's results
for model_name in model_names:
    file_name = f"{model_name.lower()}_cv_results.csv"
    summary_df = process_model_results(file_name)
    
    if summary_df is not None:
        summary_file = f"{model_name.lower()}_summary.csv"
        summary_df.to_csv(summary_file, index=False)
        summary_dfs[model_name] = summary_df
        
        print(f"Summary for {model_name} saved to {summary_file}")
        display(summary_df)


Summary for BalancedRandomForest saved to balancedrandomforest_summary.csv


Unnamed: 0,split,accuracy,recall,f1_score,mcc,balanced_accuracy,g_mean,pr_auc
0,1,0.607407,0.658182,0.247425,0.157685,0.629911,0.62557,0.154054
1,10,0.592593,0.663636,0.240939,0.148333,0.624391,0.619451,0.211618
2,2,0.635185,0.68,0.261938,0.187119,0.65518,0.64343,0.159585
3,3,0.62963,0.696364,0.271001,0.194354,0.659354,0.656354,0.201786
4,4,0.605556,0.574545,0.21436,0.107516,0.592238,0.573351,0.172774
5,5,0.633333,0.603636,0.244093,0.147,0.620101,0.618179,0.133192
6,6,0.625926,0.661818,0.260968,0.174208,0.642039,0.638211,0.189919
7,7,0.611111,0.698182,0.262479,0.18125,0.649943,0.647314,0.172194
8,8,0.609259,0.532727,0.21253,0.091196,0.575326,0.570197,0.170808
9,9,0.624074,0.605455,0.242235,0.142341,0.615929,0.614058,0.172487


Summary for SVM saved to svm_summary.csv


Unnamed: 0,split,accuracy,recall,f1_score,mcc,balanced_accuracy,g_mean,pr_auc
0,1,0.681481,0.618182,0.276401,0.194719,0.653042,0.645672,0.271844
1,10,0.690741,0.643636,0.291266,0.214488,0.669819,0.667609,0.257711
2,2,0.67963,0.567273,0.254076,0.162377,0.62967,0.614271,0.314228
3,3,0.712963,0.563636,0.286508,0.197242,0.646314,0.638956,0.20836
4,4,0.690741,0.609091,0.274054,0.193284,0.654672,0.644506,0.274486
5,5,0.727778,0.418182,0.221549,0.11842,0.59004,0.546467,0.210683
6,6,0.687037,0.641818,0.285979,0.209486,0.666912,0.661551,0.325137
7,7,0.705556,0.581818,0.29078,0.207671,0.650282,0.638294,0.250904
8,8,0.666667,0.601818,0.262428,0.172897,0.637539,0.630706,0.256779
9,9,0.672222,0.567273,0.257777,0.159911,0.625473,0.621781,0.225665


Summary for LogisticRegression saved to logisticregression_summary.csv


Unnamed: 0,split,accuracy,recall,f1_score,mcc,balanced_accuracy,g_mean,pr_auc
0,1,0.657407,0.618182,0.264939,0.176095,0.639724,0.636578,0.277669
1,10,0.657407,0.627273,0.264111,0.177833,0.644175,0.638938,0.248976
2,2,0.662963,0.643636,0.267456,0.189547,0.654534,0.642536,0.311038
3,3,0.664815,0.621818,0.27301,0.184897,0.645613,0.641917,0.298753
4,4,0.657407,0.647273,0.266218,0.186325,0.653228,0.646518,0.286768
5,5,0.644444,0.565455,0.234159,0.133616,0.609226,0.595922,0.217802
6,6,0.668519,0.605455,0.262154,0.174631,0.640493,0.633193,0.318988
7,7,0.666667,0.616364,0.269802,0.182368,0.643875,0.638069,0.257912
8,8,0.67037,0.641818,0.27868,0.197675,0.657539,0.652559,0.257286
9,9,0.662963,0.627273,0.268813,0.18218,0.647257,0.642507,0.265409


Summary for NeuralNet saved to neuralnet_summary.csv


Unnamed: 0,split,accuracy,recall,f1_score,mcc,balanced_accuracy,g_mean,pr_auc
0,1,0.637037,0.758182,0.310352,0.252611,0.691494,0.660772,0.259585
1,10,0.707407,0.663636,0.314159,0.24313,0.688109,0.686319,0.225539
2,2,0.712963,0.641818,0.30224,0.232172,0.681355,0.672465,0.236996
3,3,0.744444,0.598182,0.320069,0.273633,0.678767,0.64641,0.259414
4,4,0.657407,0.701818,0.291679,0.21894,0.677303,0.676196,0.23292
5,5,0.705556,0.672727,0.309293,0.244775,0.690424,0.680465,0.231723
6,6,0.722222,0.685455,0.320693,0.263048,0.706308,0.69476,0.244201
7,7,0.661111,0.698182,0.302782,0.229338,0.677736,0.674682,0.226917
8,8,0.662963,0.718182,0.301795,0.234353,0.68762,0.686376,0.252231
9,9,0.687037,0.701818,0.304404,0.241309,0.693892,0.688853,0.246439
