# AVF Acoustic Analysis: Machine Learning Classification

This notebook performs:
- Phase 1: Comparison of engineered vs. perceptual acoustic features
- Phase 2: Comparison of acoustic-only vs. acoustic + clinical features

Methods:
- Patient-level data splitting
- Stability selection with bootstrap (500 iterations)
- Logistic Regression and Random Forest classifiers
- SHAP analysis for model interpretability

In [None]:
import sys
import pandas as pd
import numpy as np
import re
import warnings
from pathlib import Path
from collections import Counter

if "google.colab" in sys.modules:
    !pip install python-docx statsmodels shap openpyxl -q
    !apt-get -qq install -y fonts-liberation
    from google.colab import drive
    drive.mount('/content/drive')

from sklearn.model_selection import train_test_split, GroupKFold
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    classification_report, roc_auc_score, RocCurveDisplay,
    brier_score_loss
)
from sklearn.feature_selection import RFE
from sklearn.base import clone
from sklearn.calibration import CalibrationDisplay
from sklearn.compose import make_column_selector, ColumnTransformer

from scipy.stats import mannwhitneyu, chi2_contingency, norm
import matplotlib.pyplot as plt
from docx import Document
from statsmodels.stats.multitest import multipletests

warnings.filterwarnings('ignore')
plt.rcParams['font.family'] = 'Liberation Sans'

In [None]:
TARGET_COLUMN_FOR_LABELING = 'FV'  # 'FV' or 'RI'
GROUP_THRESHOLD = 400              # FV: 400, RI: 0.6

DRIVE_BASE_PATH = Path("/path/to/your/data/folder/")
INPUT_EXCEL_PATH = DRIVE_BASE_PATH / "your_data.xlsx"
OUTPUT_DIR = DRIVE_BASE_PATH / f"{TARGET_COLUMN_FOR_LABELING}_{GROUP_THRESHOLD}_results"

N_FEATURES_TO_SELECT = 5
N_SPLITS_CV = 5
N_BOOTSTRAPS_STABILITY = 500
N_BOOTSTRAPS_CI = 1000
UNIFIED_RANDOM_STATE = 243

In [None]:
def load_and_preprocess_data(file_path: Path, target_column: str, threshold: float):
    if not file_path.exists():
        raise FileNotFoundError(f"File not found: {file_path}")

    df = pd.read_excel(file_path)
    target_variable = f'{target_column}>={threshold}'
    df[target_variable] = (df[target_column] >= threshold).astype(int)

    print(f"Loaded data from '{file_path.name}'")
    print(f"Created binary target '{target_variable}' from '{target_column}'")
    print(f"n={len(df)} samples")

    return target_variable, df


def split_data_by_patient(df: pd.DataFrame, target_variable: str,
                          test_size: float = 0.2, random_state: int = None):
    unique_patients = df['Pt No'].unique()
    patient_labels = df.drop_duplicates(subset=['Pt No']).set_index('Pt No')[target_variable]

    train_patients, test_patients = train_test_split(
        unique_patients, test_size=test_size, random_state=random_state,
        stratify=patient_labels.reindex(unique_patients)
    )

    train_indices = df[df['Pt No'].isin(train_patients)].index
    test_indices = df[df['Pt No'].isin(test_patients)].index

    print(f"\nPatient-level split: Train={len(train_patients)}, Test={len(test_patients)}")
    print(f"Sample-level split: Train={len(train_indices)}, Test={len(test_indices)}")

    return train_indices, test_indices

In [None]:
def create_patient_characteristics_table(df: pd.DataFrame, target_variable: str) -> pd.DataFrame:
    print("\n--- Creating Table 1: Patient Characteristics ---")
    df_patients = df.drop_duplicates(subset=['Pt No']).copy()

    group0 = df_patients[df_patients[target_variable] == 0]
    group1 = df_patients[df_patients[target_variable] == 1]

    table_data = []

    continuous_vars = ['age', 'POD', 'FV', 'RI', 'RA diameter', 'shunt diameter']
    for var in continuous_vars:
        if var not in df.columns:
            continue

        n_missing0 = group0[var].isnull().sum()
        n_missing1 = group1[var].isnull().sum()

        mean0, std0 = group0[var].mean(), group0[var].std()
        mean1, std1 = group1[var].mean(), group1[var].std()

        mean0_str = f"{mean0:.1f} ± {std0:.1f}"
        if n_missing0 > 0:
            mean0_str += f" (missing: {n_missing0})"

        mean1_str = f"{mean1:.1f} ± {std1:.1f}"
        if n_missing1 > 0:
            mean1_str += f" (missing: {n_missing1})"

        _, p_val = mannwhitneyu(group0[var].dropna(), group1[var].dropna())

        table_data.append({
            'Characteristic': var,
            f'{TARGET_COLUMN_FOR_LABELING} < {GROUP_THRESHOLD} (n={len(group0)})': mean0_str,
            f'{TARGET_COLUMN_FOR_LABELING} >= {GROUP_THRESHOLD} (n={len(group1)})': mean1_str,
            'P-Value': f"{p_val:.2f}" if p_val >= 0.01 else "<0.01"
        })

    categorical_vars = {
        'men': 'Male Sex (n, %)',
        'arterial calcification': 'Arterial Calcification (n, %)',
        'DM': 'DM (n, %)',
        'HTN': 'HTN (n, %)',
        'Heart disease': 'Heart Disease (n, %)',
        'tabaco site': 'tabaco site (n, %)',
        'left': 'left (n, %)'
    }

    mapping_dict = {'y': 1, 'n': 0, 'male': 1, 'female': 0, '1': 1, '0': 0, '1.0': 1, '0.0': 0}

    for var, name in categorical_vars.items():
        if var not in df_patients.columns:
            continue

        df_patients[var + '_numeric'] = df_patients[var].astype(str).str.lower().map(mapping_dict)
        df_filtered = df_patients.dropna(subset=[var + '_numeric'])

        group0_cat = df_filtered[df_filtered[target_variable] == 0]
        group1_cat = df_filtered[df_filtered[target_variable] == 1]

        crosstab = pd.crosstab(df_filtered[var + '_numeric'], df_filtered[target_variable])

        p_val = chi2_contingency(crosstab)[1] if crosstab.shape == (2, 2) else 1.0

        n0_pos = crosstab.loc[1, 0] if 1 in crosstab.index and 0 in crosstab.columns else 0
        n1_pos = crosstab.loc[1, 1] if 1 in crosstab.index and 1 in crosstab.columns else 0

        percent0 = (n0_pos / len(group0_cat) * 100) if len(group0_cat) > 0 else 0
        percent1 = (n1_pos / len(group1_cat) * 100) if len(group1_cat) > 0 else 0

        table_data.append({
            'Characteristic': name,
            f'{TARGET_COLUMN_FOR_LABELING} < {GROUP_THRESHOLD} (n={len(group0)})': f"{n0_pos} ({percent0:.1f})",
            f'{TARGET_COLUMN_FOR_LABELING} >= {GROUP_THRESHOLD} (n={len(group1)})': f"{n1_pos} ({percent1:.1f})",
            'P-Value': f"{p_val:.2f}" if p_val >= 0.01 else "<0.01"
        })

    return pd.DataFrame(table_data)

In [None]:
def select_stable_features(X_train_df: pd.DataFrame, y_train: pd.Series,
                          groups_train: pd.Series, feature_list: list,
                          model_for_select, n_features_to_select: int,
                          n_bootstraps: int, random_state: int) -> list:
    print(f"-> Starting stability selection (bootstrap iterations: {n_bootstraps})...")

    feature_counter = Counter()
    unique_patients = groups_train.unique()
    n_patients = len(unique_patients)
    rng = np.random.RandomState(random_state)

    if isinstance(model_for_select, Pipeline):
        scaler = clone(model_for_select.steps[0][1])
        estimator = clone(model_for_select.steps[-1][1])
    else:
        scaler = None
        estimator = clone(model_for_select)

    selector = RFE(estimator=estimator, n_features_to_select=n_features_to_select, step=1)
    X_train_subset = X_train_df[feature_list]

    valid_iterations = 0
    for i in range(n_bootstraps):
        if (i + 1) % 100 == 0:
            print(f"  ... Bootstrap {i+1}/{n_bootstraps} complete")

        try:
            sampled_patients = rng.choice(unique_patients, size=n_patients, replace=True)
            mask = groups_train.isin(sampled_patients)
            idx = groups_train[mask].index

            if len(idx) == 0 or len(np.unique(y_train.loc[idx])) < 2:
                continue

            X_boot = X_train_subset.loc[idx]
            y_boot = y_train.loc[idx]

            X_boot_scaled = scaler.fit_transform(X_boot) if scaler else X_boot

            selector.fit(X_boot_scaled, y_boot)
            selected = X_boot.columns[selector.support_].tolist()

            feature_counter.update(selected)
            valid_iterations += 1

        except Exception:
            continue

    if valid_iterations == 0:
        print("Warning: No valid bootstrap iterations.")
        return feature_list[:n_features_to_select]

    most_common = [f for f, _ in feature_counter.most_common(n_features_to_select)]

    print(f"-> Stability selection complete. ({valid_iterations}/{n_bootstraps} valid iterations)")
    print("--- Selected features (by frequency) ---")
    for feature, count in feature_counter.most_common(10):
        print(f"  {feature}: {count} ({count/valid_iterations:.1%})")

    return most_common

In [None]:
def evaluate_model_with_cv(X_selected: pd.DataFrame, y: pd.Series,
                          groups: pd.Series, model) -> pd.DataFrame:
    group_kfold = GroupKFold(n_splits=N_SPLITS_CV)
    scores = {
        'auc': [], 'accuracy': [], 'sensitivity': [],
        'specificity': [], 'f1_score': []
    }

    for train_idx, test_idx in group_kfold.split(X_selected, y, groups):
        X_train_cv = X_selected.iloc[train_idx]
        X_test_cv = X_selected.iloc[test_idx]
        y_train_cv = y.iloc[train_idx]
        y_test_cv = y.iloc[test_idx]

        model_clone = clone(model)
        model_clone.fit(X_train_cv, y_train_cv)

        y_pred_proba = model_clone.predict_proba(X_test_cv)[:, 1]
        y_pred = model_clone.predict(X_test_cv)

        if len(np.unique(y_test_cv)) > 1:
            scores['auc'].append(roc_auc_score(y_test_cv, y_pred_proba))
        else:
            scores['auc'].append(np.nan)

        report = classification_report(y_test_cv, y_pred, output_dict=True, zero_division=0)
        scores['accuracy'].append(report['accuracy'])
        scores['sensitivity'].append(report.get('1', {}).get('recall', np.nan))
        scores['specificity'].append(report.get('0', {}).get('recall', np.nan))
        scores['f1_score'].append(report['weighted avg']['f1-score'])

    return pd.DataFrame(scores).agg(['mean', 'std']).T


def calculate_bootstrap_ci(y_test: pd.Series, y_pred_proba: np.ndarray,
                          test_patient_ids: pd.Series) -> tuple:
    unique_patients = test_patient_ids.unique()
    n_patients = len(unique_patients)
    rng = np.random.RandomState(UNIFIED_RANDOM_STATE)

    bootstrapped_scores = []
    for _ in range(N_BOOTSTRAPS_CI):
        sampled_patients = rng.choice(unique_patients, size=n_patients, replace=True)
        mask = test_patient_ids.isin(sampled_patients)
        idx = test_patient_ids[mask].index

        if len(idx) == 0:
            continue

        y_true_bs = y_test.loc[idx]
        idx_positions = y_test.index.get_indexer(idx)
        y_pred_bs = y_pred_proba[idx_positions]

        if len(np.unique(y_true_bs)) < 2:
            continue

        bootstrapped_scores.append(roc_auc_score(y_true_bs, y_pred_bs))

    if not bootstrapped_scores:
        return np.nan, np.nan

    return np.percentile(bootstrapped_scores, 2.5), np.percentile(bootstrapped_scores, 97.5)


def get_feature_importances(model, columns: list) -> pd.Series:
    estimator = model.steps[-1][1] if isinstance(model, Pipeline) else model

    if hasattr(estimator, 'feature_importances_'):
        importances = estimator.feature_importances_
    elif hasattr(estimator, 'coef_'):
        importances = np.abs(estimator.coef_[0])
    else:
        importances = np.zeros(len(columns))

    return pd.Series(importances, index=columns).sort_values(ascending=False)

In [None]:
def compute_midrank(x):
    J = np.argsort(x)
    Z = x[J]
    N = len(x)
    T = np.zeros(N, dtype=float)
    i = 0
    while i < N:
        j = i
        while j < N and Z[j] == Z[i]:
            j += 1
        T[i:j] = 0.5 * (i + j - 1)
        i = j
    T2 = np.empty(N, dtype=float)
    T2[J] = T + 1
    return T2


def fast_delong_auc_cov(y_true, y_pred):
    y_true = np.asarray(y_true)
    y_pred = np.asarray(y_pred)

    m = sum(y_true == 1)
    n = sum(y_true == 0)

    v_10 = compute_midrank(y_pred[y_true == 1])
    v_01 = compute_midrank(y_pred[y_true == 0])

    auc = (np.sum(v_10) - m * (m + 1) / 2) / (m * n)

    s_10 = v_10 / m - (m + 1) / (2 * m)
    s_01 = v_01 / n - (n + 1) / (2 * n)

    var_10 = (np.sum(s_10**2) - m * (s_10.sum()/m)**2) / (m - 1)
    var_01 = (np.sum(s_01**2) - n * (s_01.sum()/n)**2) / (n - 1)

    return auc, var_10, var_01


def delong_roc_test(y_true, scores_a, scores_b):
    y_true = np.asarray(y_true)
    scores_a = np.asarray(scores_a)
    scores_b = np.asarray(scores_b)

    m = sum(y_true == 1)
    n = sum(y_true == 0)

    if m == 0 or n == 0:
        return 1.0

    auc_a, var_a_10, var_a_01 = fast_delong_auc_cov(y_true, scores_a)
    auc_b, var_b_10, var_b_01 = fast_delong_auc_cov(y_true, scores_b)

    v_a_10 = compute_midrank(scores_a[y_true == 1])
    v_a_01 = compute_midrank(scores_a[y_true == 0])
    v_b_10 = compute_midrank(scores_b[y_true == 1])
    v_b_01 = compute_midrank(scores_b[y_true == 0])

    s_a_10 = v_a_10 / m - (m + 1) / (2 * m)
    s_a_01 = v_a_01 / n - (n + 1) / (2 * n)
    s_b_10 = v_b_10 / m - (m + 1) / (2 * m)
    s_b_01 = v_b_01 / n - (n + 1) / (2 * n)

    cov_10 = (np.sum(s_a_10 * s_b_10) - m * (s_a_10.sum()/m) * (s_b_10.sum()/m)) / (m - 1)
    cov_01 = (np.sum(s_a_01 * s_b_01) - n * (s_a_01.sum()/n) * (s_b_01.sum()/n)) / (n - 1)

    var_diff = var_a_10 / m + var_a_01 / n + var_b_10 / m + var_b_01 / n - 2 * (cov_10 / m + cov_01 / n)

    if var_diff <= 1e-8:
        return 1.0

    z = (auc_a - auc_b) / np.sqrt(var_diff)
    p = 2 * norm.sf(np.abs(z))

    return p


def compute_auc_diff_paired(y_test: pd.Series, y_pred_probas: pd.DataFrame,
                           test_patient_ids: pd.Series, model_pair: tuple) -> dict:
    model_a, model_b = model_pair
    proba_a = y_pred_probas[model_a]
    proba_b = y_pred_probas[model_b]

    unique_patients = test_patient_ids.unique()
    n_patients = len(unique_patients)
    rng = np.random.RandomState(UNIFIED_RANDOM_STATE)

    auc_diffs = []
    for _ in range(N_BOOTSTRAPS_CI):
        sampled_patients = rng.choice(unique_patients, size=n_patients, replace=True)
        mask = test_patient_ids.isin(sampled_patients)
        idx = test_patient_ids[mask].index

        if len(idx) == 0:
            continue

        y_true_bs = y_test.loc[idx]

        if len(np.unique(y_true_bs)) < 2:
            continue

        auc_a = roc_auc_score(y_true_bs, proba_a.loc[idx])
        auc_b = roc_auc_score(y_true_bs, proba_b.loc[idx])
        auc_diffs.append(auc_a - auc_b)

    if not auc_diffs:
        return {
            'model_pair': f"{model_a} vs {model_b}",
            'auc_diff_mean': np.nan,
            'ci_lower': np.nan,
            'ci_upper': np.nan,
            'p_value': np.nan
        }

    auc_diffs = np.array(auc_diffs)
    mean_diff = auc_diffs.mean()
    ci_lower = np.percentile(auc_diffs, 2.5)
    ci_upper = np.percentile(auc_diffs, 97.5)

    p_val = min(
        np.mean(auc_diffs <= 0),
        np.mean(auc_diffs >= 0)
    ) * 2
    p_val = min(p_val, 1.0)

    return {
        'model_pair': f"{model_a} vs {model_b}",
        'auc_diff_mean': mean_diff,
        'ci_lower': ci_lower,
        'ci_upper': ci_upper,
        'p_value': p_val
    }

In [None]:
def analyze_and_plot_shap(model, X_train, X_test, output_dir, model_name, target_name):
    import shap

    print("\n" + "="*70)
    print("--- SHAP Analysis ---")
    print("="*70)

    plt.rcParams['font.family'] = 'Liberation Sans'

    model_to_explain = model.steps[-1][1] if isinstance(model, Pipeline) else model
    print(f"Initializing SHAP explainer for: {type(model_to_explain).__name__}")

    explainer = shap.Explainer(model_to_explain, X_train)
    explanation = explainer(X_test)

    explanation_plot = explanation[:, :, 1] if hasattr(explanation, 'values') and explanation.values.ndim == 3 else explanation

    print("Generating SHAP Summary Plot...")
    plt.figure(figsize=(10, 8))
    shap.summary_plot(explanation_plot, features=X_test, show=False, plot_size=None)
    fig_summary = output_dir / f'Figure2_SHAP_Summary_{target_name}.svg'
    plt.title(f'Figure 2: SHAP Summary Plot ({model_name})', fontsize=16, weight='bold')
    plt.tight_layout()
    plt.savefig(fig_summary, format='svg', bbox_inches='tight')
    plt.show()
    print(f"Saved to '{fig_summary}'")

    return [fig_summary]


def plot_calibration_curve(model, X_test, y_test, model_name, output_dir, target_name):
    print("\n" + "="*70)
    print("--- Calibration Analysis ---")
    print("="*70)

    y_pred_proba = model.predict_proba(X_test)[:, 1]
    brier = brier_score_loss(y_test, y_pred_proba)

    print(f"Brier Score for '{model_name}': {brier:.4f}")

    fig, ax = plt.subplots(figsize=(10, 8))
    CalibrationDisplay.from_predictions(
        y_test, y_pred_proba, n_bins=5, name=model_name,
        ax=ax, strategy='uniform'
    )

    ax.set_title(f'Figure 4: Calibration Plot ({model_name})', fontsize=16, weight='bold')
    ax.set_xlabel("Mean Predicted Probability", fontsize=12, weight='bold')
    ax.set_ylabel("Fraction of Positives", fontsize=12, weight='bold')
    ax.grid(linestyle=':', alpha=0.6)

    fig_path = output_dir / f'Figure4_Calibration_{target_name}.svg'
    plt.tight_layout()
    plt.savefig(fig_path, format='svg', bbox_inches='tight')
    plt.show()

    print(f"Saved to '{fig_path}'")
    return fig_path


def plot_roc_curves(final_results: dict, table2_df: pd.DataFrame, output_file: Path):
    plt.figure(figsize=(12, 10))
    ax = plt.gca()

    for model_id in table2_df['Model'].unique():
        if model_id not in final_results:
            continue

        result = final_results[model_id]
        cv_auc = table2_df.loc[table2_df['Model'] == model_id, 'AUC'].values[0].split(' ±')[0]
        label = f"{model_id} (AUC = {float(cv_auc):.3f})"

        RocCurveDisplay.from_estimator(
            result['model'], result['X_test'], result['y_test'],
            name=label, ax=ax
        )

    plt.plot([0, 1], [0, 1], color='black', lw=1, linestyle='--')
    plt.title('Figure 1: ROC Curves', fontsize=16, weight='bold')
    plt.xlabel('False Positive Rate', fontsize=12, weight='bold')
    plt.ylabel('True Positive Rate', fontsize=12, weight='bold')
    plt.grid(linestyle=':', alpha=0.6)
    plt.legend(fontsize=10, loc='lower right')
    plt.tight_layout()
    plt.savefig(output_file, format='svg', bbox_inches='tight')
    plt.show()

    print(f"Saved to '{output_file}'")

In [None]:
def main():
    """Phase 1: Engineered vs Perceptual features"""
    OUTPUT_DIR.mkdir(exist_ok=True)

    TARGET_VARIABLE, df_orig = load_and_preprocess_data(
        INPUT_EXCEL_PATH, TARGET_COLUMN_FOR_LABELING, GROUP_THRESHOLD
    )

    table1_df = create_patient_characteristics_table(df_orig, TARGET_VARIABLE)
    print(table1_df.to_string())

    df_processed = df_orig.copy()
    y = df_processed[TARGET_VARIABLE]
    groups = df_processed['Pt No']

    feature_sets = {
        "A-D": [col for col in df_processed.columns if re.match(r"^[A-D]\d+", col)],
        "E_only": [col for col in df_processed.columns if re.match(r"^E\d+", col)],
    }

    numeric_cols = list(set([f for fs in feature_sets.values() for f in fs]))
    for col in numeric_cols:
        df_processed[col] = pd.to_numeric(df_processed[col], errors='coerce').fillna(
            df_processed[col].mean()
        )

    models = {
        "LR": Pipeline([
            ('scaler', StandardScaler()),
            ('model', LogisticRegression(
                random_state=UNIFIED_RANDOM_STATE,
                class_weight='balanced',
                max_iter=1000
            ))
        ]),
        "RF": RandomForestClassifier(
            random_state=UNIFIED_RANDOM_STATE,
            class_weight='balanced'
        )
    }

    train_indices, test_indices = split_data_by_patient(
        df_processed, TARGET_VARIABLE, test_size=0.2, random_state=UNIFIED_RANDOM_STATE
    )

    print("\n--- Selecting stable features using training data only ---")
    df_train = df_processed.loc[train_indices]
    y_train = y.loc[train_indices]
    groups_train = groups.loc[train_indices]

    selected_feature_dfs = {}
    for name, f_list in feature_sets.items():
        print(f"\nFeature set '{name}' stability selection...")
        selected_cols = select_stable_features(
            df_train, y_train, groups_train, f_list,
            models["RF"], N_FEATURES_TO_SELECT,
            N_BOOTSTRAPS_STABILITY, UNIFIED_RANDOM_STATE
        )
        selected_feature_dfs[name] = df_processed[selected_cols]

    final_results = {}
    summary_data = []
    y_pred_probas_test = {}

    print("\n--- Evaluating models ---")
    for model_key, model_pipeline in models.items():
        for feature_set_name, X_selected in selected_feature_dfs.items():
            model_id = f"{model_key}_{feature_set_name}"
            print(f"\nProcessing: {model_id}")

            cv_perf = evaluate_model_with_cv(
                X_selected.loc[train_indices], y_train, groups_train, model_pipeline
            )

            X_train = X_selected.loc[train_indices]
            X_test = X_selected.loc[test_indices]
            y_train_loop = y.loc[train_indices]
            y_test_loop = y.loc[test_indices]
            groups_test = groups.loc[test_indices]

            final_model = clone(model_pipeline).fit(X_train, y_train_loop)

            proba_test = final_model.predict_proba(X_test)[:, 1]
            y_pred_probas_test[model_id] = pd.Series(proba_test, index=X_test.index)

            ci_lower, ci_upper = calculate_bootstrap_ci(y_test_loop, proba_test, groups_test)

            summary_data.append({
                'Model': model_id,
                'Features': X_selected.shape[1],
                'AUC': f"{cv_perf.loc['auc', 'mean']:.3f} ± {cv_perf.loc['auc', 'std']:.3f}",
                'AUC (95% CI)': f"{ci_lower:.3f} - {ci_upper:.3f}",
                'Accuracy': f"{cv_perf.loc['accuracy', 'mean']:.3f} ± {cv_perf.loc['accuracy', 'std']:.3f}",
                'Sensitivity': f"{cv_perf.loc['sensitivity', 'mean']:.3f} ± {cv_perf.loc['sensitivity', 'std']:.3f}",
                'Specificity': f"{cv_perf.loc['specificity', 'mean']:.3f} ± {cv_perf.loc['specificity', 'std']:.3f}",
            })

            final_results[model_id] = {
                'model': final_model,
                'X_test': X_test,
                'y_test': y_test_loop,
                'X_train': X_train
            }

    table2_df = pd.DataFrame(summary_data)

    print("\n" + "="*50)
    print("--- Table 2: Model Performance Summary ---")
    print("="*50)
    print(table2_df.to_string(index=False))

    plot_roc_curves(
        final_results, table2_df,
        OUTPUT_DIR / f'Figure1_ROC_Curve_{TARGET_COLUMN_FOR_LABELING}.svg'
    )

    print("\nPhase 1 analysis complete.")


if __name__ == "__main__":
    main()