# Model Optimisation

This notebook performs hyperparameter optimisation for the six classical ML models across the nine feature-sets:
- RandomizedSearchCV
- GridSearchCV
- BayesSearchCV

In [1]:
import warnings
warnings.filterwarnings("ignore")
from pathlib import Path
import numpy as np
import pandas as pd
import joblib
import time
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from scipy.stats import randint, uniform

FEATURES_BASE = Path("../data/processed/features")
PROC_BASE = Path("../data/processed/ml")
MODEL_BASE = Path("../models/ml")
FIG_BASE = Path("../figures/ml")

for p in [PROC_BASE, MODEL_BASE, FIG_BASE]:
    p.mkdir(parents=True, exist_ok=True)

METHODS = ["rfe","skb","fscs","etc","pc","mi","mir","mu","vt"]

OPT_MAP = {
    "rfe": "orfe",
    "skb": "oskb",
    "fscs": "ofscs",
    "etc": "oetc",
    "pc": "opc",
    "mi": "omi",
    "mir": "omir",
    "mu": "omu",
    "vt": "ovt"
}

RANDOM_STATE = 42
CV = StratifiedKFold(n_splits=3, shuffle=True, random_state=RANDOM_STATE)

try:
    from skopt import BayesSearchCV
except Exception:
    try:
        from skopt import BayesSearchCV
    except Exception as e:
        raise ImportError("skopt (scikit-optimize) not found. Install scikit-optimize to use Bayesian optimisation.") from e

print("Configuration OK. Methods:", METHODS)

Configuration OK. Methods: ['rfe', 'skb', 'fscs', 'etc', 'pc', 'mi', 'mir', 'mu', 'vt']


## Hyperparameter Spaces

In [2]:
PARAM_DIST = {
    "Logistic Regression": {
        "C": uniform(0.001, 10),
        "penalty": ["l2"],
        "solver": ["lbfgs", "saga"],
        "max_iter": [500]
    },
    "Gradient Boosting": {
        "n_estimators": randint(50, 300),
        "learning_rate": uniform(0.01, 0.5),
        "max_depth": randint(2, 8),
        "subsample": uniform(0.5, 0.5)
    },
    "KNN": {
        "n_neighbors": randint(1, 31),
        "weights": ["uniform", "distance"],
        "p": [1, 2]
    },
    "Random Forest": {
        "n_estimators": randint(50, 400),
        "max_depth": randint(3, 20),
        "max_features": ["sqrt", "log2", None]
    },
    "Decision Tree": {
        "max_depth": randint(1, 20),
        "min_samples_split": randint(2, 20),
        "min_samples_leaf": randint(1, 20)
    },
    "SVM": {
        "C": uniform(0.01, 100),
        "kernel": ["rbf", "poly"],
        "gamma": ["scale", "auto"]
    }
}

PARAM_GRID = {
    "Logistic Regression": {
        "C": [0.01, 0.1, 1, 10],
        "solver": ["lbfgs"],
        "penalty": ["l2"]
    },
    "Gradient Boosting": {
        "n_estimators": [100, 200],
        "learning_rate": [0.01, 0.1],
        "max_depth": [3, 5]
    },
    "KNN": {
        "n_neighbors": [3,5,7,9],
        "weights": ["uniform","distance"],
        "p": [1,2]
    },
    "Random Forest": {
        "n_estimators": [100, 200],
        "max_depth": [None, 10, 20]
    },
    "Decision Tree": {
        "max_depth": [None, 5, 10],
        "min_samples_split": [2, 5, 10]
    },
    "SVM": {
        "C": [0.1, 1, 10],
        "kernel": ["rbf"],
        "gamma": ["scale"]
    }
}

BAYES_SPACE = {
    "Logistic Regression": {
        "C": (1e-3, 100.0, "log-uniform"),
        "solver": ["lbfgs"],
        "penalty": ["l2"]
    },
    "Gradient Boosting": {
        "n_estimators": (50, 300),
        "learning_rate": (0.01, 0.5, "log-uniform"),
        "max_depth": (2, 8)
    },
    "KNN": {
        "n_neighbors": (1, 31),
        "weights": ["uniform", "distance"],
        "p": [1, 2]
    },
    "Random Forest": {
        "n_estimators": (50, 400),
        "max_depth": (3, 30)
    },
    "Decision Tree": {
        "max_depth": (1, 30),
        "min_samples_split": (2, 50)
    },
    "SVM": {
        "C": (1e-2, 100.0, "log-uniform"),
        "kernel": ["rbf"],
        "gamma": ["scale", "auto"]
    }
}

## Helper Functions

In [3]:
def compute_metrics(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average="weighted", zero_division=0)
    rec = recall_score(y_true, y_pred, average="weighted", zero_division=0)
    f1 = f1_score(y_true, y_pred, average="weighted", zero_division=0)
    return {"Accuracy": acc, "Precision": prec, "Recall": rec, "F1": f1}

def save_confusion_matrix(y_true, y_pred, out_path, title):
    cm = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
    ax.set_xlabel("Predicted"); ax.set_ylabel("True"); ax.set_title(title)
    fig.savefig(out_path, dpi=300, bbox_inches="tight")
    plt.close(fig)

def run_optimizer(name, model, param_space, X_train, y_train, optimizer_type="random", n_iter=30):
    if optimizer_type == "random":
        search = RandomizedSearchCV(model, param_distributions=param_space, n_iter=n_iter, cv=CV,
                                    scoring="accuracy", n_jobs=-1, random_state=RANDOM_STATE, verbose=0)
    elif optimizer_type == "grid":
        search = GridSearchCV(model, param_grid=param_space, cv=CV, scoring="accuracy", n_jobs=-1, verbose=0)
    elif optimizer_type == "bayes":
        search = BayesSearchCV(model, search_spaces=param_space, n_iter=n_iter, cv=CV, scoring="accuracy",
                               n_jobs=-1, random_state=RANDOM_STATE, verbose=0)
    else:
        raise ValueError("Unknown optimizer_type")
    start = time.time()
    search.fit(X_train, y_train)
    end = time.time()
    return search, end - start

## Model Definitions

In [4]:
MODELS = {
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=RANDOM_STATE),
    "Gradient Boosting": GradientBoostingClassifier(random_state=RANDOM_STATE),
    "KNN": KNeighborsClassifier(),
    "Random Forest": RandomForestClassifier(random_state=RANDOM_STATE),
    "Decision Tree": DecisionTreeClassifier(random_state=RANDOM_STATE),
    "SVM": SVC(probability=True, random_state=RANDOM_STATE)
}

## Main Optimisation Loop

In [5]:
n_iter_rand = 30
n_iter_bayes = 30

for method in tqdm(METHODS, desc="Methods"):
    feat_dir = FEATURES_BASE / method
    if not feat_dir.exists():
        print(f"⚠️ Missing feature folder for {method}; skipping.")
        continue

    train_df = pd.read_csv(feat_dir / "train.csv").dropna(subset=["DepressionEncoded"])
    test_df  = pd.read_csv(feat_dir / "test.csv").dropna(subset=["DepressionEncoded"])
    X_train = train_df.drop(columns=["DepressionEncoded"]).values
    y_train = train_df["DepressionEncoded"].astype(int).values
    X_test  = test_df.drop(columns=["DepressionEncoded"]).values
    y_test  = test_df["DepressionEncoded"].astype(int).values

    opt_folder = OPT_MAP.get(method, method)
    proc_out_base = PROC_BASE / opt_folder
    model_out_base = MODEL_BASE / opt_folder
    fig_out_base = FIG_BASE / opt_folder
    for p in [proc_out_base, model_out_base, fig_out_base]:
        p.mkdir(parents=True, exist_ok=True)

    rand_results = []
    grid_results = []
    bayes_results = []

    for model_name, model in tqdm(MODELS.items(), desc=f"Models ({method})", leave=False):
        print(f"\n=== {method.upper()} :: {model_name} ===")

        pdist = PARAM_DIST.get(model_name, {})
        pgrid = PARAM_GRID.get(model_name, {})
        bspace = BAYES_SPACE.get(model_name, {})

        try:
            print("-> RandomizedSearchCV (n_iter=", n_iter_rand, ")")
            search_rand, t_rand = run_optimizer(model_name, model, pdist, X_train, y_train, optimizer_type="random", n_iter=n_iter_rand)
            best_rand = search_rand.best_estimator_
            y_pred = best_rand.predict(X_test)
            metrics_rand = compute_metrics(y_test, y_pred)
            metrics_rand.update({"Method": method, "Model": model_name, "Optimizer": "Randomized", "TimeSec": t_rand})
            rand_results.append(metrics_rand)

            joblib.dump(best_rand, model_out_base / f"{model_name.lower().replace(' ','_')}_randomized.pkl")
            save_confusion_matrix(y_test, y_pred, fig_out_base / f"{model_name.lower().replace(' ','_')}_randomized_confusion.png",
                                  f"{model_name} Randomized ({method})")
            print(" Randomized done. Metrics:", metrics_rand)
        except Exception as e:
            print(" RandomizedSearchCV failed for", model_name, ":", e)

        try:
            print("-> GridSearchCV")
            if pgrid:
                search_grid, t_grid = run_optimizer(model_name, model, pgrid, X_train, y_train, optimizer_type="grid")
                best_grid = search_grid.best_estimator_
                y_pred = best_grid.predict(X_test)
                metrics_grid = compute_metrics(y_test, y_pred)
                metrics_grid.update({"Method": method, "Model": model_name, "Optimizer": "Grid", "TimeSec": t_grid})
                grid_results.append(metrics_grid)

                joblib.dump(best_grid, model_out_base / f"{model_name.lower().replace(' ','_')}_grid.pkl")
                save_confusion_matrix(y_test, y_pred, fig_out_base / f"{model_name.lower().replace(' ','_')}_grid_confusion.png",
                                      f"{model_name} Grid ({method})")
                print(" Grid done. Metrics:", metrics_grid)
            else:
                print("  No grid defined for", model_name, " — skipping GridSearch")
        except Exception as e:
            print(" GridSearchCV failed for", model_name, ":", e)

        try:
            print("-> BayesSearchCV (n_iter=", n_iter_bayes, ")")
            search_bayes, t_bayes = run_optimizer(model_name, model, bspace, X_train, y_train, optimizer_type="bayes", n_iter=n_iter_bayes)
            best_bayes = search_bayes.best_estimator_
            y_pred = best_bayes.predict(X_test)
            metrics_bayes = compute_metrics(y_test, y_pred)
            metrics_bayes.update({"Method": method, "Model": model_name, "Optimizer": "Bayes", "TimeSec": t_bayes})
            bayes_results.append(metrics_bayes)

            joblib.dump(best_bayes, model_out_base / f"{model_name.lower().replace(' ','_')}_bayes.pkl")
            save_confusion_matrix(y_test, y_pred, fig_out_base / f"{model_name.lower().replace(' ','_')}_bayes_confusion.png",
                                  f"{model_name} Bayes ({method})")
            print(" Bayes done. Metrics:", metrics_bayes)
        except Exception as e:
            print(" BayesSearchCV failed for", model_name, ":", e)

    if rand_results:
        pd.DataFrame(rand_results).to_csv(proc_out_base / "randomized_search_results.csv", index=False)
    if grid_results:
        pd.DataFrame(grid_results).to_csv(proc_out_base / "grid_search_results.csv", index=False)
    if bayes_results:
        pd.DataFrame(bayes_results).to_csv(proc_out_base / "bayes_search_results.csv", index=False)

    print(f"\nSaved results for method: {method} -> {proc_out_base}")

Methods:   0%|          | 0/9 [00:00<?, ?it/s]

Models (rfe):   0%|          | 0/6 [00:00<?, ?it/s]


=== RFE :: Logistic Regression ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7876543209876543, 'Precision': 0.7879100229434902, 'Recall': 0.7876543209876543, 'F1': 0.7868788029704153, 'Method': 'rfe', 'Model': 'Logistic Regression', 'Optimizer': 'Randomized', 'TimeSec': 6.985701560974121}
-> GridSearchCV
 Grid done. Metrics: {'Accuracy': 0.7876543209876543, 'Precision': 0.7886274129339851, 'Recall': 0.7876543209876543, 'F1': 0.7871443551932665, 'Method': 'rfe', 'Model': 'Logistic Regression', 'Optimizer': 'Grid', 'TimeSec': 0.08793115615844727}
-> BayesSearchCV (n_iter= 30 )
 Bayes done. Metrics: {'Accuracy': 0.7876543209876543, 'Precision': 0.7886274129339851, 'Recall': 0.7876543209876543, 'F1': 0.7871443551932665, 'Method': 'rfe', 'Model': 'Logistic Regression', 'Optimizer': 'Bayes', 'TimeSec': 15.414624214172363}

=== RFE :: Gradient Boosting ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7654320987654321, 'Prec

Models (skb):   0%|          | 0/6 [00:00<?, ?it/s]


=== SKB :: Logistic Regression ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7901234567901234, 'Precision': 0.7911687718850705, 'Recall': 0.7901234567901234, 'F1': 0.7890762813842568, 'Method': 'skb', 'Model': 'Logistic Regression', 'Optimizer': 'Randomized', 'TimeSec': 4.678386211395264}
-> GridSearchCV
 Grid done. Metrics: {'Accuracy': 0.7901234567901234, 'Precision': 0.791502472129668, 'Recall': 0.7901234567901234, 'F1': 0.7890148834596109, 'Method': 'skb', 'Model': 'Logistic Regression', 'Optimizer': 'Grid', 'TimeSec': 0.4046809673309326}
-> BayesSearchCV (n_iter= 30 )
 Bayes done. Metrics: {'Accuracy': 0.7901234567901234, 'Precision': 0.7911687718850705, 'Recall': 0.7901234567901234, 'F1': 0.7890762813842568, 'Method': 'skb', 'Model': 'Logistic Regression', 'Optimizer': 'Bayes', 'TimeSec': 70.2349796295166}

=== SKB :: Gradient Boosting ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7308641975308642, 'Precisio

Models (fscs):   0%|          | 0/6 [00:00<?, ?it/s]


=== FSCS :: Logistic Regression ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7679012345679013, 'Precision': 0.7717094703533359, 'Recall': 0.7679012345679013, 'F1': 0.7688258622025148, 'Method': 'fscs', 'Model': 'Logistic Regression', 'Optimizer': 'Randomized', 'TimeSec': 2.3311879634857178}
-> GridSearchCV
 Grid done. Metrics: {'Accuracy': 0.7679012345679013, 'Precision': 0.7717094703533359, 'Recall': 0.7679012345679013, 'F1': 0.7688258622025148, 'Method': 'fscs', 'Model': 'Logistic Regression', 'Optimizer': 'Grid', 'TimeSec': 0.23371672630310059}
-> BayesSearchCV (n_iter= 30 )
 Bayes done. Metrics: {'Accuracy': 0.7654320987654321, 'Precision': 0.7720771877804342, 'Recall': 0.7654320987654321, 'F1': 0.7664392474251562, 'Method': 'fscs', 'Model': 'Logistic Regression', 'Optimizer': 'Bayes', 'TimeSec': 50.8630895614624}

=== FSCS :: Gradient Boosting ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7333333333333333, '

Models (etc):   0%|          | 0/6 [00:00<?, ?it/s]


=== ETC :: Logistic Regression ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.8, 'Precision': 0.7987000107476299, 'Recall': 0.8, 'F1': 0.7987742016516626, 'Method': 'etc', 'Model': 'Logistic Regression', 'Optimizer': 'Randomized', 'TimeSec': 3.3767905235290527}
-> GridSearchCV
 Grid done. Metrics: {'Accuracy': 0.8, 'Precision': 0.7987540649531613, 'Recall': 0.8, 'F1': 0.7984704108482832, 'Method': 'etc', 'Model': 'Logistic Regression', 'Optimizer': 'Grid', 'TimeSec': 0.17650961875915527}
-> BayesSearchCV (n_iter= 30 )
 Bayes done. Metrics: {'Accuracy': 0.8, 'Precision': 0.7984192884368655, 'Recall': 0.8, 'F1': 0.7987608641124032, 'Method': 'etc', 'Model': 'Logistic Regression', 'Optimizer': 'Bayes', 'TimeSec': 45.20108914375305}

=== ETC :: Gradient Boosting ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7679012345679013, 'Precision': 0.7686936828421638, 'Recall': 0.7679012345679013, 'F1': 0.7663867478749187, 'Metho

Models (pc):   0%|          | 0/6 [00:00<?, ?it/s]


=== PC :: Logistic Regression ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7901234567901234, 'Precision': 0.7911687718850705, 'Recall': 0.7901234567901234, 'F1': 0.7890762813842568, 'Method': 'pc', 'Model': 'Logistic Regression', 'Optimizer': 'Randomized', 'TimeSec': 4.8030688762664795}
-> GridSearchCV
 Grid done. Metrics: {'Accuracy': 0.7901234567901234, 'Precision': 0.791502472129668, 'Recall': 0.7901234567901234, 'F1': 0.7890148834596109, 'Method': 'pc', 'Model': 'Logistic Regression', 'Optimizer': 'Grid', 'TimeSec': 0.20495986938476562}
-> BayesSearchCV (n_iter= 30 )
 Bayes done. Metrics: {'Accuracy': 0.7901234567901234, 'Precision': 0.7911687718850705, 'Recall': 0.7901234567901234, 'F1': 0.7890762813842568, 'Method': 'pc', 'Model': 'Logistic Regression', 'Optimizer': 'Bayes', 'TimeSec': 51.773319482803345}

=== PC :: Gradient Boosting ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7308641975308642, 'Precision

Models (mi):   0%|          | 0/6 [00:00<?, ?it/s]


=== MI :: Logistic Regression ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.782716049382716, 'Precision': 0.783310470964792, 'Recall': 0.782716049382716, 'F1': 0.7810032088282496, 'Method': 'mi', 'Model': 'Logistic Regression', 'Optimizer': 'Randomized', 'TimeSec': 1.8756027221679688}
-> GridSearchCV
 Grid done. Metrics: {'Accuracy': 0.7876543209876543, 'Precision': 0.7876156470428777, 'Recall': 0.7876543209876543, 'F1': 0.7857719708975298, 'Method': 'mi', 'Model': 'Logistic Regression', 'Optimizer': 'Grid', 'TimeSec': 0.0877530574798584}
-> BayesSearchCV (n_iter= 30 )
 Bayes done. Metrics: {'Accuracy': 0.782716049382716, 'Precision': 0.783310470964792, 'Recall': 0.782716049382716, 'F1': 0.7810032088282496, 'Method': 'mi', 'Model': 'Logistic Regression', 'Optimizer': 'Bayes', 'TimeSec': 28.305572509765625}

=== MI :: Gradient Boosting ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7308641975308642, 'Precision': 0.7

Models (mir):   0%|          | 0/6 [00:00<?, ?it/s]


=== MIR :: Logistic Regression ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7679012345679013, 'Precision': 0.7672775504207979, 'Recall': 0.7679012345679013, 'F1': 0.767410831721002, 'Method': 'mir', 'Model': 'Logistic Regression', 'Optimizer': 'Randomized', 'TimeSec': 1.595515489578247}
-> GridSearchCV
 Grid done. Metrics: {'Accuracy': 0.7679012345679013, 'Precision': 0.7672775504207979, 'Recall': 0.7679012345679013, 'F1': 0.767410831721002, 'Method': 'mir', 'Model': 'Logistic Regression', 'Optimizer': 'Grid', 'TimeSec': 0.09590721130371094}
-> BayesSearchCV (n_iter= 30 )
 Bayes done. Metrics: {'Accuracy': 0.7679012345679013, 'Precision': 0.7672775504207979, 'Recall': 0.7679012345679013, 'F1': 0.767410831721002, 'Method': 'mir', 'Model': 'Logistic Regression', 'Optimizer': 'Bayes', 'TimeSec': 13.112157344818115}

=== MIR :: Gradient Boosting ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.745679012345679, 'Precisio

Models (mu):   0%|          | 0/6 [00:00<?, ?it/s]


=== MU :: Logistic Regression ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7333333333333333, 'Precision': 0.736444418483399, 'Recall': 0.7333333333333333, 'F1': 0.733969923077532, 'Method': 'mu', 'Model': 'Logistic Regression', 'Optimizer': 'Randomized', 'TimeSec': 4.545609951019287}
-> GridSearchCV
 Grid done. Metrics: {'Accuracy': 0.7333333333333333, 'Precision': 0.736444418483399, 'Recall': 0.7333333333333333, 'F1': 0.733969923077532, 'Method': 'mu', 'Model': 'Logistic Regression', 'Optimizer': 'Grid', 'TimeSec': 0.32160115242004395}
-> BayesSearchCV (n_iter= 30 )
 Bayes done. Metrics: {'Accuracy': 0.7333333333333333, 'Precision': 0.736444418483399, 'Recall': 0.7333333333333333, 'F1': 0.733969923077532, 'Method': 'mu', 'Model': 'Logistic Regression', 'Optimizer': 'Bayes', 'TimeSec': 21.101855754852295}

=== MU :: Gradient Boosting ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7061728395061728, 'Precision': 0.7

Models (vt):   0%|          | 0/6 [00:00<?, ?it/s]


=== VT :: Logistic Regression ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.8049382716049382, 'Precision': 0.8050983081847279, 'Recall': 0.8049382716049382, 'F1': 0.8041277897313581, 'Method': 'vt', 'Model': 'Logistic Regression', 'Optimizer': 'Randomized', 'TimeSec': 1.446943998336792}
-> GridSearchCV
 Grid done. Metrics: {'Accuracy': 0.8049382716049382, 'Precision': 0.8050983081847279, 'Recall': 0.8049382716049382, 'F1': 0.8041277897313581, 'Method': 'vt', 'Model': 'Logistic Regression', 'Optimizer': 'Grid', 'TimeSec': 0.08745408058166504}
-> BayesSearchCV (n_iter= 30 )
 Bayes done. Metrics: {'Accuracy': 0.8074074074074075, 'Precision': 0.8076007022804617, 'Recall': 0.8074074074074075, 'F1': 0.8064619684866211, 'Method': 'vt', 'Model': 'Logistic Regression', 'Optimizer': 'Bayes', 'TimeSec': 12.302035570144653}

=== VT :: Gradient Boosting ===
-> RandomizedSearchCV (n_iter= 30 )
 Randomized done. Metrics: {'Accuracy': 0.7802469135802469, 'Precision

## Combine all optimizer results into a single report

In [6]:
combined_all = []
for method in METHODS:
    opt_folder = OPT_MAP.get(method, method)
    proc_out_base = PROC_BASE / opt_folder
    for fname in ["randomized_search_results.csv", "grid_search_results.csv", "bayes_search_results.csv"]:
        f = proc_out_base / fname
        if f.exists():
            df = pd.read_csv(f)
            df["OptFolder"] = opt_folder
            df["SourceFile"] = fname
            combined_all.append(df)

if combined_all:
    combined_df = pd.concat(combined_all, ignore_index=True)
    combined_df.to_csv(PROC_BASE / "all_optimised_results_summary.csv", index=False)
    display(combined_df.sort_values(["OptFolder", "Model", "Optimizer"], ascending=True))
    print("Combined optimisation summary saved to:", PROC_BASE / "all_optimised_results_summary.csv")
else:
    print("No optimisation results found to combine.")

Unnamed: 0,Accuracy,Precision,Recall,F1,Method,Model,Optimizer,TimeSec,OptFolder,SourceFile
70,0.723457,0.720939,0.723457,0.719512,etc,Decision Tree,Bayes,45.191621,oetc,bayes_search_results.csv
64,0.679012,0.686419,0.679012,0.671676,etc,Decision Tree,Grid,0.096770,oetc,grid_search_results.csv
58,0.681481,0.687382,0.681481,0.678948,etc,Decision Tree,Randomized,0.282390,oetc,randomized_search_results.csv
67,0.760494,0.762083,0.760494,0.758086,etc,Gradient Boosting,Bayes,247.829043,oetc,bayes_search_results.csv
61,0.762963,0.765628,0.762963,0.763984,etc,Gradient Boosting,Grid,25.527264,oetc,grid_search_results.csv
...,...,...,...,...,...,...,...,...,...,...
153,0.765432,0.765237,0.765432,0.762856,vt,Random Forest,Grid,1.889214,ovt,grid_search_results.csv
147,0.762963,0.763226,0.762963,0.760826,vt,Random Forest,Randomized,10.860789,ovt,randomized_search_results.csv
161,0.770370,0.770550,0.770370,0.769091,vt,SVM,Bayes,17.804526,ovt,bayes_search_results.csv
155,0.797531,0.798466,0.797531,0.796489,vt,SVM,Grid,0.712471,ovt,grid_search_results.csv


Combined optimisation summary saved to: ..\data\processed\ml\all_optimised_results_summary.csv
