Hyperparameter Tuning

Traditional ML — Grid / Random Search

In [4]:
from pathlib import Path
import numpy as np

FEAT = Path("features")          #  ←  no “..” here
X_train = np.load(FEAT / "X_train.npy")
y_train = np.load(FEAT / "y_train.npy")
X_val   = np.load(FEAT / "X_val.npy")
y_val   = np.load(FEAT / "y_val.npy")

print("shapes:", X_train.shape, y_train.shape)   # should print e.g. (7836, 1280) (7836,)


shapes: (7836, 1280) (7836,)


In [None]:
# Logistic Regression – GridSearch

from sklearn.linear_model import LogisticRegression

pipe = Pipeline([
    ("scaler",  StandardScaler()),            # improves convergence
    ("clf", LogisticRegression(max_iter=2000, solver="saga", n_jobs=-1,
                               multi_class="multinomial"))
])

param_grid = {
    "clf__C":        [0.1, 1, 3, 10],
    "clf__penalty":  ["l1", "l2"]
}

gs_log = GridSearchCV(pipe, param_grid, cv=3, scoring="accuracy",
                      n_jobs=-1, verbose=1)
gs_log.fit(X_train, y_train)
print("Best LogReg:", gs_log.best_params_, "→", gs_log.best_score_)
joblib.dump(gs_log.best_estimator_, "logreg_best.pkl")


Fitting 3 folds for each of 8 candidates, totalling 24 fits




In [None]:
# Linear SVM – RandomizedSearch

from sklearn.linear_model import SGDClassifier
from scipy.stats import loguniform

svm_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("clf", SGDClassifier(loss="hinge", max_iter=4000, n_jobs=-1))
])

param_dist = {
    "clf__alpha": loguniform(1e-6, 1e-3),   # 1/C
    "clf__learning_rate": ["optimal", "invscaling", "adaptive"],
    "clf__eta0": [1e-3, 5e-3, 1e-2]
}

rs_svm = RandomizedSearchCV(svm_pipe, param_dist, n_iter=20, cv=3,
                            scoring="accuracy", n_jobs=-1, verbose=1, random_state=42)
rs_svm.fit(X_train, y_train)
print("Best SVM:", rs_svm.best_params_, "→", rs_svm.best_score_)
joblib.dump(rs_svm.best_estimator_, "svm_best.pkl")


In [None]:
# K‑NN – GridSearch

from sklearn.neighbors import KNeighborsClassifier

knn_pipe = Pipeline([
    ("scaler", StandardScaler(with_mean=False)),   # cosine dist doesn’t need centering
    ("clf", KNeighborsClassifier(metric="cosine"))
])

param_grid = {
    "clf__n_neighbors": [3, 5, 7, 9],
    "clf__weights": ["uniform", "distance"]
}

gs_knn = GridSearchCV(knn_pipe, param_grid, cv=3, scoring="accuracy",
                      n_jobs=-1, verbose=1)
gs_knn.fit(X_train, y_train)
print("Best KNN:", gs_knn.best_params_, "→", gs_knn.best_score_)
joblib.dump(gs_knn.best_estimator_, "knn_best.pkl")



In [None]:
import pandas as pd
pd.DataFrame(gs_log.cv_results_).to_csv("logreg_grid_results.csv", index=False)

CNN — Optuna Bayesian Search (PyTorch)

In [3]:
pip install optuna --quiet

You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
# Define an objective
import optuna, torch, torch.nn as nn, copy, time
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

def objective(trial):
    # hyper‑params to explore
    lr         = trial.suggest_loguniform("lr", 1e-4, 3e-3)
    dropout_p  = trial.suggest_float("dropout", 0.2, 0.5)
    weight_dec = trial.suggest_loguniform("wd", 1e-5, 1e-3)
    unfreeze   = trial.suggest_int("unfreeze", 50, 150, step=25)

    # model
    model = mobilenet_v2_ft(num_classes, unfreeze_from=unfreeze)
    model.classifier[0] = nn.Dropout(dropout_p)
    model.to(device)

    opt  = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_dec)
    crit = nn.CrossEntropyLoss()

    best_val, stale = 0, 0
    for epoch in range(1, 11):            # max 10 epochs each trial
        # train one epoch
        model.train()
        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            opt.zero_grad(); out = model(X); loss = crit(out, y)
            loss.backward(); opt.step()

        # val acc
        model.eval(); correct = total = 0
        with torch.inference_mode():
            for X, y in val_loader:
                X, y = X.to(device), y.to(device)
                correct += (model(X).argmax(1) == y).sum().item()
                total   += y.size(0)
        val_acc = correct / total
        trial.report(val_acc, epoch)

        if trial.should_prune():   # early‑prune bad trials
            raise optuna.TrialPruned()

        if val_acc > best_val:
            best_val, stale = val_acc, 0
        else:
            stale += 1
            if stale >= 3: break   # our own patience

    return best_val


In [None]:
# Run the study
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=25, timeout=60*60)   # 1 hour budget
print("Best trial:", study.best_trial.params, "→", study.best_value)
study.trials_dataframe().to_csv("optuna_mobilenet.csv", index=False)
