Hyperparameter Tuning

Traditional ML — Grid / Random Search

In [1]:
from pathlib import Path
import numpy as np

FEAT = Path(r"C:\Users\상선\Face-ID-Project\features")          #  ←  no “..” here
X_train = np.load(FEAT / "X_train.npy")
y_train = np.load(FEAT / "y_train.npy")
X_val   = np.load(FEAT / "X_val.npy")
y_val   = np.load(FEAT / "y_val.npy")

print("shapes:", X_train.shape, y_train.shape)   # should print e.g. (7836, 1280) (7836,)


shapes: (7836, 1280) (7836,)


In [9]:
# Logistic Regression – GridSearch
# Fast manual search for linear SVM via SGD (hinge)
import time, numpy as np, joblib
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score

# (Optional) use float32 to reduce memory/compute
X_train32 = X_train.astype(np.float32, copy=False)
X_val32   = X_val.astype(np.float32, copy=False)

def build_sgd(alpha, eta0, lr):
    return Pipeline([
        ("scaler", StandardScaler()),  # embeddings are dense; centering helps
        ("clf", SGDClassifier(loss="hinge",
                              alpha=alpha, learning_rate=lr, eta0=eta0,
                              max_iter=2000, tol=1e-3, random_state=42))
    ])

candidates = [
    # (alpha, eta0, lr)
    (1e-6, 1e-3, "optimal"),
    (3e-6, 3e-3, "optimal"),
    (1e-5, 1e-3, "adaptive"),
    (3e-5, 3e-3, "adaptive"),
    (1e-4, 1e-2, "adaptive"),
    (1e-5, 1e-2, "invscaling"),
    (3e-5, 3e-3, "invscaling"),
    (1e-4, 1e-3, "optimal"),
]

best_acc, best_model, best_cfg = 0.0, None, None
for a, e, lr in candidates:
    t0 = time.time()
    model = build_sgd(a, e, lr)
    model.fit(X_train32, y_train)
    acc = accuracy_score(y_val, model.predict(X_val32))
    print(f"alpha={a:.0e} eta0={e:.0e} lr={lr:10s} → val {acc:.3f}  ({time.time()-t0:.1f}s)")
    if acc > best_acc:
        best_acc, best_model, best_cfg = acc, model, (a,e,lr)

print(f"Best config: {best_cfg}, val‑acc: {best_acc:.3f}")
joblib.dump(best_model, "svm_best.pkl")





alpha=1e-06 eta0=1e-03 lr=optimal    → val 0.393  (229.7s)
alpha=3e-06 eta0=3e-03 lr=optimal    → val 0.405  (336.9s)
alpha=1e-05 eta0=1e-03 lr=adaptive   → val 0.399  (621.4s)
alpha=3e-05 eta0=3e-03 lr=adaptive   → val 0.309  (689.9s)
alpha=1e-04 eta0=1e-02 lr=adaptive   → val 0.248  (909.9s)
alpha=1e-05 eta0=1e-02 lr=invscaling → val 0.226  (204.0s)
alpha=3e-05 eta0=3e-03 lr=invscaling → val 0.226  (432.2s)
alpha=1e-04 eta0=1e-03 lr=optimal    → val 0.387  (508.7s)
Best config: (3e-06, 0.003, 'optimal'), val‑acc: 0.405


['svm_best.pkl']

In [None]:
# Linear SVM – RandomizedSearch

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition   import IncrementalPCA
from sklearn.linear_model    import SGDClassifier
from sklearn.pipeline        import Pipeline
from sklearn.metrics         import accuracy_score
import numpy as np, time, joblib

# 
scaler = StandardScaler().fit(X_train)
Xtr = scaler.transform(X_train).astype(np.float32, copy=False)
Xv  = scaler.transform(X_val).astype(np.float32, copy=False)

pca    = IncrementalPCA(n_components=256, batch_size=512).fit(Xtr)
Xtr256 = pca.transform(Xtr)
Xv256  = pca.transform(Xv)

# 2)
candidates = [
    (1e-5, 1e-3, "adaptive"),
    (3e-5, 3e-3, "optimal"),
    (1e-4, 1e-2, "adaptive"),
    (1e-4, 1e-3, "invscaling"),
    (3e-5, 1e-2, "adaptive"),
]

best_acc, best_clf, best_cfg = 0, None, None
for a,e,lr in candidates:
    t0 = time.time()
    clf = Pipeline([
        ("clf", SGDClassifier(loss="hinge",
                              alpha=a, learning_rate=lr, eta0=e,
                              max_iter=500, tol=1e-3, random_state=42))
    ])
    clf.fit(Xtr256, y_train)              
    acc = accuracy_score(y_val, clf.predict(Xv256))
    print(f"{(a,e,lr)} → val {acc:.3f} ({time.time()-t0:.1f}s)")
    if acc > best_acc:
        best_acc, best_clf, best_cfg = acc, clf, (a,e,lr)

print("Best:", best_cfg, "val‑acc", best_acc)
# 3)
joblib.dump(scaler, "scaler.pkl")
joblib.dump(pca,    "pca256.pkl")
joblib.dump(best_clf, "svm_best.pkl")


Fitting 3 folds for each of 20 candidates, totalling 60 fits




In [None]:
# K‑NN – GridSearch

from sklearn.neighbors import KNeighborsClassifier

knn_pipe = Pipeline([
    ("scaler", StandardScaler(with_mean=False)),   # cosine dist doesn’t need centering
    ("clf", KNeighborsClassifier(metric="cosine"))
])

param_grid = {
    "clf__n_neighbors": [3, 5, 7, 9],
    "clf__weights": ["uniform", "distance"]
}

gs_knn = GridSearchCV(knn_pipe, param_grid, cv=3, scoring="accuracy",
                      n_jobs=-1, verbose=1)
gs_knn.fit(X_train, y_train)
print("Best KNN:", gs_knn.best_params_, "→", gs_knn.best_score_)
joblib.dump(gs_knn.best_estimator_, "knn_best.pkl")



In [None]:
import pandas as pd
pd.DataFrame(gs_log.cv_results_).to_csv("logreg_grid_results.csv", index=False)

CNN — Optuna Bayesian Search (PyTorch)

In [3]:
pip install optuna --quiet

You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
# Define an objective
import optuna, torch, torch.nn as nn, copy, time
from torch.utils.data import DataLoader
from torchvision import transforms, datasets

def objective(trial):
    # hyper‑params to explore
    lr         = trial.suggest_loguniform("lr", 1e-4, 3e-3)
    dropout_p  = trial.suggest_float("dropout", 0.2, 0.5)
    weight_dec = trial.suggest_loguniform("wd", 1e-5, 1e-3)
    unfreeze   = trial.suggest_int("unfreeze", 50, 150, step=25)

    # model
    model = mobilenet_v2_ft(num_classes, unfreeze_from=unfreeze)
    model.classifier[0] = nn.Dropout(dropout_p)
    model.to(device)

    opt  = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_dec)
    crit = nn.CrossEntropyLoss()

    best_val, stale = 0, 0
    for epoch in range(1, 11):            # max 10 epochs each trial
        # train one epoch
        model.train()
        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            opt.zero_grad(); out = model(X); loss = crit(out, y)
            loss.backward(); opt.step()

        # val acc
        model.eval(); correct = total = 0
        with torch.inference_mode():
            for X, y in val_loader:
                X, y = X.to(device), y.to(device)
                correct += (model(X).argmax(1) == y).sum().item()
                total   += y.size(0)
        val_acc = correct / total
        trial.report(val_acc, epoch)

        if trial.should_prune():   # early‑prune bad trials
            raise optuna.TrialPruned()

        if val_acc > best_val:
            best_val, stale = val_acc, 0
        else:
            stale += 1
            if stale >= 3: break   # our own patience

    return best_val


In [None]:
# Run the study
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=25, timeout=60*60)   # 1 hour budget
print("Best trial:", study.best_trial.params, "→", study.best_value)
study.trials_dataframe().to_csv("optuna_mobilenet.csv", index=False)
