In [None]:
#! pip install optuna

In [1]:
# ============================================================
# NOTEBOOK 2 — Hyperparameter Tuning
# Models : RandomForest, SVM
# Methods : GridSearchCV, RandomizedSearchCV, Optuna
# ============================================================

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.datasets import load_breast_cancer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

import optuna
#from optuna.integration import SklearnPipelineSampler

import warnings
warnings.filterwarnings("ignore")

data = load_breast_cancer()
X = data.data
y = data.target


* 1. Baseline model

In [2]:
pipe_svm = Pipeline([
    ("scaler", StandardScaler()),
    ("model", SVC())
])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y)

pipe_svm.fit(X_train, y_train)
pipe_svm.score(X_test, y_test)


0.9790209790209791

2. GridSearchCV sur SVM

In [3]:
grid_params = {
    "model__C": [0.1, 1, 10],
    "model__gamma": ["scale", "auto"],
    "model__kernel": ["rbf", "linear"]
}

grid = GridSearchCV(pipe_svm, grid_params, cv=5, n_jobs=-1)
grid.fit(X_train, y_train)

print("Best parameters:", grid.best_params_)
print("Test accuracy:", grid.score(X_test, y_test))


Best parameters: {'model__C': 0.1, 'model__gamma': 'scale', 'model__kernel': 'linear'}
Test accuracy: 0.986013986013986


3. RandomizedSearchCV

In [4]:
rand_params = {
    "model__C": np.logspace(-3, 3, 20),
    "model__gamma": np.logspace(-4, 1, 20),
    "model__kernel": ["rbf"]
}

rand = RandomizedSearchCV(
    pipe_svm, rand_params, n_iter=25, cv=5, n_jobs=-1, random_state=42)
rand.fit(X_train, y_train)

rand.best_params_


{'model__kernel': 'rbf',
 'model__gamma': 0.00379269019073225,
 'model__C': 12.742749857031322}

4. Tuning avec OPTUNA (optimisation bayésienne)

In [5]:
def objective(trial):
    C = trial.suggest_loguniform("model__C", 1e-3, 1e3)
    gamma = trial.suggest_loguniform("model__gamma", 1e-4, 1)
    kernel = trial.suggest_categorical("model__kernel", ["rbf", "linear"])

    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("model", SVC(C=C, gamma=gamma, kernel=kernel))
    ])

    pipe.fit(X_train, y_train)
    return pipe.score(X_test, y_test)

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)

study.best_params


[I 2025-11-20 11:03:19,571] A new study created in memory with name: no-name-36f1756f-4ee7-4d90-abfb-3e7e46df4a07
[I 2025-11-20 11:03:19,590] Trial 0 finished with value: 0.951048951048951 and parameters: {'model__C': 0.006747264895006682, 'model__gamma': 0.022148572992812577, 'model__kernel': 'linear'}. Best is trial 0 with value: 0.951048951048951.
[I 2025-11-20 11:03:19,603] Trial 1 finished with value: 0.986013986013986 and parameters: {'model__C': 232.5750683042674, 'model__gamma': 0.00019198483573759986, 'model__kernel': 'rbf'}. Best is trial 1 with value: 0.986013986013986.
[I 2025-11-20 11:03:19,665] Trial 2 finished with value: 0.6293706293706294 and parameters: {'model__C': 0.004638787253429455, 'model__gamma': 0.009440103084689073, 'model__kernel': 'rbf'}. Best is trial 1 with value: 0.986013986013986.
[I 2025-11-20 11:03:19,683] Trial 3 finished with value: 0.9790209790209791 and parameters: {'model__C': 12.54016909845471, 'model__gamma': 0.0012620071069625622, 'model__kern

{'model__C': 232.5750683042674,
 'model__gamma': 0.00019198483573759986,
 'model__kernel': 'rbf'}

- On définit les hyperparams directement dans trial.suggest_*.

In [None]:
import optuna

def objective(trial):

    C = trial.suggest_float("C", 1e-3, 1e3, log=True)
    gamma = trial.suggest_float("gamma", 1e-4, 1, log=True)
    kernel = trial.suggest_categorical("kernel", ["rbf", "linear"])

    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("model", SVC(C=C, gamma=gamma, kernel=kernel))
    ])

    pipe.fit(X_train, y_train)
    return pipe.score(X_test, y_test)

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)

study.best_params


### Question : Allez plus loin

1️⃣ Tuner un RandomForest :
      - n_estimators
      - max_depth
      - min_samples_split

2️⃣ Comparer les performances Grid vs Random vs Optuna.

3️⃣ Tester Optuna avec 200 trees.

4️⃣ Visualiser la courbe de convergence Optuna.