# Project : Optimization for Machine Learning

### Initial Setup

In [None]:
!pip install torch -q 
!pip install torchvision -q
!pip install optuna -q
!pip install matplotlib -q

Collecting matplotlib
  Downloading matplotlib-3.10.3-cp313-cp313-win_amd64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.2-cp313-cp313-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.58.1-cp313-cp313-win_amd64.whl.metadata (108 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.8-cp313-cp313-win_amd64.whl.metadata (6.3 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Downloading pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)
Downloading matplotlib-3.10.3-cp313-cp313-win_amd64.whl (8.1 MB)
   ---------------------------------------- 0.0/8.1 MB ? eta -:--:--
   --- ------------------------------------ 0.8/8.1 MB 4.1 MB/s eta 0:00:02
   --------- ------------------------------ 1.8/8.1 MB 4.4 MB/s eta 0:00:02
   ----------- ---------------------

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import random
import optuna
import optuna.visualization as vis

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Code reproducibility
def set_seed(seed=1):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(1)

In [None]:
BATCH_SIZE = 128
EPOCHS = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Dataset Preprocessing and CNN benchmark model

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [5]:
class CNN_model(nn.Module):
    def __init__(self):
        super(CNN_model, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 8 * 8, 256),
            nn.ReLU(),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

### Training and testing function

In [None]:
def train(model, optimizer, criterion, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")
    return avg_loss

In [None]:
def test(model):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    return correct / total

### Comparison of optimizers

In [None]:
optimizers_list = {
    'AdaGrad': lambda model, trial: optim.Adagrad(
        model.parameters(),
        lr=trial.suggest_loguniform("lr", 1e-4, 1e-1)
    ),
    'RMSprop': lambda model, trial: optim.RMSprop(
        model.parameters(),
        lr=trial.suggest_loguniform("lr", 1e-5, 1e-2),
        momentum=trial.suggest_uniform("momentum", 0.5, 0.99)
    ),
    'Adam': lambda model, trial: optim.Adam(
        model.parameters(),
        lr=trial.suggest_loguniform("lr", 1e-5, 1e-2),
        betas=(
            trial.suggest_uniform("beta1", 0.8, 0.99),
            trial.suggest_uniform("beta2", 0.9, 0.999)
        )
    ),
    'AdamW': lambda model, trial: optim.AdamW(
        model.parameters(),
        lr=trial.suggest_loguniform("lr", 1e-5, 1e-2)
    ),
    'AMSGrad': lambda model, trial: optim.Adam(
        model.parameters(),
        lr=trial.suggest_loguniform("lr", 1e-5, 1e-2),
        amsgrad=True
    ),
}



In [None]:
def run_single_optuna(name, base_optimizer_fn):
    print(f"\n🔍 Optimisation pour : {name}")

    def objective(trial):
        model = CNN_model().to(DEVICE)
        optimizer = base_optimizer_fn(model, trial)
        criterion = nn.CrossEntropyLoss()
        train(model, optimizer, criterion, EPOCHS)
        acc = test(model)
        return acc

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=20)     

    print(f"\n✅ Résultats pour {name} :")
    print("Meilleurs hyperparamètres :", study.best_params)
    print(f"Meilleure accuracy : {study.best_value:.2%}")

    return study


In [None]:
optimizer_name = 'Adam'
study = run_single_optuna(optimizer_name, optimizers_list[optimizer_name])

### Visualisations

In [None]:
# Visualisations
vis.plot_optimization_history(study).show()
vis.plot_param_importances(study).show()
for param in study.best_params:
    vis.plot_slice(study, params=[param]).show()
vis.plot_contour(study, params=["lr", "beta1"]).show()
vis.plot_contour(study, params=["lr", "beta2"]).show()
vis.plot_contour(study, params=["beta1", "beta2"]).show()
vis.plot_parallel_coordinate(study).show()

[I 2025-06-04 23:36:17,690] A new study created in memory with name: no-name-407cd515-2fbf-43f3-ad1e-cc85cfd17733



🔍 Optimisation pour : Adam


  lr=trial.suggest_loguniform("lr", 1e-5, 1e-2),
  trial.suggest_uniform("beta1", 0.8, 0.99),
  trial.suggest_uniform("beta2", 0.9, 0.999)
[I 2025-06-04 23:52:44,877] Trial 0 finished with value: 0.6977 and parameters: {'lr': 0.0009512347683428825, 'beta1': 0.8819387015122417, 'beta2': 0.9192026628287014}. Best is trial 0 with value: 0.6977.
[I 2025-06-05 00:10:06,917] Trial 1 finished with value: 0.5902 and parameters: {'lr': 0.00015176781519646038, 'beta1': 0.9251779557501687, 'beta2': 0.9779132975670363}. Best is trial 0 with value: 0.6977.
[W 2025-06-05 07:18:51,862] Trial 2 failed with parameters: {'lr': 7.908070545741482e-05, 'beta1': 0.9324908662451702, 'beta2': 0.9898946276140834} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\yemal\Desktop\Centrale_Lille\S8 EPFL\Cours EPFL\Optimization for Machine Learning\Projet\.venv\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial

KeyboardInterrupt: 

In [None]:
#Saving graphics
fig = vis.plot_optimization_history(study)
fig.write_html("optimization_history.html")

# 2. Importance des hyperparamètres
fig = vis.plot_param_importances(study)
fig.write_html("param_importances.html")

# 3. Courbes slice par paramètre
for param in study.best_params:
    fig = vis.plot_slice(study, params=[param])
    fig.write_html(f"slice_{param}.html")

# 4. Courbes de contours
fig = vis.plot_contour(study, params=["lr", "beta1"])
fig.write_html("contour_lr_beta1.html")

fig = vis.plot_contour(study, params=["lr", "beta2"])
fig.write_html("contour_lr_beta2.html")

fig = vis.plot_contour(study, params=["beta1", "beta2"])
fig.write_html("contour_beta1_beta2.html")

# 5. Coordonnées parallèles
fig = vis.plot_parallel_coordinate(study)
fig.write_html("parallel_coordinates.html")