In [3]:
! pip3 install optuna

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Using cached alembic-1.17.2-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Collecting sqlalchemy>=1.4.2 (from optuna)
  Using cached sqlalchemy-2.0.45-py3-none-any.whl.metadata (9.5 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Using cached mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
Using cached alembic-1.17.2-py3-none-any.whl (248 kB)
Using cached sqlalchemy-2.0.45-py3-none-any.whl (1.9 MB)
Downloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Using cached mako-1.3.10-py3-none-any.whl (78 kB)
Installing collected packages: sqlalchemy, Mako, colorlog, alembic, optuna
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5/5[0m [optuna]━━━━[0m [32m4/5[0m [optuna]]
[1A[2KSuccessfully installed Mako-

In [4]:
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision import models

In [9]:
# Build model
def build_model():
    model = models.resnet18(weights=None)  # no auto-download
    state_dict = torch.load("../pytorch-dataloading/resnet18-5c106cde.pth",weights_only=False)
    model.load_state_dict(state_dict)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 10)  # CIFAR-10 has 10 classes
    return model

In [10]:
# Data loaders
def get_dataloaders(batch_size):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
    ])
    trainset = datasets.CIFAR10(root='../learning-rate-scheduler-comparison/data', train=True, download=True, transform=transform)
    valset   = datasets.CIFAR10(root='../learning-rate-scheduler-comparison/data', train=False, download=True, transform=transform)

    trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
    valloader   = DataLoader(valset, batch_size=batch_size, shuffle=False)
    return trainloader, valloader

In [11]:
# Optuna objective
def objective(trial):
    # Hyperparameters
    lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128])
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD'])
    scheduler_name = trial.suggest_categorical('scheduler', ['StepLR', 'ReduceLROnPlateau', 'CosineAnnealingLR'])

    # Model + Data
    model = build_model()
    trainloader, valloader = get_dataloaders(batch_size)
    criterion = nn.CrossEntropyLoss()
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # Scheduler
    if scheduler_name == 'StepLR':
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)
    elif scheduler_name == 'ReduceLROnPlateau':
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2)
    else:
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-6)

    # Training loop (5 epochs)
    for epoch in range(5):
        model.train()
        for images, labels in trainloader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # Validation
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for images, labels in valloader:
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        val_acc = correct / total

        # Scheduler step
        if scheduler_name == 'ReduceLROnPlateau':
            scheduler.step(1.0 - val_acc)  # mimic loss
        else:
            scheduler.step()

    return val_acc

In [None]:
# Run Optuna study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=20)  # try 20 trials first

print("Best hyperparameters:", study.best_params)

[I 2025-12-25 21:52:01,551] A new study created in memory with name: no-name-1222568c-2fe9-4e3c-a6ea-4783110d0ffd
  lr = trial.suggest_loguniform('lr', 1e-5, 1e-2)
