In [3]:
pip install optuna

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.7-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m21.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.7-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.9/78.9 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import optuna

# Define a lightweight CNN model
class LightweightCNN(nn.Module):
    def __init__(self):
        super(LightweightCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.AvgPool2d(2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.AvgPool2d(2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 8 * 8, 512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Hyperparameters
EPOCHS = 50
WEIGHT_DECAY = 3e-4
MOMENTUM = 0.9
BATCH_SIZE = 128

# Data preparation
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])

train_dataset = datasets.CIFAR10(root="./data", train=True, transform=transform, download=True)
test_dataset = datasets.CIFAR10(root="./data", train=False, transform=transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Train and evaluate the model
def train_and_evaluate(trial):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = LightweightCNN().to(device)
    optimizer = optim.SGD(model.parameters(), lr=trial.suggest_loguniform("lr", 1e-4, 1e-1),
                          momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)
    scheduler = CosineAnnealingLR(optimizer, T_max=EPOCHS)
    criterion = nn.CrossEntropyLoss()

    # Training loop
    for epoch in range(EPOCHS):
        model.train()
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        scheduler.step()

    # Evaluation
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total * 100
    return accuracy

# Optuna for hyperparameter optimization
def objective(trial):
    accuracy = train_and_evaluate(trial)
    return accuracy

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10)

# Best hyperparameters
print("Best hyperparameters:", study.best_params)

# Save the model
device = "cuda" if torch.cuda.is_available() else "cpu"
best_model = LightweightCNN().to(device)
torch.save(best_model.state_dict(), "cifar10_lightweight_model.pth")

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:04<00:00, 35.1MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


[I 2024-12-05 17:05:32,887] A new study created in memory with name: no-name-e28d3984-0d2c-4200-bc4d-18021d8fa2ab
  optimizer = optim.SGD(model.parameters(), lr=trial.suggest_loguniform("lr", 1e-4, 1e-1),
[I 2024-12-05 17:23:22,524] Trial 0 finished with value: 64.78 and parameters: {'lr': 0.00011982071415109205}. Best is trial 0 with value: 64.78.
[I 2024-12-05 17:40:57,337] Trial 1 finished with value: 87.24 and parameters: {'lr': 0.01361360805117801}. Best is trial 1 with value: 87.24.
[I 2024-12-05 17:58:15,490] Trial 2 finished with value: 87.96000000000001 and parameters: {'lr': 0.032929516306217085}. Best is trial 2 with value: 87.96000000000001.
[I 2024-12-05 18:15:23,981] Trial 3 finished with value: 87.37 and parameters: {'lr': 0.013295270759179858}. Best is trial 2 with value: 87.96000000000001.
[I 2024-12-05 18:32:37,644] Trial 4 finished with value: 85.67 and parameters: {'lr': 0.003955999977669684}. Best is trial 2 with value: 87.96000000000001.
[I 2024-12-05 18:49:51,612

Best hyperparameters: {'lr': 0.0753167049478085}
