In [1]:
# Import necessary torch and torchvision libraries
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR10

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import optuna

device = "mps" if torch.backends.mps.is_available() else "cpu"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Download and load the CIFAR-10 dataset
train_data = CIFAR10(root='./data', 
                     train=True, 
                     download=True, 
                     transform=transforms.ToTensor())

test_data = CIFAR10(root='./data',
                    train=False,
                    download=True,
                    transform=transforms.ToTensor())

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [03:14<00:00, 877663.64it/s] 


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:
# See classes
class_names = train_data.classes
print(class_names) # It is also idx to class -> class_names[1] = 'Trouser
# Class to index
cls_to_idx = train_data.class_to_idx
print(cls_to_idx)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}


In [4]:
# Create a DataLoader object to load data in batches
train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                           batch_size=32,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_data,
                                            batch_size=32,
                                            shuffle=False)

In [13]:
def get_scheduler(optimizer, scheduler_name):
    if scheduler_name == 'StepLR':
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # Each 5 epochs, the learning rate is multiplied by gamma
    elif scheduler_name == 'ExponentialLR':
        scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8) # The learning rate is multiplied by gamma every epoch
    else:  # CosineAnnealingLR
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)
    return scheduler

def suggest_hyperparameters(trial):
    params = {
        'scheduler_name': trial.suggest_categorical('scheduler_name', ['StepLR', 'ExponentialLR', 'CosineAnnealingLR']), # 
        'optimizer_name': trial.suggest_categorical('optimizer', ['SGD', 'Adam', 'RMSprop']),
        'lr': trial.suggest_float('lr', 5*1e-5, 5*1e-3),
        'momentum': 0.0,
        'scheduler_name': trial.suggest_categorical('scheduler', ['StepLR', 'ExponentialLR', 'CosineAnnealingLR']),
        'init_method': trial.suggest_categorical('init_method', ['xavier_uniform', 'he', 'trunc_normal'])
    }
    
    if params['optimizer_name'] == 'SGD':
        params['momentum'] = trial.suggest_float('momentum', 0.85, 0.99)
    return params

In [14]:
class MyCNN(nn.Module):
    def __init__(self, num_classes=10, init_method='xavier_uniform'):
        super(MyCNN, self).__init__()
        self.network = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Flatten(), 
            nn.Linear(256*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes))
        self.init_weights(init_method)

    def forward(self, x):
        x = self.network(x)
        return x
    
    def init_weights(self, init_method):
        if init_method == 'xavier_uniform':
            for m in self.modules():
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                    nn.init.xavier_uniform_(m.weight)
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
        elif init_method == 'he':
            for m in self.modules():
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                    nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
        elif init_method == 'trunc_normal':
            for m in self.modules():
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                    nn.init.trunc_normal_(m.weight, mean=0.0, std=0.1)
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
                        

In [15]:
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy,
               device: torch.device = device):
    
    accuracy.reset()
    train_loss, train_acc = 0, 0
    model.to(device)
    
    for batch, (X, y) in enumerate(data_loader):
        
        X = X.to(device)
        y = y.to(device)
        
        # Training
        model.train()
        # Forward pass
        y_pred = model(X)
        # Calculate loss per batch
        loss = loss_fn(y_pred, y)
        train_loss += loss # accumulate loss per batch
        # Update accuracy
        accuracy.update(y_pred, y)
        # Zero the gradients
        optimizer.zero_grad()
        # Backward pass
        loss.backward()
        # Update weights
        optimizer.step()
    # Loss per epoch    
    train_loss = train_loss / len(data_loader)
    train_acc = accuracy.compute()
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc*100:.2f}%")
    return train_loss, train_acc

def test_step(model: torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              accuracy,
              device: torch.device = device):
    
    
    accuracy.reset()
    ## Testing
    test_loss, test_acc = 0, 0
    # Set model to evaluation mode
    model.eval()
    # Turn off gradients
    with torch.inference_mode():
        for X, y in data_loader:
            # Move data to device
            X = X.to(device)
            y = y.to(device)
            # Forward pass
            test_pred = model(X)
            # Calculate loss per batch
            test_loss += loss_fn(test_pred, y)
            # Update accuracy
            accuracy.update(test_pred, y)
    # Loss per epoch        
    test_loss = test_loss / len(data_loader)
    # Calculate accuracy
    test_acc = accuracy.compute()
    # Print loss and accuracy per epoch
    print(f"Test loss: {test_loss:.5f}, Test acc: {test_acc*100:.2f}%\n")
    return test_loss, test_acc

In [16]:
# Import accuracy metric
from torchmetrics import Accuracy
accuracy = Accuracy(task="multiclass", num_classes=10).to(device)
# Setup loss function and optimizer
loss_fn = nn.CrossEntropyLoss()


In [17]:
def objective(trial):
    
    params = suggest_hyperparameters(trial)
    
    model = MyCNN(num_classes=10, init_method=params['init_method'])
    
    if params['optimizer_name'] == 'SGD':
        optimizer = getattr(optim, params['optimizer_name'])(model.parameters(), lr=params['lr'], momentum=params['momentum']) 
    else:
        optimizer = getattr(optim, params['optimizer_name'])(model.parameters(), lr=params['lr'])
        
    scheduler = get_scheduler(optimizer, params['scheduler_name']) # Her epoch geçişinde .step() metodu çağrılmalıdır
    
    print(f"Optimizer: {params['optimizer_name']}, Scheduler: {params['scheduler_name']}, Learning rate: {params['lr']}, Momentum: {params['momentum']}, Init method: {params['init_method']}")
    
    """
    model = MyCNN(num_classes=10, init_method='he')
    optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)
    """
    epochs = 4
    
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}\n-------------------------------")
        train_loss, train_acc = train_step(model, train_loader, loss_fn, optimizer, accuracy)
        test_loss, test_acc = test_step(model, test_loader, loss_fn, accuracy)
        scheduler.step()
    return test_loss

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

print('Best trial:')
trial = study.best_trial

print(f'Value: {trial.value}')
print('Params: ')
for key, value in trial.params.items():
    print(f'{key}: {value}')


[I 2024-03-19 13:57:21,710] A new study created in memory with name: no-name-da8de520-cc61-42b5-a48f-e460a40f9bf2


Optimizer: RMSprop, Scheduler: ExponentialLR, Learning rate: 0.0008694247141217115, Momentum: 0.0, Init method: xavier_uniform
Epoch 1
-------------------------------
Train loss: 2.09358 | Train accuracy: 36.38%
Test loss: 1.32284, Test acc: 52.23%

Epoch 2
-------------------------------
Train loss: 1.12299 | Train accuracy: 59.64%
Test loss: 0.98849, Test acc: 64.96%

Epoch 3
-------------------------------
Train loss: 0.82157 | Train accuracy: 70.79%
Test loss: 0.89357, Test acc: 68.84%

Epoch 4
-------------------------------
Train loss: 0.61263 | Train accuracy: 78.49%


[I 2024-03-19 13:59:34,548] Trial 0 finished with value: 0.7659769058227539 and parameters: {'scheduler_name': 'CosineAnnealingLR', 'optimizer': 'RMSprop', 'lr': 0.0008694247141217115, 'scheduler': 'ExponentialLR', 'init_method': 'xavier_uniform'}. Best is trial 0 with value: 0.7659769058227539.


Test loss: 0.76598, Test acc: 73.28%

Optimizer: RMSprop, Scheduler: CosineAnnealingLR, Learning rate: 0.0038758303225686947, Momentum: 0.0, Init method: he
Epoch 1
-------------------------------
Train loss: 51871.05859 | Train accuracy: 27.37%
Test loss: 2.30893, Test acc: 10.04%

Epoch 2
-------------------------------
Train loss: 2.30442 | Train accuracy: 10.00%
Test loss: 2.30280, Test acc: 10.00%

Epoch 3
-------------------------------
Train loss: 2.30335 | Train accuracy: 10.06%
Test loss: 2.30314, Test acc: 10.00%

Epoch 4
-------------------------------
Train loss: 2.30324 | Train accuracy: 9.97%


[I 2024-03-19 14:01:49,329] Trial 1 finished with value: 2.302717924118042 and parameters: {'scheduler_name': 'ExponentialLR', 'optimizer': 'RMSprop', 'lr': 0.0038758303225686947, 'scheduler': 'CosineAnnealingLR', 'init_method': 'he'}. Best is trial 0 with value: 0.7659769058227539.


Test loss: 2.30272, Test acc: 10.00%

Optimizer: SGD, Scheduler: CosineAnnealingLR, Learning rate: 0.006258877494227019, Momentum: 0.9616413678752236, Init method: he
Epoch 1
-------------------------------
Train loss: 1.52740 | Train accuracy: 44.20%
Test loss: 1.16375, Test acc: 58.52%

Epoch 2
-------------------------------
Train loss: 1.00926 | Train accuracy: 64.60%
Test loss: 0.93429, Test acc: 68.36%

Epoch 3
-------------------------------
Train loss: 0.78003 | Train accuracy: 72.87%
Test loss: 0.84293, Test acc: 71.44%

Epoch 4
-------------------------------
Train loss: 0.63761 | Train accuracy: 77.80%


[I 2024-03-19 14:03:54,906] Trial 2 finished with value: 0.7388409376144409 and parameters: {'scheduler_name': 'ExponentialLR', 'optimizer': 'SGD', 'lr': 0.006258877494227019, 'scheduler': 'CosineAnnealingLR', 'init_method': 'he', 'momentum': 0.9616413678752236}. Best is trial 2 with value: 0.7388409376144409.


Test loss: 0.73884, Test acc: 75.59%

Optimizer: RMSprop, Scheduler: CosineAnnealingLR, Learning rate: 0.007354517860952842, Momentum: 0.0, Init method: he
Epoch 1
-------------------------------
Train loss: 3460948.75000 | Train accuracy: 25.28%
Test loss: 1.90209, Test acc: 31.20%

Epoch 2
-------------------------------
Train loss: 2.33073 | Train accuracy: 14.99%
Test loss: 2.30315, Test acc: 10.00%

Epoch 3
-------------------------------
Train loss: 12.17007 | Train accuracy: 10.53%
Test loss: 2.30335, Test acc: 10.00%

Epoch 4
-------------------------------
Train loss: 5.50129 | Train accuracy: 9.77%


[I 2024-03-19 14:06:07,788] Trial 3 finished with value: 2.302915096282959 and parameters: {'scheduler_name': 'ExponentialLR', 'optimizer': 'RMSprop', 'lr': 0.007354517860952842, 'scheduler': 'CosineAnnealingLR', 'init_method': 'he'}. Best is trial 2 with value: 0.7388409376144409.


Test loss: 2.30292, Test acc: 10.00%

Optimizer: Adam, Scheduler: StepLR, Learning rate: 0.0006813271445328782, Momentum: 0.0, Init method: xavier_uniform
Epoch 1
-------------------------------
Train loss: 1.46616 | Train accuracy: 45.69%
Test loss: 1.14917, Test acc: 58.78%

Epoch 2
-------------------------------
Train loss: 0.92874 | Train accuracy: 67.09%
Test loss: 0.88005, Test acc: 69.90%

Epoch 3
-------------------------------
Train loss: 0.70330 | Train accuracy: 75.56%
Test loss: 0.72787, Test acc: 74.99%

Epoch 4
-------------------------------
Train loss: 0.55239 | Train accuracy: 80.82%


[I 2024-03-19 14:08:30,116] Trial 4 finished with value: 0.6918030977249146 and parameters: {'scheduler_name': 'StepLR', 'optimizer': 'Adam', 'lr': 0.0006813271445328782, 'scheduler': 'StepLR', 'init_method': 'xavier_uniform'}. Best is trial 4 with value: 0.6918030977249146.


Test loss: 0.69180, Test acc: 76.38%

Optimizer: Adam, Scheduler: CosineAnnealingLR, Learning rate: 0.007015541333737277, Momentum: 0.0, Init method: he
Epoch 1
-------------------------------
Train loss: 2.78493 | Train accuracy: 10.10%
Test loss: 2.32158, Test acc: 10.00%

Epoch 2
-------------------------------


[W 2024-03-19 14:09:21,630] Trial 5 failed with parameters: {'scheduler_name': 'StepLR', 'optimizer': 'Adam', 'lr': 0.007015541333737277, 'scheduler': 'CosineAnnealingLR', 'init_method': 'he'} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Users/aliyilmaz/anaconda3/envs/pytorch_env/lib/python3.12/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/var/folders/yk/ytz7qk2s2fqgv5qxfnfhcvj80000gn/T/ipykernel_27249/1690675884.py", line 24, in objective
    train_loss, train_acc = train_step(model, train_loader, loss_fn, optimizer, accuracy)
                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/var/folders/yk/ytz7qk2s2fqgv5qxfnfhcvj80000gn/T/ipykernel_27249/2208755361.py", line 31, in train_step
    optimizer.step()
  File "/Users/aliyilmaz/anaconda3/envs/pytorch_env/lib/python3.12/site-packages/torch/optim/l

KeyboardInterrupt: 