In [3]:
# Import necessary torch and torchvision libraries
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR10

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import optuna

device = "mps" if torch.backends.mps.is_available() else "cpu"

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Download and load the CIFAR-10 dataset
train_data = CIFAR10(root='./data', 
                     train=True, 
                     download=True, 
                     transform=transforms.ToTensor())

test_data = CIFAR10(root='./data',
                    train=False,
                    download=True,
                    transform=transforms.ToTensor())

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# See classes
class_names = train_data.classes
print(class_names) # It is also idx to class -> class_names[1] = 'Trouser
# Class to index
cls_to_idx = train_data.class_to_idx
print(cls_to_idx)

['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}


In [4]:
# Create a DataLoader object to load data in batches
train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                           batch_size=32,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_data,
                                            batch_size=32,
                                            shuffle=False)


In [13]:
def get_scheduler(optimizer, scheduler_name):
    if scheduler_name == 'StepLR':
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # Each 5 epochs, the learning rate is multiplied by gamma
    elif scheduler_name == 'ExponentialLR':
        scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8) # The learning rate is multiplied by gamma every epoch
    else:  # CosineAnnealingLR
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
    return scheduler

def suggest_hyperparameters(trial):
    params = {
        'scheduler_name': trial.suggest_categorical('scheduler_name', ['StepLR', 'ExponentialLR', 'CosineAnnealingLR']), # 
        'optimizer_name': trial.suggest_categorical('optimizer', ['SGD', 'Adam', 'RMSprop']),
        'lr': trial.suggest_float('lr', 5*1e-5, 5*1e-3),
        'momentum': 0.0,
        'init_method': trial.suggest_categorical('init_method', ['xavier_uniform', 'he', 'trunc_normal'])
    }
    
    if params['optimizer_name'] == 'SGD':
        params['momentum'] = trial.suggest_float('momentum', 0.85, 0.99)
    return params

In [1]:
class MyCNN(nn.Module):
    def __init__(self, num_classes=10, init_method='xavier_uniform'):
        super(MyCNN, self).__init__()
        self.network = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Flatten(), 
            nn.Linear(256*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes))
        self.init_weights(init_method)

    def forward(self, x):
        x = self.network(x)
        return x
    
    def init_weights(self, init_method):
        if init_method == 'xavier_uniform':
            for m in self.modules():
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                    nn.init.xavier_uniform_(m.weight)
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
        elif init_method == 'he':
            for m in self.modules():
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                    nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)
        elif init_method == 'trunc_normal':
            for m in self.modules():
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                    nn.init.trunc_normal_(m.weight, mean=0.0, std=0.1)
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)


NameError: name 'nn' is not defined

In [2]:
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy,
               device: torch.device = device):
    
    accuracy.reset()
    train_loss, train_acc = 0, 0
    model.to(device)
    
    for batch, (X, y) in enumerate(data_loader):
        
        X = X.to(device)
        y = y.to(device)
        
        # Training
        model.train()
        # Forward pass
        y_pred = model(X)
        # Calculate loss per batch
        loss = loss_fn(y_pred, y)
        train_loss += loss # accumulate loss per batch
        # Update accuracy
        accuracy.update(y_pred, y)
        # Zero the gradients
        optimizer.zero_grad()
        # Backward pass
        loss.backward()
        # Update weights
        optimizer.step()
    # Loss per epoch    
    train_loss = train_loss / len(data_loader)
    train_acc = accuracy.compute()
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc*100:.2f}%")
    return train_loss, train_acc

def test_step(model: torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              accuracy,
              device: torch.device = device):
    
    
    accuracy.reset()
    ## Testing
    test_loss, test_acc = 0, 0
    # Set model to evaluation mode
    model.eval()
    # Turn off gradients
    with torch.inference_mode():
        for X, y in data_loader:
            # Move data to device
            X = X.to(device)
            y = y.to(device)
            # Forward pass
            test_pred = model(X)
            # Calculate loss per batch
            test_loss += loss_fn(test_pred, y)
            # Update accuracy
            accuracy.update(test_pred, y)
    # Loss per epoch        
    test_loss = test_loss / len(data_loader)
    # Calculate accuracy
    test_acc = accuracy.compute()
    # Print loss and accuracy per epoch
    print(f"Test loss: {test_loss:.5f}, Test acc: {test_acc*100:.2f}%\n")
    return test_loss, test_acc

NameError: name 'device' is not defined

In [16]:
# Import accuracy metric
from torchmetrics import Accuracy
accuracy = Accuracy(task="multiclass", num_classes=10).to(device)
# Setup loss function and optimizer
loss_fn = nn.CrossEntropyLoss()

In [None]:
def objective(trial):
    
    params = suggest_hyperparameters(trial)
    
    model = MyCNN(num_classes=10, init_method=params['init_method'])
    
    if params['optimizer_name'] == 'SGD':
        optimizer = getattr(optim, params['optimizer_name'])(model.parameters(), lr=params['lr'], momentum=params['momentum']) 
    else:
        optimizer = getattr(optim, params['optimizer_name'])(model.parameters(), lr=params['lr'])
        
    scheduler = get_scheduler(optimizer, params['scheduler_name']) 
    
    print(f"Optimizer: {params['optimizer_name']}, Scheduler: {params['scheduler_name']}, Learning rate: {params['lr']}, Momentum: {params['momentum']}, Init method: {params['init_method']}")
    
    """
    model = MyCNN(num_classes=10, init_method='he')
    optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)
    """
    epochs = 4
    
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}\n-------------------------------")
        train_loss, train_acc = train_step(model, train_loader, loss_fn, optimizer, accuracy)
        test_loss, test_acc = test_step(model, test_loader, loss_fn, accuracy)
        scheduler.step()
    return test_loss

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

print('Best trial:')
trial = study.best_trial

print(f'Value: {trial.value}')
print('Params: ')
for key, value in trial.params.items():
    print(f'{key}: {value}')


Mention that, getting better loss values does not mean getting higher accuracy. Therefore, optimization and also analysis should be based on accuracy value. In overtrain situations, loss value can be too high but accuracy value might be converge at higher value than expected.

In [None]:
optuna.visualization.plot_optimization_history(study)

optuna.visualization.plot_parallel_coordinate(study)

optuna.visualization.plot_param_importances(study)

optuna.visualization.plot_slice(study)


In [18]:
# `study` değişkeni, optimize edilmiş Optuna study nesnesidir.

for trial in study.trials:
    print(f"Trial #{trial.number}, Value: {trial.value}")
    print("Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")


Trial #0, Value: 0.7659769058227539
Params: 
    scheduler_name: CosineAnnealingLR
    optimizer: RMSprop
    lr: 0.0008694247141217115
    scheduler: ExponentialLR
    init_method: xavier_uniform
Trial #1, Value: 2.302717924118042
Params: 
    scheduler_name: ExponentialLR
    optimizer: RMSprop
    lr: 0.0038758303225686947
    scheduler: CosineAnnealingLR
    init_method: he
Trial #2, Value: 0.7388409376144409
Params: 
    scheduler_name: ExponentialLR
    optimizer: SGD
    lr: 0.006258877494227019
    scheduler: CosineAnnealingLR
    init_method: he
    momentum: 0.9616413678752236
Trial #3, Value: 2.302915096282959
Params: 
    scheduler_name: ExponentialLR
    optimizer: RMSprop
    lr: 0.007354517860952842
    scheduler: CosineAnnealingLR
    init_method: he
Trial #4, Value: 0.6918030977249146
Params: 
    scheduler_name: StepLR
    optimizer: Adam
    lr: 0.0006813271445328782
    scheduler: StepLR
    init_method: xavier_uniform
Trial #5, Value: None
Params: 
    scheduler_na

In [22]:
df = study.trials_dataframe()

# Set dataframe column width option
pd.set_option('display.max_colwidth', None)

print(df)


   number     value             datetime_start          datetime_complete  \
0       0  0.765977 2024-03-19 13:57:21.711175 2024-03-19 13:59:34.548201   
1       1  2.302718 2024-03-19 13:59:34.548774 2024-03-19 14:01:49.329503   
2       2  0.738841 2024-03-19 14:01:49.330536 2024-03-19 14:03:54.906295   
3       3  2.302915 2024-03-19 14:03:54.906965 2024-03-19 14:06:07.788106   
4       4  0.691803 2024-03-19 14:06:07.788751 2024-03-19 14:08:30.116511   
5       5       NaN 2024-03-19 14:08:30.117051 2024-03-19 14:09:21.629978   

                duration params_init_method  params_lr  params_momentum  \
0 0 days 00:02:12.837026     xavier_uniform   0.000869              NaN   
1 0 days 00:02:14.780729                 he   0.003876              NaN   
2 0 days 00:02:05.575759                 he   0.006259         0.961641   
3 0 days 00:02:12.881141                 he   0.007355              NaN   
4 0 days 00:02:22.327760     xavier_uniform   0.000681              NaN   
5 0 days 0

In [23]:
df1 = pd.read_csv('hyperparameter_optimization_results1.csv')
df2 = pd.read_csv('hyperparameter_optimization_results2.csv')

In [19]:
df1.drop(columns=['datetime_complete'], inplace=True)

In [22]:
df1.to_csv('hyperparameter_optimization_results1.csv', index=False)
df2.to_csv('hyperparameter_optimization_results2.csv', index=False)

Unnamed: 0,value,duration,params_init_method,params_lr,params_momentum,params_optimizer,params_scheduler,params_scheduler_name,state
0,2.302589,0 days 00:02:01.987918,trunc_normal,0.000518,0.892873,SGD,StepLR,StepLR,COMPLETE
1,0.94296,0 days 00:02:04.031166,xavier_uniform,0.002231,0.910243,SGD,ExponentialLR,StepLR,COMPLETE
2,0.925347,0 days 00:02:07.265297,trunc_normal,0.002499,,RMSprop,StepLR,CosineAnnealingLR,COMPLETE
3,2.302594,0 days 00:02:09.839847,xavier_uniform,0.002081,,Adam,ExponentialLR,CosineAnnealingLR,COMPLETE
4,2.302613,0 days 00:02:09.941922,xavier_uniform,0.002995,,Adam,ExponentialLR,CosineAnnealingLR,COMPLETE
5,2.303319,0 days 00:02:03.659195,xavier_uniform,0.00471,,RMSprop,CosineAnnealingLR,ExponentialLR,COMPLETE
6,0.852053,0 days 00:02:02.687114,he,0.000379,0.909065,SGD,StepLR,ExponentialLR,COMPLETE
7,1.138596,0 days 00:02:13.593366,he,0.000904,,Adam,StepLR,CosineAnnealingLR,COMPLETE
8,1.035344,0 days 00:02:01.980483,he,0.004148,0.901259,SGD,ExponentialLR,StepLR,COMPLETE
9,0.84972,0 days 00:02:12.674130,xavier_uniform,0.000856,,Adam,ExponentialLR,StepLR,COMPLETE
