In [135]:
!pip install optuna
# Import necessary torch and torchvision libraries
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR10

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import optuna

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



In [136]:
##### Best Hyperparameters #####
#LR = 0.001852
LR = 0.003494286840772853
#MOMENTUM = 0.870915
MOMENTUM = 0.9421216112061177
SCHEDULAR_NAME = "StepLR"
INIT_METHOD = "kaiming_normal"

In [137]:
class MyCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(MyCNN, self).__init__()
        self.network = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 64 x 16 x 16

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 128 x 8 x 8

            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2), # output: 256 x 4 x 4

            nn.Flatten(), 
            nn.Linear(256*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, num_classes))
        self.init_weights()

    def forward(self, x):
        x = self.network(x)
        return x
    
    def init_weights(self):
            for m in self.modules(): # self.modules() iterates through all modules (layers) in the model, including nested ones, allowing for operations like weight initialization to be applied universally.
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                    nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
                    if m.bias is not None:
                        nn.init.constant_(m.bias, 0)


In [138]:
def train_step(model: torch.nn.Module,
               data_loader: torch.utils.data.DataLoader,
               loss_fn: torch.nn.Module,
               optimizer: torch.optim.Optimizer,
               accuracy,
               device: torch.device = device):
    
    accuracy.reset()
    train_loss, train_acc = 0, 0
    model.to(device)
    
    for batch, (X, y) in enumerate(data_loader):
        
        X = X.to(device)
        y = y.to(device)
        
        # Training
        model.train()
        # Forward pass
        y_pred = model(X)
        # Calculate loss per batch
        loss = loss_fn(y_pred, y)
        train_loss += loss # accumulate loss per batch
        # Update accuracy
        accuracy.update(y_pred, y)
        # Zero the gradients
        optimizer.zero_grad()
        # Backward pass
        loss.backward()
        # Update weights
        optimizer.step()
    # Loss per epoch    
    train_loss = train_loss / len(data_loader)
    train_acc = accuracy.compute()
    print(f"Train loss: {train_loss:.5f} | Train accuracy: {train_acc*100:.2f}%")
    return train_loss, train_acc

def test_step(model: torch.nn.Module,
              data_loader: torch.utils.data.DataLoader,
              loss_fn: torch.nn.Module,
              accuracy,
              device: torch.device = device):
    
    
    accuracy.reset()
    ## Testing
    test_loss, test_acc = 0, 0
    # Set model to evaluation mode
    model.eval()
    # Turn off gradients
    with torch.inference_mode():
        for X, y in data_loader:
            # Move data to device
            X = X.to(device)
            y = y.to(device)
            # Forward pass
            test_pred = model(X)
            # Calculate loss per batch
            test_loss += loss_fn(test_pred, y)
            # Update accuracy
            accuracy.update(test_pred, y)
    # Loss per epoch        
    test_loss = test_loss / len(data_loader)
    # Calculate accuracy
    test_acc = accuracy.compute()
    # Print loss and accuracy per epoch
    print(f"Test loss: {test_loss:.5f}, Test acc: {test_acc*100:.2f}%\n")
    return test_loss, test_acc

# Data Augmentation

In [139]:
augmentation_transform = transforms.Compose([
    #transforms.RandomResizedCrop(224),  # Crop images to 224x224
    transforms.RandomHorizontalFlip(),  # Horizontally flip images with a 50% probability
    transforms.RandomRotation(10),      # Randomly rotate images in the range (-10 degrees, 10 degrees)
    transforms.ToTensor(),              # Convert images to PyTorch tensors
])
# Download and load the CIFAR-10 dataset
train_data = CIFAR10(root='./data', 
                     train=True, 
                     download=True, 
                     transform=transforms.ToTensor())

test_data = CIFAR10(root='./data',
                    train=False,
                    download=True,
                    transform=transforms.ToTensor())

augmented_train_data = CIFAR10(root='./data',
                                train=True,
                                transform=augmentation_transform)
augmented_test_data = CIFAR10(root='./data',
                                 train=False,
                                 transform=augmentation_transform)

# See classes
class_names = train_data.classes
# Class to index
cls_to_idx = train_data.class_to_idx

# Create a DataLoader object to load data in batches
no_augmentation_train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                           batch_size=32,
                                           shuffle=True)
no_augmentation_test_loader = torch.utils.data.DataLoader(dataset=test_data,
                                            batch_size=32,
                                            shuffle=False)

augmented_train_loader = torch.utils.data.DataLoader(dataset=augmented_train_data,
                                           batch_size=32,
                                           shuffle=True)
augmented_test_loader = torch.utils.data.DataLoader(dataset=augmented_train_data,
                                            batch_size=32,
                                            shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [140]:
# Import accuracy metric
from torchmetrics import Accuracy
accuracy = Accuracy(task="multiclass", num_classes=10).to(device)
# Setup loss function and optimizer
loss_fn = nn.CrossEntropyLoss()


In [141]:
def objective(trial):
    
    model = MyCNN()

    reg_method = trial.suggest_categorical('reg_method', ["No_Regularization", "Weight Decay: 0.0005", "Weight Decay: 0.0001",
                                                         "Data Augmentation", 'Data Augmentation - WD: 0.0001', 'Data Augmentation - WD: 0.0005'])
        
    if reg_method == "Weight Decay: 0.0005":
        train_loader = no_augmentation_train_loader
        test_loader = no_augmentation_test_loader
        optimizer = optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=0.0005)
    elif reg_method == "Weight Decay: 0.0001":
        train_loader = no_augmentation_train_loader
        test_loader = no_augmentation_test_loader
        optimizer = optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=0.0001)
    elif reg_method == "Data Augmentation":
        train_loader = augmented_train_loader
        test_loader = no_augmentation_test_loader
        optimizer = optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM)
    elif reg_method == "Data Augmentation - WD: 0.0001":
        train_loader = augmented_train_loader
        test_loader = no_augmentation_test_loader
        optimizer = optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=0.0001)
    elif reg_method == "Data Augmentation - WD: 0.0005":
        train_loader = augmented_train_loader
        test_loader = no_augmentation_test_loader
        optimizer = optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM, weight_decay=0.0005)
    elif reg_method == "No_Regularization":
        train_loader = no_augmentation_train_loader
        test_loader = no_augmentation_test_loader
        optimizer = optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.2)
    
    epochs = 15
    
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}\n-------------------------------")
        train_loss, train_acc = train_step(model, train_loader, loss_fn, optimizer, accuracy)
        test_loss, test_acc = test_step(model, test_loader, loss_fn, accuracy)
        scheduler.step()
    return test_acc

search_space = {
    'reg_method': ['No_Regularization', 'Weight Decay: 0.0001', 'Weight Decay: 0.0005', 'Data Augmentation',
                  'Data Augmentation - WD: 0.0001', 'Data Augmentation - WD: 0.0005']
}
study_augmentation = optuna.create_study(direction='maximize',
                                         sampler=optuna.samplers.GridSampler(search_space))
study_augmentation.optimize(objective, n_trials=6)

print('Best trial:')
trial_augmentation = study_augmentation.best_trial

print(f'Value: {trial_augmentation.value}')
print('Params: ')
for key, value in trial_augmentation.params.items():
    print(f'{key}: {value}')

[I 2024-03-22 22:34:45,483] A new study created in memory with name: no-name-2c1c469c-14f8-4f44-b6db-4c59b645b469


Epoch 1
-------------------------------
Train loss: 1.55512 | Train accuracy: 43.23%
Test loss: 1.25762, Test acc: 53.97%

Epoch 2
-------------------------------
Train loss: 1.05975 | Train accuracy: 62.21%
Test loss: 0.93838, Test acc: 67.01%

Epoch 3
-------------------------------
Train loss: 0.86162 | Train accuracy: 69.79%
Test loss: 0.79813, Test acc: 72.58%

Epoch 4
-------------------------------
Train loss: 0.73851 | Train accuracy: 74.37%
Test loss: 0.70851, Test acc: 75.88%

Epoch 5
-------------------------------
Train loss: 0.65195 | Train accuracy: 77.25%
Test loss: 0.65388, Test acc: 77.55%

Epoch 6
-------------------------------
Train loss: 0.46789 | Train accuracy: 83.76%
Test loss: 0.53937, Test acc: 81.58%

Epoch 7
-------------------------------
Train loss: 0.41625 | Train accuracy: 85.64%
Test loss: 0.54360, Test acc: 82.03%

Epoch 8
-------------------------------
Train loss: 0.38566 | Train accuracy: 86.40%
Test loss: 0.53733, Test acc: 82.42%

Epoch 9
--------

[I 2024-03-22 22:39:59,257] Trial 0 finished with value: 0.8357999920845032 and parameters: {'reg_method': 'Data Augmentation'}. Best is trial 0 with value: 0.8357999920845032.


Test loss: 0.54855, Test acc: 83.58%

Epoch 1
-------------------------------
Train loss: 1.54364 | Train accuracy: 43.66%
Test loss: 1.19505, Test acc: 57.56%

Epoch 2
-------------------------------
Train loss: 1.05487 | Train accuracy: 62.76%
Test loss: 0.88273, Test acc: 69.14%

Epoch 3
-------------------------------
Train loss: 0.84807 | Train accuracy: 70.25%
Test loss: 0.85398, Test acc: 69.99%

Epoch 4
-------------------------------
Train loss: 0.72865 | Train accuracy: 74.62%
Test loss: 0.70056, Test acc: 75.59%

Epoch 5
-------------------------------
Train loss: 0.64601 | Train accuracy: 77.38%
Test loss: 0.65774, Test acc: 77.64%

Epoch 6
-------------------------------
Train loss: 0.45529 | Train accuracy: 84.19%
Test loss: 0.55310, Test acc: 81.66%

Epoch 7
-------------------------------
Train loss: 0.41242 | Train accuracy: 85.61%
Test loss: 0.53517, Test acc: 81.95%

Epoch 8
-------------------------------
Train loss: 0.37494 | Train accuracy: 86.91%
Test loss: 0.531

[I 2024-03-22 22:45:14,654] Trial 1 finished with value: 0.833299994468689 and parameters: {'reg_method': 'Data Augmentation - WD: 0.0001'}. Best is trial 0 with value: 0.8357999920845032.


Test loss: 0.54522, Test acc: 83.33%

Epoch 1
-------------------------------
Train loss: 1.44081 | Train accuracy: 47.53%
Test loss: 1.05845, Test acc: 62.31%

Epoch 2
-------------------------------
Train loss: 0.92491 | Train accuracy: 67.51%
Test loss: 0.84129, Test acc: 70.47%

Epoch 3
-------------------------------
Train loss: 0.69769 | Train accuracy: 75.51%
Test loss: 0.73222, Test acc: 74.71%

Epoch 4
-------------------------------
Train loss: 0.54559 | Train accuracy: 80.94%
Test loss: 0.68618, Test acc: 77.50%

Epoch 5
-------------------------------
Train loss: 0.42096 | Train accuracy: 85.30%
Test loss: 0.66540, Test acc: 77.97%

Epoch 6
-------------------------------
Train loss: 0.14576 | Train accuracy: 95.11%
Test loss: 0.67320, Test acc: 81.07%

Epoch 7
-------------------------------
Train loss: 0.05846 | Train accuracy: 98.23%
Test loss: 0.82891, Test acc: 80.84%

Epoch 8
-------------------------------
Train loss: 0.01860 | Train accuracy: 99.67%
Test loss: 1.044

[I 2024-03-22 22:48:47,657] Trial 2 finished with value: 0.8101000189781189 and parameters: {'reg_method': 'Weight Decay: 0.0001'}. Best is trial 0 with value: 0.8357999920845032.


Test loss: 1.39326, Test acc: 81.01%

Epoch 1
-------------------------------
Train loss: 1.49244 | Train accuracy: 45.54%
Test loss: 1.18819, Test acc: 58.28%

Epoch 2
-------------------------------
Train loss: 0.94994 | Train accuracy: 66.56%
Test loss: 0.89372, Test acc: 68.40%

Epoch 3
-------------------------------
Train loss: 0.72643 | Train accuracy: 74.88%
Test loss: 0.78613, Test acc: 72.93%

Epoch 4
-------------------------------
Train loss: 0.57334 | Train accuracy: 80.11%
Test loss: 0.66350, Test acc: 77.29%

Epoch 5
-------------------------------
Train loss: 0.44945 | Train accuracy: 84.26%
Test loss: 0.67602, Test acc: 77.65%

Epoch 6
-------------------------------
Train loss: 0.17216 | Train accuracy: 94.22%
Test loss: 0.65002, Test acc: 81.36%

Epoch 7
-------------------------------
Train loss: 0.08251 | Train accuracy: 97.41%
Test loss: 0.76311, Test acc: 81.50%

Epoch 8
-------------------------------
Train loss: 0.03489 | Train accuracy: 99.15%
Test loss: 0.885

[I 2024-03-22 22:52:20,641] Trial 3 finished with value: 0.8138999938964844 and parameters: {'reg_method': 'Weight Decay: 0.0005'}. Best is trial 0 with value: 0.8357999920845032.


Test loss: 1.21580, Test acc: 81.39%

Epoch 1
-------------------------------
Train loss: 1.58794 | Train accuracy: 41.86%
Test loss: 1.16154, Test acc: 58.41%

Epoch 2
-------------------------------
Train loss: 1.07811 | Train accuracy: 61.67%
Test loss: 0.94549, Test acc: 66.96%

Epoch 3
-------------------------------
Train loss: 0.87095 | Train accuracy: 69.30%
Test loss: 0.79401, Test acc: 72.50%

Epoch 4
-------------------------------
Train loss: 0.74672 | Train accuracy: 73.94%
Test loss: 0.75230, Test acc: 74.42%

Epoch 5
-------------------------------
Train loss: 0.66344 | Train accuracy: 76.91%
Test loss: 0.67443, Test acc: 77.11%

Epoch 6
-------------------------------
Train loss: 0.47476 | Train accuracy: 83.42%
Test loss: 0.56208, Test acc: 81.43%

Epoch 7
-------------------------------
Train loss: 0.42816 | Train accuracy: 85.09%
Test loss: 0.54171, Test acc: 82.09%

Epoch 8
-------------------------------
Train loss: 0.39882 | Train accuracy: 86.28%
Test loss: 0.528

[I 2024-03-22 22:57:36,577] Trial 4 finished with value: 0.8378999829292297 and parameters: {'reg_method': 'Data Augmentation - WD: 0.0005'}. Best is trial 4 with value: 0.8378999829292297.


Test loss: 0.52361, Test acc: 83.79%

Epoch 1
-------------------------------
Train loss: 1.43381 | Train accuracy: 47.74%
Test loss: 1.09948, Test acc: 60.67%

Epoch 2
-------------------------------
Train loss: 0.92825 | Train accuracy: 67.11%
Test loss: 0.88050, Test acc: 69.24%

Epoch 3
-------------------------------
Train loss: 0.69776 | Train accuracy: 75.53%
Test loss: 0.71249, Test acc: 75.68%

Epoch 4
-------------------------------
Train loss: 0.55450 | Train accuracy: 80.67%
Test loss: 0.66122, Test acc: 77.66%

Epoch 5
-------------------------------
Train loss: 0.42038 | Train accuracy: 85.32%
Test loss: 0.67116, Test acc: 77.76%

Epoch 6
-------------------------------
Train loss: 0.14310 | Train accuracy: 95.28%
Test loss: 0.74441, Test acc: 80.21%

Epoch 7
-------------------------------
Train loss: 0.05187 | Train accuracy: 98.54%
Test loss: 0.90424, Test acc: 80.13%

Epoch 8
-------------------------------
Train loss: 0.01478 | Train accuracy: 99.75%
Test loss: 1.161

[I 2024-03-22 23:01:07,615] Trial 5 finished with value: 0.8043000102043152 and parameters: {'reg_method': 'No_Regularization'}. Best is trial 4 with value: 0.8378999829292297.


Test loss: 1.52378, Test acc: 80.43%

Best trial:
Value: 0.8378999829292297
Params: 
reg_method: Data Augmentation - WD: 0.0005


In [155]:
optuna.visualization.plot_optimization_history(study_augmentation)

In [152]:
df = study_augmentation.trials_dataframe()

# Set dataframe column width option
pd.set_option('display.max_colwidth', None)

print(df)


   number   value             datetime_start          datetime_complete  \
0       0  0.8358 2024-03-22 22:34:45.484816 2024-03-22 22:39:59.257620   
1       1  0.8333 2024-03-22 22:39:59.259245 2024-03-22 22:45:14.654607   
2       2  0.8101 2024-03-22 22:45:14.656133 2024-03-22 22:48:47.656748   
3       3  0.8139 2024-03-22 22:48:47.658216 2024-03-22 22:52:20.641499   
4       4  0.8379 2024-03-22 22:52:20.643028 2024-03-22 22:57:36.577275   
5       5  0.8043 2024-03-22 22:57:36.578889 2024-03-22 23:01:07.614869   

                duration               params_reg_method  \
0 0 days 00:05:13.772804               Data Augmentation   
1 0 days 00:05:15.395362  Data Augmentation - WD: 0.0001   
2 0 days 00:03:33.000615            Weight Decay: 0.0001   
3 0 days 00:03:32.983283            Weight Decay: 0.0005   
4 0 days 00:05:15.934247  Data Augmentation - WD: 0.0005   
5 0 days 00:03:31.035980               No_Regularization   

   system_attrs_grid_id  \
0                     0   

In [157]:
#df.drop(columns=['datetime_complete'], inplace=True)
df.to_csv('regularization_comparison.csv', index=False)