# Training Image Classification Model with SGD

|Item|Description|
|---|---|
|DeepLearning Framework|PyTorch|
|Dataset|CIFAR-100|
|Model Architecture|Simple CNN|
|Optimizer|SGD|


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import random
import numpy as np
import torch
import pprint
import matplotlib.pyplot as plt
import optuna

from data_loader.data_loader import DataLoader
from models.pytorch import simple_cnn

  from .autonotebook import tqdm as notebook_tqdm


## Set Random Seed

In [3]:
seed=42

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f5e7c1b3b50>

## Device Settings

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

## Hyperparameters

In [5]:
epochs = 200
batch_size = 256

## Load Dataset and Normalize

In [6]:
dataset_dir = '/tmp/dataset'
dataloader = DataLoader(dataset_name='cifar100_pytorch', dataset_dir=dataset_dir)

Files already downloaded and verified
Files already downloaded and verified


## Training Model

In [7]:
def objective_lr(dataloader):
    def objective(trial):
        learning_rate = trial.suggest_float('learning_rate', 0.00001, 0.1, log=True)
        
        input_size = (batch_size, 3, 32, 32)
        num_classes = 100
        model = simple_cnn.SimpleCNN(device, input_size=input_size, num_classes=num_classes)
        model_dir = f'cifar-100_model-{trial}'
        train_result = model.train(dataloader.dataset.trainloader, epochs=epochs, lr=learning_rate, output_dir=model_dir)
    
        train_result = model.predict(dataloader.dataset.trainloader)
        train_predictions, train_labels = train_result

        train_eval_result = model.evaluate(train_labels, train_predictions)

        return train_eval_result['accuracy']

    return objective

study = optuna.create_study(direction='maximize')
#study.optimize(objective_lr(dataloader), n_trials=100)
study.optimize(objective_lr(dataloader), n_trials=3)

[I 2024-05-06 11:16:42,512] A new study created in memory with name: no-name-643b35fd-d5d7-425c-b6c4-15fef46680c0


Layer (type:depth-idx)                   Output Shape              Param #
Net                                      [256, 100]                --
├─Conv2d: 1-1                            [256, 64, 32, 32]         1,792
├─ReLU: 1-2                              [256, 64, 32, 32]         --
├─BatchNorm2d: 1-3                       [256, 64, 32, 32]         128
├─Conv2d: 1-4                            [256, 64, 32, 32]         36,928
├─ReLU: 1-5                              [256, 64, 32, 32]         --
├─BatchNorm2d: 1-6                       [256, 64, 32, 32]         128
├─MaxPool2d: 1-7                         [256, 64, 16, 16]         --
├─Dropout: 1-8                           [256, 64, 16, 16]         --
├─Conv2d: 1-9                            [256, 128, 16, 16]        73,856
├─ReLU: 1-10                             [256, 128, 16, 16]        --
├─BatchNorm2d: 1-11                      [256, 128, 16, 16]        256
├─Conv2d: 1-12                           [256, 128, 16, 16]        147,

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-05-06 11:57:35,572] Trial 0 finished with value: 0.01 and parameters: {'learning_rate': 0.024242793427053037}. Best is trial 0 with value: 0.01.


Layer (type:depth-idx)                   Output Shape              Param #
Net                                      [256, 100]                --
├─Conv2d: 1-1                            [256, 64, 32, 32]         1,792
├─ReLU: 1-2                              [256, 64, 32, 32]         --
├─BatchNorm2d: 1-3                       [256, 64, 32, 32]         128
├─Conv2d: 1-4                            [256, 64, 32, 32]         36,928
├─ReLU: 1-5                              [256, 64, 32, 32]         --
├─BatchNorm2d: 1-6                       [256, 64, 32, 32]         128
├─MaxPool2d: 1-7                         [256, 64, 16, 16]         --
├─Dropout: 1-8                           [256, 64, 16, 16]         --
├─Conv2d: 1-9                            [256, 128, 16, 16]        73,856
├─ReLU: 1-10                             [256, 128, 16, 16]        --
├─BatchNorm2d: 1-11                      [256, 128, 16, 16]        256
├─Conv2d: 1-12                           [256, 128, 16, 16]        147,

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-05-06 12:39:01,460] Trial 1 finished with value: 0.63478 and parameters: {'learning_rate': 3.3109434934771945e-05}. Best is trial 1 with value: 0.63478.


Layer (type:depth-idx)                   Output Shape              Param #
Net                                      [256, 100]                --
├─Conv2d: 1-1                            [256, 64, 32, 32]         1,792
├─ReLU: 1-2                              [256, 64, 32, 32]         --
├─BatchNorm2d: 1-3                       [256, 64, 32, 32]         128
├─Conv2d: 1-4                            [256, 64, 32, 32]         36,928
├─ReLU: 1-5                              [256, 64, 32, 32]         --
├─BatchNorm2d: 1-6                       [256, 64, 32, 32]         128
├─MaxPool2d: 1-7                         [256, 64, 16, 16]         --
├─Dropout: 1-8                           [256, 64, 16, 16]         --
├─Conv2d: 1-9                            [256, 128, 16, 16]        73,856
├─ReLU: 1-10                             [256, 128, 16, 16]        --
├─BatchNorm2d: 1-11                      [256, 128, 16, 16]        256
├─Conv2d: 1-12                           [256, 128, 16, 16]        147,

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
[I 2024-05-06 13:20:47,567] Trial 2 finished with value: 0.64086 and parameters: {'learning_rate': 0.0005303739462539983}. Best is trial 2 with value: 0.64086.


In [8]:
study.best_params

{'learning_rate': 0.0005303739462539983}

In [9]:
#x = [epoch for epoch in range(epochs)]
#plt.plot(x, train_result['loss'])

## Test Model

In [10]:
train_result = model.predict(dataloader.dataset.trainloader)
train_predictions, train_labels = train_result

In [11]:
train_eval_result = model.evaluate(train_labels, train_predictions)
pprint.pprint(train_eval_result)

{'accuracy': 0.56602,
 'classification_report': {'0': {'f1-score': 0.7144060657118786,
                                 'precision': 0.6171761280931587,
                                 'recall': 0.848,
                                 'support': 500},
                           '1': {'f1-score': 0.6572379367720466,
                                 'precision': 0.5626780626780626,
                                 'recall': 0.79,
                                 'support': 500},
                           '10': {'f1-score': 0.4807511737089202,
                                  'precision': 0.45309734513274336,
                                  'recall': 0.512,
                                  'support': 500},
                           '11': {'f1-score': 0.4094754653130288,
                                  'precision': 0.3548387096774194,
                                  'recall': 0.484,
                                  'support': 500},
                           '12': {'f1-score': 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [12]:
test_result = model.predict(dataloader.dataset.testloader)
test_predictions, test_labels = test_result

In [13]:
test_eval_result = model.evaluate(test_labels, test_predictions)
pprint.pprint(test_eval_result)

{'accuracy': 0.444,
 'classification_report': {'0': {'f1-score': 0.5793650793650794,
                                 'precision': 0.48026315789473684,
                                 'recall': 0.73,
                                 'support': 100},
                           '1': {'f1-score': 0.49586776859504134,
                                 'precision': 0.4225352112676056,
                                 'recall': 0.6,
                                 'support': 100},
                           '10': {'f1-score': 0.3069767441860466,
                                  'precision': 0.28695652173913044,
                                  'recall': 0.33,
                                  'support': 100},
                           '11': {'f1-score': 0.29184549356223183,
                                  'precision': 0.2556390977443609,
                                  'recall': 0.34,
                                  'support': 100},
                           '12': {'f1-score': 0.5

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
