# Dependencies

In [1]:
import numpy as np

import torch
from torch import nn
from torch.nn import CrossEntropyLoss
from torch.optim import Adam
from torch.utils.data import DataLoader, random_split

from torchvision.transforms import v2
from torchvision.datasets import CIFAR10

from torchsummary import summary

from torchmetrics import Accuracy, ConfusionMatrix

import matplotlib.pyplot as plt

from sklearn.metrics import classification_report

In [2]:
# set a fixed seed
random_state = 42
torch.manual_seed(random_state)
torch.cuda.manual_seed(random_state)
np.random.seed(random_state)

In [3]:
# check if cuda is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

# Pre-Processing

# Load Dataset

In [4]:
# initial transforms
transforms = v2.Compose(
    [
        v2.ToImage(),
        v2.ToDtype(torch.float32, scale= True)
    ]
)

# load the CIFAR-10 dataset
trainset = CIFAR10(root= './dataset', train= True , download= True, transform= transforms)
testset  = CIFAR10(root= './dataset', train= False, download= True, transform= transforms)

# log
print('trainset:')
print(f"    -> trainset.data.shape    : {trainset.data.shape}")
print(f"    -> trainset.data.dtype    : {trainset.data.dtype}")
print(f"    -> type(trainset.data)    : {type(trainset.data)}")
print(f"    -> type(trainset.targets) : {type(trainset.targets)}")
print('-' * 50)
print('testset:')
print(f"    -> testset.data.shape     : {testset.data.shape}")
print(f"    -> testset.data.dtype     : {testset.data.dtype}")
print(f"    -> type(testset.data)     : {type(testset.data)}")
print(f"    -> type(testset.targets)  : {type(testset.targets)}")
print('-' * 50)
print(f"classes: {trainset.class_to_idx}")
print(f"trainset distribution: {np.unique(trainset.targets, return_counts= True)[1]}")
print(f"testset  distribution: {np.unique(testset.targets, return_counts= True)[1]}")

Files already downloaded and verified
Files already downloaded and verified
trainset:
    -> trainset.data.shape    : (50000, 32, 32, 3)
    -> trainset.data.dtype    : uint8
    -> type(trainset.data)    : <class 'numpy.ndarray'>
    -> type(trainset.targets) : <class 'list'>
--------------------------------------------------
testset:
    -> testset.data.shape     : (10000, 32, 32, 3)
    -> testset.data.dtype     : uint8
    -> type(testset.data)     : <class 'numpy.ndarray'>
    -> type(testset.targets)  : <class 'list'>
--------------------------------------------------
classes: {'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
trainset distribution: [5000 5000 5000 5000 5000 5000 5000 5000 5000 5000]
testset  distribution: [1000 1000 1000 1000 1000 1000 1000 1000 1000 1000]


In [None]:
# plot
fig, axs = plt.subplots(nrows= 4, ncols= 8, figsize= (12, 6), layout= 'compressed')
for i in range(4):
    for j in range(8):
        axs[i, j].imshow(trainset.data[i * 8 + j], cmap= 'gray')
        axs[i, j].set_title(trainset.classes[trainset.targets[i * 8 + j]])
        axs[i, j].axis('off')
plt.show()

## Split trainset into [trainset, validationset]

In [6]:
# 90% trainset & 10% validationset
validation_size = int(0.1 * len(trainset))
train_size = len(trainset) - validation_size

# random split
trainset, validationset = random_split(trainset, [train_size, validation_size])

# log
print('trainset:')
print(f"    -> len(trainset) : {len(trainset)}")
print(f"    -> trainset[0][0]: {trainset[0][0].shape}")
print(f"    -> trainset[0][1]: {trainset[0][1]}\n")
print('validationset:')
print(f"    -> len(validationset) : {len(validationset)}")
print(f"    -> validationset[0][0]: {validationset[0][0].shape}")
print(f"    -> validationset[0][1]: {validationset[0][1]}\n")
print('testset:')
print(f"    -> len(testset) : {len(testset)}")
print(f"    -> testset[0][0]: {testset[0][0].shape}")
print(f"    -> testset[0][1]: {testset[0][1]}")

trainset:
    -> len(trainset) : 45000
    -> trainset[0][0]: torch.Size([3, 32, 32])
    -> trainset[0][1]: 6

validationset:
    -> len(validationset) : 5000
    -> validationset[0][0]: torch.Size([3, 32, 32])
    -> validationset[0][1]: 7

testset:
    -> len(testset) : 10000
    -> testset[0][0]: torch.Size([3, 32, 32])
    -> testset[0][1]: 3


## Normalization

In [11]:
# create a temporary DataLoader for the trainset
temp_trainloader = DataLoader(trainset, batch_size= len(trainset))

# get the whole data
temp_dataset = next(iter(temp_trainloader))

# calculate the mean and standard deviation [PER CHANNEL]
train_mean = temp_dataset[0].mean(axis= (0, 2, 3)) # [0.4917, 0.4823, 0.4467]
train_std  = temp_dataset[0].std(axis= (0, 2, 3))  # [0.2471, 0.2435, 0.2616]

del temp_trainloader
del temp_dataset

# log
print(f"train mean per channel: {train_mean}")
print(f"train std  per channel: {train_std}")

train mean per channel: tensor([0.4917, 0.4823, 0.4467])
train std  per channel: tensor([0.2471, 0.2435, 0.2616])


## Transform

In [None]:
transforms

Compose(
      ToImage()
      ToDtype(scale=True)
)

In [None]:
transforms.transforms.append(v2.Normalize(mean= train_mean, std= train_std))

# log
print(f"trainset.dataset.transforms: {trainset.dataset.transforms}")
print(f"validationset.dataset.transforms: {validationset.dataset.transforms}")
print(f"testset.transforms: {testset.transforms}")

trainset.dataset.transforms: StandardTransform
Transform: Compose(
                 ToImage()
                 ToDtype(scale=True)
                 Normalize(mean=[tensor(0.4917), tensor(0.4823), tensor(0.4467)], std=[tensor(0.2471), tensor(0.2435), tensor(0.2616)], inplace=False)
           )
validationset.dataset.transforms: StandardTransform
Transform: Compose(
                 ToImage()
                 ToDtype(scale=True)
                 Normalize(mean=[tensor(0.4917), tensor(0.4823), tensor(0.4467)], std=[tensor(0.2471), tensor(0.2435), tensor(0.2616)], inplace=False)
           )
testset.transforms: StandardTransform
Transform: Compose(
                 ToImage()
                 ToDtype(scale=True)
                 Normalize(mean=[tensor(0.4917), tensor(0.4823), tensor(0.4467)], std=[tensor(0.2471), tensor(0.2435), tensor(0.2616)], inplace=False)
           )


In [None]:
# log
print("before applying transform:")
print(f"    -> type(testset.data[0]) : {type(testset.data[0])}")
print(f"    -> testset.data[0].dtype : {testset.data[0].dtype}")
print(f"    -> testset.data[0].shape : {testset.data[0].shape}")
print('-' * 50)
print("after applying transform:")
print(f"    -> type(testset[0][0])   : {type(testset[0][0])}")
print(f"    -> testset[0][0].dtype   : {testset[0][0].dtype}")
print(f"    -> testset[0][0].shape   : {testset[0][0].shape}")

before applying transform:
    -> type(testset.data[0]) : <class 'numpy.ndarray'>
    -> testset.data[0].dtype : uint8
    -> testset.data[0].shape : (32, 32, 3)
--------------------------------------------------
after applying transform:
    -> type(testset[0][0])   : <class 'torchvision.tv_tensors._image.Image'>
    -> testset[0][0].dtype   : torch.float32
    -> testset[0][0].shape   : torch.Size([3, 32, 32])


## DataLoader

In [None]:
batch_size = 64

trainloader      = DataLoader(dataset= trainset     , batch_size = batch_size, shuffle= True , num_workers= 2)
validationloader = DataLoader(dataset= validationset, batch_size = batch_size, shuffle= False, num_workers= 2)
testloader       = DataLoader(dataset= testset      , batch_size = batch_size, shuffle= False, num_workers= 2)

In [None]:
first_train_batch      = next(iter(trainloader))
first_validation_batch = next(iter(validationloader))
first_test_batch       = next(iter(testloader))

print(f"trainloader      first batch     -> x.shape: {first_train_batch[0].shape} - y.shape: {first_train_batch[1].shape} - x.dtype: {first_train_batch[0].dtype} - y.dtype: {first_train_batch[1].dtype}")
print(f"validationloader first batch     -> x.shape: {first_validation_batch[0].shape} - y.shape: {first_validation_batch[1].shape} - x.dtype: {first_validation_batch[0].dtype} - y.dtype: {first_validation_batch[1].dtype}")
print(f"testloader       first batch     -> x.shape: {first_test_batch[0].shape} - y.shape: {first_test_batch[1].shape} - x.dtype: {first_test_batch[0].dtype} - y.dtype: {first_test_batch[1].dtype}")
print(f"trainloader      last batch-size -> {len(trainset) % batch_size}")
print(f"validationloader last batch-size -> {len(validationset) % batch_size}")
print(f"testloader       last batch-size -> {len(testset) % batch_size}")

trainloader      first batch     -> x.shape: torch.Size([64, 3, 32, 32]) - y.shape: torch.Size([64]) - x.dtype: torch.float32 - y.dtype: torch.int64
validationloader first batch     -> x.shape: torch.Size([64, 3, 32, 32]) - y.shape: torch.Size([64]) - x.dtype: torch.float32 - y.dtype: torch.int64
testloader       first batch     -> x.shape: torch.Size([64, 3, 32, 32]) - y.shape: torch.Size([64]) - x.dtype: torch.float32 - y.dtype: torch.int64
trainloader      last batch-size -> 8
validationloader last batch-size -> 8
testloader       last batch-size -> 16


# Network Structure

![alt text](resources/images/convolutional-neural-network.svg)

In [None]:
class CIFAR10Model(nn.Module):
    def __init__(self, in_channels, output_dim):
        super(CIFAR10Model, self).__init__()
        self.feature_extractor = nn.Sequential(

            # 3x32x32
            nn.Conv2d(in_channels, out_channels= 32, kernel_size= 3),
            nn.BatchNorm2d(32), # StandardScaler along channel axis
            nn.ReLU(),
            nn.MaxPool2d(kernel_size= 2),
            # 32x15x15
            
            nn.Conv2d(in_channels= 32, out_channels= 64, kernel_size= 3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size= 2),
            # 64x6x6

            nn.AdaptiveAvgPool2d(output_size= (1, 1))
            # 64x1x1
        )

        self.flatten = nn.Flatten(start_dim= 1)

        self.classifier = nn.Sequential(
            nn.Linear(64, output_dim),
        )
    
    def forward(self, x):
        x = self.feature_extractor(x)
        x = self.flatten(x)
        x = self.classifier(x)
        return x

In [None]:
in_channels = trainset[0][0].shape[0]
output_dim  = len(trainset.dataset.classes)

model = CIFAR10Model(in_channels, output_dim)
model.to(device)

CIFAR10Model(
  (feature_extractor): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): AdaptiveAvgPool2d(output_size=(1, 1))
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (classifier): Sequential(
    (0): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [None]:
summary(model, input_size= testset.data.transpose(0, 3, 1, 2).shape[1:], batch_size= batch_size)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [64, 32, 30, 30]             896
       BatchNorm2d-2           [64, 32, 30, 30]              64
              ReLU-3           [64, 32, 30, 30]               0
         MaxPool2d-4           [64, 32, 15, 15]               0
            Conv2d-5           [64, 64, 13, 13]          18,496
       BatchNorm2d-6           [64, 64, 13, 13]             128
              ReLU-7           [64, 64, 13, 13]               0
         MaxPool2d-8             [64, 64, 6, 6]               0
 AdaptiveAvgPool2d-9             [64, 64, 1, 1]               0
          Flatten-10                   [64, 64]               0
           Linear-11                   [64, 10]             650
Total params: 20,234
Trainable params: 20,234
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.75
Forward/ba

# Set up remaining Hyper-Parameters

In [None]:
lr = 0.001
criterion = CrossEntropyLoss()
optimizer = Adam(params= model.parameters(), lr= lr)
num_epochs = 10

# Train & Validation Loop

In [None]:
train_acc_per_epoch  = []
train_loss_per_epoch = []
val_acc_per_epoch    = []
val_loss_per_epoch   = []

In [None]:
train_acc = Accuracy(task = 'multiclass', num_classes= len(testset.classes), top_k= 1).to(device)
val_acc   = Accuracy(task = 'multiclass', num_classes= len(testset.classes), top_k= 1).to(device)

In [None]:
for epoch in range(num_epochs):

# train loop
    model.train()
    train_loss  = 0

    for x, y in trainloader:

        # send data to GPU
        x, y_true = x.to(device), y.to(device)

        # forward
        y_pred = model(x)
        loss = criterion(y_pred, y_true)

        # backward
        loss.backward()

        # update parameters
        optimizer.step()
        optimizer.zero_grad()

        # log loss & accuracy
        train_loss += loss.item() * len(x)
        train_acc.update(y_pred, y_true)

    train_loss_per_epoch.append(train_loss / len(trainset))
    train_acc_per_epoch.append(train_acc.compute().item())
    train_acc.reset()


# validation loop
    model.eval()
    val_loss = 0

    with torch.no_grad():
        for x, y in validationloader:
            
            # send data to GPU
            x, y_true = x.to(device), y.to(device)

            # forward
            y_pred = model(x)
            loss = criterion(y_pred, y_true)

            # log loss & accuracy
            val_loss += loss.item() * len(x)
            val_acc.update(y_pred, y_true)

    val_loss_per_epoch.append(val_loss / len(validationset))
    val_acc_per_epoch.append(val_acc.compute().item())
    val_acc.reset()

    # log
    print(f"epoch {epoch:>1}  ->  train[loss: {train_loss_per_epoch[epoch]:.5f} - acc: {train_acc_per_epoch[epoch]:.2f}] | validation[loss: {val_loss_per_epoch[epoch]:.5f} - acc: {val_acc_per_epoch[epoch]:.2f}]")


epoch 0  ->  train[loss: 1.60772 - acc: 0.43] | validation[loss: 1.46902 - acc: 0.49]
epoch 1  ->  train[loss: 1.36909 - acc: 0.52] | validation[loss: 1.36441 - acc: 0.51]
epoch 2  ->  train[loss: 1.28374 - acc: 0.55] | validation[loss: 1.29317 - acc: 0.54]
epoch 3  ->  train[loss: 1.23066 - acc: 0.57] | validation[loss: 1.22654 - acc: 0.57]
epoch 4  ->  train[loss: 1.18546 - acc: 0.59] | validation[loss: 1.19977 - acc: 0.58]
epoch 5  ->  train[loss: 1.15092 - acc: 0.60] | validation[loss: 1.16889 - acc: 0.59]
epoch 6  ->  train[loss: 1.12153 - acc: 0.61] | validation[loss: 1.17079 - acc: 0.59]
epoch 7  ->  train[loss: 1.09683 - acc: 0.62] | validation[loss: 1.19746 - acc: 0.57]
epoch 8  ->  train[loss: 1.07993 - acc: 0.63] | validation[loss: 1.08246 - acc: 0.63]
epoch 9  ->  train[loss: 1.05732 - acc: 0.63] | validation[loss: 1.12181 - acc: 0.61]


## Model Analysis

In [None]:
# plot
fig, axs = plt.subplots(nrows= 1, ncols= 2, figsize= (10, 4), layout= 'compressed')

axs[0].plot(train_loss_per_epoch, label= "Train loss")
axs[0].plot(val_loss_per_epoch, label= "Validation loss")
axs[0].set(title= "Loss over time", xlabel= 'Epoch', ylabel= 'Loss')
axs[0].legend(loc= 'best', fancybox= True, shadow= True)

axs[1].plot(train_acc_per_epoch, label= "Train accuracy")
axs[1].plot(val_acc_per_epoch, label= "Validation accuracy")
axs[1].set(title= "Accuracy over time", xlabel= 'Epoch', ylabel= 'Accuracy')
axs[1].legend(loc= 'best', fancybox= True, shadow= True)

plt.show()

# Test Loop

In [None]:
test_acc = Accuracy(task = 'multiclass', num_classes= len(testset.classes), top_k= 1).to(device)

In [None]:
model.eval()
test_loss = 0
predictions = []
targets = []

with torch.no_grad():
    for x, y in testloader:

        # send data to GPU
        x, y_true = x.to(device), y.to(device)

        # forward
        y_pred = model(x)
        loss = criterion(y_pred, y_true)

        # log loss & accuracy
        test_loss += loss.item() * len(x)
        test_acc.update(y_pred, y_true)

        predictions.extend(y_pred.argmax(dim= 1).cpu())
        targets.extend(y_true.cpu())

# log
print(f"test[loss: {test_loss / len(testset):.5f} - acc: {test_acc.compute().item():.2f}]")

test[loss: 1.11687 - acc: 0.60]


## Metrics
   - loss
   - accuracy
   - recall
   - precision
   - f1-score
   - ROC Curve
   - AUC Curve
   - ...

In [None]:
# classification report
print(classification_report(targets, predictions))

              precision    recall  f1-score   support

           0       0.76      0.50      0.60      1000
           1       0.57      0.91      0.70      1000
           2       0.42      0.62      0.50      1000
           3       0.54      0.29      0.38      1000
           4       0.57      0.47      0.51      1000
           5       0.70      0.41      0.52      1000
           6       0.60      0.76      0.67      1000
           7       0.58      0.76      0.66      1000
           8       0.74      0.73      0.73      1000
           9       0.79      0.59      0.67      1000

    accuracy                           0.60     10000
   macro avg       0.63      0.60      0.59     10000
weighted avg       0.63      0.60      0.59     10000



In [None]:
# confusion matrix
metric = ConfusionMatrix(task= 'multiclass', num_classes= 10)
confusion_matrix = metric(torch.tensor(predictions), torch.tensor(targets))

# log
print(confusion_matrix)

# plot
fig, ax = plt.subplots(figsize= (8, 8))
metric.plot(ax= ax)
plt.show()

tensor([[496, 118, 132,   5,  18,   4,  21,  26, 145,  35],
        [ 11, 906,  16,   0,   2,   0,   6,   9,  17,  33],
        [ 46,  30, 618,  25,  73,  10, 103,  75,  12,   8],
        [  9,  64, 189, 295,  64, 104, 151,  99,  15,  10],
        [ 13,  19, 161,  25, 470,  18, 115, 153,  18,   8],
        [  4,  26, 151, 141,  62, 412,  61, 130,   8,   5],
        [  2,  19, 108,  25,  56,   5, 758,  22,   3,   2],
        [  5,  18,  73,  17,  57,  32,  22, 758,   2,  16],
        [ 55, 102,  29,   8,  13,   1,  19,  11, 726,  36],
        [  8, 298,  12,   4,  15,   2,  13,  27,  35, 586]])


# Prediction

In [None]:
def predict(model: nn.Module, data: np.ndarray, classes: list, transform: v2._container.Compose = None) -> torch.Tensor:

    # add batch dimension to a single data
    if len(data.shape) == 3:
        data = np.expand_dims(data, axis= 0)

    # apply the transform
    if transform:
        data = torch.stack([transform(sample) for sample in data])

    # predict
    model.eval()
    with torch.no_grad():

        # send data to GPU
        data = data.to(device)

        # forward
        y_pred = model(data).argmax(dim= 1).cpu()

        # idx to labels
        y_pred = np.array(classes)[y_pred]

    return y_pred

In [None]:
# some raw data
raw_data = CIFAR10(root= './dataset', train= False, download= True, transform= None).data[:32]

# predict
y_pred = predict(model, data= raw_data, classes= testset.classes, transform= transforms)

# log
print(f"predictions:\n{y_pred}")

Files already downloaded and verified
predictions:
['frog' 'automobile' 'ship' 'airplane' 'frog' 'frog' 'automobile' 'deer'
 'cat' 'automobile' 'ship' 'truck' 'cat' 'horse' 'truck' 'frog' 'dog'
 'horse' 'truck' 'frog' 'horse' 'airplane' 'ship' 'automobile' 'deer'
 'bird' 'bird' 'bird' 'automobile' 'frog' 'frog' 'bird']


In [None]:
# plot
fig, axs = plt.subplots(nrows= 4, ncols= 8, figsize= (12, 6), layout= 'compressed')

for i in range(4):
    for j in range(8):
        axs[i, j].imshow(raw_data[i * 8 + j], cmap= 'gray')
        axs[i, j].set_title(predict(model, raw_data[i * 8 + j], testset.classes, transform= transforms))
        axs[i, j].axis('off')

plt.show()