<a href="https://colab.research.google.com/github/lucianoselimaj/MLDL_Labs/blob/main/Lab02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lab 02: Training a Custom Model


**Objective of this lab**: training a small custom model on the Tiny-ImageNet dataset.

## Dataset preparation

In [None]:
!wget http://cs231n.stanford.edu/tiny-imagenet-200.zip
!unzip tiny-imagenet-200.zip -d tiny-imagenet

We need to adjust the format of the val split of the dataset to be used with ImageFolder.

In [None]:
import os # built-in module for interacting with the operationg system
import shutil #utility module to perform high-level file operations (e.g copying or removing files or directories)

# Opens the file
with open('tiny-imagenet/tiny-imagenet-200/val/val_annotations.txt') as f:
  # for each line in the file
    for line in f:
      #splits each line using the tab --> filename, class label, *_ a catch-all for the rest of the content
        fn, cls, *_ = line.split('\t')
        #creates a new directory for each class inside the val dir, excluding the throwing if the directory already exists
        os.makedirs(f'tiny-imagenet/tiny-imagenet-200/val/{cls}', exist_ok=True)
        # copies the image file
        shutil.copyfile(f'tiny-imagenet/tiny-imagenet-200/val/images/{fn}', f'tiny-imagenet/tiny-imagenet-200/val/{cls}/{fn}')
# once all images are copied to their new class folder, this deletes the original flat
shutil.rmtree('tiny-imagenet/tiny-imagenet-200/val/images')

After downloading the dataset, which is stored as : val/images/

We create a new set of subfolders for each class label:  <br>
val/n01443537/val_1.JPEG <br>
val/n01443537/val_2.JPEG <br>
... <br>
val/n01440764/val_3.JPEG <br>
... <br>


In the following step we load the dataset applying transformations required

In [None]:
from torchvision.datasets import ImageFolder
import torchvision.transforms as T

transform = T.Compose([
    T.Resize((224, 224)),  # Resize to fit the input dimensions of the network
    T.ToTensor(), # scales pixel values from 0–255 → 0–1 (float32), with shape [C,H,W]
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# root/{classX}/x001.jpg
tiny_imagenet_dataset_train = ImageFolder(root='tiny-imagenet/tiny-imagenet-200/train', transform=transform)
tiny_imagenet_dataset_val = ImageFolder(root='tiny-imagenet/tiny-imagenet-200/val', transform=transform)

In [None]:
print(f"Length of train dataset: {len(tiny_imagenet_dataset_train)}")
print(f"Length of val dataset: {len(tiny_imagenet_dataset_val)}")

# The following code also checks the number of samples per class
# from collections import Counter

# class_counts = Counter([target for _, target in tiny_imagenet_dataset_val])
# for class_label, count in class_counts.items():
#     print(f"Class {class_label}: {count} entries")


Length of train dataset: 100000
Length of val dataset: 10000


In [None]:
import torch
train_loader = torch.utils.data.DataLoader(tiny_imagenet_dataset_train, batch_size=32, shuffle=True, num_workers=8) # use 8 CPU workers (or process) to load the images in the backgorund while the GPU is training
val_loader = torch.utils.data.DataLoader(tiny_imagenet_dataset_val, batch_size=32, shuffle=False)



## Custom model definition

CNNs are made of 3 layers: <br>


1.   Convolutional Layer (nn.Conv2d)
This layer slides small filters across the image to detect features like edges or corners
2.   Activation function
3.   Fully connected layer (nn.Linear)



In [None]:
from torch import nn

# Define the custom neural network
class CustomNet(nn.Module):
    def __init__(self):
        super(CustomNet, self).__init__()
        # Define layers of the neural network
        # input has 3 color channels RGB, with 64 different filters, each filter is 3x3 size, padding takes the same size --> input is 224x224, output is also 224x224
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1, stride=1) #Output: 64 channels → it creates 64 different feature maps (filters)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1) # takes 64 channels from conv1 and applies 128 new filters
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)

        # we don't need every tiny detail from the image so we use Pooling to shrink the image
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2) #reduces the image size by half 224 → 112

        self.fc1 = nn.Linear(256, 200) # 200 is the number of classes in TinyImageNet

    def forward(self, x):
        # Define forward pass
        # B x 3 x 224 x 224 --> inpus shape, Batch,Channel,height,weight
        x = self.conv1(x).relu()     # [B, 64, 224, 224]
        x = self.pool(x)             # [B, 64, 112, 112]

        x = self.conv2(x).relu()     # [B, 128, 112, 112]
        x = self.pool(x)             # [B, 128, 56, 56]

        x = self.conv3(x).relu()     # [B, 256, 56, 56]
        x = self.pool(x)             # [B, 256, 28, 28]

        # Keep pooling until we get to [B, 256, 1, 1]
        x = self.pool(x)             # [B, 256, 14, 14]
        x = self.pool(x)             # [B, 256, 7, 7]
        x = self.pool(x)             # [B, 256, 3, 3]
        x = self.pool(x)             # [B, 256, 1, 1]

        x = x.view(x.size(0), -1)    # flatten: [B, 256]
        x = self.fc1(x)              # [B, 200]
        return x

What is missing in the code:


1.   Zero the gradients
2.   Forward pass
3.   Compute the Loss
4.   Backward pass (Backpropagation)
5.   Optiomizer step (update weights)

In [None]:
def train(epoch, model, train_loader, criterion, optimizer):
    #set the model to training mode
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.cuda(), targets.cuda()

        #Zero the gradients (accumulated by default)
        optimizer.zero_grad() # clears the old gradients
        #forward
        outputs = model(inputs)
        #Loss function
        loss = criterion(outputs, targets)
        #Backpropagation
        loss.backward()
        #optimizer
        optimizer.step()


        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    train_loss = running_loss / len(train_loader)
    train_accuracy = 100. * correct / total
    print(f'Train Epoch: {epoch} Loss: {train_loss:.6f} Acc: {train_accuracy:.2f}%')

Validation: <br>
In this step:


*   We don't do Backpropagation
*   We don't updates weights
*   We only evaluate how good the model is

In [None]:
# Validation loop
def validate(model, val_loader, criterion):
  #Set the model to validation mode
    model.eval()
    val_loss = 0

    correct, total = 0, 0

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(val_loader):
            inputs, targets = inputs.cuda(), targets.cuda()

            #Forward pass
            outputs = model(inputs)
            #Loss function
            loss = criterion(outputs, targets)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    val_loss = val_loss / len(val_loader)
    val_accuracy = 100. * correct / total

    print(f'Validation Loss: {val_loss:.6f} Acc: {val_accuracy:.2f}%')
    return val_accuracy

## Putting everything together

In [None]:
model = CustomNet().cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

best_acc = 0

# Run the training process for {num_epochs} epochs
num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train(epoch, model, train_loader, criterion, optimizer)

    # At the end of each training iteration, perform a validation step
    val_accuracy = validate(model, val_loader, criterion)

    # Best validation accuracy
    best_acc = max(best_acc, val_accuracy)


print(f'Best validation accuracy: {best_acc:.2f}%')
