In [3]:
# Load pretrained ResNet18 Model
from torchvision import models
model = models.resnet18(weights="IMAGENET1K_V1")



We have loaded a ResNet-18 model that has already been trained on the ImageNet dataset. ImageNet has 1.2 million labelled images across 1000 categories. Because of this huge training, Resnet-18 has already learned general features such as edges, textures and simple shapes ect..

ref = "https://docs.pytorch.org/vision/stable/models/generated/torchvision.models.resnet18.html"

The following cells uses code adapted from the Transfer Learning section of: "https://learnopencv.com/image-classification-using-transfer-learning-in-pytorch/"

In [4]:
#Freeze all backbone layers 
for param in model.parameters():
    param.requires_grad = False

#replace final layer for 43 traffic sign classes 
model.fc = torch.nn.Linear(model.fc.in_features, 43)

NameError: name 'torch' is not defined

ref for torch.nn.Linear = "https://docs.pytorch.org/vision/stable/_modules/torchvision/models/resnet.html#ResNet18_Weights "

We freeze the backbone layers because ResNet 18 already knows how to see edges, colours, textures from being trained on ImageNet. We don't need to retrain all of that, it would take hours.

Then, we replace the final layer. The original ResNet 18 can classify 1000 ImageNet classes, but our dataset has 43 traffic sign classes. So we simply swap the final layer for a new one that outputs 43 categories instead of 1000.

In [None]:
import torch.nn as nn 
import torch.optim as optim

#loss function (CrossEntropy = Softmax + NLL combined)
loss_func = nn.CrossEntropyLoss()

#optimiser (only train final layer)
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

loss_func = "https://docs.pytorch.org/docs/stable/nn.html#loss-functions"

CrossEntropyLoss is the standard loss for multi-class classification, GTSRB has 43 classes, so this is the appropiate choice. Adam updates only the new classifier layer so ResNets learned ImageNet features stay frozen.

In [None]:
#Resnet-18 Training Loop 
import torch 
from torch.autograd import Variable

epochs = 10  

for epoch in range(epochs):
    model.train()

    running_loss = 0.0
    running_corrects = 0

    for images, labels in train_loader:

        #move to CPU (my mac has no GPU)
        images = Variable(images)
        labels = Variable(labels)

        #Zero the gradients of the final layer 
        optimizer.zero_grad()

        #Forward pass
        outputs = model(images)

        #Loss
        loss = loss_func(outputs, labels )

        #Backprop (only final layer updates)
        loss.backward()
        optimizer.step()

        #Track Statistics 
        # accumulate loss and correct predictions
        running_loss += loss.item() * images.size(0)
        running_corrects += (outputs.argmax(1) == labels).sum().item()

    # compute full-dataset train metrics
    train_loss = running_loss / len(train_loader.dataset)
    train_acc = running_corrects / len(train_loader.dataset)

    # VALIDATION 
    model.eval()
    val_running_loss = 0.0
    val_running_corrects = 0

    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = loss_func(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            val_running_corrects += (outputs.argmax(1) == labels).sum().item()

    val_loss = val_running_loss / len(val_loader.dataset)
    val_acc = val_running_corrects / len(val_loader.dataset)

    # print 4 values
    print(f"Epoch {epoch+1}")
    print(f"Train Loss {train_loss:.4f} | Acc {train_acc:.4f}")
    print(f"Val   Loss {val_loss:.4f} | Acc {val_acc:.4f}")

The training and validation accuraxy remain close throughout, showing that the model is learning generalisable features rather than memorising the training set. After around Epoch 6-7, improvements slow down, suggesting the model is approaching its performance plateau.

ref : training section = "https://learnopencv.com/image-classification-using-transfer-learning-in-pytorch/"

In [None]:
#create a file to store learned weights - inform leo of the new name 

torch.save(model.state_dict(), "resnet18_stage1_reduced.pth")


After running the baseline model, we are now going to fine tune. We start by unfreezing the last ResNet 18 Block (layer 4).

In [None]:
# Unfreeze the last ResNet block for fine-tuning
for name, param in model.named_parameters():
    if "layer4" in name:     # last residual block
        param.requires_grad = True
    else:
        param.requires_grad = False

Next we set up two different learning rates.

In [None]:
# two learning-rate optimizer 

classifier_params = list(model.fc.parameters())

layer4_params = [
    p for name, p in model.named_parameters()
    if name.startswith('layer4')
]


optimizer = torch.optim.Adam([
    {"params": model.fc.parameters(), "lr": 1e-3},      # classifier head
    {"params": model.layer4.parameters(), "lr": 1e-4}   # fine-tuned block
])

Next we create learning rate scheduler to automatically reduce the learning rate when the model stops improving. And we set up early- stopping variables that will stop the training early when the validation loss gets for for serveral epochs.

In [None]:
# create the learning rate scheduler 

from torch.optim.lr_scheduler import ReduceLROnPlateau

scheduler = ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.2,
    patience=3,
    min_lr=1e-6
)

In [None]:
# early stopping variables

best_val_loss = float("inf")
patience = 5
wait = 0
best_model_state = None

Now we will run the fine tuning loop again but with the fine tuning:

In [None]:
#fine tuning applied onto resnet loop 

import copy

EPOCHS = 5


for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")

    model.train()
    running_loss = 0
    running_corrects = 0

    for images, labels in train_loader:
        optimizer.zero_grad()

        outputs = model(images)
        loss = loss_func(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        running_corrects += (outputs.argmax(1) == labels).sum().item()

    train_loss = running_loss / len(train_loader.dataset)
    train_acc = running_corrects / len(train_loader.dataset)

    #  validation
    model.eval()
    val_running_loss = 0
    val_running_corrects = 0

    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = loss_func(outputs, labels)

            val_running_loss += loss.item() * images.size(0)
            val_running_corrects += (outputs.argmax(1) == labels).sum().item()

    val_loss = val_running_loss / len(val_loader.dataset)
    val_acc = val_running_corrects / len(val_loader.dataset)

    #scheduler step
    scheduler.step(val_loss)

    print(f"Train Loss {train_loss:.4f} | Acc {train_acc:.4f}")
    print(f"Val   Loss {val_loss:.4f} | Acc {val_acc:.4f}")

    # Early stopping 
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_state = copy.deepcopy(model.state_dict())
        torch.save(best_model_state, "resnet18_best_finetuned.pth")
        wait = 0
    else:
        wait += 1
        if wait >= patience:
            print("Early stopping triggered.")
            break

As we can see, fine- tuning produces a very rapid improvement in performance, with the validation accuracy starting above 95% and reaching ~97.7%. both training and validation losses decrease smoothly (see plots), showing stable optimisation without signs of overfitting. Compared to our baseline model, this fine-tund stage achieves a large improvemnet, confirming that updating the backbone layers in crucial for extracting more detailed traffic-sign features.

In [None]:
#manually creating the history as I had issues with running it again

history = {
    "train_loss": [0.3199, 0.1161, 0.0697, 0.0525, 0.0338],
    "train_acc":  [0.8968, 0.9640, 0.9783, 0.9841, 0.9894],
    "val_loss":   [0.1476, 0.1071, 0.0843, 0.0728, 0.0668],
    "val_acc":    [0.9513, 0.9671, 0.9725, 0.9753, 0.9768]
}

In [None]:
# Visualising Training Curves for ResNet18


import matplotlib.pyplot as plt

epochs_range = range(1, len(history["train_loss"]) + 1)

plt.figure(figsize=(14, 5))

# ---- Loss Curve ----
plt.subplot(1, 2, 1)
plt.plot(epochs_range, history["train_loss"], label="Train Loss")
plt.plot(epochs_range, history["val_loss"], label="Validation Loss")
plt.title("ResNet18 Training vs Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(alpha=0.3)

# ---- Accuracy Curve ----
plt.subplot(1, 2, 2)
plt.plot(epochs_range, history["train_acc"], label="Train Accuracy")
plt.plot(epochs_range, history["val_acc"], label="Validation Accuracy")
plt.title("ResNet18 Training vs Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(alpha=0.3)

plt.tight_layout()
plt.show()

Training vs Validation Loss Comments:

- Both the training loss and validation loss deacrease steadily across epochs, whcih indicates that the model is learning effectively.
- The validation loss decreases smoothly, suggesting the fine-tuned ResNet 18 generalises well to unseen images.

Training vs Validation Accuracy Comments:

- Both training and validation accuracy increase consistently over the 5 epochs.
- By the fifth epoch, validation accuracy tsabilises at ~0.977, indicating excellent generalisation without signs of overfitting.-

In [None]:
import torch
import torch.nn as nn
from torchvision import models

# Recreate architecture
model = models.resnet18(weights="IMAGENET1K_V1")

# Freeze backbone 
for p in model.parameters():
    p.requires_grad = False

# Replace classifier head
model.fc = nn.Linear(model.fc.in_features, 43)

# Load saved fine-tuned weights
model.load_state_dict(torch.load("resnet18_best_finetuned.pth"))
model.eval()

print("Restored model ready for evaluation!")

In [None]:
# classification and confusion matrix 

from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Load best model state (ensures evaluation uses best checkpoint)
model.load_state_dict(torch.load("resnet18_best_finetuned.pth"))
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in val_loader:
        outputs = model(images)
        preds = outputs.argmax(1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print("\nClassification Report:")
print(classification_report(all_labels, all_preds, zero_division=0))

cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=False, cmap="Blues")
plt.title("ResNet18 â€“ Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

This confusion matric is strongly diagonal, indicating that for almost all classes, the predicted label matches the true labels. This shows a very strong classificiation performance across the 43 traffic sign classes in the GTSRB dataset.

The overall accuracy of 0.98 from the classification report show that performance is strong even for minority classes, suggesting the downsampled training strategy and fine-tuning approach were effective.

Next we do few shot learning.

To prepare we first create a function sample_few_shot that allows us to subsample the training dataset so that each class contains only k examples.

We then make a training functions train_classifier only that freezes entire ResNet18 backbones and trains only the final classifier layer. The function will return validation accuracy for each few-shot scenario, allowing us to quantify how performance changes as the number of examples per class decreases.

In [None]:
import numpy as np

def sample_few_shot(df, k):
    """
    Return a dataframe where each class has exactly k samples.
    """
    few_shot_df = df.groupby("ClassId").apply(
        lambda x: x.sample(k, replace=False)
    ).reset_index(drop=True)
    return few_shot_df

In [None]:
def train_classifier_only(model, train_loader, val_loader, epochs=3):
    """
    Train only the final classifier layer for few-shot experiments.
    """

    # Freeze backbone
    for name, param in model.named_parameters():
        if "fc" not in name:   # only classifier head trains
            param.requires_grad = False
            
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.fc.parameters(), lr=1e-3)

    for epoch in range(epochs):
        model.train()
        running_corrects = 0
        running_loss = 0

        for imgs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # validation accuracy
        model.eval()
        val_corrects = 0

        with torch.no_grad():
            for imgs, labels in val_loader:
                outputs = model(imgs)
                preds = outputs.argmax(1)
                val_corrects += (preds == labels).sum().item()

    val_acc = val_corrects / len(val_loader.dataset)
    return val_acc


In [None]:
shot_sizes = [1, 5, 10, 20, 40]
results = {}

from torch.utils.data import DataLoader

for k in shot_sizes:
    print(f"\nRunning Few-Shot Learning with {k} samples per class...")

    # sample dataset
    few_df = sample_few_shot(train_df, k)

    # create dataset + loader
    few_train_set = GTSRBDataset(few_df, root_dir=root, transform=train_transform)
    few_train_loader = DataLoader(few_train_set, batch_size=32, shuffle=True)

    #reuse full validation set
    few_val_loader = val_loader

    # reload a fresh model for each experiment
    model_fs = models.resnet18(weights="IMAGENET1K_V1")
    model_fs.fc = nn.Linear(model_fs.fc.in_features, 43)

    # train classifier only
    acc = train_classifier_only(model_fs, few_train_loader, few_val_loader)

    results[k] = acc
    print(f"Validation Accuracy with {k} shots: {acc:.4f}")

There are a few quick observations from these results:

- Accuracy increases as you add more examples per class.
- Performance is extremely low with 1 or 5 shots, the model cannot generalised well when the classifier head is trained on such tiny data.
- The growth is smooth, not flat (see curve below). This clarifies a key point that more data per class = better accuracy, thus confirms the value and necessity of finetuning with more samples.

In [None]:
import matplotlib.pyplot as plt

shots = list(results.keys())
accs = [results[k] for k in shots]

plt.figure(figsize=(8,5))
plt.plot(shots, accs, marker='o')
plt.title("Few-Shot Learning Curve (ResNet18 Classifier Only)")
plt.xlabel("Shots per Class")
plt.ylabel("Validation Accuracy")
plt.grid(True)
plt.show()