<a href="https://colab.research.google.com/github/lucianoselimaj/MLDL_Labs/blob/main/LAB4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LAB 4: Transfer Learning and Visualizations


> **Note:** There may be more than one solution to each of the exercises, don't worry too much about the *exact* right answer. Try to write some code that works first and then improve it if you can.



## Step 1: Install missing dependencies and import libraries

In [None]:
# Install torchinfo (for model summary), torchmetrics (for metrics), and wandb (for logging)
!pip install -q torchinfo torchmetrics wandb

In [None]:
# Import PyTorch core and neural network modules
import torch
import torch.nn as nn
import torch.optim as optim

# Import torchvision for pre-trained models and image utilities
import torchvision
from torchvision import transforms, datasets

# Additional utilities
import numpy as np
import matplotlib.pyplot as plt
import os
import wandb  # Weights & Biases: for tracking experiments

# Import for data loading and model summary
from torch.utils.data import DataLoader
from torchinfo import summary

# For timing training
from timeit import default_timer as timer

# For accuracy and confusion matrix metrics
from torchmetrics.functional import accuracy
from torchmetrics.functional.classification import multiclass_confusion_matrix

In [None]:
# Automatically use GPU if available, otherwise fallback to CPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

## Step 2: plot a confusion matrix of the predictions on the test set

### Get data

In [None]:
# Download the pizza_steak_sushi dataset (from GitHub)
!wget https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi.zip
# Create data directory (if it doesn't already exist)
!mkdir -p data
# Unzip the dataset into the data/ folder (only unzip if not done already)
!unzip -n pizza_steak_sushi.zip -d data/pizza_steak_sushi

### Prepare data

In [None]:
# Create a transforms pipeline
simple_transform = transforms.Compose([
    transforms.Resize((224, 224)), # 1. Reshape all images to 224x224 (though some models may require different sizes)
    transforms.ToTensor(), # 2. Turn image values to between 0 & 1
    transforms.Normalize(mean=[0.485, 0.456, 0.406], # 3. A mean of [0.485, 0.456, 0.406] (across each colour channel)
                         std=[0.229, 0.224, 0.225]) # 4. A standard deviation of [0.229, 0.224, 0.225] (across each colour channel),
])

In [None]:
# -------------------------------
# LOAD DATASETS
# -------------------------------
# Set path to dataset
data_path = "data/pizza_steak_sushi"

# Load training and testing datasets using ImageFolder (expects subfolders for each class)
train_dset = datasets.ImageFolder(f"{data_path}/train", transform=simple_transform)
test_dset = datasets.ImageFolder(f"{data_path}/test", transform=simple_transform)

# Save class names (pizza, steak, sushi)
class_names = train_dset.classes

# -------------------------------
# CREATE DATA LOADERS
# -------------------------------
# Prepare data for training (batching and shuffling)
train_dataloader = DataLoader(train_dset, batch_size=32, shuffle=True, num_workers=2)
test_dataloader = DataLoader(test_dset, batch_size=32, shuffle=False, num_workers=2)

### Get and prepare a pretrained model

In [None]:
# Setup the model with pretrained weights and send it to the target device
model_0 = torchvision.models.efficientnet_b0(pretrained=True).to(device)
#model_0 # uncomment to output (it's very long)

In [None]:
# Freeze all base layers in the "features" section of the model (the feature extractor) by setting requires_grad=False
for param in model_0.features.parameters():
    param.requires_grad = False

In [None]:
# Set the manual seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Get the length of class_names (one output unit for each class)
output_shape = len(class_names)

# Recreate the classifier layer and seed it to the target device
model_0.classifier = torch.nn.Sequential(
    torch.nn.Dropout(p=0.2, inplace=True),
    torch.nn.Linear(in_features=1280,
                    out_features=output_shape, # same number of output units as our number of classes
                    bias=True)).to(device)

### Train model

In [None]:
# Define loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_0.parameters(), lr=1e-3)

In [None]:
# -------------------------------
# INIT WANDB
# -------------------------------
# Start a Weights & Biases run to track metrics
wandb.init(project="transfer-learning-pizza-steak-sushi")

# -------------------------------
# TRAINING FUNCTION
# -------------------------------
def train(model, optimizer, dataloader, loss_fn):
    model.train()  # Set model to training mode
    train_loss, train_acc = 0, 0  # Initialize accumulators

    for X, y in dataloader:
        X, y = X.to(device), y.to(device)  # Move data to GPU/CPU
        y_pred = model(X)                  # Forward pass
        loss = loss_fn(y_pred, y)          # Calculate loss

        optimizer.zero_grad()              # Reset gradients
        loss.backward()                    # Backpropagation
        optimizer.step()                   # Update weights

        train_loss += loss.item()          # Add loss
        train_acc += accuracy(y_pred.softmax(dim=1), y, task='multiclass', num_classes=3)  # Add accuracy

    return train_loss / len(dataloader), train_acc / len(dataloader)

# -------------------------------
# TESTING FUNCTION
# -------------------------------
def test(model, dataloader, loss_fn):
    model.eval()  # Set model to eval mode
    test_loss, test_acc = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            y_pred = model(X)
            loss = loss_fn(y_pred, y)

            test_loss += loss.item()
            test_acc += accuracy(y_pred.softmax(dim=1), y, task='multiclass', num_classes=3)

    return test_loss / len(dataloader), test_acc / len(dataloader)


In [None]:
# Set the random seeds
torch.manual_seed(42)
torch.cuda.manual_seed(42)

# Start the timer
from timeit import default_timer as timer
start_time = timer()

# Setup training and save the results
for _ in range(1000):
    train(model_0, optimizer, train_dataloader, loss_fn)
    test_acc = test(model_0, test_dataloader, loss_fn)
    print(f"Test accuracy: {test_acc}")  # You should get values around 90% accuracy on the test set


# End the timer and print out how long it took
end_time = timer()
print(f"[INFO] Total training time: {end_time-start_time:.3f} seconds")

### Make a confusion matrix with the test preds and the truth labels

HINT: Look at the torchmetrics.functional.classification.multiclass_confusion_matrix from the torchmetrics library

In [None]:
# Evaluate predictions for all test data
all_preds = []
all_labels = []
model_0.eval()

with torch.no_grad():
    for X, y in test_dataloader:
        X, y = X.to(device), y.to(device)
        preds = model_0(X).argmax(dim=1)  # Convert logits to predicted class
        all_preds.append(preds.cpu())
        all_labels.append(y.cpu())

# Combine all batches into single tensors
all_preds = torch.cat(all_preds)
all_labels = torch.cat(all_labels)

# Compute confusion matrix for 3 classes
cm = multiclass_confusion_matrix(preds=all_preds, target=all_labels, num_classes=3)
print("Confusion Matrix:", cm)

## Step 3: Get the "most wrong" of the predictions on the test dataset and plot the 5 "most wrong" images. You can do this by:
* Predicting across all of the test dataset, storing the labels and predicted probabilities.
* Sort the predictions by *wrong prediction* and then *descending predicted probabilities*, this will give you the wrong predictions with the *highest* prediction probabilities, in other words, the "most wrong".
* Plot the top 5 "most wrong" images, why do you think the model got these wrong?

You'll want to:
* Create a DataFrame with sample, label, prediction, pred prob
* Sort DataFrame by correct (does label == prediction)
* Sort DataFrame by pred prob (descending)
* Plot the top 5 "most wrong" image predictions

In [None]:
import pandas as pd

# Create empty list to store information
wrong_preds = []

# Set model to evaluation mode
model_0.eval()

# No gradients needed during inference
with torch.no_grad():
    for X, y in test_dataloader:
        X, y = X.to(device), y.to(device)

        # Get raw predictions (logits)
        y_logits = model_0(X)

        # Convert logits to probabilities
        y_probs = torch.softmax(y_logits, dim=1)

        # Get predicted class indices and prediction confidences
        y_pred_labels = torch.argmax(y_probs, dim=1)
        y_pred_probs = torch.max(y_probs, dim=1).values

        # Loop through each prediction and store info
        for img, true, pred, prob in zip(X.cpu(), y.cpu(), y_pred_labels.cpu(), y_pred_probs.cpu()):
            correct = int(true == pred)
            wrong_preds.append({
                "image": img,
                "label": true.item(),
                "pred": pred.item(),
                "prob": prob.item(),
                "correct": correct
            })

# Convert the list of dictionaries to a DataFrame
wrong_df = pd.DataFrame(wrong_preds)

# Filter out only the wrong predictions
wrong_only = wrong_df[wrong_df["correct"] == 0]

# Sort wrong predictions by highest confidence
most_wrong = wrong_only.sort_values(by="prob", ascending=False).head(5)

In [None]:
# Plot the 5 most wrong predictions
plt.figure(figsize=(12, 8))
for i, row in enumerate(most_wrong.itertuples()):
    img = row.image.permute(1, 2, 0).numpy()  # Convert from [C, H, W] to [H, W, C]
    img = img * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])  # Unnormalize
    img = np.clip(img, 0, 1)

    plt.subplot(1, 5, i + 1)
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"Label: {class_names[row.label]}\nPred: {class_names[row.pred]}\nProb: {row.prob:.2f}")
plt.suptitle("Top 5 Most Wrong Predictions", fontsize=16)
plt.tight_layout()
plt.show()


## Step 4: Train the model from section 4 above with more data, say 20% of the images from Food101 of Pizza, Steak and Sushi images.
* You can find the [20% Pizza, Steak, Sushi dataset](https://github.com/mrdbourke/pytorch-deep-learning/blob/main/data/pizza_steak_sushi_20_percent.zip) on the course GitHub. It was created with the notebook [`extras/04_custom_data_creation.ipynb`](https://github.com/mrdbourke/pytorch-deep-learning/blob/main/extras/04_custom_data_creation.ipynb).


### Get 20% data

In [None]:
#Download the 20% subset of pizza, steak, sushi from GitHub
!wget https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip
#Create a data directory (if it doesn't already exist)
!mkdir -p data
#Unzip the dataset into the data/ directory
!unzip -n pizza_steak_sushi_20_percent.zip -d data/pizza_steak_sushi_20_percent

### Create DataLoaders

In [None]:
# Create a transforms pipeline
simple_transform = transforms.Compose([
    transforms.Resize((224, 224)), # 1. Reshape all images to 224x224 (though some models may require different sizes)
    transforms.ToTensor(), # 2. Turn image values to between 0 & 1
    transforms.Normalize(mean=[0.485, 0.456, 0.406], # 3. A mean of [0.485, 0.456, 0.406] (across each colour channel)
                         std=[0.229, 0.224, 0.225]) # 4. A standard deviation of [0.229, 0.224, 0.225] (across each colour channel),
])

In [None]:
# Create training and testing DataLoader's as well as get a list of class names

data_path = "data/pizza_steak_sushi"
train_dset_20p= ImageFolder(f"{data_path}/train", transform=simple_transform)
test_dset_20p = ImageFolder(f"{data_path}/test", transform=simple_transform)
class_names = list(os.listdir(f"{data_path}/train"))  # 'pizza', 'steak', 'sushi'
print(f"Class names: {class_names}")


train_dataloader_20p = DataLoader(train_dset_20p, batch_size=32, num_workers=2, shuffle=True)
test_dataloader_20p = DataLoader(test_dset_20p, batch_size=32, num_workers=2)

# train_dataloader, test_dataloader, class_names

### Get a pretrained model

In [None]:
import torchvision.models as models
import torch.nn as nn
import torch

#Load EfficientNetB0 pretrained on ImageNet
model_20p = models.efficientnet_b0(pretrained=True).to(device)

#Freeze all feature extraction layers
for param in model_20p.features.parameters():
    param.requires_grad = False

#Replace the classifier layer to output 3 classes
model_20p.classifier = nn.Sequential(
    nn.Dropout(p=0.2, inplace=True),
    nn.Linear(in_features=1280, out_features=len(class_names))  # 3 output classes
).to(device)

### Train a model with 20% of the data

In [None]:
import wandb

#Define loss function
loss_fn = nn.CrossEntropyLoss()
#Define optimizer (trainable parameters only)
optimizer = torch.optim.Adam(model_20p.parameters(), lr=1e-3)
# Initialize wandb run
wandb.init(project="transfer-learning-pizza-steak-sushi", name="efficientnet-20percent")



# ✅ Training function
def train(model, dataloader, loss_fn, optimizer):
    model.train()
    total_loss, total_acc = 0, 0
    for X, y in dataloader:
        X, y = X.to(device), y.to(device)
        preds = model(X)
        loss = loss_fn(preds, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        total_acc += (preds.argmax(dim=1) == y).sum().item()
    return total_loss / len(dataloader), total_acc / len(dataloader.dataset)

# ✅ Testing function
def test(model, dataloader, loss_fn):
    model.eval()
    total_loss, total_acc = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            preds = model(X)
            loss = loss_fn(preds, y)
            total_loss += loss.item()
            total_acc += (preds.argmax(dim=1) == y).sum().item()
    return total_loss / len(dataloader), total_acc / len(dataloader.dataset)


# ✅ Train for 5 epochs
epochs = 5
for epoch in range(epochs):
    train_loss, train_acc = train(model_20p, train_dataloader_20p, loss_fn, optimizer)
    test_loss, test_acc = test(model_20p, test_dataloader_20p, loss_fn)

    # ✅ Log results to wandb
    wandb.log({
        "train_loss_20p": train_loss,
        "train_acc_20p": train_acc,
        "test_loss_20p": test_loss,
        "test_acc_20p": test_acc
    })

    print(f"Epoch {epoch+1}/{epochs} | Train Acc: {train_acc:.4f} | Test Acc: {test_acc:.4f}")

## Step 5: Try a different model from [`torchvision.models`](https://pytorch.org/vision/stable/models.html) on the Pizza, Steak, Sushi data, how does this model perform?
* You'll have to change the size of the classifier layer to suit our problem.
* You may want to try an EfficientNet with a higher number than our B0, perhaps `torchvision.models.efficientnet_b2()`?
  * **Note:** Depending on the model you use you will have to prepare/transform the data in a certain way.