# Requirements

In [None]:
import os
import urllib.request
import zipfile
import torch
import torch.nn as nn
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader, Subset, random_split
import torch.optim as optim

from sklearn.metrics import confusion_matrix, classification_report

# Constants

In [None]:
DATA_URL = "https://proai-datasets.s3.eu-west-3.amazonaws.com/dataset_food_classification.zip"
ZIP_PATH = "dataset_food_classification.zip"
RAW_DATA_DIR = "data/raw"
BATCH_SIZE = 32
NUM_WORKERS = 2
SEED = 42

# Dataset download & unzip

In [None]:
if not os.path.exists(ZIP_PATH):
    print("Downloading dataset...")
    urllib.request.urlretrieve(DATA_URL, ZIP_PATH)
    print("Dataset downloaded.")
else:
    print("Dataset zip already exists, skipping download.")

Downloading dataset...
Dataset downloaded.


In [None]:
if not os.path.exists(RAW_DATA_DIR):
    print("Extracting dataset...")
    with zipfile.ZipFile(ZIP_PATH, "r") as zip_ref:
        zip_ref.extractall(RAW_DATA_DIR) # where extract
    print("Dataset extracted.")
else:
    print("Raw data directory already exists, skipping extraction.")

Extracting dataset...
Dataset extracted.


**We found that we already have test, train and val folders, so we can skip dataset division.**

# Transformations & augmentations

In [None]:
train_transforms = transforms.Compose([         # group in sequence more transformations
    transforms.RandomResizedCrop(224),          # extract a random crop (at least 8%) and resize to 224×224
    transforms.RandomHorizontalFlip(),          # random (50% proba) horizontal flip
    transforms.ColorJitter(                     # small changes in brightness/contrast
        brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),                      # convert PIL → Tensor, scale [0,255]→[0,1]
    transforms.Normalize(                       # ImageNet statistics (commonly used), it helps to make the network “see” data with a similar distribution to that of the pre-training
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [None]:
val_test_transforms = transforms.Compose([
    transforms.Resize(256),                     # to simulate a small zoom on images below 256px
    transforms.CenterCrop(224),                 # center-crop to 224×224px
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

# Create train, val & test from folders

In [None]:
RAW_DATA_DIR = "data/raw/dataset"

In [None]:
train_dir = os.path.join(RAW_DATA_DIR, "train")
val_dir   = os.path.join(RAW_DATA_DIR, "val")
test_dir  = os.path.join(RAW_DATA_DIR, "test")

# Store trasformations
They will be applied internally when recall images

In [None]:
train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transforms)
val_dataset   = datasets.ImageFolder(root=val_dir,   transform=val_test_transforms)
test_dataset  = datasets.ImageFolder(root=test_dir,  transform=val_test_transforms)

print(f"Found {len(train_dataset)} training images in {len(train_dataset.classes)} classes")
print(f"Found {len(val_dataset)} validation images")
print(f"Found {len(test_dataset)} test images")

Found 8960 training images in 14 classes
Found 2240 validation images
Found 2800 test images


# DataLoader

In [None]:
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,            # shuffle examples order at every epoch
    num_workers=NUM_WORKERS,
    pin_memory=True          # optimize CPU→GPU transfer (DMA - used by CUDA GPU - can only read from buffer that don't change address)
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,           # no shuffle, deterministic order
    num_workers=NUM_WORKERS,
    pin_memory=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,           # no shuffle, deterministic order
    num_workers=NUM_WORKERS,
    pin_memory=True
)

# Sanity check

In [None]:
if __name__ == "__main__":
    imgs, labels = next(iter(train_loader))
    print(f"Batch shape: images {imgs.shape}, labels {labels.shape}")

Batch shape: images torch.Size([32, 3, 224, 224]), labels torch.Size([32])


# Model preparation: ResNet-50

In [None]:
num_classes = len(train_dataset.classes) # 14, this will be our out_features

In [None]:
# Load a pretrained (on ImageNet) ResNet-50 model
model = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 204MB/s]


In [None]:
# Freeze all layers to avoid updating their weights during initial training
for param in model.parameters():
    param.requires_grad = False

In [None]:
# Replace the final fully-connected layer (model.fc) to match our num_classes
# The original ResNet-50 fc has in_features=2048
model.fc = nn.Linear(in_features=model.fc.in_features,
                     out_features=num_classes)

In [None]:
# Select device: CUDA se disponibile, altrimenti CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
print(f"Using device: {device}")
print(f"Model architecture: {model.__class__.__name__}")
print(f"Trainable parameters:")
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"  {name} : {param.shape}")

Using device: cuda
Model architecture: ResNet
Trainable parameters:
  fc.weight : torch.Size([14, 2048])
  fc.bias : torch.Size([14])


**For now we choose to not to unfreeze layer4 of ResNet-50, before that let's see time and performance.**

# Loss, optimizer e lr scheduler

In [None]:
# Loss function
criterion = nn.CrossEntropyLoss()  # multi-class classification

# Optimizer: train only parameters with requires_grad=True (i.e. the new fc)
optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-4,                         # starting learning rate
    weight_decay=1e-5                # L2 regularization
)

# Learning rate scheduler
# Reduce LR by factor 0.1 every 7 epochs if no improvement
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.1,
    patience=3,
)

print("Criterion, optimizer and scheduler ready.")

Criterion, optimizer and scheduler ready.


# Training

In [None]:
def train_one_epoch(model, dataloader, criterion, optimizer, device):
    model.train()                     # set model to train mode
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in dataloader:
        # Move data to device
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass + optimize
        loss.backward()
        optimizer.step()

        # Update statistics
        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc  = correct / total

    return epoch_loss, epoch_acc

In [None]:
@torch.no_grad()
def validate(model, dataloader, criterion, device):
    model.eval()                      # set model to evaluation mode
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    val_loss = running_loss / total
    val_acc  = correct / total

    return val_loss, val_acc

In [None]:
%%time

num_epochs = 10
best_val_acc = 0.0

for epoch in range(1, num_epochs + 1):
    # 1) Training step
    train_loss, train_acc = train_one_epoch(
        model, train_loader, criterion, optimizer, device
    )
    # 2) Validation step
    val_loss, val_acc = validate(
        model, val_loader, criterion, device
    )

    # 3) Scheduler step (using ReduceLROnPlateau)
    scheduler.step(val_loss)

    # 4) Get and print current learning rate manually
    current_lr = scheduler.get_last_lr()[0]
    print(
        f"Epoch {epoch}/{num_epochs} | "
        f"lr={current_lr:.2e} | "
        f"Train loss {train_loss:.4f}, acc {train_acc:.4f} | "
        f"Val loss {val_loss:.4f}, acc {val_acc:.4f}"
    )

    # 5) Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model.pth")
        print(f"  Saved new best model (val_acc={best_val_acc:.4f})")

Epoch 1/10 | lr=1.00e-04 | Train loss 2.2233, acc 0.3895 | Val loss 1.7252, acc 0.6848
  Saved new best model (val_acc=0.6848)
Epoch 2/10 | lr=1.00e-04 | Train loss 1.6941, acc 0.5984 | Val loss 1.3159, acc 0.7281
  Saved new best model (val_acc=0.7281)
Epoch 3/10 | lr=1.00e-04 | Train loss 1.4294, acc 0.6459 | Val loss 1.0982, acc 0.7598
  Saved new best model (val_acc=0.7598)
Epoch 4/10 | lr=1.00e-04 | Train loss 1.2844, acc 0.6627 | Val loss 0.9815, acc 0.7549
Epoch 5/10 | lr=1.00e-04 | Train loss 1.1998, acc 0.6731 | Val loss 0.9103, acc 0.7665
  Saved new best model (val_acc=0.7665)
Epoch 6/10 | lr=1.00e-04 | Train loss 1.1183, acc 0.6874 | Val loss 0.8418, acc 0.7741
  Saved new best model (val_acc=0.7741)
Epoch 7/10 | lr=1.00e-04 | Train loss 1.0831, acc 0.6882 | Val loss 0.8117, acc 0.7750
  Saved new best model (val_acc=0.7750)
Epoch 8/10 | lr=1.00e-04 | Train loss 1.0510, acc 0.6944 | Val loss 0.7793, acc 0.7799
  Saved new best model (val_acc=0.7799)
Epoch 9/10 | lr=1.00e-04

**It's a good baseline, considering that we have trained only fc, so we can go further.**

# Classifier modifications
**The model shows underfitting**, so Dropout isn't the best choice to improve performance.
We didn’t employ k-fold cross-validation due to its computationally expensive nature. Let's try to unfreeze layer4.

## Unfreezing + load best checkpoint

In [None]:
# Unfreeze layer4 and load best checkpoint
for name, param in model.named_parameters():
    if name.startswith("layer4"):
        param.requires_grad = True

# Load previously saved best model to continue fine-tuning
checkpoint = torch.load("best_model.pth", map_location=device)
model.load_state_dict(checkpoint)

# Compute its validation accuracy to set the starting best_val_acc
_, best_val_acc = validate(model, val_loader, criterion, device)
print(f"Starting from best_val_acc = {best_val_acc:.4f}")

Starting from best_val_acc = 0.7884


## Optimizer, sceduler recreation

In [None]:
# Recreate optimizer & scheduler to include layer4 parameters
import torch.optim as optim

trainable_params = filter(lambda p: p.requires_grad, model.parameters())
optimizer = optim.Adam(
    trainable_params,
    lr=1e-5,            # lower LR for fine-tuning
    weight_decay=1e-5
)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer,
    mode='min',
    factor=0.1,
    patience=3
)

print("Optimizer and scheduler recreated.")

Optimizer and scheduler recreated.


## Fine-tuning loop with conditional saving

In [None]:
# Fine-tuning loop starting from existing best_val_acc
%%time
num_epochs = 10  # o più se serve
for epoch in range(1, num_epochs + 1):
    # 1) Training step
    train_loss, train_acc = train_one_epoch(
        model, train_loader, criterion, optimizer, device
    )
    # 2) Validation step
    val_loss, val_acc = validate(
        model, val_loader, criterion, device
    )

    # 3) Scheduler step
    scheduler.step(val_loss)

    # 4) Log metrics
    current_lr = scheduler.get_last_lr()[0]
    print(
        f"Epoch {epoch}/{num_epochs} | "
        f"lr={current_lr:.2e} | "
        f"Train loss {train_loss:.4f}, acc {train_acc:.4f} | "
        f"Val  loss {val_loss:.4f}, acc {val_acc:.4f}"
    )

    # 5) Save only if genuinely improved over the previous best
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model.pth")
        print(f"  ▶ New best model saved (val_acc={best_val_acc:.4f})")
    else:
        print(f"  (no improvement over best_val_acc={best_val_acc:.4f})")

Epoch 1/10 | lr=1.00e-05 | Train loss 0.8751, acc 0.7323 | Val  loss 0.5669, acc 0.8272
  ▶ New best model saved (val_acc=0.8272)
Epoch 2/10 | lr=1.00e-05 | Train loss 0.7407, acc 0.7631 | Val  loss 0.5107, acc 0.8397
  ▶ New best model saved (val_acc=0.8397)
Epoch 3/10 | lr=1.00e-05 | Train loss 0.6694, acc 0.7882 | Val  loss 0.4701, acc 0.8509
  ▶ New best model saved (val_acc=0.8509)
Epoch 4/10 | lr=1.00e-05 | Train loss 0.6316, acc 0.7905 | Val  loss 0.4483, acc 0.8509
  (no improvement over best_val_acc=0.8509)
Epoch 5/10 | lr=1.00e-05 | Train loss 0.5872, acc 0.8087 | Val  loss 0.4324, acc 0.8603
  ▶ New best model saved (val_acc=0.8603)
Epoch 6/10 | lr=1.00e-05 | Train loss 0.5463, acc 0.8260 | Val  loss 0.4194, acc 0.8634
  ▶ New best model saved (val_acc=0.8634)
Epoch 7/10 | lr=1.00e-05 | Train loss 0.5124, acc 0.8357 | Val  loss 0.3977, acc 0.8705
  ▶ New best model saved (val_acc=0.8705)
Epoch 8/10 | lr=1.00e-05 | Train loss 0.4932, acc 0.8387 | Val  loss 0.3932, acc 0.8728


**Validation accuracy rose dramatically from ~0.7884 (before) to ~0.8763 (after), confirming that unfreezing high-level features yields a significant boost.**

# Final evaluation

In [None]:
# 1) Re-instantiate architecture and rebuild head
eval_model = models.resnet50(pretrained=False)
in_features = eval_model.fc.in_features
eval_model.fc = nn.Linear(in_features, num_classes)

# 2) Load the best checkpoint (no overwrite of existing file)
checkpoint = torch.load("best_model.pth", map_location=device)
eval_model.load_state_dict(checkpoint)

# 3) Move to device and switch to eval mode
eval_model = eval_model.to(device)
eval_model.eval()

# 4) Compute test loss & accuracy using the existing validate()
test_loss, test_acc = validate(eval_model, test_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")

# 5) (Optional) Confusion matrix & detailed report
y_true, y_pred = [], []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = eval_model(inputs)
        preds = outputs.argmax(dim=1)
        y_true.extend(labels.cpu().tolist())
        y_pred.extend(preds.cpu().tolist())

from sklearn.metrics import confusion_matrix, classification_report
cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:\n", cm)

report = classification_report(
    y_true, y_pred,
    target_names=test_dataset.classes,
    digits=4,
    zero_division=0
)
print("\nClassification Report:\n", report)



Test Loss: 0.3723, Test Accuracy: 0.8850
Confusion Matrix:
 [[186   3   1   1   1   1   5   2   0   0   0   0   0   0]
 [  5 176   1   1   0   5   6   3   0   0   3   0   0   0]
 [  2   2 193   0   0   0   1   2   0   0   0   0   0   0]
 [  2   1   1 185   4   3   3   1   0   0   0   0   0   0]
 [  0   0   1   0 184   3  10   1   0   0   0   1   0   0]
 [  2   0   5   1   4 171   8   6   1   2   0   0   0   0]
 [  4   8   5   3  12   4 150  13   1   0   0   0   0   0]
 [  3   0   1   3   0   3  12 177   0   0   0   0   0   1]
 [  0   0   0   0   0   0   0   0 164  13   0  11   9   3]
 [  0   0   0   0   0   0   0   1  13 174   3   8   0   1]
 [  0   0   0   0   0   0   0   0   5   1 178   3  12   1]
 [  0   0   0   0   0   0   0   0   4   7   1 183   1   4]
 [  0   1   0   0   0   1   0   0  10   3   9   2 172   2]
 [  0   0   0   0   0   0   0   0   3   3   0   7   2 185]]

Classification Report:
                 precision    recall  f1-score   support

  Baked Potato     0.9118    0.

**Strengths**:
- Excellent performance (F1 > 0.93) on Donut, Fries, Sushi  
- Solid precision/recall (>0.90) on Baked Potato, Cheesecake, Chicken Curry  

**Weaknesses**:
- Taco (F1 ≃ 0.76) and Apple Pie (F1 ≃ 0.82) remain challenging  
- Sandwich (F1 ≃ 0.87) and Hot Dog (F1 ≃ 0.91) show some confusion  

Confusion Matrix Insights:
- A handful of Taco → Taquito and Taquito → Taco swaps  
- Some Apple Pie misclassified as Ice Cream or Cheesecake  

**Possible improvements**:
- Focused data augmentation for Taco/Apple Pie