# Fitting EfficientNet-B0 Model

In [37]:
from fontTools.ufoLib.glifLib import layerInfoVersion3ValueData
from pandas.io.sas.sas_constants import dataset_length

!python -m pip install pandas numpy matplotlib seaborn tqdm



The below specifies the packages we need to download

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
import os
from PIL import Image, ImageEnhance

This chunk extracts the data from the zip file, and saves it into a "raw data" file on my laptop.

In [38]:
os.chdir("C:/Projects/DST_Project2")
print("Current directory:", os.getcwd())

Current directory: C:\Projects\DST_Project2


In [11]:
import zipfile

# 1. Change working directory
os.chdir("C:/Projects/DST_Project2")
print("Current directory:", os.getcwd())

# 2. Path to ZIP file
zip_path = "C:/Users/Nevaf/PycharmProjects/DataScienceToolbox-Project2/Neva/traffic_raw.zip"  # <-- full path including .zip

# 3. Extract ZIP
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall("C:/Neva/data")  # Extract to folder
print("ZIP extracted successfully!")

Current directory: C:\Users\Nevaf\PycharmProjects\DataScienceToolbox-Project2\Neva
ZIP extracted successfully!


# EfficientNet-B0 Model Fitting

STEP 1: INSTALL DEPENDENCIES

In [None]:
""""EfficientNet-B0 model fitting and fine tuning.

Here we implement transfer learning using EfficientNet-B0 for classifying traffic signs from the German Traffic Sign Recognition Benchmark (GTSRB)

The following script shows:
- Dataset preparation
- Random 50% class-balanced downsampling
- Data augmentation and preprocessing
- Transfer learning with pretrained model EfficientNet-B0
- Fine-tuning of final layers
- Visualisation of model accuracy

The code is annotated with markdown and NumPy docstrings
"""

In [39]:
import os
import copy 
import numpy as np
from typing import Tuple, Dict, List
from PIL import Image
from pathlib import Path

import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset , random_split
from torchvision import transforms
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm #installs a progress bar for epochs


We've installed and imported all necessary libraries and functions.

In [40]:
class Config:
    """Configuration parameters for model training."""

    # Device configuration
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    IMG_SIZE = 224 # necessary for EfficientNet
    BATCH_SIZE = 32
    NUM_CLASSES = 43
    EPOCHS = 10
    FINE_TUNE_EPOCHS = 5
    LEARNING_RATE = 1e-3
    FINE_TUNE_LR = 1e-4

    SEED = 42
    TRAIN_SPLIT = 0.8
    PATIENCE = 5
    torch.manual_seed(SEED)

    #Paths
    DATA_DIR = Path("data/raw/Train")
    MODEL_SAVE_PATH = Path("models/efficientnet_best.pth")

Prepare the dataset after EDA. We select 50% of the images per class to fit a model to, and we create a custom dataset class.

In [41]:
print(os.getcwd())  # confirms current working directory


C:\Projects\DST_Project2


In [42]:
class TrafficSignDataset(Dataset):
    """
    Custom dataset for our model.

    Parameters:
    ----------
    filepaths : List[str]
    List of paths to image files.
    transform : torchvision.transforms.Compose
    Image transformations applied to the images.

    Attributes:
    ------------
    filepaths : List[str]
    List of paths to image files.
    transform : torchvision.transforms.Compose
    Image transformations applied to the images.
    """

    def __init__(self, filepaths: List[str], transform=None):
        self.filepaths = filepaths
        self.transform = transform

    def __len__(self) -> int:
        return len(self.filepaths)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int] :
        """
    Take a single sample from the dataset.

    Parameters:
    ------------
    idx : int
    Index of the sample.

    Returns:
    ----------
    Tuple[torch.Tensor, int]
    The image tensor and its corresponding class index.
    """
        path = self.filepaths[idx]
        img = Image.open(path).convert("RGB")
        # Extract label from the directory structure
        label = int(os.path.basename(os.path.dirname(path)))
        
        if self.transform:
            img = self.transform(img)
        else:
            # Use default transformation
            img = transforms.ToTensor()(img)

        return img, label


Now we apply transformations/ perform data augmentation. This dataset is already quite varied but

In [43]:
def prepare_data(config: Config) -> Tuple[DataLoader, DataLoader]:
    """
    Prepare the train and validation data loaders.

    Parameters
    ----------
    config : Config
        Configuration object with hyperparameters

    Returns
    -------
    Tuple[DataLoader, DataLoader]
        Training and validation data loaders
    """
    # Set random seed for reproducibility
    torch.manual_seed(config.SEED)
    np.random.seed(config.SEED)

    # Collect all image paths
    all_filepaths = []
    classes = sorted([d for d in os.listdir(config.DATA_DIR)
                     if os.path.isdir(config.DATA_DIR / d)])

    for class_id in classes:
        class_dir = config.DATA_DIR / class_id
        files = [str(class_dir / f) for f in os.listdir(class_dir)
                if f.lower().endswith(('.png', '.jpg', '.jpeg', '.ppm'))]

        # Sample 50% of images per class
        sample_size = len(files) // 2
        sampled_files = np.random.choice(files, sample_size, replace=False)
        all_filepaths.extend(sampled_files)

    print(f"Total images after sampling: {len(all_filepaths)}")

    # Define transforms
    train_transforms = transforms.Compose([
        transforms.Resize((config.IMG_SIZE, config.IMG_SIZE)),
        transforms.RandomRotation(10),
        transforms.RandomAffine(0, translate=(0.1, 0.1)),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])  # ImageNet stats
    ])

    val_transforms = transforms.Compose([
        transforms.Resize((config.IMG_SIZE, config.IMG_SIZE)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])

    # Create dataset and split
    full_dataset = TrafficSignDataset(all_filepaths)

    train_size = int(config.TRAIN_SPLIT * len(full_dataset))
    val_size = len(full_dataset) - train_size

    train_dataset, val_dataset = random_split(
        full_dataset,
        [train_size, val_size],
        generator=torch.Generator().manual_seed(config.SEED)
    )

    # Apply transforms
    train_dataset.dataset.transform = train_transforms
    val_dataset.dataset.transform = val_transforms

    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=config.BATCH_SIZE,
        shuffle=True,
        num_workers=0,  # Set to 0 for Windows compatibility
        pin_memory=True if torch.cuda.is_available() else False
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=config.BATCH_SIZE,
        shuffle=False,
        num_workers=0,
        pin_memory=True if torch.cuda.is_available() else False
    )

    return train_loader, val_loader


Now we start setting up the model.
Remove classifier → keep global average pooling
reeze base model
Your custom classifier head


# Model Building

In [44]:
def build_model(config: Config, freeze_base: bool = True) -> nn.Module:
    """
    Build EfficientNet-B0 model with custom classifier.

    Parameters
    ----------
    config : Config
        Configuration object
    freeze_base : bool, default=True
        Whether to freeze the base model weights

    Returns
    -------
    nn.Module
        Complete model ready for training
    """
    # Load pre-trained EfficientNet
    weights = EfficientNet_B0_Weights.IMAGENET1K_V1
    base_model = efficientnet_b0(weights=weights)

    # Remove original classifier
    base_model.classifier = nn.Identity()

    # Freeze base model if specified
    if freeze_base:
        for param in base_model.parameters():
            param.requires_grad = False

    # Build complete model with custom classifier
    model = nn.Sequential(
        base_model,
        nn.Linear(1280, 256),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.Linear(256, config.NUM_CLASSES)
    )

    return model.to(config.DEVICE)


In [67]:
def train_epoch(model: nn.Module,
                train_loader: DataLoader,
                optimizer: torch.optim.Optimizer,
                device: torch.device,
                criterion: nn.Module = nn.CrossEntropyLoss()) -> Tuple[float, float]:
    """
    Train model for one epoch.

    Parameters
    ----------
    model : nn.Module
        Model to train
    train_loader : DataLoader
        Training data loader
    criterion : nn.Module
        Loss function
    optimizer : torch.optim.Optimizer
        Optimizer
    device : torch.device
        Device to train on

    Returns
    -------
    Tuple[float, float]
        Average loss and accuracy for the epoch
    """
    model.train()
    running_loss = 0.0
    running_corrects = 0

    for images, labels in tqdm(train_loader, desc="Training"):
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        running_corrects += (outputs.argmax(1) == labels).sum().item()


    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = running_corrects / len(train_loader.dataset)

    return epoch_loss, epoch_acc


def validate(model: nn.Module,
            val_loader: DataLoader,
            device: torch.device,
            criterion: nn.Module = nn.CrossEntropyLoss()) -> Tuple[float, float]:
    """
    Validate model.

    Parameters
    ----------
    model : nn.Module
        Model to validate
    val_loader : DataLoader
        Validation data loader
    criterion : nn.Module
        Loss function
    device : torch.device
        Device to run validation on

    Returns
    -------
    Tuple[float, float]
        Validation loss and accuracy
    """
    model.eval()
    running_loss = 0.0
    running_corrects = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            running_corrects += (outputs.argmax(1) == labels).sum().item()

    val_loss = running_loss / len(val_loader.dataset)
    val_acc = running_corrects / len(val_loader.dataset)

    return val_loss, val_acc


def train_model(model: nn.Module,
               train_loader: DataLoader,
               val_loader: DataLoader,
               config: Config,
               optimizer: torch.optim.Optimizer = None) -> Dict:
    """
    Complete training loop with early stopping.

    Parameters
    ----------
    model : nn.Module
        Model to train
    train_loader : DataLoader
        Training data loader
    val_loader : DataLoader
        Validation data loader
    config : Config
        Configuration object

    Returns
    -------
    Dict
        Training history with losses and accuracies
    """
    device = config.DEVICE
    criterion = nn.CrossEntropyLoss()

    if optimizer is None:
        optimizer = Adam(model.parameters(), lr=config.LEARNING_RATE)

    scheduler = ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=0.2,
        patience=3,
        min_lr=1e-6
    )

    best_val_loss = float('inf')
    best_model_state = None
    patience_counter = 0
    history = {'train_loss': [], 'train_acc': [],
               'val_loss': [], 'val_acc': []}

    for epoch in range(Config.EPOCHS):
        print(f"\nEpoch {epoch+1}/{config.EPOCHS}")

        # Train
        train_loss, train_acc = train_epoch(
            model, train_loader, optimizer, config.DEVICE, criterion
        )

        # Validate
        val_loss, val_acc = validate(
            model, val_loader, config.DEVICE, criterion
        )

        # Update scheduler
        scheduler.step(val_loss)

        # Store history
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)

        print(f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f}")
        print(f"Val   Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model_state = copy.deepcopy(model.state_dict())
            patience_counter = 0

            # Save best model
            config.MODEL_SAVE_PATH.parent.mkdir(parents=True, exist_ok=True)
            torch.save(best_model_state, config.MODEL_SAVE_PATH)
        else:
            patience_counter += 1
            if patience_counter >= config.PATIENCE:
                print(f"\nEarly stopping triggered after {epoch+1} epochs")
                break

    # Load best model
    model.load_state_dict(best_model_state)
    return history


In [60]:
def main():
    """Main execution function."""
    # Initialize configuration
    config = Config()

    print(f"Using device: {config.DEVICE}")
    print(f"Number of classes: {config.NUM_CLASSES}")

    # Prepare data
    print("\nPreparing data...")
    train_loader, val_loader = prepare_data(config)

    # Build model
    print("\nBuilding model...")
    model = build_model(config, freeze_base=True)

    # Train model
    print("\nStarting training...")
    history = train_model(model, train_loader, val_loader, config)

    print("\nTraining complete!")
    print(f"Best model saved to: {config.MODEL_SAVE_PATH}")

    return model, history


if __name__ == "__main__":
    model, history = main()

Using device: cpu
Number of classes: 43

Preparing data...
Total images after sampling: 19604

Building model...

Starting training...

Epoch 1/5


Training:   0%|          | 0/491 [00:04<?, ?it/s]


KeyboardInterrupt: 

In [68]:
def load_model_simple(model_path='models/efficientnet_best.pth', num_classes=43):
    """
    Load the saved model - simplest approach.

    Parameters
    ----------
    model_path : str
        Path to saved model weights
    num_classes : int
        Number of output classes (43 for traffic signs)

    Returns
    -------
    torch.nn.Module
        Loaded model ready for inference
    """
    # Recreate the exact same model architecture
    weights = EfficientNet_B0_Weights.IMAGENET1K_V1
    base_model = efficientnet_b0(weights=weights)
    base_model.classifier = nn.Identity()

    model = nn.Sequential(
        base_model,
        nn.Linear(1280, 256),
        nn.ReLU(),
        nn.Dropout(0.4),
        nn.Linear(256, num_classes)
    )

    # Load the saved weights
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.load_state_dict(torch.load(model_path, map_location=device))
    model = model.to(device)
    model.eval()  # Set to evaluation mode

    print(f"✓ Model loaded from {model_path}")
    print(f"✓ Device: {device}")

    return model

model = load_model_simple(
    model_path="C:/Projects/DST_Project2/models/efficientnet_best.pth",
    num_classes=43
)



✓ Model loaded from C:/Projects/DST_Project2/models/efficientnet_best.pth
✓ Device: cpu


In [56]:
train_loader, val_loader = prepare_data(Config)


Total images after sampling: 19604


In [62]:
Config.EPOCHS = 2

In [69]:
def progressive_fine_tuning(model, train_loader, val_loader, Config):
    """
    Implement progressive fine-tuning strategy.
    Already trained classifier only
    Stage 1: Unfreeze last 2 blocks + classifier
    Stage 2: Full fine-tuning
    """

    # Unfreeze last 2 blocks
    print("Fine-tuning last 2 blocks")
    for name, param in model[0].named_parameters():
        if name.startswith("features.6") or name.startswith("features.7"):
            param.requires_grad = True

    # Use lower learning rate
    optimizer = Adam([
        {'params': model[0].features[6].parameters(), 'lr': 3e-4},
        {'params': model[0].features[7].parameters(), 'lr': 3e-4},
        {'params': model[1].parameters(), 'lr': 1e-3},
        {'params': model[3].parameters(), 'lr': 1e-3}
    ])
    history_finetune = train_model(model, train_loader, val_loader,
                                  Config, optimizer = optimizer)

    return history_finetune

progressive_fine_tuning(model, train_loader, val_loader, Config)


Fine-tuning last 2 blocks

Epoch 1/2


Training:  28%|██▊       | 137/491 [27:47<1:11:48, 12.17s/it]


KeyboardInterrupt: 

#classifier = layers 1 and 4 of the Sequential
#backbone fine-tuned layers = features.6 and features.7
different learing rate for classifier and backbone
       # fast learning for classifier

    # slow learning for fine-tuned blocks


Fine tuning loop

In [74]:
classifier_params = list(model[1].parameters()) + list(model[4].parameters())

backbone_params = [
    p for name, p in model.named_parameters()
    if name.startswith("features.6") or name.startswith("features.7")
]

optimizer = torch.optim.Adam([
    {"params": classifier_params, "lr": 1e-3},
    {"params": backbone_params, "lr": 1e-4}
])

In [75]:
for name, _ in model[0].named_parameters():
    print(name)
EPOCHS = 5
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")

    # ---- Train ----
    model.train()
    running_loss = 0.0
    running_corrects = 0

    for images, labels in tqdm(train_loader):
        optimizer.zero_grad()
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()

        # FIX: multiply by batch size
        running_loss += loss.item() * images.size(0)
        running_corrects += (outputs.argmax(1) == labels).sum().item()

    # FIX: divide by dataset size (not by batches)
    train_loss = running_loss / len(train_loader.dataset)
    train_acc = running_corrects / len(train_loader.dataset)

    # ---- Validation ----
    model.eval()
    val_running_loss = 0.0
    val_running_corrects = 0

    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)

            # FIX: multiply by batch size
            val_running_loss += loss.item() * images.size(0)
            val_running_corrects += (outputs.argmax(1) == labels).sum().item()

    # FIX: divide by dataset size
    val_loss = val_running_loss / len(val_loader.dataset)
    val_acc = val_running_corrects / len(val_loader.dataset)

    # scheduler takes average val_loss
    scheduler.step(val_loss)

    print(f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f}")
    print(f"Val   Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

    # ---- Early stopping ----
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_state = copy.deepcopy(model.state_dict())
        torch.save(best_model_state, "efficientnet_best.pth")
        wait = 0
    else:
        wait += 1
        if wait >= patience:
            print("Early stopping triggered.")
            break


features.0.0.weight
features.0.1.weight
features.0.1.bias
features.1.0.block.0.0.weight
features.1.0.block.0.1.weight
features.1.0.block.0.1.bias
features.1.0.block.1.fc1.weight
features.1.0.block.1.fc1.bias
features.1.0.block.1.fc2.weight
features.1.0.block.1.fc2.bias
features.1.0.block.2.0.weight
features.1.0.block.2.1.weight
features.1.0.block.2.1.bias
features.2.0.block.0.0.weight
features.2.0.block.0.1.weight
features.2.0.block.0.1.bias
features.2.0.block.1.0.weight
features.2.0.block.1.1.weight
features.2.0.block.1.1.bias
features.2.0.block.2.fc1.weight
features.2.0.block.2.fc1.bias
features.2.0.block.2.fc2.weight
features.2.0.block.2.fc2.bias
features.2.0.block.3.0.weight
features.2.0.block.3.1.weight
features.2.0.block.3.1.bias
features.2.1.block.0.0.weight
features.2.1.block.0.1.weight
features.2.1.block.0.1.bias
features.2.1.block.1.0.weight
features.2.1.block.1.1.weight
features.2.1.block.1.1.bias
features.2.1.block.2.fc1.weight
features.2.1.block.2.fc1.bias
features.2.1.blo

100%|██████████| 491/491 [54:14<00:00,  6.63s/it]  


Train Loss: 0.1759 Acc: 0.9397
Val   Loss: 0.1220 Acc: 0.9668


NameError: name 'best_val_loss' is not defined

In [None]:
torch.save(model.state_dict(), "efficientnet_finetuned.pth")


In [None]:
train_losses = [3.1255, 3.0977, 3.0825, 3.0588, 3.0453]
val_losses   = [3.0977, 3.0821, 3.0709, 3.0400, 3.0357]
train_accs   = [0.6766, 0.7041, 0.7190, 0.7424, 0.7556]
val_accs     = [0.7031, 0.7195, 0.7302, 0.7615, 0.7654]

In [None]:
epochs = [1,2,3,4,5]

plt.figure(figsize=(8,5))
plt.plot(train_losses, label="Train Loss", marker='o')
plt.plot(val_losses, label="Val Loss", marker='o')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training vs Validation Loss")
plt.xticks(epochs)
plt.legend()
plt.grid(True)
plt.show()

plt.figure(figsize=(8,5))
plt.plot(train_accs, label="Train Accuracy", marker='o')
plt.plot(val_accs, label="Val Accuracy", marker='o')
plt.xlim(1, 5)
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training vs Validation Accuracy")
plt.xticks(epochs)
plt.legend()
plt.grid(True)
plt.show()

want to shift these along so they start at epoch 1 rather than 0

In [None]:
!python -m pip install scikit-learn
from sklearn.metrics import classification_report

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in val_loader:
        outputs = model(images)
        preds = outputs.argmax(1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print(classification_report(all_labels, all_preds))


In [None]:
from sklearn.metrics import classification_report

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in val_loader:
        outputs = model(images)
        preds = outputs.argmax(1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print(classification_report(
    all_labels, 
    all_preds,
    zero_division = 0))



above gives classification report: precision, recall, f1 per class. now we do confusion matrix heat map

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(all_labels, all_preds)

plt.figure(figsize=(12,10))
sns.heatmap(cm, annot=False, cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()


now time for few shot learning. shows how accuracy drops under small data conditions

In [None]:
import os
os.getcwd()
