# German Traffic Sign Detection with EfficientNet_B0

## Team members
Ketiyape Samarasekara Kasunki Samarasekara (2304486) \
Subramaniyamge Ruwani Rangika Weerasinghe (2305168)

# STEP 1: Import Required Libraries

In [None]:

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import GTSRB
from torchvision.models import efficientnet_b0
from torch.utils.data import DataLoader
import time
import os
import copy
import numpy as np


# STEP 2: Load the GTSRB Dataset

In [None]:

# Data transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Load GTSRB dataset
train_set = GTSRB(root='./data', split='train', transform=transform, download=True)
test_set = GTSRB(root='./data', split='test', transform=transform, download=True)

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Determine number of classes from the dataset
import numpy as np

# Convert labels into a NumPy array and get unique class count
all_labels = [label for _, label in train_set]
num_classes = len(np.unique(all_labels))

print("Number of classes:", num_classes)



Number of classes: 43


# STEP 3: Load Pretrained Model (EfficientNet_B0)

In [None]:
# Load EfficientNet_B0 pre-trained model
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

weights = EfficientNet_B0_Weights.DEFAULT  # or .IMAGENET1K_V1 if preferred
model = efficientnet_b0(weights=weights)
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
model = model.to(device)


# Evaluation function
def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

# Test accuracy without fine-tuning
acc_pretrain = evaluate(model, test_loader)
print("Test Accuracy of pre-trained model without any fine tuning: {:.2f}%".format(acc_pretrain))


Test Accuracy of pre-trained model without any fine tuning: 1.88%


# STEP 4: Train the Model (Fine-Tuning)

In [None]:
# Fine-tune the model
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Train for 1 epoch
model.train()
for epoch in range(1):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/1 | loss {running_loss/len(train_loader):.4f}")

acc_finetune = evaluate(model, test_loader)
print("Test accuracy of pre-trained model after fine-tuning and before quantization : {:.2f}%".format(acc_finetune))

# Save model temporarily
torch.save(model.state_dict(), "efficientnet_b0_gtsrb.pth")


Epoch 1/1 | loss 0.9075
Test accuracy of pre-trained model after fine-tuning and before quantization : 96.53%


# STEP 5: Quantize the Model (INT8)

In [None]:
# Step 5: Quantization
model.cpu()
model.eval()

quantized_model = torch.quantization.quantize_dynamic(
    model, {torch.nn.Linear}, dtype=torch.qint8
)

# Save both models to compare sizes
torch.save(model.state_dict(), "efficientnet_b0_fp32.pth")
torch.save(quantized_model.state_dict(), "efficientnet_b0_int8.pth")

# Model size comparison
import os
fp32_size = os.path.getsize("efficientnet_b0_fp32.pth") / 1e6
int8_size = os.path.getsize("efficientnet_b0_int8.pth") / 1e6
print(f"Model size before quantization: {fp32_size:.2f} MB")
print(f"Model size after quantization: {int8_size:.2f} MB")
print(f"Memory saving: {(fp32_size - int8_size) / fp32_size * 100:.2f}%")


Model size before quantization: 16.55 MB
Model size after quantization: 16.38 MB
Memory saving: 0.99%


# STEP 6: Accuracy After Quantization

In [None]:
# Quantize the model
model_int8 = copy.deepcopy(model).to('cpu')  # move to CPU for quantization
model_int8.eval()

quantized_model = torch.quantization.quantize_dynamic(
    model_int8, {nn.Linear}, dtype=torch.qint8
)

# Define evaluate function
def evaluate(model, loader, device=torch.device("cpu")):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move original model to the appropriate device
model.to(device)

# Evaluate original and quantized models
acc_original = evaluate(model, test_loader, device)
acc_quant = evaluate(quantized_model, test_loader, device=torch.device("cpu"))

# Print results
print(f"Test accuracy before quantization: {acc_original:.2f}%")
print(f"Test accuracy after quantization: {acc_quant:.2f}%")
print(f"Accuracy drop after quantization: {acc_original - acc_quant:.2f}%")


Test accuracy before quantization: 96.53%
Test accuracy after quantization: 96.52%
Accuracy drop after quantization: 0.02%


# STEP 7: Latency Benchmarking

In [None]:

def benchmark_latency(model, loader, n=10):
    model.eval()
    timings = []
    inputs_list = []
    for i, (inputs, _) in enumerate(loader):
        inputs_list.append(inputs)
        if len(inputs_list) == n:
            break
    for inputs in inputs_list:
        start = time.time()
        with torch.no_grad():
            model(inputs)
        end = time.time()
        timings.append((end - start) * 1000)  # convert to ms
    return np.mean(timings)

latency_before = benchmark_latency(model.to("cpu"), test_loader)
latency_after = benchmark_latency(quantized_model, test_loader)

print("Average inference latency (ms) before quantization: {:.2f}".format(latency_before))
print("Average inference latency (ms) after quantization: {:.2f}".format(latency_after))


Average inference latency (ms) before quantization: 4842.15
Average inference latency (ms) after quantization: 4902.52
