In [33]:
# import all required libraries for this evaluation script
import os
import numpy as np
import pandas as pd
from pathlib import Path
import kagglehub
import random
import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance
import math
from typing import List, Tuple
from collections import Counter
import seaborn as sns
import torch
from torchvision import transforms
from torchvision.models import EfficientNet_B0_Weights
from torchvision.models import efficientnet_b0
from torchvision.models import ResNet50_Weights
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import time

In [34]:
# Download latest version of the dataset and follow procedure in initial file
path = kagglehub.dataset_download("meowmeowmeowmeowmeow/gtsrb-german-traffic-sign")

test_df_path =  os.path.join(path, "Test.csv")
test_img_path =  os.path.join(path, "Test")
test_df = pd.read_csv(test_df_path)

class_names = {
    0:"Speed Limit (20Km/hr)", 1:"Speed Limit (30Km/hr)", 
    2:"Speed Limit (50Km/hr)", 3: "Speed Limit (60Km/hr)", 
    4: "Speed Limit (70Km/hr)", 5: "Speed Limit (80Km/hr)",
    6: "End of Speed Limit (80Km/hr)", 7: "Speed Limit (100Km/hr)", 
    8: "Speed Limit (120Km/hr)", 9: "No Passing", 
    10: "No Passing for trucks over 3.5 tons", 11: "Right of way", 
    12: "Priotity Road", 13: "Yeild right of way",
    14: "Stop", 15: "Prohibited for all vehicles",
    16: "Trucks and tractors over 3.5 tons prohibited", 17: "Entery prohibited",
    18: "Danger", 19: "Single curve left",
    20: "Single curve right", 21: "Double curve",
    22: "Rough road", 23: "Slippery road",
    24: "Road narrows", 25: "Construction side ahead",
    26: "Signal lights ahead", 27: "Pedestrian crosswalk ahead",
    28: "Children", 29: "Bicycle crossing",
    30: "Unexpected ice danger", 31: "Wild animal crossing",
    32: "End of restrection", 33: "Mandatory direction of travel right",
    34: "Mandatory direction of travel left", 35: "Mandatory direction of travel ahead",
    36: "Straight or right", 37: "Straight or left",
    38: "Keep right", 39: "Keep left",
    40: "Traffic circle", 41: "End of no passing zone cars",
    42: "End of no passing zone vehicle over 3.5 tons"
}

test_df["ClassName"] = test_df['ClassId'].map(class_names)

In [35]:
imagenet_mean = [0.485, 0.456, 0.406]
imagenet_std  = [0.229, 0.224, 0.225]

#test transform (no augmentation)
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean, imagenet_std)
])

In [36]:
class GTSRBDataset(Dataset):
    def __init__(self, df, root_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.root_dir = Path(root_dir)      # <-- convert to Path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = self.root_dir / row["Path"]   # Path object
        img = Image.open(img_path).convert("RGB")
        label = int(row["ClassId"])

        if self.transform:
            img = self.transform(img)
        return img, label

test_dataset = GTSRBDataset(test_df, root_dir=path, transform=test_transforms)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

print(f"Test loader: {len(test_loader)} batches")

Test loader: 198 batches


In [37]:
# Load the saved weights
NUM_CLASSES = 43

# Load EfficientNet base model
weights = EfficientNet_B0_Weights.IMAGENET1K_V1
base_model = efficientnet_b0(weights=weights)

# Remove classifier → keep global avg pooling only
base_model.classifier = nn.Identity()

# Freeze base model
for param in base_model.parameters():
    param.requires_grad = False

# Build the same custom head Neva used
model = nn.Sequential(
    base_model,                # (0)
    nn.Linear(1280, 256),      # (2)
    nn.ReLU(),                 # (3)
    nn.Dropout(0.4),           # (4)
    nn.Linear(256, NUM_CLASSES),  # (5)
    nn.Softmax(dim=1)          # (6)
)

# Load the saved weights
state_dict = torch.load("Neva/efficientnet_best.pth", map_location="cpu")
model.load_state_dict(state_dict)
model.eval()

Sequential(
  (0): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
              (scale_activati

In [38]:
model_resnet18 = models.resnet18(weights=None)
model_resnet18.fc = torch.nn.Linear(512, 43)   # example for 43 traffic signs
state_dict_resnet18 = torch.load("Gracie/resnet18_traffic_signs.pth", map_location="cpu")
model_resnet18.load_state_dict(state_dict_resnet18)
model_resnet18.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [39]:
class_names = test_loader.dataset.class_names if hasattr(test_loader.dataset, 'class_names') else class_names
print(class_names)

{0: 'Speed Limit (20Km/hr)', 1: 'Speed Limit (30Km/hr)', 2: 'Speed Limit (50Km/hr)', 3: 'Speed Limit (60Km/hr)', 4: 'Speed Limit (70Km/hr)', 5: 'Speed Limit (80Km/hr)', 6: 'End of Speed Limit (80Km/hr)', 7: 'Speed Limit (100Km/hr)', 8: 'Speed Limit (120Km/hr)', 9: 'No Passing', 10: 'No Passing for trucks over 3.5 tons', 11: 'Right of way', 12: 'Priotity Road', 13: 'Yeild right of way', 14: 'Stop', 15: 'Prohibited for all vehicles', 16: 'Trucks and tractors over 3.5 tons prohibited', 17: 'Entery prohibited', 18: 'Danger', 19: 'Single curve left', 20: 'Single curve right', 21: 'Double curve', 22: 'Rough road', 23: 'Slippery road', 24: 'Road narrows', 25: 'Construction side ahead', 26: 'Signal lights ahead', 27: 'Pedestrian crosswalk ahead', 28: 'Children', 29: 'Bicycle crossing', 30: 'Unexpected ice danger', 31: 'Wild animal crossing', 32: 'End of restrection', 33: 'Mandatory direction of travel right', 34: 'Mandatory direction of travel left', 35: 'Mandatory direction of travel ahead', 

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)            
model_resnet18.to(device)
criterion = nn.CrossEntropyLoss()

# Initialize metrics
correct_eff = correct_res = top5_correct_eff = top5_correct_res = total = 0
running_loss_eff = running_loss_res = 0.0
all_labels, all_pred_eff, all_pred_res = [], [], []

model.eval()
model_resnet18.eval()

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        batch_size = labels.size(0)
        
        # EfficientNet
        outputs_eff = model(images)
        _, pred_eff = torch.max(outputs_eff, 1)
        _, top5_eff = torch.topk(outputs_eff, 5, dim=1)
        correct_eff += (pred_eff == labels).sum().item()
        top5_correct_eff += (top5_eff == labels.unsqueeze(1)).any(dim=1).sum().item()
        running_loss_eff += criterion(outputs_eff, labels).item() * batch_size
        
        # ResNet18
        outputs_res = model_resnet18(images)
        _, pred_res = torch.max(outputs_res, 1)
        _, top5_res = torch.topk(outputs_res, 5, dim=1)
        correct_res += (pred_res == labels).sum().item()
        top5_correct_res += (top5_res == labels.unsqueeze(1)).any(dim=1).sum().item()
        running_loss_res += criterion(outputs_res, labels).item() * batch_size
        
        # Store predictions
        total += batch_size
        all_labels.append(labels.cpu().numpy())
        all_pred_eff.append(pred_eff.cpu().numpy())
        all_pred_res.append(pred_res.cpu().numpy())

# Calculate final metrics
accuracy_eff = 100 * correct_eff / total
top5_accuracy_eff = 100 * top5_correct_eff / total
avg_loss_eff = running_loss_eff / total

accuracy_res = 100 * correct_res / total
top5_accuracy_res = 100 * top5_correct_res / total
avg_loss_res = running_loss_res / total

print(f"Test Accuracy for EfficientNet: {accuracy_eff:.2f}%  | Top-5: {top5_accuracy_eff:.2f}% | Loss: {avg_loss_eff:.4f}")
print(f"Test Accuracy for ResNet18:     {accuracy_res:.2f}%  | Top-5: {top5_accuracy_res:.2f}% | Loss: {avg_loss_res:.4f}")

# Concatenate for further analysis
y_true = np.concatenate(all_labels)
y_pred_eff = np.concatenate(all_pred_eff)
y_pred_res = np.concatenate(all_pred_res)

In [None]:
def classification_report_model(model, test_loader, device):
    model.to(device).eval()
    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images.to(device))
            all_preds.extend(outputs.argmax(1).cpu().numpy())
            all_labels.extend(labels.numpy())
    
    report_dict = classification_report(all_labels, all_preds, zero_division=0, output_dict=True)
    df_metrics = pd.DataFrame(report_dict).transpose()
    
    return df_metrics, all_labels, all_preds

In [None]:
df_effnet, true_effnet, pred_effnet = classification_report_model(model, test_loader, device)
cm = confusion_matrix(true_effnet, pred_effnet)

In [None]:
print(df_effnet)

In [None]:
df_resnet, true_resnet, pred_resnet = classification_report_model(model_resnet18, test_loader, device)
cm_resnet = confusion_matrix(true_resnet, pred_resnet)

In [None]:
print(df_resnet)

In [None]:
plt.figure(figsize=(12,10))
sns.heatmap(cm, annot=False, cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix for Efficient Net Model")
plt.show()

In [None]:
plt.figure(figsize=(12,10))
sns.heatmap(cm_resnet, annot=False, cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix for ResNet18 Model")
plt.show()

In [None]:
precision_eff = df_effnet.loc["weighted avg", "precision"]
recall_eff = df_effnet.loc["weighted avg", "recall"]
f1_eff = df_effnet.loc["weighted avg", "f1-score"]

precision_res = df_resnet.loc["weighted avg", "precision"]
recall_res = df_resnet.loc["weighted avg", "recall"]
f1_res = df_resnet.loc["weighted avg", "f1-score"]

print(f"EfficientNet - Macro Precision: {precision_eff:.4f}, Macro Recall: {recall_eff:.4f}, Macro F1-Score: {f1_eff:.4f}\n")
print(f"ResNet18   - Macro Precision: {precision_res:.4f}, Macro Recall: {recall_res:.4f}, Macro F1-Score: {f1_res:.4f}")

In [None]:
worst_classes_eff= df_effnet.iloc[:-3].sort_values("recall").head(5)
worst_classes_res= df_resnet.iloc[:-3].sort_values("recall").head(5)

print(worst_classes_eff, "\n")
print(worst_classes_res)

In [None]:
def show_misclassified(model, loader, device, class_names, max_images=16):
    model.eval()
    misclassified = []
    
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            preds = model(images).argmax(1)
            
            for img, true, pred in zip(images[preds != labels], 
                                       labels[preds != labels], 
                                       preds[preds != labels]):
                misclassified.append((img.cpu(), true.item(), pred.item()))
                if len(misclassified) >= max_images:
                    break
            if len(misclassified) >= max_images:
                break
    
    # Setup plot
    n = len(misclassified)
    cols = 4
    rows = (n + cols - 1) // cols
    fig, axes = plt.subplots(rows, cols, figsize=(3*cols, 3*rows))
    axes = axes.flatten() if n > 1 else [axes]
    
    # Denormalization constants
    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
    
    # Plot each misclassified image
    for i, (img, true, pred) in enumerate(misclassified):
        img = ((img * std + mean).permute(1, 2, 0).clamp(0, 1))
        axes[i].imshow(img)
        axes[i].axis("off")
        axes[i].set_title(f"T: {class_names[true]}\nP: {class_names[pred]}", 
                          fontsize=10)
    
    # Hide unused subplots
    for i in range(n, len(axes)):
        axes[i].axis("off")
    
    plt.tight_layout()
    plt.show()

# Test it
show_misclassified(model_resnet18, test_loader, device, class_names)

In [None]:
show_misclassified(model, test_loader, device, class_names)

In [None]:
both_correct = (y_pred_eff == y_true) & (y_pred_res == y_true)
eff_only     = (y_pred_eff == y_true) & (y_pred_res != y_true)
res_only     = (y_pred_eff != y_true) & (y_pred_res == y_true)
both_wrong   = (y_pred_eff != y_true) & (y_pred_res != y_true)

print(f"Samples both correct:    {both_correct.sum()}")
print(f"EfficientNet only right: {eff_only.sum()}")
print(f"ResNet18 only right:     {res_only.sum()}")
print(f"Both wrong:              {both_wrong.sum()}")

In [None]:
import time

def measure_latency(model, dataloader, n_batches=10):
    model.eval()
    model.to(device)
    times = []

    with torch.no_grad():
        for i, (images, _) in enumerate(dataloader):
            if i >= n_batches:
                break
            images = images.to(device)

            torch.cuda.synchronize() if device.type == "cuda" else None
            t0 = time.perf_counter()
            _ = model(images)
            torch.cuda.synchronize() if device.type == "cuda" else None
            t1 = time.perf_counter()

            times.append((t1 - t0) / images.size(0))  # seconds / image

    return np.mean(times), np.std(times)

mean_eff, std_eff = measure_latency(model, test_loader)
mean_res, std_res = measure_latency(model_resnet18, test_loader)

print(f"EfficientNet: {mean_eff*1000:.2f} ± {std_eff*1000:.2f} ms / image")
print(f"ResNet18:     {mean_res*1000:.2f} ± {std_res*1000:.2f} ms / image")