# DETR Model Evaluation

In [1]:
# Standard library imports
import os
import random
import time  # For measuring time

# Third-party imports
# NumPy
import numpy as np

# Matplotlib
import matplotlib.pyplot as plt

# PIL (Python Imaging Library)
from PIL import Image

# PyTorch imports
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import StepLR

# torchvision imports
from torchvision.transforms import functional as F

# Transformers imports
from transformers import DetrForObjectDetection, DetrConfig, DetrImageProcessor

# pycocotools imports
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

# timm (PyTorch Image Models) import
import timm

# Collections
from collections import defaultdict

  from .autonotebook import tqdm as notebook_tqdm
2024-12-15 13:15:09.963532: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1734268509.981451  382692 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1734268509.986930  382692 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-15 13:15:10.007979: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Data Loading

In [2]:
# Define paths
data_folder = "Self Driving Car/export/"
annotation_file = os.path.join(data_folder, "_annotations.coco.json")

# Load COCO Annotations
coco = COCO(annotation_file)

# Print dataset statistics
total_images = len(coco.getImgIds())
print(f"Total images in the dataset: {total_images}")
print("Keys in the COCO annotations:", list(coco.dataset.keys()))

loading annotations into memory...
Done (t=0.76s)
creating index...
index created!
Total images in the dataset: 29800
Keys in the COCO annotations: ['info', 'licenses', 'categories', 'images', 'annotations']


#### **Inspect the annotation file to make sure it is suitable fro DETR**

In [3]:
print ("--------------------------------------------------------")
# Check the structure of the JSON file
print("Keys in the COCO annotations:", list(coco.dataset.keys()))

print ("--------------------------------------------------------")
print ("--------------------------------------------------------")
# Total number of images
num_images = len(coco.getImgIds())
print(f"Total images in the dataset: {num_images}")

# Total number of annotations
num_annotations = len(coco.getAnnIds())
print(f"Total annotations in the dataset: {num_annotations}")

# Total number of categories
num_categories = len(coco.getCatIds())
print(f"Total categories in the dataset: {num_categories}")

# List all categories
categories = coco.loadCats(coco.getCatIds())
print("Categories:", [cat['name'] for cat in categories])

print ("--------------------------------------------------------")
print ("--------------------------------------------------------")

# Map image IDs to annotations
img_to_anns = defaultdict(list)
for ann in coco.loadAnns(coco.getAnnIds()):
    img_to_anns[ann['image_id']].append(ann['category_id'])

# Calculate the number of unique classes per image
classes_per_image = [len(set(img_to_anns[img_id])) for img_id in coco.getImgIds()]

# Statistics
max_classes = max(classes_per_image)
min_classes = min(classes_per_image)
avg_classes = np.mean(classes_per_image)

print(f"Max classes per image: {max_classes}")
print(f"Min classes per image: {min_classes}")
print(f"Average classes per image: {avg_classes:.2f}")

print ("--------------------------------------------------------")
print ("--------------------------------------------------------")
# Example image entry
sample_image_id = coco.getImgIds()[0]
sample_image = coco.loadImgs(sample_image_id)[0]
print("Sample image entry:")
print(sample_image)

# Example annotation
sample_ann_id = coco.getAnnIds(imgIds=sample_image_id)[0]
sample_annotation = coco.loadAnns(sample_ann_id)[0]
print("Sample annotation entry:")
print(sample_annotation)


print ("--------------------------------------------------------")
print ("--------------------------------------------------------")




--------------------------------------------------------
Keys in the COCO annotations: ['info', 'licenses', 'categories', 'images', 'annotations']
--------------------------------------------------------
--------------------------------------------------------
Total images in the dataset: 29800
Total annotations in the dataset: 194539
Total categories in the dataset: 12
Categories: ['obstacles', 'biker', 'car', 'pedestrian', 'trafficLight', 'trafficLight-Green', 'trafficLight-GreenLeft', 'trafficLight-Red', 'trafficLight-RedLeft', 'trafficLight-Yellow', 'trafficLight-YellowLeft', 'truck']
--------------------------------------------------------
--------------------------------------------------------
Max classes per image: 7
Min classes per image: 0
Average classes per image: 1.88
--------------------------------------------------------
--------------------------------------------------------
Sample image entry:
{'id': 0, 'license': 1, 'file_name': '1478897026627294725_jpg.rf.6828a4e82

In [4]:
# Dataset splits
num_train = 25330
num_val = 2235
num_test = 2235

# Shuffle and split the dataset
all_image_ids = coco.getImgIds()
random.seed(42)  # Ensure reproducibility
random.shuffle(all_image_ids)

train_ids = all_image_ids[:num_train]
val_ids = all_image_ids[num_train:num_train + num_val]
test_ids = all_image_ids[num_train + num_val:num_train + num_val + num_test]

print(f"Training images: {len(train_ids)}, Validation images: {len(val_ids)}, Testing images: {len(test_ids)}")

Training images: 25330, Validation images: 2235, Testing images: 2235


### Create the Dataset Class

In [5]:
# Define the custom COCO dataset
class COCODataset(Dataset):
    def __init__(self, image_dir, coco, processor, image_ids):
        self.image_dir = image_dir
        self.coco = coco
        self.processor = processor
        self.image_ids = image_ids

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image_info = self.coco.loadImgs(image_id)[0]
        image_path = os.path.join(self.image_dir, image_info["file_name"])
        image = Image.open(image_path).convert("RGB")

        # Get annotations
        ann_ids = self.coco.getAnnIds(imgIds=image_id)
        anns = self.coco.loadAnns(ann_ids)
        annotations = []

        for ann in anns:
            bbox = ann["bbox"]  # [x_min, y_min, width, height]
            area = bbox[2] * bbox[3]  # width * height
            annotations.append({
                "bbox": bbox,
                "category_id": ann["category_id"],  # Preserve original category IDs
                "area": area,
                "iscrowd": ann.get("iscrowd", 0),
            })

        # Prepare the target dictionary
        target = {"image_id": image_id, "annotations": annotations}

        # Process the image and annotations
        encoding = self.processor(images=image, annotations=target, return_tensors="pt")
        pixel_values = encoding["pixel_values"].squeeze(0)
        labels = encoding["labels"][0]
        return pixel_values, labels

### Prepare Datasets

In [6]:
# Path to images
image_dir = data_folder

# Load pre-trained processor
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

# Create datasets for each split
train_dataset = COCODataset(image_dir, coco, processor, train_ids)
val_dataset = COCODataset(image_dir, coco, processor, val_ids)
test_dataset = COCODataset(image_dir, coco, processor, test_ids)

print(f"Datasets prepared: {len(train_dataset)} training, {len(val_dataset)} validation, {len(test_dataset)} testing.")

Datasets prepared: 25330 training, 2235 validation, 2235 testing.


### Create DataLoaders

In [7]:
# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

print("DataLoaders ready.")

DataLoaders ready.


In [8]:
# Test a batch
for pixel_values, targets in train_loader:
    print(f"Pixel values shape: {pixel_values[0].shape}")
    print(f"Target example: {targets[0]}")
    break

The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


Pixel values shape: torch.Size([3, 800, 800])
Target example: {'size': tensor([800, 800]), 'image_id': tensor([24621]), 'class_labels': tensor([2, 2, 2, 2, 2]), 'boxes': tensor([[0.4223, 0.4834, 0.0281, 0.0450],
        [0.4980, 0.4851, 0.0312, 0.0483],
        [0.5384, 0.4879, 0.0417, 0.0617],
        [0.5366, 0.4854, 0.0302, 0.0333],
        [0.6904, 0.5429, 0.1542, 0.1483]]), 'area': tensor([  810.0000,   966.6666,  1644.4445,   644.4445, 14635.5557]), 'iscrowd': tensor([0, 0, 0, 0, 0]), 'orig_size': tensor([512, 512])}


### Model Loading

In [9]:
# Load the trained model and processor
model_path = "DETR_BackboneTrained_30epochs/detr-finetuned"
processor_path = "DETR_BackboneTrained_30epochs/detr-processor"

model = DetrForObjectDetection.from_pretrained(model_path)
processor = DetrImageProcessor.from_pretrained(processor_path)

# Set the model to evaluation mode
model.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

DetrForObjectDetection(
  (model): DetrModel(
    (backbone): DetrConvModel(
      (conv_encoder): DetrConvEncoder(
        (model): FeatureListNet(
          (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
          (bn1): DetrFrozenBatchNorm2d()
          (act1): ReLU(inplace=True)
          (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
          (layer1): Sequential(
            (0): Bottleneck(
              (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn1): DetrFrozenBatchNorm2d()
              (act1): ReLU(inplace=True)
              (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
              (bn2): DetrFrozenBatchNorm2d()
              (drop_block): Identity()
              (act2): ReLU(inplace=True)
              (aa): Identity()
              (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      

### Testing the Model

In [10]:
# Helper Functions

## *FOR CALCULATING MEAN IOU OF TEST PREDICTIONS*

def calculate_mean_iou(test_predictions, test_ground_truths):
    all_ious = []
    
    for pred_sample, gt_sample in zip(test_predictions, test_ground_truths):
        pred_boxes = pred_sample['boxes']
        pred_scores = pred_sample['scores']
        gt_boxes = gt_sample['boxes']
        
        if pred_boxes.size==0:  # Skip if no predictions
            continue
            
        # Sort predictions by confidence score
        pred_boxes_with_scores = sorted(zip(pred_boxes, pred_scores), 
                                      key=lambda x: x[1], 
                                      reverse=True)
        pred_boxes = [box for box, _ in pred_boxes_with_scores]
        
        # Calculate best IoU for each ground truth box
        for gt_box in gt_boxes:
            ious = [calculate_iou(gt_box, pred_box) for pred_box in pred_boxes]
            if ious:
                max_iou = max(ious)
                all_ious.append(max_iou)
    
    # Calculate mean IoU
    mean_iou = sum(all_ious) / len(all_ious) if all_ious else 0.0
    return mean_iou

##*FOR CALCULATING MEAN IOU OF TEST PREDICTIONS PER CLASS*

from collections import defaultdict

def calculate_mean_iou_per_class(test_predictions, test_ground_truths):
    iou_per_class = defaultdict(list)
    
    for pred_sample, gt_sample in zip(test_predictions, test_ground_truths):
        pred_boxes = pred_sample['boxes']
        pred_labels = pred_sample['labels']
        pred_scores = pred_sample['scores']
        gt_boxes = gt_sample['boxes']
        gt_labels = gt_sample['labels']
        
        # Group ground truth boxes by class
        gt_boxes_by_class = defaultdict(list)
        for gt_box, gt_label in zip(gt_boxes, gt_labels):
            gt_boxes_by_class[gt_label].append(gt_box)
            
        # Group predicted boxes by class
        pred_boxes_by_class = defaultdict(list)
        for pred_box, pred_label, pred_score in zip(pred_boxes, pred_labels, pred_scores):
            pred_boxes_by_class[pred_label].append((pred_box, pred_score))
            
        # Calculate IoU for each class
        for class_id in gt_boxes_by_class.keys():
            gt_boxes_class = gt_boxes_by_class[class_id]
            pred_boxes_class = pred_boxes_by_class[class_id]
            
            if not pred_boxes_class:  # No predictions for this class
                continue
                
            # Sort predictions by confidence score
            pred_boxes_class = sorted(pred_boxes_class, key=lambda x: x[1], reverse=True)
            pred_boxes_only = [box for box, _ in pred_boxes_class]
            
            # Calculate best IoU for each ground truth box
            for gt_box in gt_boxes_class:
                ious = [calculate_iou(gt_box, pred_box) for pred_box in pred_boxes_only]
                if ious:
                    max_iou = max(ious)
                    iou_per_class[class_id].append(max_iou)
    
    # Calculate mean IoU for each class
    mean_iou_per_class = {}
    for class_id, ious in iou_per_class.items():
        if ious:  # Only calculate mean if we have IoUs for this class
            mean_iou_per_class[class_id] = sum(ious) / len(ious)
        else:
            mean_iou_per_class[class_id] = 0.0
            
    return mean_iou_per_class

In [12]:
import torch

def calculate_mean_iou_with_libraries(predictions, ground_truths):
    """
    Calculate the Mean Intersection over Union (IoU) for a set of predictions and ground truths.

    Args:
        predictions (list of dict): List of dictionaries containing "boxes" (tensor) for predicted results.
        ground_truths (list of dict): List of dictionaries containing "boxes" (tensor) for ground truths.

    Returns:
        float: The mean IoU across all predictions and ground truths.
    """
    def calculate_iou(box1, box2):
        """
        Calculate IoU for two sets of boxes using Torch's box utilities.

        Args:
            box1 (tensor): Ground truth box in the format [x_min, y_min, x_max, y_max].
            box2 (tensor): Predicted box in the format [x_min, y_min, x_max, y_max].

        Returns:
            tensor: Tensor of IoU values.
        """
        # Compute intersection
        x1 = torch.max(box1[0], box2[0])
        y1 = torch.max(box1[1], box2[1])
        x2 = torch.min(box1[2], box2[2])
        y2 = torch.min(box1[3], box2[3])

        intersection = torch.clamp(x2 - x1, min=0) * torch.clamp(y2 - y1, min=0)

        # Compute areas
        area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
        area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])

        # Compute union
        union = area1 + area2 - intersection

        # Avoid division by zero
        iou = intersection / union if union > 0 else torch.tensor(0.0)

        return iou

    all_ious = []

    # Loop over predictions and ground truths
    for pred_sample, gt_sample in zip(predictions, ground_truths):
        pred_boxes = pred_sample["boxes"]
        gt_boxes = gt_sample["boxes"]

        if len(pred_boxes) == 0 or len(gt_boxes) == 0:
            continue

        # Match each ground truth box with the predicted boxes
        for gt_box in gt_boxes:
            ious = [calculate_iou(gt_box, pred_box) for pred_box in pred_boxes]
            if ious:  # Append the highest IoU for this ground truth box
                max_iou = max(ious)
                all_ious.append(max_iou.item())

    # Compute the mean IoU
    mean_iou = sum(all_ious) / len(all_ious) if all_ious else 0.0
    return mean_iou

# Assuming `test_predictions` and `test_ground_truths` are prepared
mean_iou = calculate_mean_iou_with_libraries(test_predictions, test_ground_truths)
print(f"Mean IoU with Libraries: {mean_iou:.4f}")


Mean IoU with Libraries: 0.0000
