Road Sign Classifier
Spencer Kasbohm

In [None]:
from datasets import load_dataset

# Login using e.g. `huggingface-cli login` to access this dataset
ds = load_dataset("ThankGod/mapillary_traffic_sign_dataset")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

val_mtsd_new/train-00000-of-00007.parque(…):   0%|          | 0.00/448M [00:00<?, ?B/s]

val_mtsd_new/train-00001-of-00007.parque(…):   0%|          | 0.00/454M [00:00<?, ?B/s]

val_mtsd_new/train-00002-of-00007.parque(…):   0%|          | 0.00/466M [00:00<?, ?B/s]

val_mtsd_new/train-00003-of-00007.parque(…):   0%|          | 0.00/451M [00:00<?, ?B/s]

val_mtsd_new/train-00004-of-00007.parque(…):   0%|          | 0.00/473M [00:00<?, ?B/s]

val_mtsd_new/train-00005-of-00007.parque(…):   0%|          | 0.00/463M [00:00<?, ?B/s]

val_mtsd_new/train-00006-of-00007.parque(…):   0%|          | 0.00/460M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3813 [00:00<?, ? examples/s]

In [None]:
import random
import matplotlib.pyplot as plt

# Ensure the dataset 'ds' is loaded. If not, this will raise an error.
# The dataset 'ds' is expected to be a Hugging Face dataset object.

if 'ds' in locals():
    print(f"Dataset contains {len(ds['train'])} training samples.")

    # Display 5 random images from the 'train' split
    num_samples = 5
    for i in range(num_samples):
        random_idx = random.randint(0, len(ds['train']) - 1)
        sample = ds['train'][random_idx]

        image = sample['image']

        # Accessing category information from the 'objects' dictionary
        # Check if 'objects' key exists and if 'category' list is not empty
        if 'objects' in sample and sample['objects'] and 'category' in sample['objects'] and len(sample['objects']['category']) > 0:
            label = sample['objects']['category'][0] # Taking the first category as the label
        else:
            label = "No specific label found" t

        plt.figure(figsize=(4, 4))
        plt.imshow(image)
        plt.title(f"Label: {label}")
        plt.axis('off')
        plt.show()
else:
    print("Dataset 'ds' not found. Please run the previous cell to load the dataset.")

SyntaxError: invalid syntax (ipython-input-560013328.py, line 23)

# Task
Preprocess the "ThankGod/mapillary_traffic_sign_dataset" by defining a class mapping to broader categories, creating a custom transformation function to resize images, scale bounding boxes, apply the class mapping, convert images to PyTorch tensors, and normalize them, then apply this transformation to the dataset and verify the transformed data.

## Define Class Mapping

### Subtask:
Define a mapping from the dataset's granular classes to broader 'super classes' to simplify the problem and reduce the number of output channels.


**Reasoning**:
First, I need to understand the range and meaning of the granular class IDs to define a meaningful mapping. I will iterate through a subset of the training data to collect all unique category IDs present in the dataset, which will then inform the creation of the `class_mapping` dictionary.



In [None]:
import random

# Initialize an empty set to store unique categories
unique_categories = set()

# Iterate through a portion of the dataset to collect unique categories
# Adjust the number of samples if the dataset is very large or very small
num_samples_to_check = min(len(ds['train']), 500) # Check up to 500 samples or all if less

print(f"Checking {num_samples_to_check} samples for unique categories...")

for i in range(num_samples_to_check):
    sample_idx = random.randint(0, len(ds['train']) - 1) # Get a random index
    sample = ds['train'][sample_idx]

    if 'objects' in sample and sample['objects'] and 'category' in sample['objects']:
        for category_id in sample['objects']['category']:
            unique_categories.add(category_id)

# Convert to a sorted list for better readability
sorted_unique_categories = sorted(list(unique_categories))
print(f"Found {len(sorted_unique_categories)} unique categories: {sorted_unique_categories}")

# It's helpful to also see the mapping from integer IDs to string labels if available.
# Assuming the dataset has a feature `label_names` for categories, let's try to access it.
if 'objects' in ds['train'].features and 'category' in ds['train'].features['objects']:
    if hasattr(ds['train'].features['objects']['category'], 'names'):
        category_names = ds['train'].features['objects']['category'].names
        print("\nCategory ID to Name Mapping:")
        for cat_id in sorted_unique_categories:
            if cat_id < len(category_names):
                print(f"  ID {cat_id}: {category_names[cat_id]}")
            else:
                print(f"  ID {cat_id}: (Name not found)")
    else:
        print("\nCategory names attribute not found in features.")
else:
    print("\n'objects' or 'category' feature not found in the dataset's features for direct name access.")


Checking 500 samples for unique categories...
Found 260 unique categories: [0, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 22, 23, 24, 25, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 43, 44, 46, 47, 49, 50, 51, 52, 54, 56, 57, 60, 62, 63, 65, 66, 67, 70, 71, 72, 73, 75, 76, 78, 79, 81, 82, 83, 84, 86, 87, 88, 89, 91, 93, 94, 96, 97, 98, 100, 101, 102, 103, 108, 109, 111, 112, 113, 114, 115, 117, 118, 119, 121, 122, 126, 129, 130, 131, 133, 135, 136, 137, 138, 139, 141, 142, 143, 144, 145, 146, 147, 148, 150, 154, 155, 156, 157, 158, 159, 161, 162, 163, 164, 165, 166, 167, 169, 171, 172, 173, 174, 177, 179, 180, 182, 183, 186, 187, 188, 189, 190, 191, 192, 194, 196, 199, 200, 201, 203, 205, 206, 208, 210, 211, 213, 214, 215, 216, 217, 219, 220, 221, 222, 224, 226, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 241, 242, 244, 245, 247, 250, 254, 255, 257, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 270, 272, 273, 274, 275, 278, 280, 282, 285, 287, 288, 289, 291, 292, 29

In [None]:
num_super_classes = 20
class_mapping = {}

# Ensure sorted_unique_categories is available from the previous execution
if 'sorted_unique_categories' not in locals() or not sorted_unique_categories:
    print("Error: 'sorted_unique_categories' not found. Please run the previous cell.")
else:
    num_granular_categories = len(sorted_unique_categories)
    if num_granular_categories == 0:
        print("No unique categories found to map.")
    else:
        # Calculate how many granular categories fall into each super class
        # We distribute them as evenly as possible
        categories_per_super_class = num_granular_categories // num_super_classes
        remainder = num_granular_categories % num_super_classes

        current_super_class_id = 0
        current_category_index = 0

        for i in range(num_super_classes):
            # Determine how many categories to assign to this super class
            count_for_this_super_class = categories_per_super_class
            if i < remainder:
                count_for_this_super_class += 1

            # Assign categories to the current super class
            for _ in range(count_for_this_super_class):
                if current_category_index < num_granular_categories:
                    original_cat_id = sorted_unique_categories[current_category_index]
                    class_mapping[original_cat_id] = current_super_class_id
                    current_category_index += 1
                else:
                    break # Should not happen if logic is correct

            if current_category_index < num_granular_categories: # Only increment if there are more categories to map
                current_super_class_id += 1

        print(f"Successfully created a mapping from {num_granular_categories} granular categories to {len(set(class_mapping.values()))} super classes.")
        print("Example mappings (first 5 and last 5 entries, if available):")
        mapped_items = list(class_mapping.items())
        for i, (original_id, super_id) in enumerate(mapped_items):
            if i < 5 or i >= len(mapped_items) - 5:
                print(f"  Original ID {original_id} -> Super Class ID {super_id}")
            elif i == 5 and len(mapped_items) > 10: # Only print ellipsis if there are enough items in between
                print("  ...")

Successfully created a mapping from 260 granular categories to 20 super classes.
Example mappings (first 5 and last 5 entries, if available):
  Original ID 0 -> Super Class ID 0
  Original ID 2 -> Super Class ID 0
  Original ID 3 -> Super Class ID 0
  Original ID 4 -> Super Class ID 0
  Original ID 5 -> Super Class ID 0
  ...
  Original ID 377 -> Super Class ID 19
  Original ID 379 -> Super Class ID 19
  Original ID 385 -> Super Class ID 19
  Original ID 386 -> Super Class ID 19
  Original ID 387 -> Super Class ID 19


## Create Custom Transformation Function

### Subtask:
Develop a custom Python function to apply transformations to each sample. This function will be responsible for: 1. Ensuring the image is in PIL 'RGB' format. 2. Resizing the image to meet 'min_size' (e.g., 800) and 'max_size' (e.g., 1333) constraints while preserving aspect ratio. 3. Calculating the scaling factor and applying it to the bounding box coordinates. 4. Applying the defined class mapping to the annotation labels. 5. Converting the image to a PyTorch tensor (0.0-1.0 scaling). 6. Normalizing the tensor using ImageNet mean and standard deviation. 7. Formatting annotations (bounding boxes, labels) into PyTorch tensors suitable for torchvision's Faster R-CNN.


In [None]:
import torch
import torchvision.transforms.functional as F
from PIL import Image

# Define min_size and max_size for image resizing
min_size = 800
max_size = 1333

print(f"min_size set to: {min_size}")
print(f"max_size set to: {max_size}")

min_size set to: 800
max_size set to: 1333


**Reasoning**:
Now I will define the `transform_sample` function as specified in the instructions, incorporating image resizing, bounding box scaling and clipping, class mapping, conversion to PyTorch tensor, and normalization. I also need to make sure to define IMAGENET_MEAN and IMAGENET_STD since they were not defined earlier.



## Apply Transformations to Dataset

### Subtask:
Apply the custom transformation function to the entire dataset using `ds.with_transform` for lazy application.


In [None]:
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

def transform_sample(sample):
    # Safely extract the PIL Image object, handling potential list wrapping by datasets.with_transform
    image_input = sample['image']
    if isinstance(image_input, list):
        if len(image_input) > 0:
            image = image_input[0]
        else:
            # If the image list is empty, we cannot proceed. Return empty tensors.
            # This case should ideally be handled during data loading or cleaning.
            print("Warning: Empty image list encountered. Returning empty tensors.")
            return {'pixel_values': [torch.tensor([])], 'labels': [{}]}
    elif isinstance(image_input, Image.Image):
        image = image_input
    else:
        raise TypeError(f"Unexpected image type: {type(image_input)}. Expected PIL Image or list thereof.")

    original_width, original_height = image.size

    # 2. Ensure image is in PIL 'RGB' format
    if image.mode != 'RGB':
        image = image.convert('RGB')

    # 3. Implement image resizing
    # Calculate scale factor
    min_original_size = float(min(original_width, original_height))
    max_original_size = float(max(original_width, original_height))
    scale_factor = min_size / min_original_size

    # Check if the longer side after scaling exceeds max_size
    if max_original_size * scale_factor > max_size:
        scale_factor = max_size / max_original_size

    # New dimensions
    new_width = int(original_width * scale_factor)
    new_height = int(original_height * scale_factor)

    # Resize the image
    image = F.resize(image, (new_height, new_width), interpolation=Image.BILINEAR)

    # Safely extract objects dictionary, handling potential list wrapping
    objects_input = sample.get('objects', {})
    if isinstance(objects_input, list):
        if len(objects_input) > 0:
            objects = objects_input[0]
        else:
            objects = {}
    elif isinstance(objects_input, dict):
        objects = objects_input
    else:
        objects = {}

    boxes = objects.get('bbox', [])
    labels = objects.get('category', [])

    target = {}

    if boxes and labels:
        # Convert bounding boxes to torch.Tensor
        boxes = torch.tensor(boxes, dtype=torch.float32)
        # Bounding box format: [x_min, y_min, width, height] -> [x_min, y_min, x_max, y_max]
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2] # x_max
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3] # y_max

        # Scale bounding box coordinates
        boxes *= scale_factor

        # Clip bounding box values to be within the new image dimensions
        boxes[:, 0].clamp_(min=0, max=new_width)
        boxes[:, 1].clamp_(min=0, max=new_height)
        boxes[:, 2].clamp_(min=0, max=new_width)
        boxes[:, 3].clamp_(min=0, max=new_height)

        # Ensure xmin <= xmax and ymin <= ymax (handle cases where width/height became 0 or negative)
        keep = (boxes[:, 2] > boxes[:, 0]) & (boxes[:, 3] > boxes[:, 1])
        boxes = boxes[keep]
        labels_list = [labels[i] for i in range(len(labels))] # Ensure labels is a list for filtering
        labels = [labels_list[i] for i, k in enumerate(keep.tolist()) if k] # Filter labels based on 'keep'

        # Apply class mapping to labels
        mapped_labels = [class_mapping.get(label, -1) for label in labels] # Use -1 for unknown, or handle appropriately
        mapped_labels = [lbl for lbl in mapped_labels if lbl != -1] # Filter out labels that couldn't be mapped

        if mapped_labels:
            labels = torch.tensor(mapped_labels, dtype=torch.int64)
        else:
            labels = torch.tensor([], dtype=torch.int64)

        target['boxes'] = boxes
        target['labels'] = labels
    else:
        target['boxes'] = torch.tensor([], dtype=torch.float32).reshape(0,4)
        target['labels'] = torch.tensor([], dtype=torch.int64)


    # Convert image to PyTorch tensor (0.0-1.0 scaling)
    image_tensor = F.to_tensor(image)

    # Normalize the image tensor
    image_tensor = F.normalize(image_tensor, mean=IMAGENET_MEAN, std=IMAGENET_STD)

    # Add original image ID
    if 'image_id' in sample:
        image_id_val = sample['image_id']
        if isinstance(image_id_val, list) and len(image_id_val) > 0:
            target['image_id'] = torch.tensor([image_id_val[0]])
        else:
            target['image_id'] = torch.tensor([image_id_val])

    if 'area' in objects:
        original_areas = torch.tensor(objects['area'], dtype=torch.float32)
        scaled_areas = original_areas * (scale_factor ** 2)
        if 'keep' in locals() and len(scaled_areas) == len(keep): # Apply same filtering as for boxes if 'keep' was created
            target['area'] = scaled_areas[keep]
        elif 'keep' in locals() and boxes.numel() == 0: # If boxes are empty due to filtering, areas should also be empty
            target['area'] = torch.tensor([], dtype=torch.float32)
        else: # No filtering happened, or 'keep' not defined (e.g. no boxes initially)
            target['area'] = scaled_areas

    # The function should return a dictionary with the transformed data, typically 'pixel_values' for the image
    # and 'labels' for the annotation target dictionary, for consistency with Hugging Face models.
    # Wrap the outputs in lists for datasets.with_transform
    return {'pixel_values': [image_tensor], 'labels': [target]}

print("Redefined 'transform_sample' function to robustly handle input formats and return single-element lists.")

Redefined 'transform_sample' function to robustly handle input formats and return single-element lists.


In [None]:
transformed_ds = ds.with_transform(transform_sample)

print("Transformation applied to the dataset splits using `ds.with_transform`.")
print(f"Example of a transformed training sample: {transformed_ds['train'][0]}")

Transformation applied to the dataset splits using `ds.with_transform`.
Example of a transformed training sample: {'pixel_values': tensor([[[-1.4500, -1.4329, -1.4500,  ..., -1.2959, -1.2959, -1.2959],
         [-1.4500, -1.4158, -1.4158,  ..., -1.3302, -1.3302, -1.2959],
         [-1.3815, -1.3815, -1.3815,  ..., -1.3302, -1.3302, -1.2959],
         ...,
         [-1.4158, -1.4329, -1.4672,  ..., -1.5357, -1.5699, -1.5870],
         [-1.3815, -1.3815, -1.4500,  ..., -1.5185, -1.5357, -1.5528],
         [-1.4158, -1.4158, -1.4843,  ..., -1.5528, -1.5699, -1.5699]],

        [[-0.4951, -0.4601, -0.4776,  ..., -0.2850, -0.2850, -0.2850],
         [-0.5126, -0.4776, -0.4776,  ..., -0.3200, -0.3200, -0.2850],
         [-0.4776, -0.4776, -0.4776,  ..., -0.3200, -0.3200, -0.2850],
         ...,
         [-1.0903, -1.1078, -1.1429,  ..., -1.3004, -1.3354, -1.3529],
         [-1.0553, -1.0553, -1.1253,  ..., -1.2829, -1.3004, -1.3179],
         [-1.0903, -1.0903, -1.1604,  ..., -1.3179, -1.335

# Task
Load a pre-trained Faster R-CNN model, specifically `fasterrcnn_resnet50_fpn_v2` from `torchvision.models.detection`, and configure its classification head to accommodate `num_super_classes` (20) plus one background class.

## Load Pre-trained Faster R-CNN Model

### Subtask:
Import `fasterrcnn_resnet50_fpn_v2` from `torchvision.models.detection`, instantiate it with pre-trained weights, and adjust the classification head for the number of `num_super_classes` (20 + 1 for background).


**Reasoning**:
To load the pre-trained Faster R-CNN model and modify its classification head, I need to import the model, its weights, and `torch.nn` for the new classification layers, then adjust the model's box predictor to match the desired number of super classes plus one for the background.



In [None]:
import torch
import torch.nn as nn
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights

# 1. Load the pre-trained Faster R-CNN model
weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn_v2(weights=weights)

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Define the number of output classes. It's num_super_classes + 1 (for background).
# 'num_super_classes' was defined in a previous cell.
num_classes = num_super_classes + 1

# 2. Replace the pre-trained head with a new one
# Create a new classifier with the correct number of output classes
model.roi_heads.box_predictor = nn.Sequential(
    nn.Linear(in_features, num_classes), # For classification scores
    nn.Linear(in_features, num_classes * 4) # For bounding box regression, 4 for each class
)

print(f"Loaded Faster R-CNN model with ResNet50 FPN backbone using pre-trained weights.")
print(f"Original classifier input features: {in_features}")
print(f"New number of output classes (including background): {num_classes}")
print(f"Model's classification head adjusted.")

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth


100%|██████████| 167M/167M [00:00<00:00, 187MB/s]


Loaded Faster R-CNN model with ResNet50 FPN backbone using pre-trained weights.
Original classifier input features: 1024
New number of output classes (including background): 21
Model's classification head adjusted.


**Reasoning**:
The previous code incorrectly replaced `model.roi_heads.box_predictor` with an `nn.Sequential` module. To properly modify the classification head, I need to replace `model.roi_heads.box_predictor.cls_score` and `model.roi_heads.box_predictor.bbox_pred` individually, as specified in the instructions, while retaining the overall structure of the `FastRCNNPredictor` expected by the Faster R-CNN model.



In [None]:
import torch
import torch.nn as nn
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights

# 1. Load the pre-trained Faster R-CNN model
weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn_v2(weights=weights)

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Define the number of output classes. It's num_super_classes + 1 (for background).
# 'num_super_classes' was defined in a previous cell.
num_classes = num_super_classes + 1

# 2. Replace the pre-trained head with a new one
# Create new classifiers for classification scores and bounding box regression
model.roi_heads.box_predictor.cls_score = nn.Linear(in_features, num_classes)
model.roi_heads.box_predictor.bbox_pred = nn.Linear(in_features, num_classes * 4)

print(f"Loaded Faster R-CNN model with ResNet50 FPN backbone using pre-trained weights.")
print(f"Original classifier input features: {in_features}")
print(f"New number of output classes (including background): {num_classes}")
print(f"Model's classification head adjusted.")

Loaded Faster R-CNN model with ResNet50 FPN backbone using pre-trained weights.
Original classifier input features: 1024
New number of output classes (including background): 21
Model's classification head adjusted.


## Define Optimizer and Learning Rate Scheduler

### Subtask:
Define an optimizer (e.g., SGD) and a learning rate scheduler (e.g., StepLR or CosineAnnealingLR) for training the model's parameters.


**Reasoning**:
Following the instructions, I will import `torch.optim` and `torch.optim.lr_scheduler`, then define an SGD optimizer and a StepLR scheduler for the model's parameters.



In [None]:
import torch.optim as optim
from torch.optim import lr_scheduler

# 1. Instantiate an optimizer (e.g., SGD)
learning_rate = 0.005
momentum = 0.9
weight_decay = 0.0005

optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum, weight_decay=weight_decay)

print(f"Optimizer (SGD) initialized with learning_rate={learning_rate}, momentum={momentum}, weight_decay={weight_decay}.")

# 2. Instantiate a learning rate scheduler (e.g., StepLR)
step_size = 3
gamma = 0.1

scheduler = lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

print(f"Learning rate scheduler (StepLR) initialized with step_size={step_size}, gamma={gamma}.")


Optimizer (SGD) initialized with learning_rate=0.005, momentum=0.9, weight_decay=0.0005.
Learning rate scheduler (StepLR) initialized with step_size=3, gamma=0.1.


## Setup DataLoaders

### Subtask:
Create `DataLoader` instances for the training and validation splits of the `transformed_ds`. Implement a custom `collate_fn` to handle the variable-sized outputs of object detection datasets.


**Reasoning**:
First, I will define a custom `collate_fn` to correctly handle the batching of variable-sized images and their corresponding labels for object detection tasks. This function will be used by the `DataLoader` to prepare batches.



In [None]:
import torch
from torch.utils.data import DataLoader

def collate_fn(batch):
    pixel_values = []
    labels = []
    for sample in batch:
        # 'pixel_values' and 'labels' are expected to be lists of single elements from transform_sample
        pixel_values.append(sample['pixel_values'][0])
        labels.append(sample['labels'][0])

    # No need to stack pixel_values into a single tensor if the model expects a list of tensors
    # For Faster R-CNN, the model typically expects a list of image tensors and a list of target dicts.

    return pixel_values, labels

print("Defined custom `collate_fn` for handling variable-sized outputs.")

Defined custom `collate_fn` for handling variable-sized outputs.


**Reasoning**:
Now that the `collate_fn` is defined, I will create `DataLoader` instances for the training and validation splits of the `transformed_ds` using this custom function and appropriate batch sizes.



In [None]:
batch_size = 2

# Create DataLoader for the training split
train_dataloader = DataLoader(
    transformed_ds['train'],
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn
)

# Create DataLoader for the validation split
# It's good practice not to shuffle the validation set for consistent evaluation
val_dataloader = DataLoader(
    transformed_ds['validation'],
    batch_size=batch_size,
    shuffle=False,
    collate_fn=collate_fn
)

print(f"Train DataLoader created with batch size {batch_size} and shuffle=True.")
print(f"Validation DataLoader created with batch size {batch_size} and shuffle=False.")

# Optionally, verify one batch
for images, targets in train_dataloader:
    print(f"Example train batch - Number of images: {len(images)}")
    print(f"Example train batch - Image tensor shape: {images[0].shape}")
    print(f"Example train batch - Number of targets: {len(targets)}")
    print(f"Example train batch - First target keys: {targets[0].keys()}")
    break

KeyError: 'validation'

**Reasoning**:
The previous code failed because the dataset does not have a split named 'validation'. I will change the validation DataLoader to use the 'test' split, which is a common alternative name for the validation or evaluation set in many datasets.



In [None]:
batch_size = 2

# Create DataLoader for the training split
train_dataloader = DataLoader(
    transformed_ds['train'],
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn
)

# Create DataLoader for the validation split, using 'test' split instead of 'validation'
# It's good practice not to shuffle the validation set for consistent evaluation
val_dataloader = DataLoader(
    transformed_ds['test'],
    batch_size=batch_size,
    shuffle=False,
    collate_fn=collate_fn
)

print(f"Train DataLoader created with batch size {batch_size} and shuffle=True.")
print(f"Validation DataLoader created with batch size {batch_size} and shuffle=False.")

# Optionally, verify one batch
for images, targets in train_dataloader:
    print(f"Example train batch - Number of images: {len(images)}")
    print(f"Example train batch - Image tensor shape: {images[0].shape}")
    print(f"Example train batch - Number of targets: {len(targets)}")
    print(f"Example train batch - First target keys: {targets[0].keys()}")
    break

KeyError: 'test'

**Reasoning**:
The previous attempts to access 'validation' and 'test' splits resulted in `KeyError`s, indicating they are not present in the `transformed_ds` object. I will first inspect the available splits in `transformed_ds` to determine the correct split names or if the dataset needs to be split for validation.



In [None]:
print(f"Available splits in transformed_ds: {transformed_ds.keys()}")

# Assuming the intention is to use a validation split, if only 'train' is available,
# we will need to create one by splitting the 'train' set.
# This part will be handled in the next step based on the output of transformed_ds.keys().


Available splits in transformed_ds: dict_keys(['train'])


**Reasoning**:
Since the `transformed_ds` only contains a 'train' split, I need to split this training data into training and validation sets to create a dedicated validation split, which is essential for proper model evaluation during training.



In [None]:
train_test_split_ratio = 0.8

# Split the 'train' dataset into training and validation sets
splits = transformed_ds['train'].train_test_split(test_size=1 - train_test_split_ratio)

# Assign the new splits to the transformed_ds object (or new variables)
train_dataset = splits['train']
val_dataset = splits['test'] # 'train_test_split' uses 'test' for the split portion

print(f"Original training dataset split into:")
print(f"  Training set: {len(train_dataset)} samples")
print(f"  Validation set: {len(val_dataset)} samples")


Original training dataset split into:
  Training set: 3050 samples
  Validation set: 763 samples


**Reasoning**:
Now that the training data has been split into `train_dataset` and `val_dataset`, I will create `DataLoader` instances for both, using the previously defined `collate_fn` and an appropriate batch size. This will complete the subtask of setting up DataLoaders.



In [None]:
batch_size = 2

# Create DataLoader for the training split using the new train_dataset
train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn
)

# Create DataLoader for the validation split using the new val_dataset
val_dataloader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=collate_fn
)

print(f"Train DataLoader created with batch size {batch_size} for {len(train_dataset)} samples.")
print(f"Validation DataLoader created with batch size {batch_size} for {len(val_dataset)} samples.")

# Optionally, verify one batch from the training dataloader
for images, targets in train_dataloader:
    print(f"\nExample train batch verification:")
    print(f"  Number of images in batch: {len(images)}")
    print(f"  Shape of first image tensor: {images[0].shape}")
    print(f"  Number of target dictionaries in batch: {len(targets)}")
    if targets:
        print(f"  Keys in first target dictionary: {targets[0].keys()}")
    else:
        print("  No targets in the first batch (e.g., empty batch).")
    break

# Optionally, verify one batch from the validation dataloader
for images, targets in val_dataloader:
    print(f"\nExample validation batch verification:")
    print(f"  Number of images in batch: {len(images)}")
    print(f"  Shape of first image tensor: {images[0].shape}")
    print(f"  Number of target dictionaries in batch: {len(targets)}")
    if targets:
        print(f"  Keys in first target dictionary: {targets[0].keys()}")
    else:
        print("  No targets in the first batch (e.g., empty batch).")
    break

Train DataLoader created with batch size 2 for 3050 samples.
Validation DataLoader created with batch size 2 for 763 samples.


KeyError: 0

**Reasoning**:
The error `KeyError: 0` in `collate_fn` occurs because `sample['labels']` is a dictionary, not a list of dictionaries as previously assumed by the `collate_fn`. This is evident from the output of the `transformed_ds` in a previous cell, which shows `'pixel_values': tensor(...)` and `'labels': {'boxes': ...}` (i.e., unwrapped tensors and dictionaries). Therefore, the `collate_fn` needs to be updated to append `sample['pixel_values']` and `sample['labels']` directly, without attempting to access index `[0]`, to match the actual structure of the data provided by `transformed_ds`.



In [None]:
import torch
from torch.utils.data import DataLoader

def collate_fn(batch):
    pixel_values = []
    labels = []
    for sample in batch:
        # Based on the actual output of transformed_ds, 'pixel_values' is a tensor and 'labels' is a dict directly,
        # not wrapped in single-element lists. Adjusting collate_fn accordingly.
        pixel_values.append(sample['pixel_values'])
        labels.append(sample['labels'])

    # For Faster R-CNN, the model typically expects a list of image tensors and a list of target dicts.
    # So, no need to stack pixel_values into a single tensor here.

    return pixel_values, labels

print("Defined custom `collate_fn` for handling variable-sized outputs based on observed data structure.")

Defined custom `collate_fn` for handling variable-sized outputs based on observed data structure.


**Reasoning**:
Now that the `collate_fn` has been correctly redefined to match the actual output structure of `transformed_ds`, I will recreate the `DataLoader` instances for both training and validation splits. I will also include the verification loops to confirm that the `DataLoader`s are functioning as expected without errors, which will complete the subtask.



In [None]:
batch_size = 2

# Create DataLoader for the training split using the new train_dataset
train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn
)

# Create DataLoader for the validation split using the new val_dataset
val_dataloader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=collate_fn
)

print(f"Train DataLoader created with batch size {batch_size} for {len(train_dataset)} samples.")
print(f"Validation DataLoader created with batch size {batch_size} for {len(val_dataset)} samples.")

# Optionally, verify one batch from the training dataloader
for images, targets in train_dataloader:
    print(f"\nExample train batch verification:")
    print(f"  Number of images in batch: {len(images)}")
    print(f"  Shape of first image tensor: {images[0].shape}")
    print(f"  Number of target dictionaries in batch: {len(targets)}")
    if targets:
        print(f"  Keys in first target dictionary: {targets[0].keys()}")
    else:
        print("  No targets in the first batch (e.g., empty batch).")
    break

# Optionally, verify one batch from the validation dataloader
for images, targets in val_dataloader:
    print(f"\nExample validation batch verification:")
    print(f"  Number of images in batch: {len(images)}")
    print(f"  Shape of first image tensor: {images[0].shape}")
    print(f"  Number of target dictionaries in batch: {len(targets)}")
    if targets:
        print(f"  Keys in first target dictionary: {targets[0].keys()}")
    else:
        print("  No targets in the first batch (e.g., empty batch).")
    break

Train DataLoader created with batch size 2 for 3050 samples.
Validation DataLoader created with batch size 2 for 763 samples.

Example train batch verification:
  Number of images in batch: 1
  Shape of first image tensor: torch.Size([3, 749, 1333])
  Number of target dictionaries in batch: 1
  Keys in first target dictionary: dict_keys(['boxes', 'labels', 'image_id', 'area'])

Example validation batch verification:
  Number of images in batch: 1
  Shape of first image tensor: torch.Size([3, 800, 1066])
  Number of target dictionaries in batch: 1
  Keys in first target dictionary: dict_keys(['boxes', 'labels', 'image_id', 'area'])


## Define Training Loop

### Subtask:
Implement the training loop to iterate over epochs and batches, perform forward and backward passes, calculate and log losses, and update model weights using the optimizer and scheduler.


**Reasoning**:
I need to initialize a device for training (GPU if available, otherwise CPU) and move the model to this device to prepare for the training loop.



In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

print(f"Model moved to device: {device}")

Model moved to device: cpu


**Reasoning**:
Now that the model is on the correct device, I will implement the training loop as per the instructions, iterating through epochs and batches, performing forward and backward passes, calculating and logging losses, and updating model weights and the learning rate scheduler.



# Task
Implement the core training loop for the Faster R-CNN model over multiple epochs, including iterating through batches, moving data to the appropriate device, performing forward and backward passes, calculating and accumulating losses, updating model weights using the `optimizer`, and adjusting the learning rate with the `scheduler` at each epoch. Then, define a function to evaluate the model on the validation dataset after each training epoch to monitor performance.

## Implement Training Loop

### Subtask:
Implement the core training loop, including iterating over epochs, fetching batches from the DataLoader, performing forward and backward passes, calculating and aggregating losses, updating the model's weights using the optimizer, and adjusting the learning rate with the scheduler.


**Reasoning**:
I will implement the training loop as described in the instructions, initializing epochs, setting the model to training mode, and then iterating through batches to perform forward/backward passes, update weights, and adjust the learning rate.



In [None]:
num_epochs = 10

# Set the model to training mode
model.train()

print(f"Starting training for {num_epochs} epochs...")

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    total_loss_epoch = 0

    # Iterate over the training data batches
    for i, (images, targets) in enumerate(train_dataloader):
        # Move images and targets to the device
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Zero out the gradients
        optimizer.zero_grad()

        # Perform a forward pass
        # In training mode, Faster R-CNN returns a dictionary of losses
        loss_dict = model(images, targets)

        # Calculate total loss for the batch
        losses = sum(loss for loss in loss_dict.values())

        # Perform a backward pass
        losses.backward()

        # Update model weights
        optimizer.step()

        total_loss_epoch += losses.item()

        if i % 10 == 0: # Print loss every 10 batches
            print(f"  Batch {i}/{len(train_dataloader)}, Loss: {losses.item():.4f}")

    # Adjust the learning rate
    scheduler.step()

    # Print average loss for the epoch
    avg_loss_epoch = total_loss_epoch / len(train_dataloader)
    print(f"Epoch {epoch+1} finished. Average Loss: {avg_loss_epoch:.4f}")

print("Training complete.")

Starting training for 10 epochs...

Epoch 1/10
  Batch 0/1525, Loss: 0.8599
  Batch 10/1525, Loss: 0.2579


IndexError: index is out of bounds for dimension with size 0

**Reasoning**:
The `IndexError` occurs because some samples in a batch contain bounding boxes but no corresponding labels (e.g., if labels were filtered out during preprocessing), leading to an inconsistency in the target dictionary that the Faster R-CNN model cannot handle during training. To fix this within the training loop, I will filter out such inconsistent samples from the batch before feeding them to the model, ensuring that only samples with valid (non-empty and consistent) boxes and labels, or entirely empty targets, are processed.



In [None]:
num_epochs = 10

# Set the model to training mode
model.train()

print(f"Starting training for {num_epochs} epochs...")

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    total_loss_epoch = 0

    # Iterate over the training data batches
    for i, (images_batch, targets_batch) in enumerate(train_dataloader):
        # Filter out problematic samples in the batch where boxes are present but labels are missing/empty.
        # This ensures consistency for the Faster R-CNN model.
        filtered_images = []
        filtered_targets = []
        for img, tgt in zip(images_batch, targets_batch):
            # A sample is valid if it has labels, or if it has neither labels nor boxes.
            if tgt['labels'].numel() > 0 or (tgt['labels'].numel() == 0 and tgt['boxes'].numel() == 0):
                filtered_images.append(img)
                filtered_targets.append(tgt)
            # Samples with non-empty boxes but empty labels are skipped.

        if not filtered_images: # Skip batch if no valid samples remain after filtering
            print(f"  Batch {i}/{len(train_dataloader)} skipped (no valid targets after filtering).")
            continue

        # Move images and targets to the device
        images = [image.to(device) for image in filtered_images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in filtered_targets]

        # Zero out the gradients
        optimizer.zero_grad()

        # Perform a forward pass
        # In training mode, Faster R-CNN returns a dictionary of losses
        loss_dict = model(images, targets)

        # Calculate total loss for the batch
        losses = sum(loss for loss in loss_dict.values())

        # Perform a backward pass
        losses.backward()

        # Update model weights
        optimizer.step()

        total_loss_epoch += losses.item()

        if i % 10 == 0: # Print loss every 10 batches
            print(f"  Batch {i}/{len(train_dataloader)}, Loss: {losses.item():.4f}")

    # Adjust the learning rate
    scheduler.step()

    # Print average loss for the epoch
    # Avoid division by zero if all batches were skipped in an epoch
    if len(train_dataloader) > 0 and total_loss_epoch > 0: # Check total_loss_epoch > 0 to avoid division by zero from skipped batches
        avg_loss_epoch = total_loss_epoch / len(train_dataloader)
        print(f"Epoch {epoch+1} finished. Average Loss: {avg_loss_epoch:.4f}")
    else:
        print(f"Epoch {epoch+1} finished. No valid loss recorded.")

print("Training complete.")

Starting training for 10 epochs...

Epoch 1/10
  Batch 0/1525, Loss: 0.6503


IndexError: index 2 is out of bounds for dimension 0 with size 2

**Reasoning**:
The `IndexError` occurs because `gt_labels_in_image` (derived from `targets[image_id]['labels']`) has fewer elements than the indices `clamped_matched_idxs_in_image` tries to access. This happens when the `boxes` and `labels` tensors in the `target` dictionary passed to the model are not synchronized after filtering operations within the `transform_sample` function. Specifically, labels might be filtered further by the `class_mapping` (removing entries mapped to -1) without simultaneously filtering the corresponding bounding boxes, leading to a mismatch in tensor lengths. To resolve this, I will modify the `transform_sample` function to ensure `boxes`, `labels`, and `area` tensors are always kept in strict one-to-one correspondence throughout all filtering (geometric and class-mapping) and mapping steps.



In [None]:
import torch
import torchvision.transforms.functional as F
from PIL import Image

# Define min_size and max_size for image resizing
min_size = 800
max_size = 1333

print(f"min_size set to: {min_size}")
print(f"max_size set to: {max_size}")

IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

def transform_sample(sample):
    # Safely extract the PIL Image object, handling potential list wrapping by datasets.with_transform
    image_input = sample['image']
    if isinstance(image_input, list):
        if len(image_input) > 0:
            image = image_input[0]
        else:
            # If the image list is empty, we cannot proceed. Return empty tensors.
            print("Warning: Empty image list encountered. Returning empty tensors.")
            return {'pixel_values': torch.tensor([]), 'labels': {}}
    elif isinstance(image_input, Image.Image):
        image = image_input
    else:
        raise TypeError(f"Unexpected image type: {type(image_input)}. Expected PIL Image or list thereof.")

    original_width, original_height = image.size

    # 2. Ensure image is in PIL 'RGB' format
    if image.mode != 'RGB':
        image = image.convert('RGB')

    # 3. Implement image resizing
    # Calculate scale factor
    min_original_size = float(min(original_width, original_height))
    max_original_size = float(max(original_width, original_height))
    scale_factor = min_size / min_original_size

    # Check if the longer side after scaling exceeds max_size
    if max_original_size * scale_factor > max_size:
        scale_factor = max_size / max_original_size

    # New dimensions
    new_width = int(original_width * scale_factor)
    new_height = int(original_height * scale_factor)

    # Resize the image
    image = F.resize(image, (new_height, new_width), interpolation=Image.BILINEAR)

    # Safely extract objects dictionary, handling potential list wrapping
    objects_input = sample.get('objects', {})
    if isinstance(objects_input, list):
        if len(objects_input) > 0:
            objects = objects_input[0]
        else:
            objects = {}
    elif isinstance(objects_input, dict):
        objects = objects_input
    else:
        objects = {}

    boxes = objects.get('bbox', [])
    labels = objects.get('category', [])

    target = {}

    if boxes and labels:
        # Convert bounding boxes to torch.Tensor
        boxes = torch.tensor(boxes, dtype=torch.float32)
        # Bounding box format: [x_min, y_min, width, height] -> [x_min, y_min, x_max, y_max]
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2] # x_max
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3] # y_max

        # Scale bounding box coordinates
        boxes *= scale_factor

        # Clip bounding box values to be within the new image dimensions
        boxes[:, 0].clamp_(min=0, max=new_width)
        boxes[:, 1].clamp_(min=0, max=new_height)
        boxes[:, 2].clamp_(min=0, max=new_width)
        boxes[:, 3].clamp_(min=0, max=new_height)

        # Step 1: Filter out geometrically invalid boxes (width/height <= 0) and keep corresponding original labels and areas
        valid_bbox_mask = (boxes[:, 2] > boxes[:, 0]) & (boxes[:, 3] > boxes[:, 1])
        boxes_filtered_geometric = boxes[valid_bbox_mask]
        labels_filtered_geometric = [labels[i] for i, k in enumerate(valid_bbox_mask.tolist()) if k]

        areas_filtered_geometric = torch.tensor([])
        if 'area' in objects:
            original_areas = torch.tensor(objects['area'], dtype=torch.float32)
            scaled_areas = original_areas * (scale_factor ** 2)
            areas_filtered_geometric = scaled_areas[valid_bbox_mask]

        # Step 2: Apply class mapping and filter out unmapped labels, ensuring boxes and areas remain in sync
        final_boxes_list = []
        final_labels_list = []
        final_areas_list = []

        for idx, original_label in enumerate(labels_filtered_geometric):
            mapped_label = class_mapping.get(original_label, -1)
            if mapped_label != -1: # Only include if successfully mapped
                final_labels_list.append(mapped_label)
                final_boxes_list.append(boxes_filtered_geometric[idx])
                if areas_filtered_geometric.numel() > 0:
                    final_areas_list.append(areas_filtered_geometric[idx])

        if final_labels_list:
            target['boxes'] = torch.stack(final_boxes_list)
            target['labels'] = torch.tensor(final_labels_list, dtype=torch.int64)
            if final_areas_list:
                target['area'] = torch.tensor(final_areas_list, dtype=torch.float32)
            else:
                target['area'] = torch.tensor([], dtype=torch.float32) # No area if no valid labels after mapping
        else: # No valid boxes/labels after all filtering
            target['boxes'] = torch.tensor([], dtype=torch.float32).reshape(0,4)
            target['labels'] = torch.tensor([], dtype=torch.int64)
            target['area'] = torch.tensor([], dtype=torch.float32)

    else: # Case where there are no initial boxes or labels
        target['boxes'] = torch.tensor([], dtype=torch.float32).reshape(0,4)
        target['labels'] = torch.tensor([], dtype=torch.int64)
        target['area'] = torch.tensor([], dtype=torch.float32) # No area if no boxes

    # Convert image to PyTorch tensor (0.0-1.0 scaling)
    image_tensor = F.to_tensor(image)

    # Normalize the image tensor
    image_tensor = F.normalize(image_tensor, mean=IMAGENET_MEAN, std=IMAGENET_STD)

    # Add original image ID
    if 'image_id' in sample:
        image_id_val = sample['image_id']
        if isinstance(image_id_val, list) and len(image_id_val) > 0:
            target['image_id'] = torch.tensor([image_id_val[0]])
        else:
            target['image_id'] = torch.tensor([image_id_val])

    # The function should return a dictionary with the transformed data, typically 'pixel_values' for the image
    # and 'labels' for the annotation target dictionary, for consistency with Hugging Face models.
    return {'pixel_values': image_tensor, 'labels': target}

print("Redefined 'transform_sample' function to robustly handle input formats, ensure box/label synchronization, and return unwrapped tensors/dicts.")

min_size set to: 800
max_size set to: 1333
Redefined 'transform_sample' function to robustly handle input formats, ensure box/label synchronization, and return unwrapped tensors/dicts.


**Reasoning**:
The `transform_sample` function was redefined in the previous step. To apply this updated function to the dataset, I need to re-create the `transformed_ds` object using `ds.with_transform(transform_sample)`.



In [None]:
transformed_ds = ds.with_transform(transform_sample)

print("Transformation re-applied to the dataset splits using `ds.with_transform` with the updated transform_sample function.")
# Optionally, verify one sample to ensure the new structure is applied
# (Already done in previous steps, but can be re-added if needed for debugging)

Transformation re-applied to the dataset splits using `ds.with_transform` with the updated transform_sample function.


**Reasoning**:
Since the `transformed_ds` object has been re-created with the updated `transform_sample` function, I must now re-split the 'train' dataset into new training and validation sets to ensure they reflect the latest transformations. This prepares the data for proper loading into the DataLoaders.



In [None]:
train_test_split_ratio = 0.8

# Split the 'train' dataset into training and validation sets
splits = transformed_ds['train'].train_test_split(test_size=1 - train_test_split_ratio)

# Assign the new splits to the train_dataset and val_dataset variables
train_dataset = splits['train']
val_dataset = splits['test'] # 'train_test_split' uses 'test' for the split portion

print(f"Original training dataset re-split into:")
print(f"  Training set: {len(train_dataset)} samples")
print(f"  Validation set: {len(val_dataset)} samples")

Original training dataset re-split into:
  Training set: 3050 samples
  Validation set: 763 samples


**Reasoning**:
Now that the `transformed_ds` has been re-split into `train_dataset` and `val_dataset` reflecting the updated `transform_sample` function, I need to recreate the `DataLoader` instances using these new datasets. This is essential to ensure that the training and validation loops operate on the correctly processed data.



In [None]:
batch_size = 2

# Create DataLoader for the training split using the new train_dataset
train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn
)

# Create DataLoader for the validation split using the new val_dataset
val_dataloader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=collate_fn
)

print(f"Train DataLoader created with batch size {batch_size} for {len(train_dataset)} samples.")
print(f"Validation DataLoader created with batch size {batch_size} for {len(val_dataset)} samples.")

# Optionally, verify one batch from the training dataloader
for images, targets in train_dataloader:
    print(f"\nExample train batch verification:")
    print(f"  Number of images in batch: {len(images)}")
    print(f"  Shape of first image tensor: {images[0].shape}")
    print(f"  Number of target dictionaries in batch: {len(targets)}")
    if targets:
        print(f"  Keys in first target dictionary: {targets[0].keys()}")
    else:
        print("  No targets in the first batch (e.g., empty batch).")
    break

# Optionally, verify one batch from the validation dataloader
for images, targets in val_dataloader:
    print(f"\nExample validation batch verification:")
    print(f"  Number of images in batch: {len(images)}")
    print(f"  Shape of first image tensor: {images[0].shape}")
    print(f"  Number of target dictionaries in batch: {len(targets)}")
    if targets:
        print(f"  Keys in first target dictionary: {targets[0].keys()}")
    else:
        print("  No targets in the first batch (e.g., empty batch).")
    break

Train DataLoader created with batch size 2 for 3050 samples.
Validation DataLoader created with batch size 2 for 763 samples.


KeyError: 0

**Reasoning**:
The persistent `KeyError: 0` within `datasets.arrow_dataset.py` occurs because the `DataLoader`'s fetcher, when interacting with a dataset created via `ds.with_transform` and then `train_test_split`, attempts to use the dataset's `__getitems__` method. This method incorrectly interprets the single-sample dictionary output of `transform_sample` (which returns a `torch.Tensor` for `pixel_values` and a `dict` for `labels`) as a batch to be further unpacked, leading to indexing a dictionary with an integer key. To avoid this internal `datasets` library misinterpretation, I will switch from `ds.with_transform` to `ds.map(transform_sample, batched=False, remove_columns=ds['train'].column_names)`. This will eagerly apply the `transform_sample` function to all data, directly creating a new dataset with the desired structure for each element, thus bypassing the problematic `__getitems__` behavior during DataLoader instantiation.



In [None]:
transformed_ds = ds.map(
    transform_sample,
    batched=False,
    remove_columns=ds['train'].column_names
)

print("Transformation applied to the dataset splits using `ds.map` for eager processing.")


Map:   0%|          | 0/3813 [00:00<?, ? examples/s]

**Reasoning**:
Since the `transformed_ds` object has been re-created using `ds.map`, I must now re-split this updated dataset into new training and validation sets to ensure the `DataLoader`s are built upon the correct, eagerly transformed data. This is crucial before attempting to verify the DataLoaders again.



In [None]:
train_test_split_ratio = 0.8

# Split the 'train' dataset into training and validation sets
splits = transformed_ds['train'].train_test_split(test_size=1 - train_test_split_ratio)

# Assign the new splits to the train_dataset and val_dataset variables
train_dataset = splits['train']
val_dataset = splits['test'] # 'train_test_split' uses 'test' for the split portion

print(f"Original training dataset re-split into:")
print(f"  Training set: {len(train_dataset)} samples")
print(f"  Validation set: {len(val_dataset)} samples")

NameError: name 'transformed_ds' is not defined

**Reasoning**:
The previous attempt to re-split the `transformed_ds` failed because the `transformed_ds` variable was not defined in the kernel, likely due to an incomplete or unexecuted `ds.map` command in the previous step. I will re-execute the `ds.map` command to ensure `transformed_ds` is properly created and available for splitting.



# Task
Re-apply `ds.map` to ensure `transformed_ds` is properly created and fully populated with transformed data. Re-split `transformed_ds['train']` into new `train_dataset` and `val_dataset` variables to ensure they reflect the fully transformed data, and recreate the `train_dataloader` and `val_dataloader` instances using the newly split datasets.

## Re-apply `ds.map`

### Subtask:
Re-execute the `ds.map` command to ensure `transformed_ds` is properly created and fully populated with transformed data.


**Reasoning**:
Re-executing the `ds.map` command will ensure that the `transformed_ds` object is correctly initialized and populated with the transformed data, resolving the `NameError` from the previous step.



In [None]:
transformed_ds = ds.map(
    transform_sample,
    batched=False,
    remove_columns=ds['train'].column_names
)

print("Transformation applied to the dataset splits using `ds.map` for eager processing.")

NameError: name 'transform_sample' is not defined

**Reasoning**:
The `NameError` for `transform_sample` indicates that the function was not defined in the current kernel session. I need to re-execute the cell that defines `transform_sample`, along with its dependencies (`min_size`, `max_size`, `IMAGENET_MEAN`, `IMAGENET_STD`, and imports), before attempting to use it with `ds.map`.



In [None]:
import torch
import torchvision.transforms.functional as F
from PIL import Image

# Define min_size and max_size for image resizing
min_size = 800
max_size = 1333

print(f"min_size set to: {min_size}")
print(f"max_size set to: {max_size}")

IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]

def transform_sample(sample):
    # Safely extract the PIL Image object, handling potential list wrapping by datasets.with_transform
    image_input = sample['image']
    if isinstance(image_input, list):
        if len(image_input) > 0:
            image = image_input[0]
        else:
            # If the image list is empty, we cannot proceed. Return empty tensors.
            print("Warning: Empty image list encountered. Returning empty tensors.")
            return {'pixel_values': torch.tensor([]), 'labels': {}}
    elif isinstance(image_input, Image.Image):
        image = image_input
    else:
        raise TypeError(f"Unexpected image type: {type(image_input)}. Expected PIL Image or list thereof.")

    original_width, original_height = image.size

    # 2. Ensure image is in PIL 'RGB' format
    if image.mode != 'RGB':
        image = image.convert('RGB')

    # 3. Implement image resizing
    # Calculate scale factor
    min_original_size = float(min(original_width, original_height))
    max_original_size = float(max(original_width, original_height))
    scale_factor = min_size / min_original_size

    # Check if the longer side after scaling exceeds max_size
    if max_original_size * scale_factor > max_size:
        scale_factor = max_size / max_original_size

    # New dimensions
    new_width = int(original_width * scale_factor)
    new_height = int(original_height * scale_factor)

    # Resize the image
    image = F.resize(image, (new_height, new_width), interpolation=Image.BILINEAR)

    # Safely extract objects dictionary, handling potential list wrapping
    objects_input = sample.get('objects', {})
    if isinstance(objects_input, list):
        if len(objects_input) > 0:
            objects = objects_input[0]
        else:
            objects = {}
    elif isinstance(objects_input, dict):
        objects = objects_input
    else:
        objects = {}

    boxes = objects.get('bbox', [])
    labels = objects.get('category', [])

    target = {}

    if boxes and labels:
        # Convert bounding boxes to torch.Tensor
        boxes = torch.tensor(boxes, dtype=torch.float32)
        # Bounding box format: [x_min, y_min, width, height] -> [x_min, y_min, x_max, y_max]
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2] # x_max
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3] # y_max

        # Scale bounding box coordinates
        boxes *= scale_factor

        # Clip bounding box values to be within the new image dimensions
        boxes[:, 0].clamp_(min=0, max=new_width)
        boxes[:, 1].clamp_(min=0, max=new_height)
        boxes[:, 2].clamp_(min=0, max=new_width)
        boxes[:, 3].clamp_(min=0, max=new_height)

        # Step 1: Filter out geometrically invalid boxes (width/height <= 0) and keep corresponding original labels and areas
        valid_bbox_mask = (boxes[:, 2] > boxes[:, 0]) & (boxes[:, 3] > boxes[:, 1])
        boxes_filtered_geometric = boxes[valid_bbox_mask]
        labels_filtered_geometric = [labels[i] for i, k in enumerate(valid_bbox_mask.tolist()) if k]

        areas_filtered_geometric = torch.tensor([])
        if 'area' in objects:
            original_areas = torch.tensor(objects['area'], dtype=torch.float32)
            scaled_areas = original_areas * (scale_factor ** 2)
            areas_filtered_geometric = scaled_areas[valid_bbox_mask]

        # Step 2: Apply class mapping and filter out unmapped labels, ensuring boxes and areas remain in sync
        final_boxes_list = []
        final_labels_list = []
        final_areas_list = []

        for idx, original_label in enumerate(labels_filtered_geometric):
            mapped_label = class_mapping.get(original_label, -1)
            if mapped_label != -1: # Only include if successfully mapped
                final_labels_list.append(mapped_label)
                final_boxes_list.append(boxes_filtered_geometric[idx])
                if areas_filtered_geometric.numel() > 0:
                    final_areas_list.append(areas_filtered_geometric[idx])

        if final_labels_list:
            target['boxes'] = torch.stack(final_boxes_list)
            target['labels'] = torch.tensor(final_labels_list, dtype=torch.int64)
            if final_areas_list:
                target['area'] = torch.tensor(final_areas_list, dtype=torch.float32)
            else:
                target['area'] = torch.tensor([], dtype=torch.float32) # No area if no valid labels after mapping
        else: # No valid boxes/labels after all filtering
            target['boxes'] = torch.tensor([], dtype=torch.float32).reshape(0,4)
            target['labels'] = torch.tensor([], dtype=torch.int64)
            target['area'] = torch.tensor([], dtype=torch.float32) # No area if no boxes

    else: # Case where there are no initial boxes or labels
        target['boxes'] = torch.tensor([], dtype=torch.float32).reshape(0,4)
        target['labels'] = torch.tensor([], dtype=torch.int64)
        target['area'] = torch.tensor([], dtype=torch.float32) # No area if no boxes

    # Convert image to PyTorch tensor (0.0-1.0 scaling)
    image_tensor = F.to_tensor(image)

    # Normalize the image tensor
    image_tensor = F.normalize(image_tensor, mean=IMAGENET_MEAN, std=IMAGENET_STD)

    # Add original image ID
    if 'image_id' in sample:
        image_id_val = sample['image_id']
        if isinstance(image_id_val, list) and len(image_id_val) > 0:
            target['image_id'] = torch.tensor([image_id_val[0]])
        else:
            target['image_id'] = torch.tensor([image_id_val])

    # The function should return a dictionary with the transformed data, typically 'pixel_values' for the image
    # and 'labels' for the annotation target dictionary, for consistency with Hugging Face models.
    return {'pixel_values': image_tensor, 'labels': target}

print("Redefined 'transform_sample' function to robustly handle input formats, ensure box/label synchronization, and return unwrapped tensors/dicts.")

transformed_ds = ds.map(
    transform_sample,
    batched=False,
    remove_columns=ds['train'].column_names
)

print("Transformation applied to the dataset splits using `ds.map` for eager processing.")

min_size set to: 800
max_size set to: 1333
Redefined 'transform_sample' function to robustly handle input formats, ensure box/label synchronization, and return unwrapped tensors/dicts.


Map:   0%|          | 0/3813 [00:00<?, ? examples/s]

NameError: name 'class_mapping' is not defined

**Reasoning**:
The `NameError: name 'class_mapping' is not defined` indicates that the `class_mapping` dictionary was not available in the kernel's global scope when `transform_sample` was executed. I need to re-execute the cell that defines `class_mapping` first, then redefine `transform_sample`, and finally re-apply `ds.map`.



In [None]:
num_super_classes = 20
class_mapping = {}

# Ensure sorted_unique_categories is available from the previous execution
if 'sorted_unique_categories' not in locals() or not sorted_unique_categories:
    print("Error: 'sorted_unique_categories' not found. Please run the previous cell.")
else:
    num_granular_categories = len(sorted_unique_categories)
    if num_granular_categories == 0:
        print("No unique categories found to map.")
    else:
        # Calculate how many granular categories fall into each super class
        # We distribute them as evenly as possible
        categories_per_super_class = num_granular_categories // num_super_classes
        remainder = num_granular_categories % num_super_classes

        current_super_class_id = 0
        current_category_index = 0

        for i in range(num_super_classes):
            # Determine how many categories to assign to this super class
            count_for_this_super_class = categories_per_super_class
            if i < remainder:
                count_for_this_super_class += 1

            # Assign categories to the current super class
            for _ in range(count_for_this_super_class):
                if current_category_index < num_granular_categories:
                    original_cat_id = sorted_unique_categories[current_category_index]
                    class_mapping[original_cat_id] = current_super_class_id
                    current_category_index += 1
                else:
                    break # Should not happen if logic is correct

            if current_category_index < num_granular_categories: # Only increment if there are more categories to map
                current_super_class_id += 1

        print(f"Successfully created a mapping from {num_granular_categories} granular categories to {len(set(class_mapping.values()))} super classes.")
        print("Example mappings (first 5 and last 5 entries, if available):")
        mapped_items = list(class_mapping.items())
        for i, (original_id, super_id) in enumerate(mapped_items):
            if i < 5 or i >= len(mapped_items) - 5:
                print(f"  Original ID {original_id} -> Super Class ID {super_id}")
            elif i == 5 and len(mapped_items) > 10: # Only print ellipsis if there are enough items in between
                print("  ...")

Error: 'sorted_unique_categories' not found. Please run the previous cell.


**Reasoning**:
The previous execution failed because `sorted_unique_categories` was not defined. I need to re-execute the cell that identifies unique categories from the dataset to ensure `sorted_unique_categories` is populated before `class_mapping` can be created.



In [None]:
import random

# Initialize an empty set to store unique categories
unique_categories = set()

# Iterate through a portion of the dataset to collect unique categories
# Adjust the number of samples if the dataset is very large or very small
num_samples_to_check = min(len(ds['train']), 500) # Check up to 500 samples or all if less

print(f"Checking {num_samples_to_check} samples for unique categories...")

for i in range(num_samples_to_check):
    sample_idx = random.randint(0, len(ds['train']) - 1) # Get a random index
    sample = ds['train'][sample_idx]

    if 'objects' in sample and sample['objects'] and 'category' in sample['objects']:
        for category_id in sample['objects']['category']:
            unique_categories.add(category_id)

# Convert to a sorted list for better readability
sorted_unique_categories = sorted(list(unique_categories))
print(f"Found {len(sorted_unique_categories)} unique categories: {sorted_unique_categories}")

# It's helpful to also see the mapping from integer IDs to string labels if available.
# Assuming the dataset has a feature `label_names` for categories, let's try to access it.
if 'objects' in ds['train'].features and 'category' in ds['train'].features['objects']:
    if hasattr(ds['train'].features['objects']['category'], 'names'):
        category_names = ds['train'].features['objects']['category'].names
        print("\nCategory ID to Name Mapping:")
        for cat_id in sorted_unique_categories:
            if cat_id < len(category_names):
                print(f"  ID {cat_id}: {category_names[cat_id]}")
            else:
                print(f"  ID {cat_id}: (Name not found)")
    else:
        print("\nCategory names attribute not found in features.")
else:
    print("\n'objects' or 'category' feature not found in the dataset's features for direct name access.")

Checking 500 samples for unique categories...
Found 265 unique categories: [0, 1, 2, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15, 16, 18, 21, 22, 25, 27, 28, 30, 31, 32, 36, 37, 38, 39, 41, 43, 44, 45, 46, 47, 48, 49, 50, 53, 54, 55, 56, 59, 61, 62, 63, 64, 66, 67, 70, 71, 72, 73, 75, 76, 78, 80, 82, 84, 85, 86, 87, 88, 89, 90, 91, 93, 94, 96, 97, 98, 99, 100, 102, 103, 104, 106, 108, 109, 110, 112, 114, 115, 118, 121, 122, 124, 126, 128, 131, 133, 135, 137, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 158, 159, 161, 163, 164, 165, 166, 167, 173, 174, 175, 179, 180, 181, 182, 183, 184, 185, 187, 188, 189, 190, 191, 192, 194, 196, 197, 200, 201, 203, 205, 206, 207, 208, 209, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 224, 225, 226, 229, 232, 233, 234, 235, 236, 238, 241, 242, 243, 244, 246, 247, 248, 250, 253, 255, 256, 258, 259, 260, 262, 263, 264, 265, 266, 267, 272, 273, 276, 277, 278, 280, 283, 284, 285, 286, 287, 289, 292, 294, 2

**Reasoning**:
Now that `sorted_unique_categories` is populated, I will re-execute the cell that defines `class_mapping` using the available `sorted_unique_categories` and `num_super_classes` variables.



In [None]:
num_super_classes = 20
class_mapping = {}

# Ensure sorted_unique_categories is available from the previous execution
if 'sorted_unique_categories' not in locals() or not sorted_unique_categories:
    print("Error: 'sorted_unique_categories' not found. Please run the previous cell.")
else:
    num_granular_categories = len(sorted_unique_categories)
    if num_granular_categories == 0:
        print("No unique categories found to map.")
    else:
        # Calculate how many granular categories fall into each super class
        # We distribute them as evenly as possible
        categories_per_super_class = num_granular_categories // num_super_classes
        remainder = num_granular_categories % num_super_classes

        current_super_class_id = 0
        current_category_index = 0

        for i in range(num_super_classes):
            # Determine how many categories to assign to this super class
            count_for_this_super_class = categories_per_super_class
            if i < remainder:
                count_for_this_super_class += 1

            # Assign categories to the current super class
            for _ in range(count_for_this_super_class):
                if current_category_index < num_granular_categories:
                    original_cat_id = sorted_unique_categories[current_category_index]
                    class_mapping[original_cat_id] = current_super_class_id
                    current_category_index += 1
                else:
                    break # Should not happen if logic is correct

            if current_category_index < num_granular_categories: # Only increment if there are more categories to map
                current_super_class_id += 1

        print(f"Successfully created a mapping from {num_granular_categories} granular categories to {len(set(class_mapping.values()))} super classes.")
        print("Example mappings (first 5 and last 5 entries, if available):")
        mapped_items = list(class_mapping.items())
        for i, (original_id, super_id) in enumerate(mapped_items):
            if i < 5 or i >= len(mapped_items) - 5:
                print(f"  Original ID {original_id} -> Super Class ID {super_id}")
            elif i == 5 and len(mapped_items) > 10: # Only print ellipsis if there are enough items in between
                print("  ...")


Successfully created a mapping from 265 granular categories to 20 super classes.
Example mappings (first 5 and last 5 entries, if available):
  Original ID 0 -> Super Class ID 0
  Original ID 1 -> Super Class ID 0
  Original ID 2 -> Super Class ID 0
  Original ID 4 -> Super Class ID 0
  Original ID 5 -> Super Class ID 0
  ...
  Original ID 382 -> Super Class ID 19
  Original ID 385 -> Super Class ID 19
  Original ID 386 -> Super Class ID 19
  Original ID 387 -> Super Class ID 19
  Original ID 388 -> Super Class ID 19
