In [24]:
!pip install -U albumentations

In [28]:
import torch
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import numpy as np
import os

In [33]:
json_path =  "C:/Users/megha/Downloads/instances_val2017.json-20241115T150718Z-001/instances_val2017.json/instances_val2017.json"
images_dir = "C:/Users/megha/Downloads/val2017-20241115T150951Z-001/val2017"

In [30]:
 class CocoDataset(torch.utils.data.Dataset):  # Inheriting from Dataset
    def __init__(self, image_metadata, annotation_data, category_mapping, image_dir, transform=None):
        """
        Initializes the dataset with images, annotations, and other required data.
        
        Parameters:
            image_metadata (list): A list of image metadata (e.g., file names and image IDs).
            annotation_data (list): A list of annotations (bounding boxes and category IDs).
            category_mapping (dict): A mapping of category IDs to class names.
            image_dir (str): Directory where images are stored.
            transform (callable, optional): Transformation to apply to images and annotations.
        """
        self.image_metadata = image_metadata
        self.annotation_data = annotation_data
        self.category_mapping = category_mapping
        self.image_dir = image_dir
        self.transform = transform
        self.image_id_to_annotations = self._group_annotations_by_image()

    def _group_annotations_by_image(self):
        """
        Groups annotations by image ID for efficient retrieval during data loading.

        Returns:
            dict: A dictionary mapping image IDs to their annotations.
        """
        image_id_to_annotations = {}
        for annotation in self.annotation_data:
            image_id = annotation['image_id']
            if image_id not in image_id_to_annotations:
                image_id_to_annotations[image_id] = []
            image_id_to_annotations[image_id].append(annotation)
        return image_id_to_annotations

    def __len__(self):
        """
        Returns the number of images in the dataset.

        Returns:
            int: Total number of images.
        """
        return len(self.image_metadata)

    def __getitem__(self, idx):
        """
        Retrieves an image and its corresponding annotations.

        Parameters:
            idx (int): Index of the image and its annotations.

        Returns:
            tuple: A tuple containing the image and its target annotations (bounding boxes and labels).
        """
        # Get image information
        image_info = self.image_metadata[idx]
        img_path = os.path.join(self.image_dir, image_info['file_name'])
        
        # Print the image path to check if it is correct
        print(f"Attempting to load image from: {img_path}")
        
        # Read the image
        image = cv2.imread(img_path)
        
        # Check if the image is loaded correctly
        if image is None:
            raise FileNotFoundError(f"Image not found or failed to load at path: {img_path}")
        
        # Convert the image to RGB format
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        # Retrieve annotations for the image
        image_id = image_info['id']
        annotations = self.image_id_to_annotations.get(image_id, [])
        
        # Initialize lists for bounding boxes and labels
        bounding_boxes = []
        category_labels = []
        for annotation in annotations:
            x, y, width, height = annotation['bbox']
            bounding_boxes.append([x, y, x + width, y + height])  # Convert to (x1, y1, x2, y2)
            category_labels.append(annotation['category_id'])
        
        # Convert bounding boxes and labels to numpy arrays for transformation
        bounding_boxes = np.array(bounding_boxes)
        category_labels = np.array(category_labels)
        
        # Apply transformations if provided
        if self.transform:
            transformed = self.transform(image=image, bboxes=bounding_boxes, labels=category_labels)
            image = transformed['image']
            bounding_boxes = transformed['bboxes']
            category_labels = transformed['labels']
        
        # Convert to PyTorch tensors
        bounding_boxes = torch.tensor(bounding_boxes, dtype=torch.float32)
        category_labels = torch.tensor(category_labels, dtype=torch.int64)
        target = {"boxes": bounding_boxes, "labels": category_labels}
        
        return image, target


In [31]:
import json

In [34]:
# Augmentation and Preprocessing Pipeline
transform_pipeline = A.Compose([
    A.Resize(416, 416),  # Resize images to 416x416
    A.RandomBrightnessContrast(p=0.2),  # Adjust brightness and contrast
    A.GaussianBlur(p=0.2),  # Apply Gaussian blur
    A.HorizontalFlip(p=0.5),  # Apply horizontal flip with 50% probability
    A.Rotate(limit=20, p=0.5),  # Apply random rotation with a limit of 20 degrees
    A.ColorJitter(p=0.3),  # Randomly change image color
    A.ToGray(p=0.1),  # Randomly convert some images to grayscale
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalize based on ImageNet values
    ToTensorV2()  # Convert the image to PyTorch tensor
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

# Load COCO dataset (images, annotations, category_mapping) from JSON
annotations_json_path = "C:/Users/megha/Downloads/instances_val2017.json-20241115T150718Z-001/instances_val2017.json/instances_val2017.json"
 #  your file path
image_directory =  "C:/Users/megha/Downloads/val2017-20241115T150951Z-001/val2017"  #  your image directory path

# Load annotations (COCO format)
with open(annotations_json_path, 'r') as f:
    coco_data = json.load(f)

# Extract images, annotations, and categories
image_list = coco_data['images']  # List of image data
annotation_list = coco_data['annotations']  # List of annotations
category_list = coco_data['categories']  # List of categories

# Create a mapping of category IDs to category names
category_id_mapping = {category['id']: category['name'] for category in category_list}

# Initialize the custom dataset
custom_dataset = CocoDataset(
    image_metadata=image_list,
    annotation_data=annotation_list,  
    category_mapping=category_id_mapping,
    image_dir=image_directory,
    transform=transform_pipeline
)

# Improved collate_fn to handle dynamic batching and padding for images of varying sizes
def dynamic_collate_fn(batch):
    img_batch, target_batch = zip(*batch)

    # Find the maximum height and width of images in the batch
    max_img_height = max([img.shape[1] for img in img_batch])  # Maximum height
    max_img_width = max([img.shape[2] for img in img_batch])   # Maximum width

    padded_images = []
    for img in img_batch:
        # Create a tensor of zeros (padding) and copy the image into the top-left corner
        padded_img = torch.zeros((3, max_img_height, max_img_width), dtype=torch.float32)
        padded_img[:, :img.shape[1], :img.shape[2]] = img
        padded_images.append(padded_img)

    # Stack the padded images into a single tensor
    img_batch = torch.stack(padded_images, dim=0)

    return img_batch, target_batch

# Set up DataLoader with the dynamic collate function
data_loader = DataLoader(custom_dataset, batch_size=8, shuffle=True, collate_fn=dynamic_collate_fn)

# Test the data loading process
for batch_idx, (images, targets) in enumerate(data_loader):
    if batch_idx >= 5:  # Limit the number of batches to test
        break
    print(f"Batch {batch_idx} - Image batch size: {images.size()}")
    print(f"Batch {batch_idx} - Number of targets: {len(targets)}")
    print(f"Sample target: {targets[0]}")


Attempting to load image from: C:/Users/megha/Downloads/val2017-20241115T150951Z-001/val2017\000000380706.jpg
Attempting to load image from: C:/Users/megha/Downloads/val2017-20241115T150951Z-001/val2017\000000215072.jpg
Attempting to load image from: C:/Users/megha/Downloads/val2017-20241115T150951Z-001/val2017\000000570736.jpg
Attempting to load image from: C:/Users/megha/Downloads/val2017-20241115T150951Z-001/val2017\000000375493.jpg
Attempting to load image from: C:/Users/megha/Downloads/val2017-20241115T150951Z-001/val2017\000000373382.jpg
Attempting to load image from: C:/Users/megha/Downloads/val2017-20241115T150951Z-001/val2017\000000479099.jpg
Attempting to load image from: C:/Users/megha/Downloads/val2017-20241115T150951Z-001/val2017\000000430048.jpg
Attempting to load image from: C:/Users/megha/Downloads/val2017-20241115T150951Z-001/val2017\000000100723.jpg
Batch 0 - Image batch size: torch.Size([8, 3, 416, 416])
Batch 0 - Number of targets: 8
Sample target: {'boxes': tensor(