In [1]:
!pip install albumentations
!pip install torch torchvision



In [7]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [8]:
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
import os
import numpy as np
import json

In [10]:
json_path = '/content/drive/My Drive/instances_val2017/instances_val2017.json'
images_dir = '/content/drive/My Drive/val2017/'

In [None]:
# Augmentation and Preprocessing Pipeline
transform = A.Compose([
    A.Resize(416, 416),  # Resizing
    A.RandomBrightnessContrast(p=0.2),  # Brightness and Contrast Adjustment
    A.GaussianBlur(p=0.2),  # Blurring for image quality improvement
    A.HorizontalFlip(p=0.5),  # Horizontal Flip
    A.Rotate(limit=20, p=0.5),  # Random Rotation
    A.ColorJitter(p=0.3),  # Random color adjustments
    A.ToGray(p=0.1),  # Randomly convert some images to grayscale
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalization
    ToTensorV2()  # Convert image to PyTorch tensor
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

# Load Dataset and file
ann_file = '/content/drive/My Drive/instances_val2017/instances_val2017.json'
img_dir = '/content/drive/My Drive/val2017/'

# Load Annotation
with open(ann_file, 'r') as f:
    coco_data = json.load(f)

# List of images
images = coco_data['images']
# List of annotations
annotations = coco_data['annotations']
# List of categories
categories = coco_data['categories']

# Create category mapping
category_mapping = {category['id']: category['name'] for category in categories}

# Define the CocoDataset class
class CocoDataset(Dataset):
    def __init__(self, images, annotations, category_mapping, img_dir, transform=None):
        self.images = images
        self.annotations = annotations
        self.category_mapping = category_mapping
        self.img_dir = img_dir
        self.transform = transform
        self.image_id_to_annotations = self._group_annotations_by_image()

    def _group_annotations_by_image(self):
        image_id_to_annotations = {}
        for ann in self.annotations:
            image_id = ann['image_id']
            if image_id not in image_id_to_annotations:
                image_id_to_annotations[image_id] = []
            image_id_to_annotations[image_id].append(ann)
        return image_id_to_annotations

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_info = self.images[idx]
        img_path = os.path.join(self.img_dir, image_info['file_name'])  # Use file_name for path
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB

        # Get annotations
        image_id = image_info['id']
        annotations = self.image_id_to_annotations.get(image_id, [])

        boxes = []
        labels = []
        for ann in annotations:
            x, y, width, height = ann['bbox']
            boxes.append([x, y, x + width, y + height])
            labels.append(ann['category_id'])

        # Convert boxes and labels to numpy arrays for Albumentations
        boxes = np.array(boxes)
        labels = np.array(labels)

        # Apply transformations
        if self.transform:
            transformed = self.transform(image=image, bboxes=boxes, labels=labels)
            image = transformed['image']
            boxes = transformed['bboxes']
            labels = transformed['labels']

        # Convert to PyTorch tensors
        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels}

        return image, target

# Create dataset and data loader
dataset = CocoDataset(images, annotations, category_mapping, img_dir, transform=transform)
data_loader = DataLoader(dataset, batch_size=16, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

# Test data loading
for images, targets in data_loader:
    print(f"Images batch size: {len(images)}")
    print(f"Target batch size: {len(targets)}")
    print("Sample target:", targets[0])  # Print sample target (bounding boxes

Images batch size: 16
Target batch size: 16
Sample target: {'boxes': tensor([[357.4941, 142.0979, 361.4134, 147.7481],
        [116.1947, 265.3900, 120.4049, 271.9315],
        [368.2184,  90.8651, 403.8568, 189.3316],
        [385.2670, 141.1566, 409.9744, 158.1829]]), 'labels': tensor([37, 37,  1, 43])}
Images batch size: 16
Target batch size: 16
Sample target: {'boxes': tensor([[139.0350, 107.8580, 149.7145, 156.0341],
        [141.5700, 125.9204, 200.2000, 198.8324],
        [323.1540, 238.1917, 352.8850, 317.1245],
        [137.2215,   6.0890, 332.8065, 411.8497]]), 'labels': tensor([44, 34,  1,  1])}
Images batch size: 16
Target batch size: 16
Sample target: {'boxes': tensor([[202.3723, 155.5125, 236.0897, 195.7540],
        [ 58.7940, 183.6705,  91.0632, 208.5915],
        [  7.2800, 137.4620, 404.9877, 274.8265],
        [213.6471, 278.2715, 410.2363, 412.6850],
        [363.0766,   5.5315, 409.4295,  99.1705],
        [  2.9256, 281.8985, 206.3768, 413.0620],
        [255.1985