In [2]:
!pip install -U albumentations



In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import torch
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import numpy as np
import os

In [5]:
json_path = '/content/drive/MyDrive/Colab_Notebooks/instances_val2017.json/instances_val2017.json'
images_dir = '/content/drive/MyDrive/Colab_Notebooks/val2017/'

In [10]:
import json

In [None]:
# Augmentation and Preprocessing Pipeline
transform = A.Compose([
    A.Resize(416, 416),  # Resizing
    A.RandomBrightnessContrast(p=0.2),  # Brightness and Contrast Adjustment
    A.GaussianBlur(p=0.2),  # Blurring for image quality improvement
    A.HorizontalFlip(p=0.5),  # Horizontal Flip
    A.Rotate(limit=20, p=0.5),  # Random Rotation
    A.ColorJitter(p=0.3),  # Random color adjustments
    A.ToGray(p=0.1),  # Randomly convert some images to grayscale
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),  # Normalization
    ToTensorV2()  # Convert image to PyTorch tensor
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

# Load COCO dataset (images, annotations, category_mapping) from JSON
annotations_file = '/content/drive/MyDrive/Colab_Notebooks/instances_val2017.json/instances_val2017.json'
img_dir = '/content/drive/MyDrive/Colab_Notebooks/val2017/'

# Load annotations (COCO-style)
with open(annotations_file, 'r') as f:
    coco_data = json.load(f)

# Extract images, annotations, and category mapping
images = coco_data['images']  # List of images
annotations = coco_data['annotations']  # List of annotations
categories = coco_data['categories']  # List of categories

# Create category mapping (optional)
category_mapping = {category['id']: category['name'] for category in categories}

# Initialize dataset
dataset = CocoDataset(images=images, annotations=annotations, category_mapping=category_mapping, img_dir=img_dir, transform=transform)

# Improved collate_fn handling dynamic batching and padding for varying image sizes
def collate_fn(batch):
    images, targets = zip(*batch)

    # Handle variable image sizes and padding if necessary
    max_height = max([img.shape[1] for img in images])  # Find the max height
    max_width = max([img.shape[2] for img in images])   # Find the max width

    padded_images = []
    for img in images:
        # Pad images to the maximum width and height
        padded_img = torch.zeros((3, max_height, max_width), dtype=torch.float32)
        padded_img[:, :img.shape[1], :img.shape[2]] = img
        padded_images.append(padded_img)

    images = torch.stack(padded_images, dim=0)  # Stack images to create a batch

    return images, targets

# Update DataLoader with the modified collate function
data_loader = DataLoader(dataset, batch_size=8, shuffle=True, collate_fn=collate_fn)

# Test the new data loading
for idx, (images, targets) in enumerate(data_loader):
    if idx >= 5:  # Set the number of batches you want to test
        break
    print(f"Batch of images size: {images.size()}")
    print(f"Batch of targets length: {len(targets)}")
    print("Sample target:", targets[0])

Batch of images size: torch.Size([8, 3, 416, 416])
Batch of targets length: 8
Sample target: {'boxes': tensor([[  0.0000,   0.0000, 293.3548,  25.5915],
        [ 39.5258,   0.0000, 395.4639, 416.0000],
        [ 41.4800,  81.0893, 136.4931, 240.2232],
        [330.4786, 123.0851, 382.6027, 162.3719],
        [138.6789, 380.1142, 416.0000, 416.0000]]), 'labels': tensor([ 1,  1, 43, 37,  1])}
Batch of images size: torch.Size([8, 3, 416, 416])
Batch of targets length: 8
Sample target: {'boxes': tensor([[ 75.9525,  16.8285, 245.3913, 401.9795],
        [262.9494, 173.8685, 300.1944, 220.4995]]), 'labels': tensor([25, 25])}
Batch of images size: torch.Size([8, 3, 416, 416])
Batch of targets length: 8
Sample target: {'boxes': tensor([[ 13.7363,   1.9305, 375.9309, 411.3916],
        [368.2349,   3.3629, 416.0000, 129.2714],
        [207.8752, 225.8232, 273.5533, 282.0206],
        [139.1770, 195.9559, 206.0947, 271.2096],
        [  0.7238,   3.2383, 142.1888, 408.4398]]), 'labels': tensor(