In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install -U albumentations

Collecting albumentations
  Downloading albumentations-1.4.21-py3-none-any.whl.metadata (31 kB)
Collecting albucore==0.0.20 (from albumentations)
  Downloading albucore-0.0.20-py3-none-any.whl.metadata (5.3 kB)
Collecting simsimd>=5.9.2 (from albucore==0.0.20->albumentations)
  Downloading simsimd-6.0.5-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.7/57.7 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Downloading albumentations-1.4.21-py3-none-any.whl (227 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.9/227.9 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading albucore-0.0.20-py3-none-any.whl (12 kB)
Downloading simsimd-6.0.5-cp310-cp310-manylinux_2_28_x86_64.whl (605 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m605.1/605.1 kB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: simsimd, albucore, albumentations
  Attempting uni

In [3]:
import torch
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
import numpy as np
import os

In [4]:
class CocoDataset(torch.utils.data.Dataset):  # Inherit from Dataset
    def __init__(self, images, annotations, category_mapping, img_dir, transform=None):
        self.images = images
        self.annotations = annotations
        self.category_mapping = category_mapping
        self.img_dir = img_dir
        self.transform = transform
        self.image_id_to_annotations = self._group_annotations_by_image()

    def _group_annotations_by_image(self):
        image_id_to_annotations = {}
        for ann in self.annotations:
            image_id = ann['image_id']
            if image_id not in image_id_to_annotations:
                image_id_to_annotations[image_id] = []
            image_id_to_annotations[image_id].append(ann)
        return image_id_to_annotations

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_info = self.images[idx]
        img_path = os.path.join(self.img_dir, image_info['file_name'])
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB

        # Get annotations
        image_id = image_info['id']
        annotations = self.image_id_to_annotations.get(image_id, [])

        boxes = []
        labels = []
        for ann in annotations:
            x, y, width, height = ann['bbox']
            boxes.append([x, y, x + width, y + height])
            labels.append(ann['category_id'])

        # Convert boxes and labels to numpy arrays for Albumentations
        boxes = np.array(boxes)
        labels = np.array(labels)

        # Apply transformations
        if self.transform:
            transformed = self.transform(image=image, bboxes=boxes, labels=labels)
            image = transformed['image']
            boxes = transformed['bboxes']
            labels = transformed['labels']

        # Convert to PyTorch tensors
        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)
        target = {"boxes": boxes, "labels": labels}

        return image, target

In [5]:
import json

In [40]:
import os
import json
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2

annotation_file = '/content/drive/MyDrive/Colab Notebooks/instances_val2017.json'
img_dir = '/content/drive/MyDrive/Colab Notebooks/val2017/'

# Load annotations
with open(annotation_file, 'r') as f:
    coco_data = json.load(f)

images = coco_data['images']
annotations = coco_data['annotations']
categories = coco_data['categories']
category_mapping = {cat['id']: cat['name'] for cat in categories}
print(f"Loaded {len(annotations)} annotations and {len(categories)} categories.")

# Augmentation and Preprocessing Pipeline
transform = A.Compose([
    A.Resize(416, 416),
    A.RandomBrightnessContrast(p=0.2),
    A.GaussianBlur(p=0.2),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=20, p=0.5),
    A.ColorJitter(p=0.3),
    A.ToGray(p=0.1),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

# Initialize Dataset and DataLoader
dataset = CocoDataset(images, annotations, category_mapping, img_dir, transform=transform)
data_loader = DataLoader(dataset, batch_size=16, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

# Test DataLoader
for images_batch, targets_batch in data_loader:
    print(f"Images batch size: {len(images_batch)}")
    print(f"Target batch size: {len(targets_batch)}")
    print("Sample target:", targets_batch[0])
    break

Loaded 36781 annotations and 80 categories.
Images batch size: 16
Target batch size: 16
Sample target: {'boxes': tensor([[  0.0000,  29.7090,  21.5380, 284.8102],
        [  0.0000,   5.3859,  10.0085,  75.0518],
        [228.9655,  87.8621, 338.6541, 248.2668],
        [112.6221,  59.0153, 233.2758, 242.0753],
        [  0.0000,  48.1224, 145.0379, 303.2236],
        [ 66.8842, 182.9871, 244.2605, 416.0000],
        [ 80.4507, 173.2080, 204.8816, 284.9275],
        [270.2061, 193.3108, 362.2168, 265.6737],
        [214.7188, 156.0565, 416.0000, 416.0000],
        [ 46.7985, 285.1402, 103.2587, 317.2188],
        [  6.0799, 287.2697, 103.3571, 393.9852],
        [ 33.9609,  15.2013,  75.8408,  84.8672],
        [352.5294, 104.7198, 416.0000, 197.7580],
        [  0.0000, 323.1266,  55.7543, 390.1275],
        [174.2113,  44.9631, 200.7932,  73.0731],
        [183.3839,  49.8341, 258.7166, 176.8713],
        [354.2028, 245.4057, 390.5701, 303.6892],
        [115.1521,  77.5213, 145.1959

In [44]:
import os
import json
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch
from torch.nn.functional import pad

annotation_file = '/content/drive/MyDrive/Colab Notebooks/instances_val2017.json'
img_dir = '/content/drive/MyDrive/Colab Notebooks/val2017/'

# Load annotations
with open(annotation_file, 'r') as f:
    coco_data = json.load(f)

images = coco_data['images']
annotations = coco_data['annotations']
categories = coco_data['categories']
category_mapping = {cat['id']: cat['name'] for cat in categories}
print(f"Loaded {len(annotations)} annotations and {len(categories)} categories.")

# Augmentation and Preprocessing Pipeline
transform = A.Compose([
    A.Resize(416, 416),  # Resize the image to 416x416
    A.RandomBrightnessContrast(p=0.2),
    A.GaussianBlur(p=0.2),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=20, p=0.5),
    A.ColorJitter(p=0.3),
    A.ToGray(p=0.1),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2()  # Don't include PadIfNeeded here
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

# Collate function for dynamic batching
def collate_fn(batch):
    images, targets = zip(*batch)  # Unpack batch into images and targets

    # Pad images to the same size manually
    max_height = max(image.shape[1] for image in images)
    max_width = max(image.shape[2] for image in images)

    padded_images = []
    for image in images:
        _, h, w = image.shape
        pad_height = max_height - h
        pad_width = max_width - w
        # Manually pad (left, right, top, bottom) to match the largest image in the batch
        padded_image = pad(image, (0, pad_width, 0, pad_height))  # Pad left, right, top, bottom
        padded_images.append(padded_image)

    # Stack padded images into a tensor
    padded_images = torch.stack(padded_images)

    # Print padded images and targets
    print(f"Padded Images: {padded_images.shape}")

    return padded_images, targets

# Initialize Dataset and DataLoader
dataset = CocoDataset(images, annotations, category_mapping, img_dir, transform=transform)
data_loader = DataLoader(dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)

# Test DataLoader
for images_batch, targets_batch in data_loader:
    print(f"Images Batch Size: {images_batch.shape}")
    print(f"Target Batch Size: {len(targets_batch)}")
    print("Sample Target:", targets_batch[0])
    break


Loaded 36781 annotations and 80 categories.
Padded Images: torch.Size([16, 3, 416, 416])
Images Batch Size: torch.Size([16, 3, 416, 416])
Target Batch Size: 16
Sample Target: {'boxes': tensor([[159.3368, 149.9225, 365.7877, 395.6160],
        [ 53.3006, 112.9960, 125.1118, 192.1465],
        [188.1742, 109.7980, 263.5024, 167.4985],
        [113.8398, 119.4830, 125.9593, 125.9375],
        [133.1297, 119.9770, 148.9806, 126.4640],
        [123.9037, 148.2650, 136.8806, 172.1330],
        [106.9909, 140.4715, 121.0882, 148.8630],
        [393.5925, 147.5305, 407.5923, 173.0950],
        [146.3209, 145.2100, 191.0385, 171.8990],
        [ 32.1791, 142.8700,  58.3374, 174.8175],
        [ 35.5207, 145.3335,  49.3452, 152.0675],
        [  0.0000, 115.1735,  37.5082, 168.8635],
        [ 95.3683, 113.6850, 159.0737, 168.9480],
        [188.3399, 147.0365, 201.5018, 165.5485]]), 'labels': tensor([19,  1,  1,  1,  1,  1,  1,  1,  3,  3,  3,  6,  6, 84])}
