In [1]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import numpy as np
import json
from tqdm import tqdm

# Helper function to convert RGB to segment ID
def rgb_to_segment_id(rgb_image):
    rgb = np.array(rgb_image)
    segment_id = rgb[:, :, 0].astype(np.uint32) * 256 * 256 + \
                rgb[:, :, 1].astype(np.uint32) * 256 + \
                rgb[:, :, 2].astype(np.uint32)
    return segment_id

class COCOPanopticDataset(Dataset):
    def __init__(self, images_dir, panoptic_dir, annotations_file, transform=None, target_transform=None, num_classes=201):
        """
        Args:
            images_dir (str): Directory with all the images (.jpg).
            panoptic_dir (str): Directory with all the panoptic segmentation masks (.png).
            annotations_file (str): Path to the COCO panoptic annotations JSON file.
            transform (callable, optional): Optional transform to be applied on an image.
            target_transform (callable, optional): Optional transform to be applied on the mask.
            num_classes (int): Number of segmentation classes.
        """
        self.images_dir = images_dir
        self.panoptic_dir = panoptic_dir
        self.transform = transform
        self.target_transform = target_transform
        self.num_classes = num_classes

        # Load the panoptic annotations JSON
        with open(annotations_file, 'r') as f:
            self.annotations = json.load(f)

        # Create a mapping from image_id to annotation
        self.image_id_to_ann = {ann['image_id']: ann for ann in self.annotations['annotations']}

        # Extract all image entries
        self.images = self.annotations['images']

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # Get image information
        img_info = self.images[idx]
        img_id = img_info['id']
        img_filename = img_info['file_name']
        img_path = os.path.join(self.images_dir, img_filename)

        # Load image
        image = Image.open(img_path).convert('RGB')

        # Get corresponding panoptic annotation
        ann = self.image_id_to_ann.get(img_id, None)
        if ann is None:
            raise ValueError(f"No annotation found for image_id {img_id}")

        panoptic_filename = ann['file_name']
        panoptic_path = os.path.join(self.panoptic_dir, panoptic_filename)

        # Load panoptic segmentation mask
        panoptic_image = Image.open(panoptic_path).convert('RGB')
        segment_id = rgb_to_segment_id(panoptic_image)

        # Initialize mask
        height, width = image.size[1], image.size[0]
        mask = np.zeros((height, width), dtype=np.int64)

        # Assign class indices based on segments_info
        for segment in ann['segments_info']:
            cat_id = segment['category_id']
            seg_id = segment['id']
            # Convert category_id to zero-based index if necessary
            class_index = cat_id - 1  # Assuming category_id starts at 1
            mask[segment_id == seg_id] = class_index

        # Apply transforms
        if self.transform:
            image = self.transform(image)

        if self.target_transform:
            mask = self.target_transform(mask)

        return image, mask


In [2]:
from torchvision import transforms

# Desired dimensions
H, W = 256, 256  # Adjust based on your model's requirements

# Image transformations
image_transforms = transforms.Compose([
    transforms.Resize((H, W)),  # Resize images to H x W
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet mean
                         std=[0.229, 0.224, 0.225])   # ImageNet std
])

# Mask transformations
def mask_transform(mask):
    """
    Transforms the mask by resizing and converting it to a tensor.

    Args:
        mask (numpy.ndarray): The segmentation mask.

    Returns:
        torch.Tensor: The transformed mask.
    """
    mask = Image.fromarray(mask)
    mask = transforms.Resize((H, W), interpolation=Image.NEAREST)(mask)  # Resize with nearest neighbor to preserve labels
    mask = np.array(mask, dtype=np.int64)
    return torch.from_numpy(mask)


In [3]:
# Paths (replace with your actual paths)
images_dir = 'C:/Users/gades/MLProject/Datasets/COCOPanoptic/COCOPanoptic/images/train2017'  # Directory containing .jpg images
panoptic_dir = 'C:/Users/gades/MLProject/Datasets/COCOPanoptic/COCOPanoptic/panoptic_train2017'  # Directory containing .png masks
annotations_file = 'C:/Users/gades/MLProject/Datasets/COCOPanoptic/COCOPanoptic/annotations/panoptic_train2017.json'  # Path to panoptic_train2017.json

# Initialize the dataset
dataset = COCOPanopticDataset(
    images_dir=images_dir,
    panoptic_dir=panoptic_dir,
    annotations_file=annotations_file,
    transform=image_transforms,
    target_transform=mask_transform,
    num_classes=201
)


# Initialize the DataLoader
batch_size = 8  # Adjust based on your GPU memory
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4)
