# Visualize dataloader

This script creates the dataloader and let the user investigate the items in the dataloader by clicking on them.

In [76]:
import os
import cv2
import torch
import json
import time
import numpy as np
from tqdm import tqdm  # Import tqdm for progress bar

from torch.utils.data import Dataset
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

## Create Dataset and Dataloader

In [87]:
class COCOSegmentationDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        """
        Args:
            image_dir (str): Path to the directory containing images.
            mask_dir (str): Path to the directory containing instance masks.
            transform (callable, optional): Optional transform to be applied to images.
        """
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.image_files = sorted([f for f in os.listdir(image_dir) if f.endswith(".png")])[:100] # only consider first 100
        self.mask_files = sorted([f for f in os.listdir(mask_dir) if f.endswith(".png")])[:100]

    def __len__(self):
        """
        Return the number of files in the image dataset (each image correspond to one mask)
        """
        return len(self.image_files)

    def __getitem__(self, idx):
        # Load the images and masks
        image_name = self.image_files[idx]
        image_path = os.path.join(self.image_dir, image_name)
        mask_name = image_name  # Image and mask have the same filename
        mask_path = os.path.join(self.mask_dir, mask_name)

        if not mask_name:
            return None  # No mask found, handle accordingly
        
        # Set final image sizes (650x700), which includes all sizes
        image_size_h = 650
        image_size_w = 700

        # Load image
        image_original = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) / 255 # reading as is

        # Pad image to match desired size
        original_h, original_w, _ = image_original.shape
        pad_h = max(0, (image_size_h - original_h) // 2)
        pad_w = max(0, (image_size_w - original_w) // 2)

        image_padded = np.pad(image_original, ((pad_h, image_size_h - original_h - pad_h), (pad_w, image_size_w - original_w - pad_w), (0, 0)), mode='constant', constant_values=0)
        
        # Load mask (grayscale) and expand values
        mask_original = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED) / 255
        
        # Pad mask to match desired size instead of interpolation
        original_h, original_w = mask_original.shape

        pad_h = max(0, (image_size_h - original_h) // 2)
        pad_w = max(0, (image_size_w - original_w) // 2)

        mask_padded = np.pad(mask_original, ((pad_h, image_size_h - original_h - pad_h), (pad_w, image_size_w - original_w - pad_w)), mode='constant', constant_values=0)
        
        # Ensure mask values remain categorical (0 to 255 after expansion)
        mask_tensor = torch.tensor(mask_padded, dtype=torch.float32)
        
        # Convert image to tensor
        image_tensor = torch.tensor(image_padded, dtype=torch.float32).permute(2, 0, 1) # in tensors, channels must be first dimension

        return image_tensor, mask_tensor

In [88]:
image_val_dir = "/home/maver02/Development/Datasets/COCO/preprocess_coco_2_v1/val/images"
image_train_dir = "/home/maver02/Development/Datasets/COCO/preprocess_coco_2_v1/train/images"

mask_val_dir = "/home/maver02/Development/Datasets/COCO/preprocess_coco_2_v1/val/masks"
mask_train_dir = "/home/maver02/Development/Datasets/COCO/preprocess_coco_2_v1/train/masks"

instances_val_dir = "/home/maver02/Development/Datasets/COCO/annotations/instances_val2017.json"
instances_train_dir = "/home/maver02/Development/Datasets/COCO/annotations/instances_val2017.json"

In [89]:
test_data = COCOSegmentationDataset(image_val_dir, mask_val_dir)
train_data = test_data # use test data for now as it is smaller

batch_size = 2  # Reduce to avoid OOM
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

## Visualise data in Dataloader

In [90]:
# load json file with instances
with open(instances_val_dir, 'r') as file:
    val_instances_json = json.load(file)

# Create a dict mapping categories id (pixel values) into their names
categories_dict = {}
categories_dict[0] = 'unknown'
for category in val_instances_json['categories']:
    categories_dict[category['id']] = category['name']

In [95]:
# Select next item in dataloader
test_images, test_masks = next(iter(test_dataloader))

# Select first object from dataloader item
image_np = test_images[0].numpy().transpose(1, 2, 0)
mask_np = test_masks[0].numpy()

# Convert grayscale image to 3-channel
mask_np_3ch = cv2.cvtColor(mask_np, cv2.COLOR_GRAY2BGR)

# Concatenate images horizontally
img_combined = np.hstack((image_np, mask_np_3ch))

# Mouse callback function
def get_pixel_value(event, x, y, flags, param):
    if event == cv2.EVENT_LBUTTONDOWN:  # Left mouse button click
        pixel_value = img_combined[y, x]  # OpenCV uses (y, x) indexing
        print(f"Pixel value: {pixel_value[0]}. Category: {categories_dict[int(pixel_value[0]*255)]}") # print the category assigned to that pixel value

# Ensure the window is created before setting the callback
cv2.namedWindow("Image")

# Set the mouse callback function
cv2.setMouseCallback("Image", get_pixel_value)

# Show result
cv2.imshow("Image", img_combined)
cv2.waitKey(15000) # set timer, close by pressing any button
cv2.destroyAllWindows()

Pixel value: 0.0. Category: unknown
Pixel value: 0.003921568859368563. Category: person
Pixel value: 0.003921568859368563. Category: person
Pixel value: 0.1725490242242813. Category: bottle
Pixel value: 0.1725490242242813. Category: bottle
Pixel value: 0.003921568859368563. Category: person
