In [3]:
import random
import os
import cv2
import numpy as np
from pathlib import Path
from PIL import Image
import torch
from tqdm import tqdm
import xml.etree.ElementTree as ET

In [2]:
example_background = torch.load('/home/gridsan/manderson/ovdsat/run/coop_prototypes/boxes/no_bg/dior_N5-1/prototypes_remoteclip-14.pt')

In [10]:
example_background['label_names'][:10]

['bg_class_1',
 'bg_class_2',
 'bg_class_3',
 'bg_class_4',
 'bg_class_5',
 'bg_class_6',
 'bg_class_7',
 'bg_class_8',
 'bg_class_9',
 'bg_class_10']

In [7]:
example_background['prototypes'].shape

torch.Size([200, 1024])

## Generate crops for few shot training sets
* Run coco_to_seg first to create images with masks (if not done already!)
* Crop images for each N shot and M split and save to separate folder
* Extract 10 random background crops per image and save in separate folder

In [7]:
def apply_rectangular_mask_and_crop(image: Image.Image, mask: Image.Image) -> Image.Image:
    """Crops the image to the rectangular mask bounding box only (no masking)."""
    mask_array = np.array(mask.convert("L"))

    # Get non-zero mask coordinates
    ys, xs = np.where(mask_array > 0)
    if xs.size == 0 or ys.size == 0:
        return Image.new("RGB", (1, 1))  # Blank image if no mask

    x_min, x_max = xs.min(), xs.max()
    y_min, y_max = ys.min(), ys.max()

    # Crop the image directly to the mask's bounding rectangle
    return image.crop((x_min, y_min, x_max + 1, y_max + 1))

def process_directory(input_dir: str, output_dir: str):
    input_path = Path(input_dir)
    output_path = Path(output_dir)

    for root, _, files in os.walk(input_path):
        for file in files:
            if file.endswith(".jpg") and not file.endswith(".mask.jpg"):
                image_file = Path(root) / file
                mask_file = image_file.with_name(f"{image_file.stem}.mask.jpg")

                if not mask_file.exists():
                    print(f"[!] Mask not found for {image_file.name}, skipping.")
                    continue

                try:
                    image = Image.open(image_file)
                    mask = Image.open(mask_file)
                    masked_cropped = apply_rectangular_mask_and_crop(image, mask)

                    # Determine output path
                    relative_path = image_file.relative_to(input_path)
                    output_image_path = output_path / relative_path
                    output_image_path.parent.mkdir(parents=True, exist_ok=True)

                    masked_cropped.save(output_image_path)
                    print(f"Saved: {output_image_path}")
                except Exception as e:
                    print(f"Error processing {image_file}: {e}")

In [13]:
# For few-shot train

for N in [5, 10, 30]:
    for M in [1, 2, 3, 4, 5]:
        print(f'================ Processing train N={N}, M={M} ================')
        input_root = f'/home/gridsan/manderson/ovdsat/data/init_data/dior_N{N}-{M}'
        output_root = f'/home/gridsan/manderson/ovdsat/data/cropped_data/dior_N{N}-{M}'
        process_directory(input_root, output_root)
        print()

Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior_N5-1/ship/11728.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior_N5-1/ship/03398.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior_N5-1/ship/01572.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior_N5-1/ship/12329.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior_N5-1/ship/22752.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior_N5-1/harbor/00364.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior_N5-1/harbor/15432.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior_N5-1/harbor/12862.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior_N5-1/harbor/08505.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior_N5-1/Expressway-toll-station/14865.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior_N5-1/Expressway-toll-station/03962.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior_N5

In [26]:
# For val

for M in [1, 2, 3, 4, 5]:
    print(f'================ Processing val M={M} ================')
    input_root = f'/home/gridsan/manderson/ovdsat/data/init_data/dior_val-{M}'
    output_root = f'/home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-{M}'
    process_directory(input_root, output_root)
    print()

Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-3/ship/05669.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-3/ship/10468.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-3/ship/07966.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-3/ship/13722.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-3/ship/07678.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-3/ship/05575.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-3/ship/18789.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-3/ship/04490.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-3/ship/12626.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-3/ship/06429.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-3/harbor/19417.j

In [None]:
# For test

for M in [1]:#, 2, 3, 4, 5]:
    print(f'================ Processing test M={M} ================')
    input_root = f'/home/gridsan/manderson/ovdsat/data/init_data/dior_test-{M}'
    output_root = f'/home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior_test-{M}'
    process_directory(input_root, output_root)
    print()

Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior_test-1/ship/12175.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior_test-1/ship/17292.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior_test-1/ship/06433.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior_test-1/ship/00995.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior_test-1/ship/20232.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior_test-1/ship/06848.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior_test-1/ship/04461.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior_test-1/ship/14030.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior_test-1/ship/19324.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior_test-1/ship/04535.jpg
Saved: /home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior

Extract 10 background crops per sample

In [27]:
def generate_background_boxes_from_mask(mask_array, num_b, min_size=30, max_size=200, max_iter=100):
    """
    Generate boxes that do NOT intersect with non-zero mask areas.
    """
    h, w = mask_array.shape
    boxes = []

    for _ in range(num_b):
        valid_box = False
        count = 0
        while not valid_box and count < max_iter:
            count += 1
            width = random.randint(min_size, min(max_size, w))
            height = random.randint(min_size, min(max_size, h))
            x = random.randint(0, w - width)
            y = random.randint(0, h - height)
            x2 = x + width
            y2 = y + height

            # Check if the box intersects with foreground mask
            if np.any(mask_array[y:y2, x:x2] > 0):
                continue  # intersecting: skip
            boxes.append((x, y, x2, y2))
            valid_box = True

    return boxes

def generate_background_crops_from_masks(input_dir, output_root, num_b=10, min_size=30, max_size=200):
    input_path = Path(input_dir)
    background_dir = Path(output_root) / "background"
    background_dir.mkdir(parents=True, exist_ok=True)

    for root, _, files in os.walk(input_path):
        for file in tqdm(files):
            if file.endswith(".jpg") and not file.endswith(".mask.jpg"):
                image_file = Path(root) / file
                mask_file = image_file.with_name(f"{image_file.stem}.mask.jpg")

                if not mask_file.exists():
                    print(f"[!] No mask found for {image_file.name}, skipping.")
                    continue

                try:
                    image = Image.open(image_file).convert("RGB")
                    mask = Image.open(mask_file).convert("L")
                except Exception as e:
                    print(f"[✗] Error loading {image_file.name}: {e}")
                    continue

                mask_array = np.array(mask)
                boxes = generate_background_boxes_from_mask(mask_array, num_b, min_size, max_size)

                for i, (x1, y1, x2, y2) in enumerate(boxes):
                    crop = image.crop((x1, y1, x2, y2))
                    crop_filename = f"{image_file.stem}_background{i}.jpg"
                    crop.save(background_dir / crop_filename)
                    #print(f"[✓] Saved: {background_dir / crop_filename}")

In [20]:
# For train

for N in [5, 10, 30]:
    for M in [1, 2, 3, 4, 5]:
        print(f'================ Processing train N={N}, M={M} ================')
        input_root = f'/home/gridsan/manderson/ovdsat/data/init_data/dior_N{N}-{M}'
        output_root = f'/home/gridsan/manderson/ovdsat/data/cropped_data/dior_N{N}-{M}'
        generate_background_crops_from_masks(input_root, output_root)
        print()



0it [00:00, ?it/s]
100%|██████████| 20/20 [00:00<00:00, 50.65it/s]
100%|██████████| 20/20 [00:00<00:00, 56.49it/s]
100%|██████████| 20/20 [00:00<00:00, 65.28it/s]
100%|██████████| 20/20 [00:00<00:00, 63.82it/s]
100%|██████████| 20/20 [00:00<00:00, 65.06it/s]
100%|██████████| 20/20 [00:00<00:00, 56.70it/s]
100%|██████████| 20/20 [00:00<00:00, 47.93it/s]
100%|██████████| 20/20 [00:00<00:00, 63.33it/s]
100%|██████████| 20/20 [00:00<00:00, 67.74it/s]
100%|██████████| 20/20 [00:00<00:00, 49.59it/s]
100%|██████████| 20/20 [00:00<00:00, 49.39it/s]
100%|██████████| 20/20 [00:00<00:00, 27.21it/s]
100%|██████████| 20/20 [00:03<00:00,  5.01it/s]
100%|██████████| 2/2 [00:00<00:00, 3135.93it/s]


[!] No mask found for 19192.mask-checkpoint.jpg, skipping.
[!] No mask found for 19192-checkpoint.jpg, skipping.


100%|██████████| 20/20 [00:03<00:00,  5.49it/s]
100%|██████████| 20/20 [00:02<00:00,  7.29it/s]
100%|██████████| 20/20 [00:09<00:00,  2.03it/s]
100%|██████████| 20/20 [00:03<00:00,  6.58it/s]
100%|██████████| 20/20 [00:04<00:00,  4.54it/s]
100%|██████████| 20/20 [00:04<00:00,  4.37it/s]
100%|██████████| 20/20 [00:06<00:00,  3.03it/s]





0it [00:00, ?it/s]
100%|██████████| 20/20 [00:03<00:00,  5.94it/s]
100%|██████████| 20/20 [00:06<00:00,  2.93it/s]
100%|██████████| 20/20 [00:05<00:00,  3.61it/s]
100%|██████████| 20/20 [00:04<00:00,  4.01it/s]
100%|██████████| 20/20 [00:03<00:00,  5.41it/s]
100%|██████████| 20/20 [00:05<00:00,  3.40it/s]
100%|██████████| 20/20 [00:04<00:00,  4.24it/s]
100%|██████████| 20/20 [00:03<00:00,  5.45it/s]
100%|██████████| 20/20 [00:04<00:00,  4.45it/s]
100%|██████████| 18/18 [00:08<00:00,  2.05it/s]
100%|██████████| 20/20 [00:03<00:00,  5.69it/s]
100%|██████████| 18/18 [00:03<00:00,  5.03it/s]
100%|██████████| 18/18 [00:03<00:00,  5.08it/s]
100%|██████████| 20/20 [00:04<00:00,  4.94it/s]
100%|██████████| 20/20 [00:03<00:00,  5.80it/s]
100%|██████████| 20/20 [00:04<00:00,  4.57it/s]
100%|██████████| 20/20 [00:03<00:00,  6.22it/s]
100%|██████████| 20/20 [00:03<00:00,  5.38it/s]
100%|██████████| 20/20 [00:02<00:00,  7.29it/s]
100%|██████████| 20/20 [00:02<00:00,  6.83it/s]





0it [00:00, ?it/s]
100%|██████████| 20/20 [00:03<00:00,  6.04it/s]
100%|██████████| 20/20 [00:02<00:00,  8.01it/s]
100%|██████████| 20/20 [00:02<00:00,  6.88it/s]
100%|██████████| 20/20 [00:03<00:00,  6.48it/s]
100%|██████████| 20/20 [00:03<00:00,  5.60it/s]
100%|██████████| 20/20 [00:02<00:00,  8.90it/s]
100%|██████████| 20/20 [00:03<00:00,  6.02it/s]
100%|██████████| 20/20 [00:02<00:00,  7.32it/s]
100%|██████████| 20/20 [00:05<00:00,  3.82it/s]
100%|██████████| 20/20 [00:03<00:00,  5.96it/s]
100%|██████████| 20/20 [00:05<00:00,  3.55it/s]
100%|██████████| 20/20 [00:04<00:00,  4.76it/s]
100%|██████████| 20/20 [00:03<00:00,  6.59it/s]
100%|██████████| 20/20 [00:03<00:00,  5.96it/s]
100%|██████████| 20/20 [00:03<00:00,  6.57it/s]
100%|██████████| 20/20 [00:02<00:00,  7.09it/s]
100%|██████████| 20/20 [00:03<00:00,  6.50it/s]
100%|██████████| 20/20 [00:02<00:00,  6.67it/s]
100%|██████████| 20/20 [00:03<00:00,  5.59it/s]
100%|██████████| 20/20 [00:03<00:00,  6.14it/s]





0it [00:00, ?it/s]
100%|██████████| 20/20 [00:03<00:00,  6.08it/s]
100%|██████████| 20/20 [00:03<00:00,  6.62it/s]
100%|██████████| 20/20 [00:03<00:00,  5.42it/s]
100%|██████████| 20/20 [00:03<00:00,  6.57it/s]
100%|██████████| 20/20 [00:02<00:00,  8.38it/s]
100%|██████████| 18/18 [00:02<00:00,  7.50it/s]
100%|██████████| 20/20 [00:03<00:00,  6.63it/s]
100%|██████████| 20/20 [00:02<00:00,  9.49it/s]
100%|██████████| 20/20 [00:03<00:00,  6.58it/s]
100%|██████████| 20/20 [00:02<00:00,  7.82it/s]
100%|██████████| 20/20 [00:03<00:00,  5.88it/s]
100%|██████████| 20/20 [00:03<00:00,  6.46it/s]
100%|██████████| 20/20 [00:02<00:00,  7.51it/s]
100%|██████████| 20/20 [00:03<00:00,  5.38it/s]
100%|██████████| 20/20 [00:02<00:00,  7.13it/s]
100%|██████████| 20/20 [00:02<00:00,  6.91it/s]
100%|██████████| 20/20 [00:03<00:00,  5.73it/s]
100%|██████████| 20/20 [00:04<00:00,  4.25it/s]
100%|██████████| 20/20 [00:02<00:00,  7.27it/s]
100%|██████████| 20/20 [00:02<00:00,  7.46it/s]





0it [00:00, ?it/s]
100%|██████████| 20/20 [00:03<00:00,  5.66it/s]
100%|██████████| 20/20 [00:02<00:00,  7.34it/s]
100%|██████████| 20/20 [00:03<00:00,  6.09it/s]
100%|██████████| 20/20 [00:03<00:00,  6.18it/s]
100%|██████████| 20/20 [00:03<00:00,  6.03it/s]
100%|██████████| 20/20 [00:02<00:00,  6.97it/s]
100%|██████████| 20/20 [00:03<00:00,  6.36it/s]
100%|██████████| 20/20 [00:03<00:00,  5.47it/s]
100%|██████████| 20/20 [00:02<00:00,  7.73it/s]
100%|██████████| 20/20 [00:03<00:00,  5.28it/s]
100%|██████████| 18/18 [00:02<00:00,  6.24it/s]
100%|██████████| 20/20 [00:03<00:00,  6.04it/s]
100%|██████████| 20/20 [00:02<00:00,  7.44it/s]
100%|██████████| 20/20 [00:03<00:00,  6.08it/s]
100%|██████████| 20/20 [00:02<00:00,  7.52it/s]
100%|██████████| 20/20 [00:03<00:00,  6.00it/s]
100%|██████████| 20/20 [00:02<00:00,  7.99it/s]
100%|██████████| 20/20 [00:02<00:00,  7.44it/s]
100%|██████████| 20/20 [00:03<00:00,  6.44it/s]
100%|██████████| 20/20 [00:03<00:00,  5.35it/s]





0it [00:00, ?it/s]
100%|██████████| 58/58 [00:08<00:00,  7.03it/s]
100%|██████████| 60/60 [00:07<00:00,  7.60it/s]
100%|██████████| 60/60 [00:09<00:00,  6.67it/s]
100%|██████████| 60/60 [00:08<00:00,  6.81it/s]
100%|██████████| 60/60 [00:09<00:00,  6.46it/s]
100%|██████████| 60/60 [00:08<00:00,  7.10it/s]
100%|██████████| 60/60 [00:08<00:00,  7.15it/s]
100%|██████████| 60/60 [00:08<00:00,  7.06it/s]
100%|██████████| 60/60 [00:07<00:00,  7.73it/s]
100%|██████████| 60/60 [00:07<00:00,  8.21it/s]
100%|██████████| 60/60 [00:09<00:00,  6.39it/s]
100%|██████████| 58/58 [00:07<00:00,  7.35it/s]
100%|██████████| 60/60 [00:09<00:00,  6.48it/s]
100%|██████████| 60/60 [00:09<00:00,  6.59it/s]
100%|██████████| 60/60 [00:08<00:00,  7.04it/s]
100%|██████████| 60/60 [00:07<00:00,  7.65it/s]
100%|██████████| 60/60 [00:08<00:00,  7.09it/s]
100%|██████████| 60/60 [00:06<00:00,  9.19it/s]
100%|██████████| 60/60 [00:07<00:00,  7.58it/s]
100%|██████████| 60/60 [00:07<00:00,  7.61it/s]





0it [00:00, ?it/s]
100%|██████████| 60/60 [00:07<00:00,  8.24it/s]
100%|██████████| 2/2 [00:00<00:00, 2276.42it/s]


[!] No mask found for 02842-checkpoint.jpg, skipping.
[!] No mask found for 11588-checkpoint.jpg, skipping.


100%|██████████| 60/60 [00:07<00:00,  7.93it/s]
100%|██████████| 60/60 [00:08<00:00,  7.45it/s]
100%|██████████| 60/60 [00:08<00:00,  6.71it/s]
100%|██████████| 60/60 [00:12<00:00,  4.80it/s]
100%|██████████| 60/60 [00:11<00:00,  5.13it/s]
100%|██████████| 2/2 [00:00<00:00, 4495.50it/s]


[!] No mask found for 01819-checkpoint.jpg, skipping.
[!] No mask found for 07175-checkpoint.jpg, skipping.


100%|██████████| 60/60 [00:10<00:00,  5.89it/s]
100%|██████████| 60/60 [00:11<00:00,  5.16it/s]
100%|██████████| 60/60 [00:13<00:00,  4.32it/s]
100%|██████████| 58/58 [00:08<00:00,  6.93it/s]
100%|██████████| 60/60 [00:12<00:00,  4.62it/s]
100%|██████████| 58/58 [00:09<00:00,  6.18it/s]
100%|██████████| 60/60 [00:09<00:00,  6.42it/s]
100%|██████████| 7/7 [00:00<00:00, 5151.80it/s]


[!] No mask found for 02024.mask-checkpoint.jpg, skipping.
[!] No mask found for 16213.mask-checkpoint.jpg, skipping.
[!] No mask found for 03577-checkpoint.jpg, skipping.
[!] No mask found for 16213-checkpoint.jpg, skipping.
[!] No mask found for 02064-checkpoint.jpg, skipping.
[!] No mask found for 19451-checkpoint.jpg, skipping.
[!] No mask found for 02024-checkpoint.jpg, skipping.


100%|██████████| 60/60 [00:10<00:00,  5.80it/s]
100%|██████████| 58/58 [00:10<00:00,  5.58it/s]
100%|██████████| 60/60 [00:10<00:00,  5.73it/s]
100%|██████████| 58/58 [00:07<00:00,  7.63it/s]
100%|██████████| 60/60 [00:10<00:00,  5.61it/s]
100%|██████████| 60/60 [00:08<00:00,  7.32it/s]
100%|██████████| 60/60 [00:10<00:00,  5.88it/s]





0it [00:00, ?it/s]
100%|██████████| 60/60 [00:12<00:00,  4.99it/s]
100%|██████████| 56/56 [00:08<00:00,  6.32it/s]
100%|██████████| 60/60 [00:09<00:00,  6.45it/s]
100%|██████████| 60/60 [00:10<00:00,  5.54it/s]
100%|██████████| 60/60 [00:07<00:00,  8.04it/s]
100%|██████████| 60/60 [00:07<00:00,  8.35it/s]
100%|██████████| 60/60 [00:07<00:00,  8.26it/s]
100%|██████████| 60/60 [00:07<00:00,  8.01it/s]
100%|██████████| 60/60 [00:08<00:00,  7.31it/s]
100%|██████████| 58/58 [00:07<00:00,  7.37it/s]
100%|██████████| 60/60 [00:06<00:00,  9.64it/s]
100%|██████████| 58/58 [00:07<00:00,  7.58it/s]
100%|██████████| 60/60 [00:08<00:00,  6.77it/s]
100%|██████████| 60/60 [00:07<00:00,  8.11it/s]
100%|██████████| 60/60 [00:07<00:00,  7.84it/s]
100%|██████████| 60/60 [00:06<00:00,  8.62it/s]
100%|██████████| 60/60 [00:07<00:00,  7.60it/s]
100%|██████████| 60/60 [00:06<00:00,  9.27it/s]
100%|██████████| 60/60 [00:07<00:00,  7.57it/s]
100%|██████████| 60/60 [00:07<00:00,  8.36it/s]





0it [00:00, ?it/s]
100%|██████████| 60/60 [00:07<00:00,  7.60it/s]
100%|██████████| 58/58 [00:06<00:00,  9.10it/s]
100%|██████████| 60/60 [00:05<00:00, 10.08it/s]
100%|██████████| 60/60 [00:07<00:00,  7.58it/s]
100%|██████████| 60/60 [00:07<00:00,  8.40it/s]
100%|██████████| 60/60 [00:06<00:00,  8.70it/s]
100%|██████████| 60/60 [00:07<00:00,  8.26it/s]
100%|██████████| 60/60 [00:06<00:00,  8.96it/s]
100%|██████████| 60/60 [00:06<00:00,  9.17it/s]
100%|██████████| 58/58 [00:06<00:00,  8.44it/s]
100%|██████████| 58/58 [00:06<00:00,  8.35it/s]
100%|██████████| 58/58 [00:06<00:00,  8.47it/s]
100%|██████████| 60/60 [00:06<00:00,  9.01it/s]
100%|██████████| 60/60 [00:06<00:00,  8.76it/s]
100%|██████████| 60/60 [00:06<00:00,  8.77it/s]
100%|██████████| 58/58 [00:06<00:00,  9.11it/s]
100%|██████████| 60/60 [00:05<00:00, 10.10it/s]
100%|██████████| 60/60 [00:07<00:00,  8.51it/s]
100%|██████████| 60/60 [00:06<00:00,  8.80it/s]
100%|██████████| 60/60 [00:06<00:00,  8.68it/s]





0it [00:00, ?it/s]
100%|██████████| 60/60 [00:06<00:00,  8.63it/s]
100%|██████████| 56/56 [00:05<00:00,  9.49it/s]
100%|██████████| 60/60 [00:06<00:00,  8.68it/s]
100%|██████████| 60/60 [00:06<00:00,  9.08it/s]
100%|██████████| 60/60 [00:06<00:00,  9.43it/s]
100%|██████████| 60/60 [00:06<00:00,  9.04it/s]
100%|██████████| 60/60 [00:06<00:00,  9.28it/s]
100%|██████████| 60/60 [00:07<00:00,  7.55it/s]
100%|██████████| 60/60 [00:06<00:00,  9.33it/s]
100%|██████████| 60/60 [00:07<00:00,  7.79it/s]
100%|██████████| 60/60 [00:06<00:00,  9.68it/s]
100%|██████████| 58/58 [00:06<00:00,  9.17it/s]
100%|██████████| 58/58 [00:06<00:00,  9.18it/s]
100%|██████████| 60/60 [00:07<00:00,  8.23it/s]
100%|██████████| 60/60 [00:06<00:00,  9.86it/s]
100%|██████████| 58/58 [00:05<00:00, 10.60it/s]
100%|██████████| 60/60 [00:05<00:00, 10.38it/s]
100%|██████████| 60/60 [00:06<00:00,  8.86it/s]
100%|██████████| 60/60 [00:06<00:00,  8.90it/s]
100%|██████████| 60/60 [00:05<00:00, 11.41it/s]







In [28]:
# For val

for M in [1, 2, 3, 4, 5]:
    print(f'================ Processing val M={M} ================')
    input_root = f'/home/gridsan/manderson/ovdsat/data/init_data/dior_val-{M}'
    output_root = f'/home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior_val-{M}'
    generate_background_crops_from_masks(input_root, output_root)
    print()



0it [00:00, ?it/s]
100%|██████████| 20/20 [00:01<00:00, 17.50it/s]
100%|██████████| 20/20 [00:01<00:00, 18.77it/s]
100%|██████████| 20/20 [00:00<00:00, 20.85it/s]
100%|██████████| 20/20 [00:00<00:00, 21.31it/s]
100%|██████████| 20/20 [00:00<00:00, 21.33it/s]
100%|██████████| 20/20 [00:00<00:00, 24.93it/s]
100%|██████████| 20/20 [00:00<00:00, 24.83it/s]
100%|██████████| 20/20 [00:00<00:00, 21.48it/s]
100%|██████████| 20/20 [00:00<00:00, 26.31it/s]
100%|██████████| 20/20 [00:00<00:00, 27.43it/s]
100%|██████████| 20/20 [00:00<00:00, 22.54it/s]
100%|██████████| 20/20 [00:01<00:00, 19.60it/s]
100%|██████████| 20/20 [00:00<00:00, 25.16it/s]
100%|██████████| 20/20 [00:00<00:00, 32.03it/s]
100%|██████████| 20/20 [00:00<00:00, 28.10it/s]
100%|██████████| 20/20 [00:00<00:00, 30.92it/s]
100%|██████████| 20/20 [00:00<00:00, 20.90it/s]
100%|██████████| 20/20 [00:00<00:00, 33.01it/s]
100%|██████████| 20/20 [00:00<00:00, 25.64it/s]
100%|██████████| 20/20 [00:00<00:00, 23.36it/s]





0it [00:00, ?it/s]
100%|██████████| 20/20 [00:00<00:00, 35.78it/s]
100%|██████████| 20/20 [00:00<00:00, 33.67it/s]
100%|██████████| 20/20 [00:00<00:00, 24.89it/s]
100%|██████████| 20/20 [00:00<00:00, 30.32it/s]
100%|██████████| 20/20 [00:00<00:00, 30.12it/s]
100%|██████████| 20/20 [00:00<00:00, 37.62it/s]
100%|██████████| 20/20 [00:00<00:00, 32.23it/s]
100%|██████████| 20/20 [00:00<00:00, 28.49it/s]
100%|██████████| 20/20 [00:00<00:00, 33.30it/s]
100%|██████████| 20/20 [00:00<00:00, 29.34it/s]
100%|██████████| 20/20 [00:00<00:00, 29.44it/s]
100%|██████████| 20/20 [00:00<00:00, 28.02it/s]
100%|██████████| 20/20 [00:00<00:00, 33.82it/s]
100%|██████████| 20/20 [00:00<00:00, 31.03it/s]
100%|██████████| 20/20 [00:00<00:00, 39.83it/s]
100%|██████████| 20/20 [00:00<00:00, 22.82it/s]
100%|██████████| 20/20 [00:00<00:00, 35.24it/s]
100%|██████████| 20/20 [00:00<00:00, 34.25it/s]
100%|██████████| 20/20 [00:00<00:00, 34.83it/s]
100%|██████████| 20/20 [00:00<00:00, 32.34it/s]





0it [00:00, ?it/s]
100%|██████████| 20/20 [00:00<00:00, 22.32it/s]
100%|██████████| 20/20 [00:00<00:00, 31.99it/s]
100%|██████████| 20/20 [00:00<00:00, 48.09it/s]
100%|██████████| 20/20 [00:00<00:00, 34.34it/s]
100%|██████████| 20/20 [00:00<00:00, 25.53it/s]
100%|██████████| 20/20 [00:00<00:00, 32.40it/s]
100%|██████████| 20/20 [00:00<00:00, 26.50it/s]
100%|██████████| 20/20 [00:00<00:00, 23.23it/s]
100%|██████████| 20/20 [00:00<00:00, 32.19it/s]
100%|██████████| 20/20 [00:00<00:00, 28.54it/s]
100%|██████████| 20/20 [00:00<00:00, 32.81it/s]
100%|██████████| 20/20 [00:00<00:00, 32.89it/s]
100%|██████████| 20/20 [00:00<00:00, 32.17it/s]
100%|██████████| 20/20 [00:00<00:00, 36.04it/s]
100%|██████████| 20/20 [00:00<00:00, 32.19it/s]
100%|██████████| 20/20 [00:00<00:00, 41.54it/s]
100%|██████████| 20/20 [00:00<00:00, 40.02it/s]
100%|██████████| 20/20 [00:00<00:00, 30.26it/s]
100%|██████████| 20/20 [00:00<00:00, 31.20it/s]
100%|██████████| 20/20 [00:00<00:00, 30.17it/s]





0it [00:00, ?it/s]
100%|██████████| 20/20 [00:00<00:00, 37.31it/s]
100%|██████████| 20/20 [00:01<00:00, 14.40it/s]
100%|██████████| 20/20 [00:00<00:00, 35.93it/s]
100%|██████████| 20/20 [00:00<00:00, 35.89it/s]
100%|██████████| 20/20 [00:00<00:00, 31.93it/s]
100%|██████████| 20/20 [00:00<00:00, 36.11it/s]
100%|██████████| 20/20 [00:00<00:00, 40.66it/s]
100%|██████████| 20/20 [00:00<00:00, 31.32it/s]
100%|██████████| 20/20 [00:00<00:00, 38.85it/s]
100%|██████████| 20/20 [00:00<00:00, 37.34it/s]
100%|██████████| 20/20 [00:00<00:00, 34.02it/s]
100%|██████████| 20/20 [00:00<00:00, 29.10it/s]
100%|██████████| 20/20 [00:00<00:00, 39.05it/s]
100%|██████████| 20/20 [00:00<00:00, 26.40it/s]
100%|██████████| 20/20 [00:00<00:00, 40.93it/s]
100%|██████████| 20/20 [00:00<00:00, 48.22it/s]
100%|██████████| 20/20 [00:00<00:00, 35.59it/s]
100%|██████████| 20/20 [00:00<00:00, 34.32it/s]
100%|██████████| 20/20 [00:00<00:00, 33.94it/s]
100%|██████████| 20/20 [00:00<00:00, 32.37it/s]





0it [00:00, ?it/s]
100%|██████████| 20/20 [00:00<00:00, 34.98it/s]
100%|██████████| 20/20 [00:00<00:00, 38.61it/s]
100%|██████████| 20/20 [00:00<00:00, 43.72it/s]
100%|██████████| 20/20 [00:00<00:00, 34.68it/s]
100%|██████████| 20/20 [00:00<00:00, 29.74it/s]
100%|██████████| 20/20 [00:00<00:00, 35.02it/s]
100%|██████████| 20/20 [00:00<00:00, 36.73it/s]
100%|██████████| 20/20 [00:00<00:00, 37.24it/s]
100%|██████████| 20/20 [00:00<00:00, 23.94it/s]
100%|██████████| 20/20 [00:00<00:00, 35.52it/s]
100%|██████████| 20/20 [00:00<00:00, 37.59it/s]
100%|██████████| 20/20 [00:00<00:00, 30.45it/s]
100%|██████████| 20/20 [00:00<00:00, 41.34it/s]
100%|██████████| 20/20 [00:00<00:00, 38.85it/s]
100%|██████████| 20/20 [00:00<00:00, 34.28it/s]
100%|██████████| 20/20 [00:00<00:00, 40.74it/s]
100%|██████████| 20/20 [00:00<00:00, 35.16it/s]
100%|██████████| 20/20 [00:00<00:00, 34.10it/s]
100%|██████████| 20/20 [00:00<00:00, 42.76it/s]
100%|██████████| 20/20 [00:00<00:00, 35.16it/s]







In [30]:
# For test

for M in [1, 2, 3, 4, 5]:
    print(f'================ Processing test M={M} ================')
    input_root = f'/home/gridsan/manderson/ovdsat/data/init_data/dior_test-{M}'
    output_root = f'/home/gridsan/manderson/ovdsat/data/cropped_data/dior/test/dior_test-{M}'
    generate_background_crops_from_masks(input_root, output_root, num_b=1) # just get one crop per image
    print()



0it [00:00, ?it/s]
100%|██████████| 200/200 [00:01<00:00, 100.44it/s]
100%|██████████| 200/200 [00:02<00:00, 80.86it/s] 
100%|██████████| 200/200 [00:02<00:00, 96.10it/s] 
100%|██████████| 200/200 [00:02<00:00, 74.39it/s]
100%|██████████| 200/200 [00:02<00:00, 77.31it/s] 
100%|██████████| 200/200 [00:02<00:00, 86.00it/s] 
100%|██████████| 200/200 [00:02<00:00, 99.19it/s] 
100%|██████████| 200/200 [00:02<00:00, 83.35it/s]
100%|██████████| 200/200 [00:02<00:00, 79.50it/s] 
100%|██████████| 200/200 [00:02<00:00, 81.99it/s] 
100%|██████████| 200/200 [00:02<00:00, 86.38it/s] 
100%|██████████| 200/200 [00:01<00:00, 105.83it/s]
100%|██████████| 200/200 [00:01<00:00, 105.87it/s]
100%|██████████| 200/200 [00:01<00:00, 100.34it/s]
100%|██████████| 200/200 [00:01<00:00, 137.92it/s]
100%|██████████| 200/200 [00:01<00:00, 115.17it/s]
100%|██████████| 200/200 [00:01<00:00, 118.50it/s]
100%|██████████| 200/200 [00:01<00:00, 104.61it/s]
100%|██████████| 200/200 [00:01<00:00, 106.40it/s]
100%|█████████




0it [00:00, ?it/s]
100%|██████████| 200/200 [00:01<00:00, 132.68it/s]
100%|██████████| 200/200 [00:01<00:00, 104.01it/s]
100%|██████████| 200/200 [00:01<00:00, 105.39it/s]
100%|██████████| 200/200 [00:02<00:00, 89.39it/s] 
100%|██████████| 200/200 [00:02<00:00, 87.06it/s] 
100%|██████████| 200/200 [00:02<00:00, 95.43it/s] 
100%|██████████| 200/200 [00:02<00:00, 90.99it/s] 
100%|██████████| 200/200 [00:02<00:00, 98.78it/s] 
100%|██████████| 200/200 [00:02<00:00, 96.03it/s] 
100%|██████████| 200/200 [00:02<00:00, 92.82it/s] 
100%|██████████| 200/200 [00:01<00:00, 107.54it/s]
100%|██████████| 200/200 [00:02<00:00, 94.34it/s] 
100%|██████████| 200/200 [00:01<00:00, 102.99it/s]
100%|██████████| 200/200 [00:02<00:00, 89.03it/s] 
100%|██████████| 200/200 [00:01<00:00, 107.63it/s]
100%|██████████| 200/200 [00:01<00:00, 118.85it/s]
100%|██████████| 200/200 [00:01<00:00, 114.09it/s]
100%|██████████| 200/200 [00:02<00:00, 99.52it/s] 
100%|██████████| 200/200 [00:01<00:00, 107.44it/s]
100%|███████




0it [00:00, ?it/s]
100%|██████████| 200/200 [00:02<00:00, 94.48it/s] 
100%|██████████| 200/200 [00:01<00:00, 101.17it/s]
100%|██████████| 200/200 [00:01<00:00, 105.92it/s]
100%|██████████| 200/200 [00:01<00:00, 101.46it/s]
100%|██████████| 200/200 [00:02<00:00, 76.53it/s]
100%|██████████| 200/200 [00:01<00:00, 103.48it/s]
100%|██████████| 200/200 [00:01<00:00, 100.60it/s]
100%|██████████| 200/200 [00:02<00:00, 78.23it/s] 
100%|██████████| 200/200 [00:02<00:00, 87.66it/s] 
100%|██████████| 200/200 [00:02<00:00, 85.58it/s] 
100%|██████████| 200/200 [00:02<00:00, 95.32it/s] 
100%|██████████| 200/200 [00:01<00:00, 111.33it/s]
100%|██████████| 200/200 [00:01<00:00, 104.28it/s]
100%|██████████| 200/200 [00:02<00:00, 99.71it/s] 
100%|██████████| 200/200 [00:01<00:00, 121.03it/s]
100%|██████████| 200/200 [00:01<00:00, 120.48it/s]
100%|██████████| 200/200 [00:01<00:00, 102.74it/s]
100%|██████████| 200/200 [00:02<00:00, 84.33it/s] 
100%|██████████| 200/200 [00:01<00:00, 105.96it/s]
100%|████████




0it [00:00, ?it/s]
100%|██████████| 200/200 [00:01<00:00, 103.28it/s]
100%|██████████| 200/200 [00:01<00:00, 105.00it/s]
100%|██████████| 200/200 [00:01<00:00, 102.66it/s]
100%|██████████| 200/200 [00:01<00:00, 101.54it/s]
100%|██████████| 200/200 [00:01<00:00, 102.89it/s]
100%|██████████| 200/200 [00:02<00:00, 86.25it/s]
100%|██████████| 200/200 [00:02<00:00, 99.32it/s] 
100%|██████████| 200/200 [00:02<00:00, 99.12it/s] 
100%|██████████| 200/200 [00:02<00:00, 90.05it/s] 
100%|██████████| 200/200 [00:02<00:00, 86.93it/s] 
100%|██████████| 200/200 [00:02<00:00, 91.88it/s] 
100%|██████████| 200/200 [00:02<00:00, 97.33it/s] 
100%|██████████| 200/200 [00:02<00:00, 96.35it/s] 
100%|██████████| 200/200 [00:01<00:00, 107.95it/s]
100%|██████████| 200/200 [00:01<00:00, 120.17it/s]
100%|██████████| 200/200 [00:01<00:00, 115.66it/s]
100%|██████████| 200/200 [00:02<00:00, 93.55it/s] 
100%|██████████| 200/200 [00:02<00:00, 91.81it/s] 
100%|██████████| 200/200 [00:02<00:00, 95.70it/s] 
100%|████████




0it [00:00, ?it/s]
100%|██████████| 200/200 [00:02<00:00, 91.84it/s] 
100%|██████████| 200/200 [00:02<00:00, 89.07it/s] 
100%|██████████| 200/200 [00:01<00:00, 114.62it/s]
100%|██████████| 200/200 [00:02<00:00, 84.11it/s] 
100%|██████████| 200/200 [00:01<00:00, 119.67it/s]
100%|██████████| 200/200 [00:01<00:00, 113.73it/s]
100%|██████████| 200/200 [00:01<00:00, 100.23it/s]
100%|██████████| 200/200 [00:02<00:00, 77.53it/s] 
100%|██████████| 200/200 [00:02<00:00, 81.68it/s] 
100%|██████████| 200/200 [00:02<00:00, 95.49it/s] 
100%|██████████| 200/200 [00:02<00:00, 89.51it/s]
100%|██████████| 200/200 [00:02<00:00, 85.61it/s] 
100%|██████████| 200/200 [00:02<00:00, 93.11it/s] 
100%|██████████| 200/200 [00:02<00:00, 92.88it/s] 
100%|██████████| 200/200 [00:01<00:00, 108.87it/s]
100%|██████████| 200/200 [00:01<00:00, 116.59it/s]
100%|██████████| 200/200 [00:02<00:00, 99.72it/s] 
100%|██████████| 200/200 [00:02<00:00, 83.96it/s] 
100%|██████████| 200/200 [00:02<00:00, 98.23it/s] 
100%|████████







## Create images + masks (init_data)

In [5]:
import os
import json
import cv2
import numpy as np
from PIL import Image
from pycocotools.coco import COCO

def coco_to_seg(annotation_file, image_directory, save_path, subset_size=None):
    '''
    Convert COCO annotations to segmentation masks in class directories for prototype initialization.

    Args:
        annotation_file (str): Path to the COCO annotation file.
        image_directory (str): Path to the directory containing the images.
        save_path (str): Path to the directory where the segmentation masks will be saved.
    '''
    # Create the output directory if it doesn't exist
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Load the COCO annotation file
    coco = COCO(annotation_file)

    # Get the list of category IDs
    category_ids = coco.getCatIds()


    # Loop through each category
    for category_id in category_ids:
        # Get the category information
        category_info = coco.loadCats(category_id)[0]
        category_name = category_info['name']
        category_directory = os.path.join(save_path, category_name)

        # Create a directory for the category
        if not os.path.exists(category_directory):
            os.makedirs(category_directory)

        # Get the image IDs containing the selected category
        image_ids = coco.getImgIds(catIds=category_id)
        
        # Subsample images if subset_size is given
        if subset_size is not None and subset_size < len(image_ids):
            image_ids = random.sample(image_ids, subset_size)

        for image_id in image_ids:
            # Load the image and annotations
            image_info = coco.loadImgs(image_id)[0]
            image_filename = image_info['file_name']
            image = cv2.imread(os.path.join(image_directory, image_filename))
            annotations = coco.loadAnns(coco.getAnnIds(imgIds=image_id, catIds=category_id))

            # Create a mask for the selected category
            mask = np.zeros(image.shape[:2], dtype=np.uint8)
            for annotation in annotations:
                bbox = list(map(int, annotation['bbox']))
                x, y, w, h = bbox
                mask[y:y + h, x:x + w] = 255

            # Save the image and mask
            image_filename_without_ext = os.path.splitext(image_filename)[0]
            mask_filename = f"{image_filename_without_ext}.mask{os.path.splitext(image_filename)[1]}"
            cv2.imwrite(os.path.join(category_directory, image_filename), image)
            cv2.imwrite(os.path.join(category_directory, mask_filename), mask)

        print(f"Processed category: {category_name}")

    print("Processing complete.")

In [24]:
# For val
subset_size = 10
for M in [1, 2, 3, 4, 5]:
    annotation_file=f'/home/gridsan/manderson/ovdsat/data/dior/train_coco_finetune_val-{M}.json'
    image_directory='/home/gridsan/manderson/ovdsat/data/dior/JPEGImages'
    save_path=f'/home/gridsan/manderson/ovdsat/data/init_data/dior_val-{M}'
    coco_to_seg(annotation_file, image_directory, save_path, subset_size)

loading annotations into memory...
Done (t=0.06s)
creating index...
index created!
Processed category: groundtrackfield
Processed category: baseballfield
Processed category: bridge
Processed category: Expressway-toll-station
Processed category: vehicle
Processed category: airplane
Processed category: airport
Processed category: tenniscourt
Processed category: trainstation
Processed category: storagetank
Processed category: stadium
Processed category: windmill
Processed category: ship
Processed category: golffield
Processed category: overpass
Processed category: chimney
Processed category: dam
Processed category: basketballcourt
Processed category: harbor
Processed category: Expressway-Service-area
Processing complete.


In [6]:
# For test
subset_size = 100
for M in [1]:#, 2, 3, 4, 5]:
    annotation_file=f'/home/gridsan/manderson/ovdsat/data/dior/val_coco-{M}.json'
    image_directory='/home/gridsan/manderson/ovdsat/data/dior/JPEGImages'
    save_path=f'/home/gridsan/manderson/ovdsat/data/init_data/dior_test-{M}'
    coco_to_seg(annotation_file, image_directory, save_path, subset_size)

loading annotations into memory...
Done (t=0.48s)
creating index...
index created!
Processed category: groundtrackfield
Processed category: baseballfield
Processed category: bridge
Processed category: Expressway-toll-station
Processed category: vehicle
Processed category: airplane
Processed category: airport
Processed category: tenniscourt
Processed category: trainstation
Processed category: storagetank
Processed category: stadium
Processed category: windmill
Processed category: ship
Processed category: golffield
Processed category: overpass
Processed category: chimney
Processed category: dam
Processed category: basketballcourt
Processed category: harbor
Processed category: Expressway-Service-area
Processing complete.


## From annotations file (don't use)
* Do the same but load from a .json file (gives image path)
* Select a subset of N images (or else it will take too long)

In [9]:
import json
import random
from pathlib import Path
from PIL import Image
import numpy as np
import xml.etree.ElementTree as ET

def apply_rectangular_mask_and_crop(image: Image.Image, mask: Image.Image) -> Image.Image:
    """Crops the image to the rectangular mask bounding box only (no masking)."""
    mask_array = np.array(mask.convert("L"))
    ys, xs = np.where(mask_array > 0)

    if xs.size == 0 or ys.size == 0:
        return Image.new("RGB", (1, 1))  # Empty image if no mask

    x_min, x_max = xs.min(), xs.max()
    y_min, y_max = ys.min(), ys.max()
    return image.crop((x_min, y_min, x_max + 1, y_max + 1))

def load_annotations(xml_path: Path):
    """Parse a Pascal VOC-style XML file and return object entries (optional)."""
    try:
        tree = ET.parse(xml_path)
        root = tree.getroot()
        objects = []
        for obj in root.findall("object"):
            name = obj.find("name").text
            bbox = obj.find("bndbox")
            box = {
                "name": name,
                "bbox": (
                    int(bbox.find("xmin").text),
                    int(bbox.find("ymin").text),
                    int(bbox.find("xmax").text),
                    int(bbox.find("ymax").text),
                ),
            }
            objects.append(box)
        return objects
    except Exception as e:
        print(f"[!] Failed to parse {xml_path.name}: {e}")
        return []

def process_from_json(
    json_path: str,
    image_root: str,
    annotations_dir: str,
    output_dir: str,
    subset_size: int = 100
):
    image_root = Path(image_root)
    annotations_dir = Path(annotations_dir)
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    with open(json_path, "r") as f:
        data = json.load(f)
    
    images = data["images"]
    if subset_size > 0:
        images = random.sample(images, subset_size)

    for img_info in images:
        try:
            file_name = img_info["file_name"]
            folder = img_info.get("folder", "")
            image_path = image_root / folder / file_name
            mask_path = image_path.with_name(image_path.stem + ".mask.jpg")
            xml_path = annotations_dir / (image_path.stem + ".xml")

            if not mask_path.exists():
                print(f"[!] Mask not found: {mask_path}")
                continue

            image = Image.open(image_path).convert("RGB")
            mask = Image.open(mask_path).convert("L")
            masked_cropped = apply_rectangular_mask_and_crop(image, mask)

            # Optional: load annotations
            _ = load_annotations(xml_path)

            # Save with preserved folder structure
            output_file = output_path / folder / file_name
            output_file.parent.mkdir(parents=True, exist_ok=True)
            masked_cropped.save(output_file)
            print(f"[✓] Saved: {output_file}")

        except Exception as e:
            print(f"[✗] Error processing {img_info}: {e}")


In [10]:
M=1

process_from_json(
    json_path=f"/home/gridsan/manderson/ovdsat/data/dior/train_coco_finetune_val-{M}.json",
    image_root="/home/gridsan/manderson/ovdsat/data/dior/JPEGImages",
    annotations_dir="/home/gridsan/manderson/ovdsat/data/dior/Annotations",
    output_dir=f"/home/gridsan/manderson/ovdsat/data/cropped_data/dior/val/dior-{M}",
    subset_size=10
)

[!] Mask not found: /home/gridsan/manderson/ovdsat/data/dior/JPEGImages/00499.mask.jpg
[!] Mask not found: /home/gridsan/manderson/ovdsat/data/dior/JPEGImages/01711.mask.jpg
[!] Mask not found: /home/gridsan/manderson/ovdsat/data/dior/JPEGImages/21022.mask.jpg
[!] Mask not found: /home/gridsan/manderson/ovdsat/data/dior/JPEGImages/17339.mask.jpg
[!] Mask not found: /home/gridsan/manderson/ovdsat/data/dior/JPEGImages/21084.mask.jpg
[!] Mask not found: /home/gridsan/manderson/ovdsat/data/dior/JPEGImages/19542.mask.jpg
[!] Mask not found: /home/gridsan/manderson/ovdsat/data/dior/JPEGImages/12238.mask.jpg
[!] Mask not found: /home/gridsan/manderson/ovdsat/data/dior/JPEGImages/02348.mask.jpg
[!] Mask not found: /home/gridsan/manderson/ovdsat/data/dior/JPEGImages/03136.mask.jpg
[!] Mask not found: /home/gridsan/manderson/ovdsat/data/dior/JPEGImages/17443.mask.jpg
