In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm.notebook import tqdm, trange
import torch.optim as optim
from torchvision import models
import numpy as np
from collections import deque
import random
import time
import torch.nn.functional as F
from torch.utils.data import ConcatDataset, DataLoader, Subset
import torchvision.models.segmentation as segmentation
from collections import defaultdict
import os
import bz2
import pickle
import math

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True  # Enforce deterministic algorithms
        torch.backends.cudnn.benchmark = False     # Disable benchmark for reproducibility

    os.environ['PYTHONHASHSEED'] = str(seed)       # Seed Python hashing, which can affect ordering
set_seed(42)

### Explicit Heuristic Split Model

#### ZoeDepth - HuggingFace

In [None]:
# import os
# from pathlib import Path
# import torch
# import numpy as np
# from transformers import AutoImageProcessor, ZoeDepthForDepthEstimation
# from PIL import Image
# from tqdm import tqdm
# from torchvision import transforms

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# output_dir = Path('../data/depth')
# output_dir.mkdir(parents=True, exist_ok=True)

# image_processor = AutoImageProcessor.from_pretrained("Intel/zoedepth-nyu-kitti", use_fast=True)
# model = ZoeDepthForDepthEstimation.from_pretrained("Intel/zoedepth-nyu-kitti").to(device).eval()

# # Prepare image paths list
# image_paths = df['image_path'].tolist()

# batch_size = 10  # or whatever batch size you want
# for batch_idx in tqdm(range(0, len(image_paths), batch_size)):
#     batch_paths = image_paths[batch_idx:batch_idx + batch_size]
    
#     # Load images as PIL Images (no manual transform)
#     batch_images = [Image.open(img_path).convert("RGB") for img_path in batch_paths]
    
#     # Preprocess with ZoeDepth image processor
#     inputs = image_processor(images=batch_images, return_tensors="pt").to(device)
    
#     with torch.no_grad():
#         outputs = model(**inputs)
    
#     # Post-process depth maps to original sizes
#     source_sizes = [(img.height, img.width) for img in batch_images]
#     post_processed = image_processor.post_process_depth_estimation(
#         outputs,
#         source_sizes=source_sizes
#     )
    
#     for i, depth_dict in enumerate(post_processed):
#         # Get raw depth map
#         depth_array = depth_dict["predicted_depth"].cpu().numpy()
#         img_stem = Path(batch_paths[i]).stem
#         np.save(output_dir / f"{img_stem}.npy", depth_array)
#         # Save visualization PNG
#         depth_norm = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min())
#         depth_img = Image.fromarray((depth_norm * 255).astype(np.uint8))
#         depth_img.save(output_dir / f"{img_stem}_depth.png")



#### Segmentation Pipeline GroundingDINO+SAM through Autodistill GroundingSAM

In [None]:
import os
import h5py
import numpy as np
from autodistill_grounded_sam import GroundedSAM
from autodistill.detection import CaptionOntology
from tqdm.notebook import tqdm
import supervision as sv
import random

In [None]:
# Define categories for images
def get_category(image_name: str) -> str:
    categories = ["Hallway", "BigOffice-2", "BigOffice-3", "MeetingRoom", "SmallOffice", "Home"]
    for cat in categories:
        if cat in image_name:
            return cat
    return "Home"

# Helper to filter Detections object dict with only needed attributes
def filter_detections(detections):
    filtered = {
        "xyxy": detections.xyxy,
        "mask": detections.mask,
        "confidence": detections.confidence,
        "class_id": detections.class_id,
        # discard the rest
    }
    return filtered

def load_detection_from_h5(img_group) -> sv.Detections:
    xyxy = img_group["xyxy"][:]            # numpy array (N,4)
    masks = img_group["mask"][:]            # numpy array (N, H, W)
    confidence = img_group["confidence"][:] # numpy array (N,)
    class_id = img_group["class_id"][:]     # numpy array (N,)

    detection = sv.Detections(
        xyxy=xyxy,
        mask=masks,
        confidence=confidence,
        class_id=class_id,
    )
    return detection


In [None]:
# Initialize model with ontology
# define an ontology to map class names to our GroundedSAM prompt
# the ontology dictionary has the format {caption: class}
# where caption is the prompt sent to the base model, and class is the label that will
# be saved for that caption in the generated annotations
# then, load the model
base_model = GroundedSAM(
    ontology=CaptionOntology(
        {
            "human . child . person": "human",
            "robot": "robot",
            "dog": "dog"
        }
    )
)

In [None]:
def segment_images(
    image_dir="../data/images/",
    image_extension=".png",
    hdf5_path="autodistill_segmentation_dataset.hdf5",
    batch_size=20,
    ):

    all_images = [f for f in os.listdir(image_dir) if f.endswith(image_extension)]

    # Create HDF5 file
    if os.path.exists(hdf5_path):
        raise FileExistsError(f"Filec '{hdf5_path}' already exists.")
    else:
        h5file = h5py.File(hdf5_path, "a")

    # Process images
    for i in tqdm(range(0, len(all_images), batch_size), desc="Batch"):
        batch = all_images[i : i + batch_size]
        for image_name in tqdm(batch, desc="Image", leave=False):
            img_path = os.path.join(image_dir, image_name)
            
            detections = base_model.predict(img_path)
            data = filter_detections(detections)

            # Create group for category
            category = get_category(image_name)
            if category not in h5file:
                h5file.create_group(category)
            category_group = h5file[category]

            # Create subgroup for image name
            if image_name in category_group:
                raise RuntimeError(f"Duplicate entry detected for image '{image_name}' in category '{category}'.")
            img_group = category_group.create_group(image_name)

            # Save detection data to datasets in img_group
            img_group.create_dataset("xyxy", data=data["xyxy"], compression="gzip")
            img_group.create_dataset("mask", data=data["mask"], compression="gzip")
            img_group.create_dataset("confidence", data=data["confidence"], compression="gzip")
            img_group.create_dataset("class_id", data=data["class_id"], compression="gzip")

        # Flush to disk after batch
        h5file.flush()
    h5file.close()

In [None]:
# segment_images(image_dir="../data/images/", image_extension=".png", hdf5_path="autodistill_segmentation_dataset.hdf5", batch_size=20)

In [None]:
def inspect_detections_dataset(hdf5_path = "autodistill_segmentation_dataset.hdf5"):
    with h5py.File(hdf5_path, "r") as h5file:
        categories = list(h5file.keys())
        print(f"Categories ({len(categories)}): {categories}\n")

        total_images = 0
        category_counts = {}
        image_paths = []

        for category in categories:
            images = list(h5file[category].keys())
            count = len(images)
            category_counts[category] = count
            total_images += count
            # Save image full path as (category, image_name)
            image_paths.extend([(category, img) for img in images])

        print("Number of images per category:")
        for cat, cnt in category_counts.items():
            print(f"  {cat}: {cnt}")
        print(f"\nTotal images: {total_images}\n")

        # Sample 20 random images for inspection
        sample_paths = random.sample(image_paths, min(20, len(image_paths)))
        print("Sample 20 images (category, image_name) and dataset shapes:")

        for category, image_name in sample_paths:
            img_group = h5file[category][image_name]
            print(f"\n{category}/{image_name}:")
            for dataset_name in img_group.keys():
                data = img_group[dataset_name]
                print(f"  - {dataset_name}: shape={data.shape}, dtype={data.dtype}")

In [None]:
inspect_detections_dataset(hdf5_path="autodistill_segmentation_dataset.hdf5")

In [None]:
import os
import h5py
import numpy as np
import cv2
from PIL import Image, ImageDraw
from tqdm.notebook import tqdm

def apply_bbox_mask_to_dataset(
        image_dir="../data/images/",
        hdf5_path="autodistill_segmentation_dataset.hdf5",
        output_dir="../data/test_segmentations_colored/",
        confidence_threshold=0.3,
        resize_to=(512, 288),
        deduplicate_bbox=False
        ):

    # Define class colors with alpha (RGBA)
    class_colors = {
        0: (255, 0, 0, 100),    # Red (human)
        1: (0, 255, 0, 100),    # Green (robot)
        2: (0, 0, 255, 100),    # Blue (animal)
        3: (122, 122, 0, 100),
        # Add more classes/colors as needed
    }

    
    os.makedirs(output_dir, exist_ok=True)
    target_width, target_height = resize_to
    

    with h5py.File(hdf5_path, "r") as h5file:
        for category in tqdm(h5file.keys(), desc="Categories"):
            category_group = h5file[category]
            for image_name in tqdm(category_group.keys(), desc=f"Images in {category}", leave=False):
                img_group = category_group[image_name]
                detections = load_detection_from_h5(img_group)
                if deduplicate_bbox:
                    detections = detections.with_nms(class_agnostic=True)

                xyxy = detections.xyxy
                masks = detections.mask
                confidences = detections.confidence
                class_ids = detections.class_id

                img_path = os.path.join(image_dir, image_name)
                image = cv2.imread(img_path)
                if image is None:
                    print(f"Warning: Image {image_name} not found, skipping.")
                    continue

                orig_h, orig_w = image.shape[:2]
                scale_x = target_width / orig_w
                scale_y = target_height / orig_h

                rgb_image = image[:, :, ::-1]
                pil_img = Image.fromarray(rgb_image).convert("RGBA")
                pil_img = pil_img.resize((target_width, target_height), resample=Image.LANCZOS)

                # Filter detections by confidence threshold
                keep = confidences >= confidence_threshold
                if not np.any(keep):
                    print(f"No detections above threshold for {image_name}")
                    continue

                filtered_boxes = xyxy[keep]
                filtered_masks = masks[keep]
                filtered_confs = confidences[keep]
                filtered_classes = class_ids[keep]

                # Scale bounding boxes to resized image
                scaled_boxes = []
                for box in filtered_boxes:
                    x1, y1, x2, y2 = box
                    scaled_box = (
                        int(x1 * scale_x),
                        int(y1 * scale_y),
                        int(x2 * scale_x),
                        int(y2 * scale_y),
                    )
                    scaled_boxes.append(scaled_box)

                # Resize masks to target size (nearest neighbor)
                scaled_masks = []
                for mask in filtered_masks:
                    pil_mask = Image.fromarray((mask * 255).astype(np.uint8))
                    pil_mask = pil_mask.resize((target_width, target_height), resample=Image.NEAREST)
                    scaled_mask = np.array(pil_mask) > 128 #turn to binary
                    scaled_masks.append(scaled_mask)

                # Overlay colored masks with transparency
                for mask, class_id in zip(scaled_masks, filtered_classes):
                    color = class_colors.get(class_id, (255, 255, 255, 100))  # default white
                    mask_img = Image.fromarray((mask.astype(bool) * 255).astype(np.uint8), mode="L")
                    colored_mask = Image.new("RGBA", pil_img.size, color)
                    pil_img = Image.alpha_composite(pil_img, Image.composite(colored_mask, Image.new("RGBA", pil_img.size), mask_img))

                # Draw bounding boxes and confidence text
                draw = ImageDraw.Draw(pil_img)
                for (x1, y1, x2, y2), conf, class_id in zip(scaled_boxes, filtered_confs, filtered_classes):
                    color_rgb = class_colors.get(class_id, (255, 255, 255, 100))[:3]
                    draw.rectangle([x1, y1, x2, y2], outline=color_rgb, width=2)
                    draw.text((x1, max(y1 - 10 * (class_id + 1), 0)), f"{class_id}: {conf:.2f}", fill=color_rgb)

                # Save output image as PNG
                save_path = os.path.join(output_dir, os.path.splitext(image_name)[0] + ".png")
                pil_img.convert("RGB").save(save_path)

    print("All images processed and saved with resized overlays.")


In [None]:
def get_dataset(hdf5_path="autodistill_segmentation_dataset.hdf5", image_dir="../data/images/", category_name="Hallway", deduplicate_bbox=False):
    """
    Get the dataset for semantic split.
    """
    detections_dataset = []

    with h5py.File(hdf5_path, "r") as h5file:
        category_group = h5file[category_name]
        for image_name in category_group.keys():
            img_group = category_group[image_name]
            detections = load_detection_from_h5(img_group)
            if deduplicate_bbox:
                detections = detections.with_nms(class_agnostic=True)
            img_path = os.path.join(image_dir, image_name)
            detections_dataset.append((img_path, detections))

    print(f"Loaded {len(detections_dataset)} images and detections from category '{category_name}'.")
    return detections_dataset


#### depth mean std

In [None]:
import numpy as np
import os
from tqdm.notebook import tqdm

def calculate_mean_std_for_npy(folder_path):
    total_sum = 0
    total_sum_sq = 0
    total_count = 0

    # List all .npy files
    files = [f for f in os.listdir(folder_path) if f.endswith('.npy')]
    
    # Wrap the loop with tqdm for progress bar
    for filename in tqdm(files):
        file_path = os.path.join(folder_path, filename)
        img = np.load(file_path).astype(np.float64)
        total_sum += img.sum()
        total_sum_sq += (img ** 2).sum()
        total_count += img.size

    mean = total_sum / total_count if total_count > 0 else None
    variance = (total_sum_sq / total_count) - (mean ** 2) if total_count > 0 else None
    std = np.sqrt(variance) if variance is not None else None
    return mean, std


In [None]:
# calculate_mean_std_for_npy('../data/depth')

#### Heuristic Split

In [None]:
import cv2
import numpy as np
import torch
import torchvision.transforms as transforms
from PIL import Image
import os
from pathlib import Path

def process_segmentation_data(detections_dataset, output_dir, imagenet_mean=[0.485, 0.456, 0.406], confidence_threshold=0.3):
    """
    Process supervision Detections dataset to create social and environment images
    
    Args:
        detections_dataset: List of tuples (image_path, Detection_object, ...)
        output_dir: Base directory to save processed images
        imagenet_mean: RGB mean values for filling masked areas
    """
    
    # Create output directories
    social_dir = Path(output_dir) / "social"
    env_dir = Path(output_dir) / "environment"
    social_dir.mkdir(parents=True, exist_ok=True)
    env_dir.mkdir(parents=True, exist_ok=True)
    
    for item in tqdm(detections_dataset):
        image_path = item[0]
        detections = item[-1]
        
        # Load original image
        original_image = cv2.imread(str(image_path))
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        height, width = original_image.shape[:2]
        
        # Get filename for saving
        filename = Path(image_path).name

        keep_indices = [i for i, conf in enumerate(detections.confidence) if conf >= confidence_threshold]
        filtered_masks = detections.mask[keep_indices]
        
        # Combine all masks into one
        combined_mask = combine_masks(filtered_masks, height, width)
        
        # Create social image (only people and robot visible)
        social_image = apply_mask_with_mean(
            original_image, combined_mask, imagenet_mean, keep_masked=True
        )
        
        # Create environment image (room only, people and robot masked out)
        env_image = apply_mask_with_mean(
            original_image, combined_mask, imagenet_mean, keep_masked=False
        )
        
        # Save images
        social_path = social_dir / filename
        env_path = env_dir / filename
        
        save_image(social_image, social_path)
        save_image(env_image, env_path)


def combine_masks(masks, height, width, confidence_threshold=0.3):
    """
    Combine multiple masks into a single binary mask using union operation
    
    Args:
        masks: Array of individual masks from Detection object
        height, width: Dimensions of the original image
        
    Returns:
        combined_mask: Single binary mask (1 = object, 0 = background)
    """
    if masks is None or len(masks) == 0:
        return np.zeros((height, width), dtype=np.uint8)
    
    # Initialize combined mask
    combined_mask = np.zeros((height, width), dtype=np.uint8)
    
    # Union all individual masks using maximum function (as shown in search results)
    for mask in masks:
        # Ensure mask is the right size
        if mask.shape != (height, width):
            raise ValueError(f"Mask shape incorrect: {mask.shape}, should be {(height, width)}")
        
        # Union operation: take maximum of current combined mask and new mask
        combined_mask = np.maximum(combined_mask, mask.astype(np.uint8))
    
    return combined_mask

def apply_mask_with_mean(image, mask, imagenet_mean, keep_masked=True):
    """
    Apply mask to image and fill empty areas with ImageNet mean values
    
    Args:
        image: Original RGB image (H, W, 3)
        mask: Binary mask (H, W) where 1 = object, 0 = background
        imagenet_mean: RGB mean values [R, G, B] in range [0, 1]
        keep_masked: If True, keep masked areas (social). If False, remove masked areas (environment)
        
    Returns:
        processed_image: Image with mask applied and filled with mean values
    """
    processed_image = image.copy().astype(np.float32) / 255.0
    
    # Convert imagenet_mean to same range as image
    mean_values = np.array(imagenet_mean).reshape(1, 1, 3)
    
    if keep_masked:
        # Social image: keep people/robot, fill background with mean
        fill_mask = (mask == 0)  # Areas to fill (background)
    else:
        # Environment image: keep background, fill people/robot with mean  
        fill_mask = (mask == 1)  # Areas to fill (people/robot)
    
    # Fill specified areas with ImageNet mean values
    for c in range(3):  # RGB channels
        processed_image[:, :, c][fill_mask] = imagenet_mean[c]
    
    # Convert back to uint8
    processed_image = (processed_image * 255).astype(np.uint8)
    
    return processed_image

def save_image(image, save_path):
    """
    Save image to specified path
    
    Args:
        image: RGB image array (H, W, 3)
        save_path: Path to save the image
    """
    # Convert to PIL Image and save
    pil_image = Image.fromarray(image)
    pil_image.save(save_path)


In [None]:
# dataset_home = get_dataset(
#     hdf5_path="autodistill_segmentation_dataset.hdf5", 
#     image_dir="../data/images/", 
#     category_name="Home", 
#     deduplicate_bbox=False)

# process_segmentation_data(
#     detections_dataset=dataset_home,
#     output_dir='../data/masked',
#     imagenet_mean=[0.485, 0.456, 0.406]  # ImageNet RGB means
# )


#### obfuscate env

In [None]:
def get_ellipse_pool(detections_dataset, confidence_threshold=0.3):
    ellipses_pool = []

    for item in tqdm(detections_dataset):
        image_path = Path("../data/images") / Path(item[0]).name
        detections = item[2]

        keep_indices = [i for i, conf in enumerate(detections.confidence) if conf >= confidence_threshold]

        height, width = cv2.imread(str(image_path)).shape[:2]
        
        for i in keep_indices:
            x1, y1, x2, y2 = detections.xyxy[i]

            # create blank mask
            mask = np.zeros((height, width), dtype=np.uint8)

            # center, axes lengths, angle
            center = ((x1 + x2) / 2, (y1 + y2) / 2)
            axes = ((x2 - x1) / 2, (y2 - y1) / 2)
            angle = 0  # no rotation

            # draw ellipse
            mask = draw_random_pear_blob_cv(mask, center, axes[0], axes[1],
                         irregularity=0.6, spikiness=0.4,
                         vertical_stretch=1.2, bottom_bias=0.4)
            ellipses_pool.append(mask)

    ellipses_pool = np.stack(ellipses_pool, axis=0)
    return ellipses_pool

def get_silhouettes_pool(detections_dataset, confidence_threshold=0.3):
    social_actors_silhouettes=[]
    for item in tqdm(detections_dataset):
        image_path = Path("../data/images") / Path(item[0]).name
        detections = item[2]
        
        # Load original image
        original_image = cv2.imread(str(image_path))
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

        keep_indices = [i for i, conf in enumerate(detections.confidence) if conf >= confidence_threshold]
        social_actors_silhouettes.append(detections.mask[keep_indices])
    social_actors_silhouettes = np.concatenate(social_actors_silhouettes, axis=0)
    return social_actors_silhouettes
        
def add_masks_to_image(detections_dataset, output_dir, masks_pool, imagenet_mean=[0.485, 0.456, 0.406]):
    
    env_dir = Path(output_dir) / "environment_plus"
    env_dir.mkdir(parents=True, exist_ok=True)

    for item in tqdm(detections_dataset):
        image_path = Path("../data/masked/environment_elipses") / Path(item[0]).name
        
        # Load original image
        original_image = cv2.imread(str(image_path))
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        height, width = original_image.shape[:2]
        
        # Get filename for saving
        filename = Path(image_path).name 

        idxs = random.sample(range(masks_pool.shape[0]), 15)
        filtered_masks = masks_pool[idxs]   # shape (10, 1080, 1920)
        # Combine all masks into one
        combined_mask = combine_masks(filtered_masks, height, width)
        
        # Create environment image (room only, people and robot masked out)
        env_image = apply_mask_with_mean(
            original_image, combined_mask, imagenet_mean, keep_masked=False
        )

        env_path = env_dir / filename
    
        save_image(env_image, env_path)


def draw_random_pear_blob_cv(mask, center, avg_radius_x, avg_radius_y,
                             irregularity=0.3, spikiness=0.2, num_points=14,
                             vertical_stretch=1.2, bottom_bias=0.3, color=1):
    """
    Draws a random pear-shaped blob on a NumPy mask.
    - vertical_stretch > 1.0 makes shape taller
    - bottom_bias > 0 widens the lower half
    """
    cx, cy = center
    angle = 0
    points = []
    for i in range(num_points):
        angle_step = 2 * math.pi / num_points
        angle += angle_step + random.uniform(-angle_step * irregularity,
                                             angle_step * irregularity)

        # base radii
        radius_x = avg_radius_x + random.uniform(-avg_radius_x * spikiness,
                                                 avg_radius_x * spikiness)
        radius_y = avg_radius_y + random.uniform(-avg_radius_y * (spikiness * 0.4),
                                                 avg_radius_y * (spikiness * 0.4))

        # stretch vertically
        radius_y *= vertical_stretch

        # widen bottom (when pointing down)
        if math.sin(angle) > 0:
            radius_x *= (1 + bottom_bias * math.sin(angle))

        x = cx + math.cos(angle) * radius_x
        y = cy + math.sin(angle) * radius_y
        points.append((int(x), int(y)))

    cv2.fillPoly(mask, [np.array(points, dtype=np.int32)], color)
    return mask

def process_segmentation_data_eplipses(detections_dataset, output_dir, imagenet_mean=[0.485, 0.456, 0.406], confidence_threshold=0.3):
    env_dir = Path(output_dir) / "environment_elipses"
    env_dir.mkdir(parents=True, exist_ok=True)
    
    for item in tqdm(detections_dataset):
        image_path = Path("../data/masked/environment") / Path(item[0]).name
        detections = item[2]
        
        # Load original image
        original_image = cv2.imread(str(image_path))
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        height, width = original_image.shape[:2]
        
        # Get filename for saving
        filename = Path(image_path).name

        keep_indices = [i for i, conf in enumerate(detections.confidence) if conf >= confidence_threshold]
        
        filtered_masks = []
        for i in keep_indices:
            x1, y1, x2, y2 = detections.xyxy[i]

            # create blank mask
            mask = np.zeros((height, width), dtype=np.uint8)

            # center, axes lengths, angle
            center = ((x1 + x2) / 2, (y1 + y2) / 2)
            axes = ((x2 - x1) / 2, (y2 - y1) / 2)
            angle = 0  # no rotation

            # draw ellipse
            # cv2.ellipse(mask, (int(center[0]), int(center[1])),
            #             (int(axes[0]), int(axes[1])), angle, 0, 360, 1, -1)
            mask = draw_random_pear_blob_cv(mask, center, axes[0], axes[1],
                         irregularity=0.6, spikiness=0.4,
                         vertical_stretch=1.2, bottom_bias=0.4)
            filtered_masks.append(mask)
        
        # Combine all masks into one
        combined_mask = combine_masks(filtered_masks, height, width)
        
        # Create environment image (room only, people and robot masked out)
        env_image = apply_mask_with_mean(
            original_image, combined_mask, imagenet_mean, keep_masked=False
        )
        env_path = env_dir / filename

        save_image(env_image, env_path)

In [None]:
with bz2.BZ2File('../data/autodistill_dataset_home.pbz2', 'rb') as f:
    dataset_home = pickle.load(f)

In [None]:
process_segmentation_data_eplipses(
    detections_dataset=dataset_home,
    output_dir='../data/masked',
    imagenet_mean=[0.485, 0.456, 0.406]  # ImageNet RGB means
)

In [None]:
# masks_pool=get_silhouettes_pool(random.sample(sorted(dataset_home), 100))
elipses_pool=get_ellipse_pool(random.sample(sorted(dataset_home), 100))

In [None]:
add_masks_to_image(
    detections_dataset=dataset_home,
    output_dir='../data/masked',
    masks_pool=elipses_pool,
    imagenet_mean=[0.485, 0.456, 0.406],
    )