### Dataset analysis for zoomin method
find robot in image, zoomin on the robot: image 1-robot close up, image 2- full image

#### Get robot bboxes

In [None]:
import os
import bz2
import pickle
import math
import numpy as np
from PIL import Image
import cv2
from pathlib import Path
from tqdm.notebook import tqdm, trange
import matplotlib.pyplot as plt
import json

In [None]:
with bz2.BZ2File('../data/autodistill_dataset_home.pbz2', 'rb') as f:
    dataset = pickle.load(f)
    suffix="home"

In [None]:
dataset[0][2]

In [None]:
with bz2.BZ2File('../data/autodistill_dataset_office.pbz2', 'rb') as f:
    dataset = pickle.load(f)
    suffix="office"

In [None]:
def count_robots(detections_dataset, confidence_threshold=0.3):
    robot_counts_per_image = []
    for item in tqdm(detections_dataset):
        image_path = Path("../data/images") / Path(item[0]).name
        detections = item[-1]

        keep_indices = [i for i, conf in enumerate(detections.confidence) if conf >= confidence_threshold]
        detected_classes = detections.class_id[keep_indices]

        robot_count = np.count_nonzero(detected_classes == 1)
        robot_counts_per_image.append(robot_count)  # Aggregate per image
    return robot_counts_per_image

In [None]:
robot_counts_per_image = count_robots(dataset)
unique_counts, counts = np.unique(robot_counts_per_image, return_counts=True)
for count, num_images in zip(unique_counts, counts):
    print(f"{count}: {num_images}")

#Home
# 0: 9
# 1: 388
# 2: 386
# 3: 159
# 4: 49
# 5: 8
# 6: 1

In [None]:
robot_counts_per_image = count_robots(dataset)
unique_counts, counts = np.unique(robot_counts_per_image, return_counts=True)
for count, num_images in zip(unique_counts, counts):
    print(f"{count}: {num_images}")

#Office
# 0: 24
# 1: 805
# 2: 163
# 3: 8

In [None]:
domain_labels = np.repeat(np.arange(5), 200)
from collections import defaultdict, Counter

# Initialize a dictionary to hold counts per category
category_robot_counts = defaultdict(list)

for item, label in zip(dataset_office, domain_labels):
    detections = item[1]
    keep_indices = [i for i, conf in enumerate(detections.confidence) if conf >= 0.3]
    detected_classes = detections.class_id[keep_indices]
    robot_count = np.count_nonzero(detected_classes == 1)
    
    # Append count to the relevant category list
    category_robot_counts[label].append(robot_count)

# Now compute value counts per category
for category, counts in category_robot_counts.items():
    counter = Counter(counts)
    print(f"\nCategory {category}:")
    for count, num_images in sorted(counter.items()):
        print(f"Robots: {count} | Images: {num_images}")

# Per domain
# Category 0:
# Robots: 0 | Images: 4
# Robots: 1 | Images: 172
# Robots: 2 | Images: 23
# Robots: 3 | Images: 1

# Category 1:
# Robots: 0 | Images: 2
# Robots: 1 | Images: 177
# Robots: 2 | Images: 19
# Robots: 3 | Images: 2

# Category 2:
# Robots: 0 | Images: 5
# Robots: 1 | Images: 174
# Robots: 2 | Images: 20
# Robots: 3 | Images: 1

# Category 3:
# Robots: 0 | Images: 13
# Robots: 1 | Images: 167
# Robots: 2 | Images: 20

# Category 4:
# Robots: 1 | Images: 115
# Robots: 2 | Images: 81
# Robots: 3 | Images: 4

In [None]:
def robot_locations(detections_dataset, confidence_threshold=0.3):
    robot_bboxes = []

    for item in tqdm(detections_dataset):
        image_path = Path("../data/images") / Path(item[0]).name
        detections = item[-1]

        keep_indices = [i for i, conf in enumerate(detections.confidence) if conf >= confidence_threshold]
        detected_classes = detections.class_id[keep_indices]

        # Save bounding boxes of detected robots (class_id == 1)
        robot_indices = [i for i, c in zip(keep_indices, detected_classes) if c == 1]
        if len(robot_indices) == 1:
            i = robot_indices[0]
            bbox = detections.xyxy[i]  # bbox: [x1, y1, x2, y2]
            robot_bboxes.append({
                'image_path': image_path.as_posix(),
                'bbox': bbox
            })
    return robot_bboxes

In [None]:
robot_bboxes = robot_locations(dataset)

In [None]:
robot_bboxes = [{'image_path': i['image_path'], 'bbox': i['bbox'].tolist()} for i in robot_bboxes]
with open(f"robot_bboxes_office.json", "w") as f:
    json.dump(robot_bboxes, f)

In [None]:
with open("robot_bboxes_office.json", "r") as f:
    robot_bboxes = json.load(f)
print(len(robot_bboxes), "\n", robot_bboxes[0])

In [None]:
with open("robot_bboxes_home.json", "r") as f:
    robot_bboxes = json.load(f)
print(len(robot_bboxes), "\n", robot_bboxes[0])

#### Prepare cropped robot images

In [None]:
from pathlib import Path
import numpy as np
import json
from PIL import Image, ImageDraw
from tqdm import tqdm
import os

In [None]:
with open("robot_bboxes_home.json", "r") as f:
    robot_bboxes = json.load(f)
with open("robot_bboxes_office.json", "r") as f:
    robot_bboxes += json.load(f)

In [None]:
print(len(robot_bboxes), robot_bboxes[0])

In [None]:
# Exclude images that incorrectly labeled the robot - find them manually
exclude = [
    "Hallway_205", 
    "Hallway_273", 
    "Hallway_38",
    "38_1_0_4_3_0.7022904_0_1_1_2.686175_0.7022905_0.7022906_0.7022906_220_340_100_0_0_1_0_1_1_1_0_50_0_0_5_Pepper_TV",
    "4_1_0_6_4_0.5346667_0_1_1_2.360057_0.5346668_0.5346668_0.5346668_300_30_120_0_0_1_0_1_1_1_0_50_0_0_7_Pepper_TV",
    "576_1_0_3_5_0.6744272_0_1_1_1.406374_0.674427_0.674427_0.6744272_150_222_6_0_0_0_1_1_1_0_0_50_0_1_7_Pepper_TV",
    "SmallOffice_236",
    "62_1_0_4_3_0.59108_0_1_1_1.457013_0.59108_0.59108_0.59108_354_114_234_0_1_0_0_1_1_1_0_50_1_0_6_Pepper_TV",
    "440_1_0_5_3_0.7460655_0_1_1_2.220734_0.7460653_0.7460654_0.7460655_158_38.00001_278_0_0_1_0_1_0_0_1_1.334754_0_0_6_Pepper_TV",
    "Hallway_44",
    "Hallway_76",
    "MeetingRoom_47",
    "MeetingRoom_69",
    ]
robot_bboxes = [
    e for e in robot_bboxes
    if not any(k in e["image_path"] for k in exclude)
]

In [None]:
#Inspect faulty bounding boxes manually

# output_dir = Path("./bbox_inspection")
# output_dir.mkdir(exist_ok=True)

# target_size = (720, 405)  # Resize for inspection

# for entry in tqdm(robot_bboxes):
#     img_path = entry['image_path']
#     bbox = entry['bbox']  # bbox is a list [x1, y1, x2, y2]
#     x1, y1, x2, y2 = map(int, bbox)

#     # Open image
#     img = Image.open(img_path).convert("RGB")

#     # Draw green bounding box
#     draw = ImageDraw.Draw(img)
#     draw.rectangle([x1, y1, x2, y2], outline="green", width=3)

#     # Resize image
#     img_resized = img.resize(target_size, Image.BILINEAR)

#     # Save image
#     save_path = output_dir / Path(img_path).name
#     img_resized.save(save_path)


In [None]:
def get_max_crop_window(robot_bboxes):
    max_width = 0
    max_height = 0
    max_bbox_img=''

    for entry in robot_bboxes:
        bbox_img = entry['image_path']
        bbox = entry['bbox']
        w = bbox[2] - bbox[0]
        h = bbox[3] - bbox[1]

        if w > max_width:
            max_width = w
            max_bbox_img = bbox_img
        if h > max_height:
            max_height = h
            max_bbox_img = bbox_img
    return (round(max_width), round(max_height)), max_bbox_img


# Find 16:9 crop window enclosing the biggest bbox without cropping it (expand bbox to 16:9)
def expand_bbox_to_aspect(bbox_w, bbox_h, target_aspect=16/9):
    # Start from bbox size
    w, h = bbox_w, bbox_h
    current_aspect = w / h

    if current_aspect > target_aspect:
        # Too wide: increase height
        new_h = w / target_aspect
        new_w = w
    else:
        # Too tall: increase width
        new_w = h * target_aspect
        new_h = h
    return int(np.ceil(new_w)), int(np.ceil(new_h))

max_size, img_path = get_max_crop_window(robot_bboxes)
print(f"Biggest bbox: {max_size} {img_path}")
max_crop_width, max_crop_height = expand_bbox_to_aspect(*max_size)
print(f"Absolute crop window size (closest 16:9 enclosing biggest bbox): {max_crop_width}x{max_crop_height}")


In [None]:
def get_crop_window(image_width, image_height, bbox, absolute_crop_size=None):
    """
    Returns a crop window (x1, y1, x2, y2).
    - If absolute_crop_size is given (width, height), center crop window of that size on bbox center, clamped inside image.
    - Otherwise, calculate relative crop window where bbox width/height is 50% of crop window.

    bbox = [x1, y1, x2, y2]
    """
    x1, y1, x2, y2 = bbox
    bbox_w, bbox_h = x2 - x1, y2 - y1
    cx, cy = (x1 + x2) / 2, (y1 + y2) / 2

    if absolute_crop_size is not None:
        crop_w, crop_h = absolute_crop_size
    else:
        crop_w = crop_h = max(bbox_w, bbox_h) * 2 

    # Crop box coords
    left = int(round(cx - crop_w / 2))
    upper = int(round(cy - crop_h / 2))
    right = left + crop_w
    lower = upper + crop_h

    # Clamp inside image bounds
    if left < 0:
        right -= left  # Move right boundary
        left = 0
    if upper < 0:
        lower -= upper
        upper = 0
    if right > image_width:
        left -= (right - image_width)
        right = image_width
        if left < 0:
            left = 0
    if lower > image_height:
        upper -= (lower - image_height)
        lower = image_height
        if upper < 0:
            upper = 0

    return left, upper, right, lower


In [None]:
target_size = (128,128) #(224,224)

absolute_crop_size_side = max(*get_max_crop_window(robot_bboxes)[0])
absolute_crop_size = (absolute_crop_size_side, absolute_crop_size_side)

output_dir = Path("../data/resized_images")
output_dir.mkdir(exist_ok=True)

for entry in tqdm(robot_bboxes):
    img_path = entry['image_path']
    bbox = entry['bbox']

    img = Image.open(img_path).convert("RGB")
    iw, ih = img.size

    # Choose crop window mode: absolute or relative
    # crop_box = get_crop_window(iw, ih, bbox, absolute_crop_size=absolute_crop_size)  # absolute
    crop_box = get_crop_window(iw, ih, bbox, absolute_crop_size=None)  # relative with bbox half crop

    cropped_img = img.crop(crop_box)
    resized_img = cropped_img.resize(target_size, Image.BILINEAR)
    # resized_img = img.resize((256,144), Image.BILINEAR)

    output_path = output_dir / Path(img_path).name
    resized_img.save(output_path)
