In [None]:
import csv
from pathlib import Path

ROOT = Path.cwd()
BB_DIR = ROOT / "bounding_boxes"

def count_csv(csv_path: Path):
    imgs = set()
    boxes = 0
    with csv_path.open("r", encoding="utf-8", newline="") as f:
        reader = csv.DictReader(f)
        for row in reader:
            boxes += 1
            rp = row.get("relative_path", "")
            if rp:
                imgs.add(rp)
    return len(imgs), boxes

def summarize_split(split_name, mapping):
    print(f"\n{split_name}")
    total_imgs = 0
    total_boxes = 0
    for name, rel in mapping.items():
        csv_path = rel if rel.is_absolute() else (BB_DIR / rel)
        if not csv_path.exists():
            print(f"  {name}: MISSING ({csv_path})")
            continue
        imgs, boxes = count_csv(csv_path)
        total_imgs += imgs
        total_boxes += boxes
        print(f"  {name}: images={imgs:,} boxes={boxes:,}")
    print(f"  TOTAL: images={total_imgs:,} boxes={total_boxes:,}\n")


In [4]:
# Hand detection datasets
hand_train = {
    "COCO-Hand train": Path("coco_hand_big_train_bounding_boxes.csv"),
    "HaGRIDv2 train": Path("hagridv2_subset_train_bounding_boxes.csv"),
    "MU ASL": Path("asl_1_dataset_raw_bounding_boxes.csv"),
    "HG ASL": Path("asl_2_dataset_raw_bounding_boxes.csv"),
    "RPS": Path("rps_dataset_raw_bounding_boxes.csv"),
}

hand_val = {
    "COCO-Hand val": Path("coco_hand_big_val_bounding_boxes.csv"),
    "HaGRIDv2 val": Path("hagridv2_subset_val_bounding_boxes.csv"),
}

hand_test = {
    "COCO-Hand test": Path("coco_hand_big_test_bounding_boxes.csv"),
    "HaGRIDv2 test": Path("hagridv2_subset_test_bounding_boxes.csv"),
    "Open-Closed-1k all": Path("open_closed_1k_all_bounding_boxes.csv"),
}

summarize_split("Hand train", hand_train)
summarize_split("Hand val", hand_val)
summarize_split("Hand test", hand_test)


Hand train
  COCO-Hand train: images=10,000 boxes=19,075
  HaGRIDv2 train: images=15,000 boxes=18,139
  MU ASL: images=1,819 boxes=1,819
  HG ASL: images=4,618 boxes=4,618
  RPS: images=2,629 boxes=2,629
  TOTAL: images=34,066 boxes=46,280


Hand val
  COCO-Hand val: images=1,000 boxes=1,811
  HaGRIDv2 val: images=1,500 boxes=1,836
  TOTAL: images=2,500 boxes=3,647


Hand test
  COCO-Hand test: images=4,000 boxes=7,716
  HaGRIDv2 test: images=6,000 boxes=7,169
  Open-Closed-1k all: images=1,077 boxes=1,077
  TOTAL: images=11,077 boxes=15,962



In [None]:
# Open/Closed datasets used for classification/detection
oc_train = {
    "HaGRIDv2 train": Path("hagridv2_subset_train_bounding_boxes.csv"),
    "ASL1": Path("asl_1_dataset_labeled_bounding_boxes.csv"),
    "ASL2": Path("asl_2_dataset_labeled_bounding_boxes.csv"),
    "RPS": Path("rps_dataset_labeled_bounding_boxes.csv"),
}

oc_val = {
    "HaGRIDv2 val": Path("hagridv2_subset_val_bounding_boxes.csv"),
}

oc_test = {
    "HaGRIDv2 test": Path("hagridv2_subset_test_bounding_boxes.csv"),
    "Open-Closed-1k all": Path("open_closed_1k_all_bounding_boxes.csv"),
}

summarize_split("Open/Closed train", oc_train)
summarize_split("Open/Closed val", oc_val)
summarize_split("Open/Closed test", oc_test)


Open/Closed train
  HaGRIDv2 train: images=15,000 boxes=18,139
  ASL1: images=326 boxes=326
  ASL2: images=579 boxes=579
  RPS: images=1,774 boxes=1,774
  TOTAL: images=17,679 boxes=20,818


Open/Closed val
  HaGRIDv2 val: images=1,500 boxes=1,836
  TOTAL: images=1,500 boxes=1,836


Open/Closed test
  HaGRIDv2 test: images=6,000 boxes=7,169
  Open-Closed-1k all: images=1,077 boxes=1,077
  TOTAL: images=7,077 boxes=8,246



In [None]:
from collections import Counter

hagrid_test_csv = BB_DIR / "hagridv2_subset_test_bounding_boxes.csv"
counts = Counter()
if hagrid_test_csv.exists():
    per_image = {}
    with hagrid_test_csv.open("r", encoding="utf-8", newline="") as f:
        reader = csv.DictReader(f)
        for row in reader:
            rp = row.get("relative_path", "")
            if rp:
                per_image[rp] = per_image.get(rp, 0) + 1
    for n in per_image.values():
        counts[n] += 1
    for n in sorted(counts):
        print(f"  {n} boxes: {counts[n]} images")
else:
    print(f"Missing {hagrid_test_csv}")