In [1]:
# !pip install -q albumentations==1.4.10 opencv-python-headless pycocotools

import os, json, cv2, numpy as np
from pathlib import Path
from tqdm.auto import tqdm
import albumentations as A

try:
    from google.colab import drive
    drive.mount('/content/drive')
except Exception:
    pass

Mounted at /content/drive


In [2]:

# COCO train annotations and its image root
TRAIN_JSON = Path("/content/drive/MyDrive/ErdosFall25/data/detection2/yolo_split2/train/annotations.json")
TRAIN_IMAGES_ROOT = Path("/content/drive/MyDrive/ErdosFall25/data/detection2/yolo_split2/train/images")

# Where to write the static dataset
OUT_ROOT = Path("/content/drive/MyDrive/ErdosFall25/data/detection2_static/fixed_aug")
OUT_EXT  = ".jpg"   # ".png" works too

# Zoom amounts
ZOOM_IN_SCALE  = 1.10            # +10%
ZOOM_OUT_SCALES = [0.90, 0.70, 0.50]

# Shear magnitudes in degrees (applied to original only)
SHEAR_X_DEGS = [+10, -10]        # shear in X axis (±)
SHEAR_Y_DEGS = [+10, -10]        # shear in Y axis (±)

# Border handling
BORDER_MODE = cv2.BORDER_CONSTANT
BORDER_VALUE = 0  # black

assert TRAIN_JSON.exists(), f"TRAIN_JSON not found: {TRAIN_JSON}"
assert TRAIN_IMAGES_ROOT.exists(), f"TRAIN_IMAGES_ROOT not found: {TRAIN_IMAGES_ROOT}"

(OUT_ROOT / "images").mkdir(parents=True, exist_ok=True)
print("Paths OK")

Paths OK


In [9]:
import json

def load_coco_state(ann_json: Path, images_root: Path | None):
    with open(ann_json, "r") as f:
        coco = json.load(f)
    images = coco["images"]; anns = coco["annotations"]; cats = coco["categories"]
    anns_by_img = {}
    for a in anns:
        anns_by_img.setdefault(a["image_id"], []).append(a)
    # keep categories as-is (ids/names)
    cat_id_to_idx = {c["id"]: i + 1 for i, c in enumerate(cats)}
    idx_to_cat_id = {i + 1: c["id"] for i, c in enumerate(cats)}
    return {
        "images": images,
        "anns_by_img": anns_by_img,
        "categories": cats,
        "cat_id_to_idx": cat_id_to_idx,
        "idx_to_cat_id": idx_to_cat_id,
        "images_root": images_root,
    }

In [11]:
def load_raw_sample(state, i):

    imrec = state["images"][i]
    p = Path(imrec["file_name"])
    root = state.get("images_root")
    if root and not p.is_absolute():
        p = root / p
    img = cv2.imread(str(p))
    if img is None:
        raise FileNotFoundError(str(p))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    H, W = img.shape[:2]
    boxes, labels = [], []
    for a in state["anns_by_img"].get(imrec["id"], []):
        x, y, w, h = a["bbox"]
        if max(x,y,w,h) <= 1.5:
            x,y,w,h = x*w,y*h, w*W, h*H
        boxes.append([x, y, x + w, y + h])  # Pascal VOC
        labels.append(state["cat_id_to_idx"][a["category_id"]])
    boxes  = np.asarray(boxes,  dtype=np.float32) if boxes else np.zeros((0,4), np.float32)
    labels = np.asarray(labels, dtype=np.int64)   if labels else np.zeros((0,),  np.int64)

    if len(boxes):
      boxes[:, 0] = np.clip(boxes[:, 0], 0, W - 1e-6)
      boxes[:, 1] = np.clip(boxes[:, 1], 0, H - 1e-6)
      boxes[:, 2] = np.clip(boxes[:, 2], 0, W - 1e-6)
      boxes[:, 3] = np.clip(boxes[:, 3], 0, H - 1e-6)
      eps = 1e-6
      keep = (boxes[:, 2] - boxes[:, 0] > eps) & (boxes[:, 3] - boxes[:, 1] > eps)
      boxes, labels = boxes[keep], labels[keep]

    return img, boxes, labels, imrec

In [12]:
import inspect
import albumentations as A
import cv2

# Global config you already set:
# BORDER_MODE = cv2.BORDER_CONSTANT
# BORDER_VALUE = 0

def _accepts(cls, argname: str) -> bool:
    try:
        return argname in inspect.signature(cls.__init__).parameters
    except Exception:
        return False

def _rotate_kwargs():
    kw = {}
    if _accepts(A.Rotate, "border_mode"):
        kw["border_mode"] = BORDER_MODE
    # Albumentations versions differ: some use border_value, some use value
    if _accepts(A.Rotate, "border_value"):
        kw["border_value"] = BORDER_VALUE
    elif _accepts(A.Rotate, "value"):
        kw["value"] = BORDER_VALUE
    return kw

def _affine_kwargs():
    kw = {}
    # Some versions use 'mode', others 'border_mode'
    if _accepts(A.Affine, "mode"):
        kw["mode"] = BORDER_MODE
    elif _accepts(A.Affine, "border_mode"):
        kw["border_mode"] = BORDER_MODE
    # Some versions use 'cval', others 'value'
    if _accepts(A.Affine, "cval"):
        kw["cval"] = BORDER_VALUE
    elif _accepts(A.Affine, "value"):
        kw["value"] = BORDER_VALUE
    return kw

def make_identity():
    # Use NoOp so there's at least one transform (prevents the bbox processor warning)
    return A.Compose([A.NoOp(p=1.0)],
                     bbox_params=A.BboxParams(format="pascal_voc", label_fields=["labels"], min_visibility=0.0),
                     strict=False)

def make_rotate(deg):
    kw = _rotate_kwargs()
    return A.Compose(
        [A.Rotate(limit=(deg, deg), p=1.0, **kw)],
        bbox_params=A.BboxParams(format="pascal_voc", label_fields=["labels"], min_visibility=0.0),
        strict=False
    )

def make_flip_horizontal():
    return A.Compose([A.HorizontalFlip(p=1.0)],
                     bbox_params=A.BboxParams(format="pascal_voc", label_fields=["labels"], min_visibility=0.0),
                     strict=False)

def make_flip_vertical():
    return A.Compose([A.VerticalFlip(p=1.0)],
                     bbox_params=A.BboxParams(format="pascal_voc", label_fields=["labels"], min_visibility=0.0),
                     strict=False)

def make_zoom(scale):
    kw = _affine_kwargs()
    return A.Compose(
        [A.Affine(scale=scale, translate_percent=(0, 0), rotate=0, shear=(0, 0), p=1.0, **kw)],
        bbox_params=A.BboxParams(format="pascal_voc", label_fields=["labels"], min_visibility=0.0),
        strict=False
    )

def make_shear(x_deg=0.0, y_deg=0.0):
    kw = _affine_kwargs()
    return A.Compose(
        [A.Affine(shear=(x_deg, y_deg), rotate=0, p=1.0, **kw)],
        bbox_params=A.BboxParams(format="pascal_voc", label_fields=["labels"], min_visibility=0.0),
        strict=False
    )

In [13]:
# Base transforms from original
base_transforms = {
    "orig": make_identity(),
    "rot90": make_rotate(90),
    "rot180": make_rotate(180),
    "rot270": make_rotate(270),
    "flip_h": make_flip_horizontal(),
    "flip_v": make_flip_vertical(),
}

# Derived zoom variants (apply on top of each base) will be handled by composing two calls.
# Shear variants (on original only):
shear_variants = {
    f"shear_x_{sx:+d}": make_shear(x_deg=sx, y_deg=0) for sx in [10, -10]
}
shear_variants.update({
    f"shear_y_{sy:+d}": make_shear(x_deg=0, y_deg=sy) for sy in [10, -10]
})

# Summary
print("Base variants:", list(base_transforms.keys()))
print("Shear variants:", list(shear_variants.keys()))

Base variants: ['orig', 'rot90', 'rot180', 'rot270', 'flip_h', 'flip_v']
Shear variants: ['shear_x_+10', 'shear_x_-10', 'shear_y_+10', 'shear_y_-10']


In [None]:

# ---- Materialize all variants to disk ----
def apply_tfm(tfm, img, boxes, labels):
    out = tfm(image=img, bboxes=boxes.tolist(), labels=labels.tolist())
    bxs, lbs = out.get("bboxes", []), out.get("labels", [])
    bxs = np.asarray(bxs, dtype=np.float32) if bxs else np.zeros((0,4), np.float32)
    lbs = np.asarray(lbs, dtype=np.int64)   if lbs else np.zeros((0,),  np.int64)
    im  = out["image"]
    if im.dtype != np.uint8:
        im = np.clip(im, 0, 255).astype(np.uint8)
    return im, bxs, lbs

def materialize_fixed_aug(train_json: Path, images_root: Path, out_root: Path):
    state = load_coco_state(train_json, images_root)
    images_new, anns_new = [], []
    ann_id = 1
    cats_new = state["categories"]

    # We will create these variant groups:
    # 1) base-only: rot90|180|270 + flip_h|flip_v (5), note: "orig" is not saved alone unless also zoom/shear
    # 2) zoom-in (+10%) applied to orig + all base variants (6)
    # 3) zoom-out (-10%, -30%, -50%) applied to orig + all base variants (6 * 3 = 18)
    # 4) shears (±X, ±Y) on original (4)

    all_variants = {}

    # Base only (excluding "orig" here; we'll use orig for zoom & shear)
    for k in ["rot90", "rot180", "rot270", "flip_h", "flip_v"]:
        all_variants[k] = ("base", base_transforms[k])

    # Zoom-in on orig + all base keys
    for base_key in ["orig", "rot90", "rot180", "rot270", "flip_h", "flip_v"]:
        all_variants[f"{base_key}_zoom_in_10"] = ("zoom_in", (base_key, ZOOM_IN_SCALE))

    # Zoom-out scales on orig + base
    for s in ZOOM_OUT_SCALES:
        for base_key in ["orig", "rot90", "rot180", "rot270", "flip_h", "flip_v"]:
            all_variants[f"{base_key}_zoom_out_{int((1-s)*100)}"] = ("zoom_out", (base_key, s))

    # Shears on original
    for name, tfm in shear_variants.items():
        all_variants[name] = ("shear", tfm)

    for i in tqdm(range(len(state["images"])), desc="Aug->Disk"):
        img, boxes, labels, imrec = load_raw_sample(state, i)

        for var_name, spec in all_variants.items():
            kind = spec[0]

            if kind == "base":
                tfm = spec[1]
                img_b, b_b, l_b = apply_tfm(tfm, img, boxes, labels)

            elif kind in ("zoom_in", "zoom_out"):
                base_key, scale = spec[1]
                # Apply base first, then zoom
                base_tfm = base_transforms[base_key]
                img_b, b_b, l_b = apply_tfm(base_tfm, img, boxes, labels)
                zoom_tfm = make_zoom(scale)
                img_b, b_b, l_b = apply_tfm(zoom_tfm, img_b, b_b, l_b)

            elif kind == "shear":
                tfm = spec[1]
                img_b, b_b, l_b = apply_tfm(tfm, img, boxes, labels)

            else:
                raise ValueError("Unknown variant type")

            # Save
            new_name = f"{imrec['id']}_{var_name}{OUT_EXT}"
            save_path = out_root / "images" / new_name
            save_path.parent.mkdir(parents=True, exist_ok=True)
            cv2.imwrite(str(save_path), cv2.cvtColor(img_b, cv2.COLOR_RGB2BGR))

            Ht, Wt = img_b.shape[:2]
            # One new image id per variant of the original image:
            # We make a composite id by hashing variant name to avoid collisions.
            new_img_id = int(imrec["id"]) * 1000 + (abs(hash(var_name)) % 997)

            images_new.append({
                "id": new_img_id,
                "file_name": f"images/{new_name}",
                "width": int(Wt),
                "height": int(Ht),
            })

            for (x1,y1,x2,y2), lb in zip(b_b, l_b):
                cat_id = int(state["idx_to_cat_id"][int(lb)])
                w, h = float(x2 - x1), float(y2 - y1)
                if w <= 0 or h <= 0:
                    continue
                anns_new.append({
                    "id": ann_id,
                    "image_id": new_img_id,
                    "category_id": cat_id,
                    "bbox": [float(x1), float(y1), w, h],
                    "area": float(w * h),
                    "iscrowd": 0,
                })
                ann_id += 1

    # COCO dump
    coco_out = {"images": images_new, "annotations": anns_new, "categories": cats_new}
    with open(out_root / "annotations.json", "w") as f:
        json.dump(coco_out, f)
    print(f"Wrote {out_root/'annotations.json'}")
    print("Images:", len(images_new), "Annotations:", len(anns_new))
    return coco_out

coco_out = materialize_fixed_aug(TRAIN_JSON, TRAIN_IMAGES_ROOT, OUT_ROOT)


Aug->Disk:   0%|          | 0/4169 [00:00<?, ?it/s]

Wrote /content/drive/MyDrive/ErdosFall25/data/detection2_static/fixed_aug/annotations.json
Images: 137577 Annotations: 73076
