In [None]:
#!/usr/bin/env python3
"""
Un-crop DLC-style predictions using per-frame bounding boxes.
Recursively scans directories for prediction CSVs and bbox CSVs.
"""

import os
import sys
from pathlib import Path
from typing import List
import pandas as pd


# --------------------------
# Helpers
# --------------------------

def _read_dlc_csv(path: str) -> pd.DataFrame:
    """Read DLC-style CSV with 3-row multi-index header."""
    return pd.read_csv(path, header=[0, 1, 2])

def _read_bbox_csv(path: str) -> pd.DataFrame:
    """
    Read a headerless bbox CSV with 5 columns: frame, x, y, h, w.
    Assumes first cell (A1) is blank, so no header row.
    """
    import pandas as pd

    # Force header=None so the first row is treated as data
    df = pd.read_csv(path, header=None, names=["frame", "x", "y", "h", "w"])

    # Coerce numeric values
    for c in ["frame", "x", "y", "h", "w"]:
        df[c] = pd.to_numeric(df[c], errors="coerce")

    # If frames look 1-based, shift to 0-based
    if df["frame"].min() == 1:
        df["frame"] = df["frame"] - 1

    # Final cast to int for frame
    df["frame"] = df["frame"].round().astype(int)

    return df.sort_values("frame").reset_index(drop=True)

def _transform_predictions(df_pred: pd.DataFrame, df_bbox: pd.DataFrame) -> pd.DataFrame:
    """Apply uncropping transform."""
    n_frames = len(df_pred)
    merged = pd.DataFrame({"frame": range(n_frames)}).merge(df_bbox, on="frame", how="left")
    if merged[["x", "y", "h", "w"]].isna().any().any():
        missing = merged[merged[["x","y","h","w"]].isna().any(axis=1)]["frame"].tolist()
        raise ValueError(f"Missing bbox entries for frames (first 10 shown): {missing[:10]}")

    x_off, y_off, h, w = [merged[c].to_numpy() for c in ["x", "y", "h", "w"]]
    out = df_pred.copy()

    lvl0 = out.columns.get_level_values(0).unique()
    lvl1 = out.columns.get_level_values(1).unique()

    for scorer in lvl0:
        for bp in lvl1:
            if (scorer, bp, "x") in out.columns:
                x_vals = pd.to_numeric(out[(scorer, bp, "x")], errors="coerce").to_numpy()
                out[(scorer, bp, "x")] = (x_vals / 320.0) * w + x_off
            if (scorer, bp, "y") in out.columns:
                y_vals = pd.to_numeric(out[(scorer, bp, "y")], errors="coerce").to_numpy()
                out[(scorer, bp, "y")] = (y_vals / 320.0) * h + y_off
    return out

def _derive_output_path(pred_path: Path, output_dir: Path | None) -> Path:
    """Output path with _uncropped.csv suffix."""
    root = pred_path.stem
    out_name = f"{root}_uncropped.csv"
    if output_dir is not None:
        output_dir.mkdir(parents=True, exist_ok=True)
        return output_dir / out_name
    return pred_path.parent / out_name


# --------------------------
# Discovery
# --------------------------

def _rglob_csvs(root: Path) -> list[Path]:
    return [p for p in root.rglob("*.csv") if p.is_file()]

def _filter_by_cam(paths: list[Path], cam: str) -> list[Path]:
    cam_l = cam.lower()
    return [p for p in paths if cam_l in p.name.lower()]

# --------------------------
# Main processing
# --------------------------

def process_all(camera_names: List[str], preds_root: str, bbox_root: str, output_dir: str | None = None) -> None:
    # Resolve to absolute paths relative to the kernel's CWD
    preds_root_p = Path(preds_root).expanduser().resolve()
    bbox_root_p  = Path(bbox_root).expanduser().resolve()
    output_dir_p = Path(output_dir).expanduser().resolve() if output_dir else None

    print(f"[INFO] CWD: {Path.cwd().resolve()}")
    print(f"[INFO] preds_root: {preds_root_p}")
    print(f"[INFO] bbox_root : {bbox_root_p}")
    if output_dir_p: print(f"[INFO] output_dir: {output_dir_p}")

    if not preds_root_p.exists():
        raise FileNotFoundError(f"preds_root does not exist: {preds_root_p}")
    if not bbox_root_p.exists():
        raise FileNotFoundError(f"bbox_root does not exist: {bbox_root_p}")

    # Discover all CSVs once (recursive)
    all_pred_csvs = _rglob_csvs(preds_root_p)
    all_bbox_csvs = _rglob_csvs(bbox_root_p)

    if not all_pred_csvs:
        print(f"[WARN] No prediction CSVs found under {preds_root_p}", file=sys.stderr)
    if not all_bbox_csvs:
        print(f"[WARN] No bbox CSVs found under {bbox_root_p}", file=sys.stderr)

    for cam in camera_names:
        preds_paths = _filter_by_cam(all_pred_csvs, cam)
        if not preds_paths:
            print(f"[WARN] No prediction CSVs matched camera '{cam}'", file=sys.stderr)
            continue

        bbox_matches = _filter_by_cam(all_bbox_csvs, cam)
        if len(bbox_matches) != 1:
            raise ValueError(
                f"Need exactly one bbox CSV for camera '{cam}', found {len(bbox_matches)}: "
                f"{[str(p) for p in bbox_matches]}"
            )
        bbox_path = bbox_matches[0]
        df_bbox = _read_bbox_csv(bbox_path)

        print(f"[INFO] Camera '{cam}': {len(preds_paths)} preds, bbox={bbox_path}")

        for pred_path in preds_paths:
            try:
                df_pred = _read_dlc_csv(pred_path)
            except Exception as e:
                print(f"[WARN] Skipping (bad DLC header?): {pred_path} ({e})", file=sys.stderr)
                continue
            df_out = _transform_predictions(df_pred, df_bbox)
            out_path = _derive_output_path(pred_path, output_dir_p)
            df_out.to_csv(out_path, index=False)
            print(f"    â†’ Saved: {out_path}")

# --------------------------
# Hard-coded config
# --------------------------

if __name__ == "__main__":
    camera_names = ["lBack", "lFront", "lTop", "rBack", "rFront", "rTop"]
    preds_root = "./data/chickadee"        # can be relative; resolved & printed
    bbox_root  = "./data/bounding_boxes"   # one bbox CSV per camera (name contains camera)
    output_dir = "./data/chickadee_uncropped"

    process_all(camera_names, preds_root, bbox_root, output_dir)


[INFO] CWD: E:\eks
[INFO] preds_root: E:\eks\data\chickadee
[INFO] bbox_root : E:\eks\data\bounding_boxes
[INFO] output_dir: E:\eks\data\chickadee_uncropped


IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer