# View point specific full dataset

Reads an existing CSV file with predictions and copies the frames into viewpoint-specific folders

In [None]:
import csv
from pathlib import Path
import shutil
from collections import Counter

csv_path = Path("1_preds_viewpoint.csv")
source_root = Path("det_mask_frames_2021")
dest_root   = Path("Vp_specfic_full_dataset")

bird_list = [
    "BRG-YOM", "EYB-RPM", "BNU-RPM", "GBM-ORY", "OGY-BRM", "GBY-ORM", "RYO-BGM", "OYR-BGM", "OEB-RPM",
    "ORB-UYM", "YM-OBR", "BNY-RPM", "OUB-RPM", "YRU-POM", "RGY-BOM", "BRK-NOM", "YGO-M", "BEU-RPM",
    "ORG-BYM", "PUE-ONM"
]
valid_labels = {"front", "back", "left_side", "right_side"}

for lbl in valid_labels:
    (dest_root / lbl).mkdir(parents=True, exist_ok=True)

def to_rel_path(fp: str, source_root: Path) -> Path:
    p = Path(fp.strip())
    if str(p).startswith(str(source_root)):
        try:
            return p.relative_to(source_root)
        except ValueError:
            # fallback: find source_root parts within p.parts
            src_parts = source_root.parts
            pp = p.parts
            for i in range(len(pp) - len(src_parts) + 1):
                if pp[i:i+len(src_parts)] == src_parts:
                    return Path(*pp[i+len(src_parts):])
            # if not found, treat as filename only
            return Path(p.name)
    return p  # already relative (e.g., "BEU-RPM/xyz.png")

copied_counts = Counter()

with csv_path.open(newline="") as f:
    reader = csv.DictReader(f, delimiter=";")
    for row in reader:
        rel_path = to_rel_path(row["filepath"], source_root)
        label    = row["pred_label"].strip()

        if label not in valid_labels or not rel_path.parts:
            continue

        bird_id = rel_path.parts[0]
        if bird_id not in bird_list:
            continue

        src = source_root / rel_path
        if not src.is_file():
            continue

        dst_dir = dest_root / label / bird_id
        dst_dir.mkdir(parents=True, exist_ok=True)
        shutil.copy2(src, dst_dir / src.name)
        copied_counts[label] += 1

print("Copied images per label:")
for lbl in sorted(valid_labels):
    print(f"  {lbl}: {copied_counts[lbl]}")

Copied images per label:
  back: 31068
  front: 14702
  left_side: 23133
  right_side: 20804
