### Sort labelled images into their viewpoint classes (Front, back, left_side, right_side

In [4]:
import pandas as pd
from pathlib import Path
from shutil import copy2

CSV      = Path("/lisc/data/scratch/becogbio/juarez/thesis/3_Viewpoint_classifier/viewpoint_labelling.csv")
SRC_DIR  = Path("/lisc/data/scratch/becogbio/juarez/thesis/3_Viewpoint_classifier/frames_for_labelling/")
DEST_DIR = Path("/lisc/data/scratch/becogbio/juarez/thesis/3_Viewpoint_classifier/labelled_sorted_frames/")

# clean CSV
df = pd.read_csv(CSV)[["annotation_id", "image", "label"]]
df["image"] = df["image"].apply(lambda p: Path(p.split("?d=")[-1]).name)  # keep only file name, e.g. B02_0001.png
df.to_csv(CSV, index=False)                                       

# copy images into class folders 
for _, row in df.iterrows():
    fname  = row["image"]
    orient = str(row["label"]).strip()        # front / back / left_side / right_side

    src = SRC_DIR / fname
    dst = DEST_DIR / orient / fname
    dst.parent.mkdir(parents=True, exist_ok=True)

    try:
        copy2(src, dst)
    except FileNotFoundError:
        print("missing:", src)

# counts per viewpoint folder
print("\nImages per viewpoint:")
for folder in sorted(p for p in DEST_DIR.iterdir() if p.is_dir()):
    print(f"{folder.name}: {len(list(folder.glob('*')))}")



Images per viewpoint:
back: 921
front: 569
left_side: 750
right_side: 706


### Split cropped labelled images into training, validation and test sets

70 / 20 / 10 split into mutually-exclusive train / val / test sets

In [9]:
import random, shutil
from pathlib import Path
from collections import defaultdict

SRC_ROOT = Path("/lisc/data/scratch/becogbio/juarez/thesis/3_Viewpoint_classifier/labelled_sorted_frames/")
DST_ROOT = Path("/lisc/data/scratch/becogbio/juarez/thesis/3_Viewpoint_classifier/train_val_test_split")
SPLITS   = {"train": 0.70, "validation": 0.20, "test": 0.10}

random.seed(42)

# make the split folders and their class sub-folders (including Side_view)
for split in SPLITS:
    (DST_ROOT / split / "side_view").mkdir(parents=True, exist_ok=True)  # joint class
    for cls in ["front", "back", "right_side", "left_side"]:
        (DST_ROOT / split / cls).mkdir(parents=True, exist_ok=True)

counts = defaultdict(lambda: defaultdict(int))  # split → class → n

for class_dir in SRC_ROOT.iterdir():
    if not class_dir.is_dir():
        continue
    imgs = sorted(class_dir.glob("*"))
    random.shuffle(imgs)

    n       = len(imgs)
    n_train = int(n * SPLITS["train"])
    n_val   = int(n * SPLITS["validation"])
    n_test  = n - n_train - n_val

    split_map = [
        ("train",       imgs[:n_train]),
        ("validation",  imgs[n_train:n_train + n_val]),
        ("test",        imgs[n_train + n_val:])
    ]

    for split, subset in split_map:
        base_cls_dir = DST_ROOT / split / class_dir.name
        side_dir     = DST_ROOT / split / "side_view"

        for p in subset:
            shutil.copy2(p, base_cls_dir / p.name)   # copy to its own class
            counts[split][class_dir.name] += 1

            if class_dir.name in {"right_side", "left_side"}:
                shutil.copy2(p, side_dir / p.name)   # duplicate into Side_view
                counts[split]["side_view"] += 1

# quick summary
for split in SPLITS:
    print(f"\n{split.upper()}:")
    for cls in ["front", "back", "left_side", "right_side", "side_view"]:
        n = counts[split].get(cls, 0)
        print(f"  {cls:<11} {n:5} images")


TRAIN:
  front         398 images
  back          644 images
  left_side     525 images
  right_side    494 images
  side_view    1019 images

VALIDATION:
  front         113 images
  back          184 images
  left_side     150 images
  right_side    141 images
  side_view     291 images

TEST:
  front          58 images
  back           93 images
  left_side      75 images
  right_side     71 images
  side_view     146 images
