In [None]:
import os
import random
import shutil
from tqdm import tqdm

# Source dataset (folder-per-class)
src_dir = r"D:\Final_Semester_Project\AI_Attendance_System\AI_And_ML_Model\data\normalized"

# Destination dataset (train/val/test)
dst_dir = r"D:\Final_Semester_Project\AI_Attendance_System\AI_And_ML_Model\datasets"

# Create output dirs
for split in ("train", "val", "test"):
    os.makedirs(os.path.join(dst_dir, split), exist_ok=True)

random.seed(42)  # reproducibility

classes = [c for c in os.listdir(src_dir) if os.path.isdir(os.path.join(src_dir, c))]

for cls in tqdm(classes, desc="Splitting dataset", unit="class"):
    cls_path = os.path.join(src_dir, cls)

    # Get list of images
    imgs = [f for f in os.listdir(cls_path) if os.path.isfile(os.path.join(cls_path, f))]
    imgs.sort()  # keep order consistent
    if len(imgs) < 70:
        tqdm.write(f"Skipping {cls} (only {len(imgs)} images)")
        continue

    # Pick first 70 only
    imgs = imgs[:70]
    random.shuffle(imgs)

    train_imgs = imgs[:56]
    val_imgs   = imgs[56:63]
    test_imgs  = imgs[63:70]

    splits = {"train": train_imgs, "val": val_imgs, "test": test_imgs}

    # Copy files
    for split, files in splits.items():
        out_cls = os.path.join(dst_dir, split, cls)
        os.makedirs(out_cls, exist_ok=True)
        for f in files:
            shutil.copy(os.path.join(cls_path, f), os.path.join(out_cls, f))

print("✅ Dataset split completed.")
