In [1]:
import os
import random
import shutil
from pathlib import Path

# Configuration
DATA_DIR = Path("../datasets/wow-fishing-classic")
IMAGE_DIR = DATA_DIR / "images"
LABEL_DIR = DATA_DIR / "labels"
SPLITS = {"train": 0.8, "val": 0.1, "test": 0.1}
IMG_EXTENSIONS = [".jpg", ".jpeg", ".png"]

# Create destination folders
for split in SPLITS:
    (IMAGE_DIR / split).mkdir(parents=True, exist_ok=True)
    (LABEL_DIR / split).mkdir(parents=True, exist_ok=True)

# Gather all images
image_files = [f for f in IMAGE_DIR.glob("*") if f.suffix.lower() in IMG_EXTENSIONS]
random.shuffle(image_files)

# Calculate split sizes
total = len(image_files)
n_train = int(SPLITS["train"] * total)
n_val = int(SPLITS["val"] * total)
n_test = total - n_train - n_val

splits = {
    "train": image_files[:n_train],
    "val": image_files[n_train:n_train + n_val],
    "test": image_files[n_train + n_val:]
}

# Move files
for split_name, files in splits.items():
    for img_path in files:
        label_path = LABEL_DIR / img_path.with_suffix(".txt").name
        shutil.move(str(img_path), str(IMAGE_DIR / split_name / img_path.name))
        if label_path.exists():
            shutil.move(str(label_path), str(LABEL_DIR / split_name / label_path.name))

print("✅ Dataset successfully split into train/val/test")


✅ Dataset successfully split into train/val/test
