In [3]:
import os
import shutil
import random

# Set paths
original_dataset_dir = "dataset"  # Your original dataset path
output_base_dir = "splitted_dataset"  # Where to save the split dataset

# Create base folders
for split in ["train", "val", "test"]:
    os.makedirs(os.path.join(output_base_dir, split), exist_ok=True)

# Split ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Fix seed for reproducibility
random.seed(42)

# Go through each class folder
for class_name in os.listdir(original_dataset_dir):
    class_path = os.path.join(original_dataset_dir, class_name)
    if not os.path.isdir(class_path):
        continue

    # List all image files
    image_files = [f for f in os.listdir(class_path) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
    random.shuffle(image_files)

    total = len(image_files)
    train_end = int(train_ratio * total)
    val_end = train_end + int(val_ratio * total)

    train_files = image_files[:train_end]
    val_files = image_files[train_end:val_end]
    test_files = image_files[val_end:]

    # Copy files to respective folders
    for split_name, files in zip(["train", "val", "test"], [train_files, val_files, test_files]):
        split_class_dir = os.path.join(output_base_dir, split_name, class_name)
        os.makedirs(split_class_dir, exist_ok=True)

        for file_name in files:
            src = os.path.join(class_path, file_name)
            dst = os.path.join(split_class_dir, file_name)
            shutil.copy2(src, dst)

print("✅ Dataset has been split into train/val/test with 42/9/9 images per class.")


✅ Dataset has been split into train/val/test with 42/9/9 images per class.
