In [None]:
import os
import shutil
import random

def split_dataset(dataset_path, output_path, train_ratio=0.6, val_ratio=0.2, test_ratio=0.2):
   
    assert train_ratio + val_ratio + test_ratio == 1.0, "Ratios must sum to 1"

    train_dir = os.path.join(output_path, "train")
    val_dir = os.path.join(output_path, "val")
    test_dir = os.path.join(output_path, "test")
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    for class_name in os.listdir(dataset_path):
        class_path = os.path.join(dataset_path, class_name)
        if not os.path.isdir(class_path):
            continue

        # Gather all file paths in the current class
        files = os.listdir(class_path)
        random.shuffle(files)  

        total_files = len(files)
        train_end = int(total_files * train_ratio)
        val_end = train_end + int(total_files * val_ratio)

        # Split files
        train_files = files[:train_end]
        val_files = files[train_end:val_end]
        test_files = files[val_end:]

        # Copy files to respective directories
        for file in train_files:
            src = os.path.join(class_path, file)
            dest = os.path.join(train_dir, class_name)
            os.makedirs(dest, exist_ok=True)
            shutil.copy(src, dest)

        for file in val_files:
            src = os.path.join(class_path, file)
            dest = os.path.join(val_dir, class_name)
            os.makedirs(dest, exist_ok=True)
            shutil.copy(src, dest)

        for file in test_files:
            src = os.path.join(class_path, file)
            dest = os.path.join(test_dir, class_name)
            os.makedirs(dest, exist_ok=True)
            shutil.copy(src, dest)

    print(f"Dataset split completed. Training: {train_dir}, Validation: {val_dir}, Testing: {test_dir}")

dataset_path = "C:/Users/HPA02532Y/Documents\GitHub/Wasteclassifier/RealWaste"  
output_path = "C:/Users/HPA02532Y/Documents\GitHub/Wasteclassifier/Data" 
split_dataset(dataset_path, output_path)


Dataset split completed. Training: C:/Users/HPA02532Y/Documents\GitHub/Wasteclassifier/Data\train, Validation: C:/Users/HPA02532Y/Documents\GitHub/Wasteclassifier/Data\val, Testing: C:/Users/HPA02532Y/Documents\GitHub/Wasteclassifier/Data\test
