In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split


source_dir = r'Prepared Data/' 
output_dir = r'dataset_split' 

# Rasio pembagian data
train_ratio = 0.8
validation_ratio = 0.1
test_ratio = 0.1


# Hapus folder output jika sudah ada untuk memulai dari awal
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)

# Fungsi untuk menyalin file
def copy_files(files, dest_folder):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
    for f in files:
        shutil.copy(f, dest_folder)

print("Memulai proses pembagian dataset...")


for class_name in os.listdir(source_dir):
    class_path = os.path.join(source_dir, class_name)
    
    if os.path.isdir(class_path):
        images = [os.path.join(class_path, f) for f in os.listdir(class_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
        
        if not images:
            continue

        #Pisahkan data menjadi 80% train dan 20% sisa (untuk validation + test)
        train_files, temp_files = train_test_split(
            images, train_size=train_ratio, random_state=42
        )

        # Pisahkan 20% sisa menjadi 10% validation dan 10% test
        # Rasio untuk pemisahan kedua adalah 0.5 (50% dari sisa) untuk mendapatkan 10% dari total
        validation_files, test_files = train_test_split(
            temp_files, train_size=validation_ratio / (validation_ratio + test_ratio), random_state=42
        )

        train_dest = os.path.join(output_dir, 'train', class_name)
        validation_dest = os.path.join(output_dir, 'validation', class_name)
        test_dest = os.path.join(output_dir, 'test', class_name)

        # Menyalin file ke folder tujuan
        copy_files(train_files, train_dest)
        copy_files(validation_files, validation_dest)
        copy_files(test_files, test_dest)
        
        print(f"Kelas '{class_name}':")
        print(f"  - {len(train_files)} file train")
        print(f"  - {len(validation_files)} file validation")
        print(f"  - {len(test_files)} file test")

print("\nProses selesai! ✅")
print(f"Dataset yang sudah dibagi tersimpan di folder: {output_dir}")

Memulai proses pembagian dataset...
Kelas 'Close Eyes':
  - 33556 file train
  - 4195 file validation
  - 4195 file test
Kelas 'Open Eyes':
  - 34361 file train
  - 4295 file validation
  - 4296 file test

Proses selesai! ✅
Dataset yang sudah dibagi tersimpan di folder: dataset_split
