In [17]:
import os
import random
import shutil

def split_data(image_folder, label_folder, output_folder, train_ratio=0.7, test_ratio=0.15, val_ratio=0.15):
    assert train_ratio + test_ratio + val_ratio == 1.0, "Ratios should add up to 1.0"
    
    # Create output folders
    train_folder = os.path.join(output_folder, 'train')
    test_folder = os.path.join(output_folder, 'test')
    val_folder = os.path.join(output_folder, 'val')
    
    for folder in [train_folder, test_folder, val_folder]:
        os.makedirs(os.path.join(folder, 'images'), exist_ok=True)
        os.makedirs(os.path.join(folder, 'labels'), exist_ok=True)

    # Get a list of image files
    image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]
    
    # Randomly shuffle the files
    random.shuffle(image_files)

    # Calculate the number of files for each split
    num_files = len(image_files)
    num_train = int(train_ratio * num_files)
    num_test = int(test_ratio * num_files)
    num_val = num_files - num_train - num_test
    # Copy files to the corresponding folders
    for i, image_file in enumerate(image_files):
        label_file = image_file.replace('.jpg', '.txt')
        
        if i < num_train:
            shutil.copy(os.path.join(image_folder, image_file), os.path.join(train_folder, 'images', image_file))
            shutil.copy(os.path.join(label_folder, label_file), os.path.join(train_folder, 'labels', label_file))
        elif i < num_train + num_test:
            shutil.copy(os.path.join(image_folder, image_file), os.path.join(test_folder, 'images', image_file))
            shutil.copy(os.path.join(label_folder, label_file), os.path.join(test_folder, 'labels', label_file))
        else:
            shutil.copy(os.path.join(image_folder, image_file), os.path.join(val_folder, 'images', image_file))
            shutil.copy(os.path.join(label_folder, label_file), os.path.join(val_folder, 'labels', label_file))

# Specify your input and output folders
image_folder = 'patches_data_pipeline/images/val'
label_folder = 'patches_data_pipeline/labels/val'
output_folder = './dataset/'  # Change this to your desired output path

# Call the function to split the data

split_data(image_folder, label_folder, output_folder)
