In [2]:
import os
import shutil
from sklearn.model_selection import train_test_split

def split_dataset(image_dir, label_dir, output_dir, val_size=0.2, random_state=42):
    # Get list of all image files
    images = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]
    
    # Ensure corresponding label files exist
    images = [img for img in images if os.path.isfile(os.path.join(label_dir, os.path.splitext(img)[0] + '.txt'))]

    # Split the dataset
    train_images, val_images = train_test_split(images, test_size=val_size, random_state=random_state)
    
    # Create directories for train and validation sets
    train_image_dir = os.path.join(output_dir, 'train', 'images')
    train_label_dir = os.path.join(output_dir, 'train', 'labels')
    val_image_dir = os.path.join(output_dir, 'val', 'images')
    val_label_dir = os.path.join(output_dir, 'val', 'labels')

    os.makedirs(train_image_dir, exist_ok=True)
    os.makedirs(train_label_dir, exist_ok=True)
    os.makedirs(val_image_dir, exist_ok=True)
    os.makedirs(val_label_dir, exist_ok=True)
    
    # Function to copy files to destination
    def copy_files(files, src_image_dir, src_label_dir, dst_image_dir, dst_label_dir):
        for file in files:
            # Copy image file
            shutil.copy(os.path.join(src_image_dir, file), os.path.join(dst_image_dir, file))
            # Copy label file
            label_file = os.path.splitext(file)[0] + '.txt'
            shutil.copy(os.path.join(src_label_dir, label_file), os.path.join(dst_label_dir, label_file))
    
    # Copy training files
    copy_files(train_images, image_dir, label_dir, train_image_dir, train_label_dir)
    
    # Copy validation files
    copy_files(val_images, image_dir, label_dir, val_image_dir, val_label_dir)
    
    print(f"Training images: {len(train_images)}")
    print(f"Validation images: {len(val_images)}")


image_dir = 'C:/Users/midiy/OneDrive/Desktop/datasets/datasets/images'  
label_dir = 'C:/Users/midiy/OneDrive/Desktop/datasets/datasets/YOLO_labels'  
output_dir = 'C:/Users/midiy/OneDrive/Desktop/result/Out_Data'  

split_dataset(image_dir, label_dir, output_dir)


Training images: 332
Validation images: 84
