In [2]:
import os
import random
import shutil

def split_train_val(images_dir, labels_dir, output_base_dir, train_ratio=0.8):
    # Define output directories
    train_images_dir = os.path.join(output_base_dir, 'images', 'train')
    val_images_dir = os.path.join(output_base_dir, 'images', 'val')
    train_labels_dir = os.path.join(output_base_dir, 'labels', 'train')
    val_labels_dir = os.path.join(output_base_dir, 'labels', 'val')

    # Make sure all directories exist
    os.makedirs(train_images_dir, exist_ok=True)
    os.makedirs(val_images_dir, exist_ok=True)
    os.makedirs(train_labels_dir, exist_ok=True)
    os.makedirs(val_labels_dir, exist_ok=True)

    # List all images
    all_images = [f for f in os.listdir(images_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    random.shuffle(all_images)

    split_idx = int(len(all_images) * train_ratio)
    train_images = all_images[:split_idx]
    val_images = all_images[split_idx:]

    # Copy files
    for img_file in train_images:
        label_file = os.path.splitext(img_file)[0] + '.txt'
        shutil.copy(os.path.join(images_dir, img_file), os.path.join(train_images_dir, img_file))
        shutil.copy(os.path.join(labels_dir, label_file), os.path.join(train_labels_dir, label_file))

    for img_file in val_images:
        label_file = os.path.splitext(img_file)[0] + '.txt'
        shutil.copy(os.path.join(images_dir, img_file), os.path.join(val_images_dir, img_file))
        shutil.copy(os.path.join(labels_dir, label_file), os.path.join(val_labels_dir, label_file))

    print(f"✅ Split complete: {len(train_images)} train images, {len(val_images)} val images.")

In [3]:
# Update paths based on where your images/labels are
IMAGES_DIR = "data/do_not_open/images"  # where your generated images are
LABELS_DIR = "data/do_not_open/labels"  # where your generated labels are
OUTPUT_DIR = "data/catflap"              # where final split folders will go

split_train_val(IMAGES_DIR, LABELS_DIR, OUTPUT_DIR, train_ratio=0.8)

✅ Split complete: 626 train images, 157 val images.
