In [1]:
import os
import shutil
import random

def split_and_move_dataset(image_dir, label_dir, base_output, train_ratio=0.8, val_ratio=0.1):
    image_files = [f for f in os.listdir(image_dir) if f.endswith('.png')]
    base_names = [os.path.splitext(f)[0] for f in image_files]

    # Ensure only those with a corresponding label file are considered
    valid_base_names = [b for b in base_names if os.path.exists(os.path.join(label_dir, b + '.txt'))]

    random.shuffle(valid_base_names)

    total = len(valid_base_names)
    train_end = int(total * train_ratio)
    val_end = train_end + int(total * val_ratio)

    train_split = valid_base_names[:train_end]
    val_split = valid_base_names[train_end:val_end]
    test_split = valid_base_names[val_end:]

    print(f"Total: {total}, Train: {len(train_split)}, Val: {len(val_split)}, Test: {len(test_split)}")

    def move_files(split, split_type):
        img_target = os.path.join(base_output, f'images/{split_type}')
        lbl_target = os.path.join(base_output, f'labels/{split_type}')
        os.makedirs(img_target, exist_ok=True)
        os.makedirs(lbl_target, exist_ok=True)

        for base in split:
            src_img = os.path.join(image_dir, base + '.png')
            src_lbl = os.path.join(label_dir, base + '.txt')
            dst_img = os.path.join(img_target, base + '.png')
            dst_lbl = os.path.join(lbl_target, base + '.txt')

            shutil.move(src_img, dst_img)
            shutil.move(src_lbl, dst_lbl)

    move_files(train_split, 'train')
    move_files(val_split, 'val')
    move_files(test_split, 'test')

# ===== Update your paths here =====
base_output = "/student/vcheruku/Enhance-Data-Diversity-and-Robustness/yolo-for-single-class"
image_dir = os.path.join(base_output, "raw_images")
label_dir = os.path.join(base_output, "labels")

split_and_move_dataset(image_dir, label_dir, base_output)


Total: 8080, Train: 6464, Val: 808, Test: 808
