# Test/Train/Valid Split from Directory
Assumes screenshots and labels in the same directory.

In [4]:
import os
import random
import shutil

# Paths
data_dir = os.path.expanduser("~/Desktop/lower_left_crops")  # Directory containing all files
output_dir = os.path.expanduser("~/Downloads/Diablo4.v2a.yolov11_split")  # Output directory for subsets
os.makedirs(output_dir, exist_ok=True)

# Subset ratios
train_ratio = 0.7
valid_ratio = 0.2
test_ratio = 0.1

# Subset paths
train_dir = os.path.join(output_dir, "train")
valid_dir = os.path.join(output_dir, "valid")
test_dir = os.path.join(output_dir, "test")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(valid_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Helper function to get the matching label file for an image
def get_label_file(image_file):
    base_name, ext = os.path.splitext(image_file)
    return f"{base_name}.txt"

# Get all image files
all_image_files = [f for f in os.listdir(data_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Shuffle files
random.shuffle(all_image_files)

# Split files
train_cutoff = int(len(all_image_files) * train_ratio)
valid_cutoff = train_cutoff + int(len(all_image_files) * valid_ratio)

train_files = all_image_files[:train_cutoff]
valid_files = all_image_files[train_cutoff:valid_cutoff]
test_files = all_image_files[valid_cutoff:]

# Function to copy images and their labels
def copy_files(subset_files, subset_dir):
    for image_file in subset_files:
        image_path = os.path.join(data_dir, image_file)
        label_file = get_label_file(image_file)
        label_path = os.path.join(data_dir, label_file)

        # Copy image
        shutil.copy(image_path, os.path.join(subset_dir, image_file))
        # Copy label if it exists
        if os.path.exists(label_path):
            shutil.copy(label_path, os.path.join(subset_dir, label_file))

# Copy files to subsets
copy_files(train_files, train_dir)
copy_files(valid_files, valid_dir)
copy_files(test_files, test_dir)

print("Dataset splitting completed!")


Dataset splitting completed!
