In [None]:
import os
import shutil
import json
from sklearn.model_selection import train_test_split

# Paths to raw data
images_dir = 'data/raw/images/'
labels_dir = 'data/raw/labels/'

# Destination directories for processed data
train_images_dir = 'data/processed/train/images/'
val_images_dir = 'data/processed/val/images/'
test_images_dir = 'data/processed/test/images/'

train_labels_dir = 'data/processed/train/labels/'
val_labels_dir = 'data/processed/val/labels/'
test_labels_dir = 'data/processed/test/labels/'

# Create processed directories if they don't exist
for dir_path in [train_images_dir, val_images_dir, test_images_dir, 
                 train_labels_dir, val_labels_dir, test_labels_dir]:
    os.makedirs(dir_path, exist_ok=True)

# Get list of image filenames (assume PNG) and corresponding JSON labels
image_files = [f for f in os.listdir(images_dir) if f.endswith('.png')]
label_files = [f.replace('.png', '.json') for f in image_files]  # Assuming JSON label filenames match the image filenames

# Split into train, validation, and test sets (80% train, 10% validation, 10% test)
train_images, temp_images, train_labels, temp_labels = train_test_split(image_files, label_files, test_size=0.2, random_state=42)
val_images, test_images, val_labels, test_labels = train_test_split(temp_images, temp_labels, test_size=0.5, random_state=42)

# Helper function to move files
def move_files(file_list, src_dir, dest_dir):
    for file_name in file_list:
        src_path = os.path.join(src_dir, file_name)
        dest_path = os.path.join(dest_dir, file_name)
        shutil.move(src_path, dest_path)

# Move image files
move_files(train_images, images_dir, train_images_dir)
move_files(val_images, images_dir, val_images_dir)
move_files(test_images, images_dir, test_images_dir)

# Move label files
move_files(train_labels, labels_dir, train_labels_dir)
move_files(val_labels, labels_dir, val_labels_dir)
move_files(test_labels, labels_dir, test_labels_dir)

print("Data split and moved successfully!")
