In [1]:
import os
import shutil
from sklearn.model_selection import train_test_split
#--------- Code Written by Khai Tha ----------------

# Directories for images and labels
images_dir = 'seconddata/cropped/images'
labels_dir = 'seconddata/cropped/labels'

# Destination directories for train, test, val
train_dir = 'seconddata/train'
test_dir = 'seconddata/test'
val_dir = 'seconddata/val'

# Create directories if they don't exist
for split in ['train', 'test', 'val']:
    os.makedirs(os.path.join(train_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(train_dir, 'labels'), exist_ok=True)
    os.makedirs(os.path.join(test_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(test_dir, 'labels'), exist_ok=True)
    os.makedirs(os.path.join(val_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(val_dir, 'labels'), exist_ok=True)

# Get list of images and labels
image_files = sorted(os.listdir(images_dir))
label_files = sorted(os.listdir(labels_dir))

# Ensure the images and labels are in the same order
assert len(image_files) == len(label_files)

# Split data into train, test, and val
train_images, val_images, train_labels, val_labels = train_test_split(
    image_files, label_files, test_size=0.2, random_state=42)

train_images, test_images, train_labels, test_labels = train_test_split(
    train_images, train_labels, test_size=0.25, random_state=42)

# Function to copy files to their respective directories
def copy_files(files, src_dir, dest_dir):
    for file in files:
        shutil.copy(os.path.join(src_dir, file), os.path.join(dest_dir, file))

# Copy training files
copy_files(train_images, images_dir, os.path.join(train_dir, 'images'))
copy_files(train_labels, labels_dir, os.path.join(train_dir, 'labels'))

# Copy validation files
copy_files(val_images, images_dir, os.path.join(val_dir, 'images'))
copy_files(val_labels, labels_dir, os.path.join(val_dir, 'labels'))

# Copy test files
copy_files(test_images, images_dir, os.path.join(test_dir, 'images'))
copy_files(test_labels, labels_dir, os.path.join(test_dir, 'labels'))

print("Data split completed.")


Data split completed.
