In [None]:
import os
import numpy as np
from PIL import Image
import shutil
from sklearn.model_selection import train_test_split
import uuid

dataset_dir = 'dataset'
train_dir = os.path.join(dataset_dir, 'train')
labels_dir = os.path.join(dataset_dir, 'labels')

output_dir = 'training_data'
train_images_dir = os.path.join(output_dir, 'train_images')
train_annotations_dir = os.path.join(output_dir, 'train_annotations')
val_images_dir = os.path.join(output_dir, 'val_images')
val_annotations_dir = os.path.join(output_dir, 'val_annotations')

os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_annotations_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(val_annotations_dir, exist_ok=True)

image_paths = []
annotation_paths = []

for subfolder in os.listdir(train_dir):
    train_subfolder_path = os.path.join(train_dir, subfolder)
    label_subfolder_path = os.path.join(labels_dir, subfolder)

    if os.path.isdir(train_subfolder_path) and os.path.isdir(label_subfolder_path):
        for filename in os.listdir(train_subfolder_path):
            if filename.endswith('_leftImg8bit.jpg'):
                file_number = filename.split('_')[0]
                image_path = os.path.join(train_subfolder_path, filename)
                label_filename = f"{file_number}_gtFine_labellevel3Ids.png"
                label_path = os.path.join(label_subfolder_path, label_filename)

                if os.path.exists(image_path) and os.path.exists(label_path):
                    image_paths.append(image_path)
                    annotation_paths.append(label_path)

train_image_paths, val_image_paths, train_annotation_paths, val_annotation_paths = train_test_split(
    image_paths, annotation_paths, test_size=0.2, random_state=42)

for idx, (train_image, train_annotation) in enumerate(zip(train_image_paths, train_annotation_paths)):
    new_filename = f"train_{idx}.jpg"
    new_annotation_filename = f"train_{idx}.png"
    shutil.copy(train_image, os.path.join(train_images_dir, new_filename))
    shutil.copy(train_annotation, os.path.join(train_annotations_dir, new_annotation_filename))

for idx, (val_image, val_annotation) in enumerate(zip(val_image_paths, val_annotation_paths)):
    new_filename = f"val_{idx}.jpg"
    new_annotation_filename = f"val_{idx}.png"
    shutil.copy(val_image, os.path.join(val_images_dir, new_filename))
    shutil.copy(val_annotation, os.path.join(val_annotations_dir, new_annotation_filename))

print(f"Training set: {len(train_image_paths)} images and annotations.")
print(f"Validation set: {len(val_image_paths)} images and annotations.")
