In [1]:
from pathlib import Path
import shutil
from sklearn.model_selection import train_test_split

objects = ['cameraDistance', 'coolLighting', 'furniture', "plants"]

# Target dirs
train_images = Path("Synthetic_to_Real_Object_Detection_Full_2/data/train/images")
train_labels = Path("Synthetic_to_Real_Object_Detection_Full_2/data/train/labels")
val_images = Path("Synthetic_to_Real_Object_Detection_Full_2/data/val/images")
val_labels = Path("Synthetic_to_Real_Object_Detection_Full_2/data/val/labels")

# Create target directories if they don't exist
for folder in [train_images, train_labels, val_images, val_labels]:
    folder.mkdir(parents=True, exist_ok=True)

for obj in objects:

    # Root director
    images_dir = Path(f"Synthetic_to_Real_Object_Detection_Full_2/{obj}/{obj}/images")
    labels_dir = Path(f"Synthetic_to_Real_Object_Detection_Full_2/{obj}/{obj}/labels")

    # Loop through all .png files recursively
    for file_path in images_dir.rglob("*.png"):
        try:
            grandparent_name = file_path.parent.parent.name  # Get grandparent folder
            stem = file_path.stem
            new_name = f"{stem}_{grandparent_name}.png"
            new_path = file_path.with_name(new_name)
            file_path.rename(new_path)        
        except IndexError:
            print(f"Skipped (no grandparent): {file_path}")

    # Loop through all .png files recursively
    for file_path in labels_dir.rglob("*.txt"):
        try:
            grandparent_name = file_path.parent.parent.name  # Get grandparent folder
            stem = file_path.stem
            new_name = f"{stem}_{grandparent_name}.txt"
            new_path = file_path.with_name(new_name)
            file_path.rename(new_path)
        except IndexError:
            print(f"Skipped (no grandparent): {file_path}")

    # Match images and labels by filename (without extension)
    image_files = list(images_dir.glob("*.png"))
    label_files = list(labels_dir.glob("*.txt"))

    # Build list of matched pairs
    matched_pairs = [
        (img, labels_dir / (img.stem + ".txt"))
        for img in image_files
        if (labels_dir / (img.stem + ".txt")).exists()
    ]

    # Split the pairs
    train_pairs, val_pairs = train_test_split(matched_pairs, test_size=0.20, random_state=42)

    # Move files
    def move_pairs(pairs, img_dest, label_dest):
        for img_path, lbl_path in pairs:
            shutil.copy(img_path, img_dest / img_path.name)
            shutil.copy(lbl_path, label_dest / lbl_path.name)

    move_pairs(train_pairs, train_images, train_labels)
    move_pairs(val_pairs, val_images, val_labels)

    print(f"Moved {len(train_pairs)} training pairs and {len(val_pairs)} validation pairs.")

Moved 208 training pairs and 52 validation pairs.
Moved 194 training pairs and 49 validation pairs.
Moved 200 training pairs and 50 validation pairs.
Moved 196 training pairs and 50 validation pairs.
