In [3]:
import os
import shutil
from pathlib import Path

# Set paths
base_dir = "../basketball_dataset"  # Create this directory
os.makedirs(f"{base_dir}/train/images", exist_ok=True)
os.makedirs(f"{base_dir}/train/labels", exist_ok=True)
os.makedirs(f"{base_dir}/val/images", exist_ok=True)
os.makedirs(f"{base_dir}/val/labels", exist_ok=True)

# Source directories
train_folders = ["../train/video1_yolo_1.1", "../train/video2_yolo_1.1", "../train/ball_16_ft_from_cam_yolo_1.1", 
                "../train/basketball_video_1_yolo_1.1", "../train/basketball_video_2_yolo_1.1", "../train/basketball_video_3_yolo_1.1", 
                "../train/basketball_video_4_yolo_1.1", "../train/video3_yolo_1.1", "../train/video4_yolo_1.1", 
                "../train/video5_yolo_1.1", "../train/test_30fps_yolo_1.1"]

# Process each video folder
for folder in train_folders:
    # Extract folder name to use as prefix
    folder_name = os.path.basename(folder).replace("_yolo_1.1", "")
    
    frames_dir = os.path.join(folder, "frames")
    txt_dir = os.path.join(folder, "obj_train_data")
    
    # Check if directories exist
    if not os.path.exists(frames_dir) or not os.path.exists(txt_dir):
        print(f"Warning: Missing directories in {folder}. Skipping.")
        continue
        
    # Get all image files
    try:
        image_files = [f for f in os.listdir(frames_dir) if f.endswith(('.png'))]
    except FileNotFoundError:
        print(f"Warning: Cannot access {frames_dir}. Skipping.")
        continue
    
    # Split into train/val (80/20 split)
    split_idx = int(len(image_files) * 0.8)
    train_images = image_files[:split_idx]
    val_images = image_files[split_idx:]
    
    # Copy training files
    for img_name in train_images:
        # Create unique filenames with video prefix
        unique_img_name = f"{folder_name}_{img_name}"
        unique_label_name = f"{folder_name}_{os.path.splitext(img_name)[0]}.txt"
        original_label_name = os.path.splitext(img_name)[0] + ".txt"
        
        # Only copy if label exists
        if os.path.exists(os.path.join(txt_dir, original_label_name)):
            # Copy image
            shutil.copy(
                os.path.join(frames_dir, img_name),
                os.path.join(base_dir, "train/images", unique_img_name)
            )
            # Copy label
            shutil.copy(
                os.path.join(txt_dir, original_label_name),
                os.path.join(base_dir, "train/labels", unique_label_name)
            )
        else:
            print(f"Warning: Missing label file {original_label_name} for {img_name} - skipping this image")
    
    # Copy validation files
    for img_name in val_images:
        # Create unique filenames with video prefix
        unique_img_name = f"{folder_name}_{img_name}"
        unique_label_name = f"{folder_name}_{os.path.splitext(img_name)[0]}.txt"
        original_label_name = os.path.splitext(img_name)[0] + ".txt"
        
        # Only copy if label exists
        if os.path.exists(os.path.join(txt_dir, original_label_name)):
            # Copy image
            shutil.copy(
                os.path.join(frames_dir, img_name),
                os.path.join(base_dir, "val/images", unique_img_name)
            )
            # Copy label
            shutil.copy(
                os.path.join(txt_dir, original_label_name),
                os.path.join(base_dir, "val/labels", unique_label_name)
            )
        else:
            print(f"Warning: Missing label file {original_label_name} for {img_name}")

# Create data.yaml file
with open(f"{base_dir}/data.yaml", "w") as f:
    f.write(f"path: {os.path.abspath(base_dir)}\n")
    f.write("train: train/images\n")
    f.write("val: val/images\n")
    f.write("test: \n\n")
    f.write("nc: 1\n")
    f.write("names: ['basketball']\n")

print(f"Dataset reorganized in {base_dir}")

Dataset reorganized in ../basketball_dataset
