# :basketball: 4 - Combine base with all augmentations

## Imports

In [16]:
import shutil

from pathlib import Path

## Predefined variables

In [17]:
BASE_DATA_PATH = Path("../data/basketball.yolov8")
AUGMENTED_DATA_PATH = Path("../data/basketball.yolov8-augmented")
OUTPUT_DATA_PATH = Path("../data/basketball.yolov8-combined")

OUTPUT_DATA_PATH.mkdir(parents=True, exist_ok=True)

## Combine data

In [18]:
# Create splits
splits = ["train", "test", "valid"]

for split in splits:
    # Create directories for images and labels
    (OUTPUT_DATA_PATH / split / "images").mkdir(parents=True, exist_ok=True)
    (OUTPUT_DATA_PATH / split / "labels").mkdir(parents=True, exist_ok=True)

In [19]:
def copy_unique(src_img_dir, src_lbl_dir, dest_img_dir, dest_lbl_dir, seen):
    # Copy unseen images
    for img in src_img_dir.glob("*.jpg"):
        if img.name not in seen:
            shutil.copy(img, dest_img_dir / img.name)
            seen.add(img.name)

    # Copy unseen labels
    for lbl in src_lbl_dir.glob("*.txt"):
        if lbl.name not in seen:
            shutil.copy(lbl, dest_lbl_dir / lbl.name)
            seen.add(lbl.name)

In [20]:
for split in splits:
    seen_files = set()

    # Copy from base dataset
    copy_unique(
        BASE_DATA_PATH / split / "images",
        BASE_DATA_PATH / split / "labels",
        OUTPUT_DATA_PATH / split / "images",
        OUTPUT_DATA_PATH / split / "labels",
        seen_files
    )
    print(f"Copied base dataset for {split} split")

    # Copy from augmented datasets
    for aug_type in ["geometric", "color", "kernel"]:
        copy_unique(
            AUGMENTED_DATA_PATH / aug_type / split / "images",
            AUGMENTED_DATA_PATH / aug_type / split / "labels",
            OUTPUT_DATA_PATH / split / "images",
            OUTPUT_DATA_PATH / split / "labels",
            seen_files
        )
        print(f"Copied {aug_type} augmented dataset for {split} split")
    
    print()

print("All datasets combined successfully!")

Copied base dataset for train split
Copied geometric augmented dataset for train split
Copied color augmented dataset for train split
Copied kernel augmented dataset for train split

Copied base dataset for test split
Copied geometric augmented dataset for test split
Copied color augmented dataset for test split
Copied kernel augmented dataset for test split

Copied base dataset for valid split
Copied geometric augmented dataset for valid split
Copied color augmented dataset for valid split
Copied kernel augmented dataset for valid split

All datasets combined successfully!


In [21]:
# Copy the YAML file
shutil.copy(BASE_DATA_PATH / "data.yaml", OUTPUT_DATA_PATH / "data.yaml")

WindowsPath('../data/basketball.yolov8-combined/data.yaml')