In [None]:
import os
import shutil
import glob

In [None]:
# Define paths
train_images_dir = "datasets/all/train/images_2"
val_images_dir = "datasets/all/val/images_2"
train_annotations_dir = "datasets/all/train/annotations_2"
val_annotations_dir = "datasets/all/val/annotations_2"
train_target_dir = "datasets/all/train/annotations_1"
val_target_dir = "datasets/all/val/annotations_1"

# Ensure target directories exist
os.makedirs(train_target_dir, exist_ok=True)
os.makedirs(val_target_dir, exist_ok=True)

def copy_matching_annotations(image_dir, annotation_dir, target_dir):
    """Copies XML annotations that have corresponding JPG images."""
    # Get all JPG file names without extension
    image_files = {os.path.splitext(os.path.basename(f))[0] for f in glob.glob(os.path.join(image_dir, "*.jpg"))}
    
    # Iterate over XML files in the annotation directory
    for annotation_file in glob.glob(os.path.join(annotation_dir, "*.xml")):
        annotation_name = os.path.splitext(os.path.basename(annotation_file))[0]
        
        # Check if there's a corresponding JPG file
        if annotation_name in image_files:
            shutil.copy(annotation_file, os.path.join(target_dir, os.path.basename(annotation_file)))

# Process train and val datasets
copy_matching_annotations(train_images_dir, train_annotations_dir, train_target_dir)
copy_matching_annotations(val_images_dir, val_annotations_dir, val_target_dir)

def count_files(directory, extension):
    """Counts the number of files with a given extension in a directory."""
    return len(glob.glob(os.path.join(directory, f"*.{extension}")))

# Count files
train_annotations_count = count_files(train_target_dir, "xml")
val_annotations_count = count_files(val_target_dir, "xml")
train_images_count = count_files(train_images_dir, "jpg")
val_images_count = count_files(val_images_dir, "jpg")

print(f"Train annotations copied: {train_annotations_count}")
print(f"Val annotations copied: {val_annotations_count}")
print(f"Train images available: {train_images_count}")
print(f"Val images available: {val_images_count}")

print("Annotation copying completed.")
