In [None]:
# augmenting data to strengthen model training
# 20250811
# jedenfalls

import os
import cv2
import xml.etree.ElementTree as ET
import albumentations as A
#from albumentations.augmentations.bbox_utils import convert_bbox_to_albumentations, convert_bbox_from_albumentations
from PIL import Image
import numpy as np

# Define Albumentations transformations (geometric only, no resizing)
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.Rotate(limit=15, p=0.5),
    A.Affine(scale=(0.9, 1.1), translate_percent=(0.1, 0.1), shear=(-10, 10), p=0.7),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5),
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['category_ids']))

def parse_annotation(annotation_path):
    tree = ET.parse(annotation_path)
    root = tree.getroot()
    bboxes = []
    labels = []

    for obj in root.findall('object'):
        label = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = int(float(bndbox.find('xmin').text))
        ymin = int(float(bndbox.find('ymin').text))
        xmax = int(float(bndbox.find('xmax').text))
        ymax = int(float(bndbox.find('ymax').text))
        bboxes.append([xmin, ymin, xmax, ymax])
        labels.append(label)
    return bboxes, labels, root

def save_annotation(output_path, image_filename, width, height, bboxes, labels):
    annotation = ET.Element("annotation")
    ET.SubElement(annotation, "folder").text = os.path.dirname(output_path)
    ET.SubElement(annotation, "filename").text = image_filename
    ET.SubElement(annotation, "path").text = output_path

    source = ET.SubElement(annotation, "source")
    ET.SubElement(source, "database").text = "Unknown"

    size = ET.SubElement(annotation, "size")
    ET.SubElement(size, "width").text = str(width)
    ET.SubElement(size, "height").text = str(height)
    ET.SubElement(size, "depth").text = "3"

    ET.SubElement(annotation, "segmented").text = "0"

    for label, box in zip(labels, bboxes):
        xmin, ymin, xmax, ymax = map(int, box)
        obj = ET.SubElement(annotation, "object")
        ET.SubElement(obj, "name").text = label
        ET.SubElement(obj, "pose").text = "Unspecified"
        ET.SubElement(obj, "truncated").text = "0"
        ET.SubElement(obj, "difficult").text = "0"
        bndbox = ET.SubElement(obj, "bndbox")
        ET.SubElement(bndbox, "xmin").text = str(xmin)
        ET.SubElement(bndbox, "ymin").text = str(ymin)
        ET.SubElement(bndbox, "xmax").text = str(xmax)
        ET.SubElement(bndbox, "ymax").text = str(ymax)

    tree = ET.ElementTree(annotation)
    tree.write(output_path)

def augment_and_save(image_path, annotation_path, output_image_dir, output_annotation_dir, num_augmentations=10):
    image = cv2.imread(image_path)
    height, width = image.shape[:2]
    bboxes, labels, _ = parse_annotation(annotation_path)

    for i in range(num_augmentations):
        transformed = transform(image=image, bboxes=bboxes, category_ids=labels)

        transformed_image = transformed['image']
        transformed_bboxes = transformed['bboxes']
        transformed_labels = transformed['category_ids']

        # Skip saving if no boxes remain
        if not transformed_bboxes:
            continue

        base_filename = os.path.splitext(os.path.basename(image_path))[0]
        new_image_filename = f"{base_filename}_aug_{i}.jpg"
        new_annotation_filename = f"{base_filename}_aug_{i}.xml"

        new_image_path = os.path.join(output_image_dir, new_image_filename)
        new_annotation_path = os.path.join(output_annotation_dir, new_annotation_filename)

        cv2.imwrite(new_image_path, transformed_image)
        save_annotation(new_annotation_path, new_image_filename, width, height, transformed_bboxes, transformed_labels)





source_image_dir = '/home/guppy/Desktop/MIT/20250728/images'
source_annotation_dir = '/home/guppy/Desktop/MIT/20250728/mokolwane'

output_images_dir = "/home/guppy/Desktop/MIT/20250728/aug/images"
output_annotations_dir = "/home/guppy/Desktop/MIT/20250728/aug/mokolwane"

os.makedirs(output_images_dir, exist_ok=True)
os.makedirs(output_annotations_dir, exist_ok=True)

image_files = [f for f in os.listdir(source_image_dir) if f.endswith('.jpg')]

for image_file in image_files:
    image_path = os.path.join(source_image_dir, image_file)
    annotation_file = image_file.replace('.jpg', '.xml')
    annotation_path = os.path.join(source_annotation_dir, annotation_file)

    if os.path.exists(annotation_path):
        print(f"Augmenting {image_file}...")
        augment_and_save(image_path, annotation_path, output_images_dir, output_annotations_dir)

print("Data augmentation complete.")
