In [None]:
import os
import xml.etree.ElementTree as ET

In [None]:
def create_pascal_voc(xml_path, image_filename, width, height, objects):
    annotation = ET.Element("annotation")
    ET.SubElement(annotation, "folder").text = "images"
    ET.SubElement(annotation, "filename").text = image_filename
    size = ET.SubElement(annotation, "size")
    ET.SubElement(size, "width").text = str(width)
    ET.SubElement(size, "height").text = str(height)
    ET.SubElement(size, "depth").text = "3"
    
    for obj in objects:
        obj_element = ET.SubElement(annotation, "object")
        ET.SubElement(obj_element, "name").text = "class_0"  # Assuming single class "0"
        ET.SubElement(obj_element, "pose").text = "Unspecified"
        ET.SubElement(obj_element, "truncated").text = "0"
        ET.SubElement(obj_element, "difficult").text = "0"
        bndbox = ET.SubElement(obj_element, "bndbox")
        ET.SubElement(bndbox, "xmin").text = str(obj[0])
        ET.SubElement(bndbox, "ymin").text = str(obj[1])
        ET.SubElement(bndbox, "xmax").text = str(obj[2])
        ET.SubElement(bndbox, "ymax").text = str(obj[3])
    
    tree = ET.ElementTree(annotation)
    tree.write(xml_path)

def process_labels(label_dirs, annotation_dirs):
    file_count = 0
    image_width, image_height = 4000, 3000
    for label_dir, annotation_dir in zip(label_dirs, annotation_dirs):
        os.makedirs(annotation_dir, exist_ok=True)
        
        for filename in os.listdir(label_dir):
            if filename.endswith(".txt"):
                label_path = os.path.join(label_dir, filename)
                xml_path = os.path.join(annotation_dir, filename.replace(".txt", ".xml"))
                
                with open(label_path, "r") as file:
                    lines = file.readlines()
                    objects = []
                    for line in lines:
                        parts = line.strip().split()
                        if len(parts) == 5:  # Assuming format: class x_center y_center width height (YOLO format)
                            _, x_center, y_center, width, height = map(float, parts)
                            xmin = int((x_center - width / 2) * image_width)
                            ymin = int((y_center - height / 2) * image_height)
                            xmax = int((x_center + width / 2) * image_width)
                            ymax = int((y_center + height / 2) * image_height)
                            
                            # Skip small annotations
                            if (xmax - xmin) > 10 and (ymax - ymin) > 10:
                                objects.append((xmin, ymin, xmax, ymax))
                    
                    if objects:
                        create_pascal_voc(xml_path, filename.replace(".txt", ".jpg"), image_width, image_height, objects)
                        file_count += 1
                        print(f"Created {xml_path}")
    
    print(f"Processing complete. Total XML files created: {file_count}")

# Define label and annotation directories
label_dirs = [
    "datasets/mokolwane/train/labels", "datasets/mopororo/train/labels", "datasets/motswere/train/labels",
    "datasets/mokolwane/val/labels", "datasets/mopororo/val/labels", "datasets/motswere/val/labels"
]

annotation_dirs = [
    "datasets/mokolwane/train/annotations", "datasets/mopororo/train/annotations", "datasets/motswere/train/annotations",
    "datasets/mokolwane/val/annotations", "datasets/mopororo/val/annotations", "datasets/motswere/val/annotations"
]

# Convert labels to Pascal VOC
process_labels(label_dirs, annotation_dirs)
