In [10]:
import pandas as pd
import xml.etree.ElementTree as ET
from xml.dom import minidom

# Step 1: Read the Excel sheet
annotations_df = pd.read_csv('data_images/annotations.csv')

# Step 2: Format the annotations (assuming columns: 'image_path', 'class', 'xmin', 'ymin', 'xmax', 'ymax')
def create_pascal_voc_xml(filename, image_width, image_height, grouped_annotations):
    annotation = ET.Element("annotation")

    folder = ET.SubElement(annotation, "folder")
    folder.text = "images"

    filename_elem = ET.SubElement(annotation, "filename")
    filename_elem.text = filename

    size = ET.SubElement(annotation, "size")
    width = ET.SubElement(size, "width")
    width.text = str(image_width)
    height = ET.SubElement(size, "height")
    height.text = str(image_height)
    depth = ET.SubElement(size, "depth")
    depth.text = "3"  # Assuming RGB images

    for index, row in grouped_annotations.iterrows():
        object = ET.SubElement(annotation, "object")
        name = ET.SubElement(object, "name")
        name.text = row['class']
        bndbox = ET.SubElement(object, "bndbox")
        xmin = ET.SubElement(bndbox, "xmin")
        xmin.text = str(row['xmin'])
        ymin = ET.SubElement(bndbox, "ymin")
        ymin.text = str(row['ymin'])
        xmax = ET.SubElement(bndbox, "xmax")
        xmax.text = str(row['xmax'])
        ymax = ET.SubElement(bndbox, "ymax")
        ymax.text = str(row['ymax'])

    return annotation

# Step 3: Generate XML files
grouped_annotations = annotations_df.groupby('filename')
for filename, group in grouped_annotations:
    xml_tree = create_pascal_voc_xml(filename, group.iloc[0]['width'], group.iloc[0]['height'], group)
    xml_string = ET.tostring(xml_tree, encoding='utf8').decode('utf8')
    xml_pretty = minidom.parseString(xml_string).toprettyxml(indent="  ")
    with open(f'annotations/{filename}.xml', 'w') as xml_file:
        xml_file.write(xml_pretty)

