In [2]:
import os
import glob
import shutil
import xml.etree.ElementTree as ET
from sklearn.model_selection import train_test_split

# Paths
IMG_DIR = "dataset/images"
ANN_DIR = "dataset/annotations"
YOLO_LABEL_DIR = "dataset/labels"

# Create folders
for split in ["train", "val"]:
    os.makedirs(f"{YOLO_LABEL_DIR}/{split}", exist_ok=True)
    os.makedirs(f"{IMG_DIR}/{split}", exist_ok=True)

# Get all XML files
xml_files = glob.glob(f"{ANN_DIR}/*.xml")

# Train/val split
train_xmls, val_xmls = train_test_split(xml_files, test_size=0.2, random_state=42)

def convert_and_save(xml_list, split):
    for xml_file in xml_list:
        tree = ET.parse(xml_file)
        root = tree.getroot()

        img_name = root.find("filename").text
        img_path = os.path.join(IMG_DIR, img_name)

        width = int(root.find("size/width").text)
        height = int(root.find("size/height").text)

        label_lines = []
        for obj in root.findall("object"):
            cls_id = 0  # licence plate class
            bbox = obj.find("bndbox")
            xmin = int(bbox.find("xmin").text)
            ymin = int(bbox.find("ymin").text)
            xmax = int(bbox.find("xmax").text)
            ymax = int(bbox.find("ymax").text)

            x_center = ((xmin + xmax) / 2) / width
            y_center = ((ymin + ymax) / 2) / height
            w = (xmax - xmin) / width
            h = (ymax - ymin) / height

            label_lines.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}")

        # Save .txt
        base = os.path.splitext(os.path.basename(xml_file))[0]
        with open(f"{YOLO_LABEL_DIR}/{split}/{base}.txt", "w") as f:
            f.write("\n".join(label_lines))

        # Copy image
        shutil.copy(img_path, f"{IMG_DIR}/{split}/{img_name}")

# Run conversion
convert_and_save(train_xmls, "train")
convert_and_save(val_xmls, "val")
