In [1]:
print('hello')

hello


In [2]:
import os
import xml.etree.ElementTree as ET
import shutil
import random

In [3]:
ANNOTATIONS_DIR = 'annotations_xml/'
IMAGES_DIR = 'images_original/'
OUTPUT_DIR = 'dataset/'

In [4]:
# Create YOLO folders
os.makedirs(OUTPUT_DIR + 'images/train', exist_ok=True)
os.makedirs(OUTPUT_DIR + 'images/val', exist_ok=True)
os.makedirs(OUTPUT_DIR + 'labels/train', exist_ok=True)
os.makedirs(OUTPUT_DIR + 'labels/val', exist_ok=True)

In [5]:
# Splitting

xml_files = [f for f in os.listdir(ANNOTATIONS_DIR) if f.endswith('.xml')]
random.shuffle(xml_files)
split_idx = int(0.8 * len(xml_files))
train_files = xml_files[:split_idx]
val_files = xml_files[split_idx:]

In [6]:
def convert_annotation(xml_file, output_txt):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    size = root.find('size')
    img_w = int(size.find('width').text)
    img_h = int(size.find('height').text)

    yolo_lines = []

    for obj in root.findall('object'):
        cls_name = obj.find('name').text
        class_id = 0  # only one class: number_plate

        bbox = obj.find('bndbox')
        xmin = int(float(bbox.find('xmin').text))
        ymin = int(float(bbox.find('ymin').text))
        xmax = int(float(bbox.find('xmax').text))
        ymax = int(float(bbox.find('ymax').text))

        x_center = (xmin + xmax) / 2.0 / img_w
        y_center = (ymin + ymax) / 2.0 / img_h
        width = (xmax - xmin) / img_w
        height = (ymax - ymin) / img_h

        line = f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"
        yolo_lines.append(line)

    with open(output_txt, 'w') as f:
        for line in yolo_lines:
            f.write(line + '\n')


In [7]:
# Convert and copy files
for xml_file in xml_files:
    img_base = os.path.splitext(xml_file)[0]

    # --- check for jpg or png ---
    if os.path.exists(IMAGES_DIR + img_base + '.jpg'):
        img_file = img_base + '.jpg'
    elif os.path.exists(IMAGES_DIR + img_base + '.png'):
        img_file = img_base + '.png'
    else:
        print(f"Image file for {xml_file} not found (jpg or png)")
        continue

    if xml_file in train_files:
        img_out = OUTPUT_DIR + 'images/train/' + img_file
        label_out = OUTPUT_DIR + 'labels/train/' + img_base + '.txt'
    else:
        img_out = OUTPUT_DIR + 'images/val/' + img_file
        label_out = OUTPUT_DIR + 'labels/val/' + img_base + '.txt'

    shutil.copy(IMAGES_DIR + img_file, img_out)
    convert_annotation(ANNOTATIONS_DIR + xml_file, label_out)

print('✅ Dataset prepared successfully and labels converted!')

✅ Dataset prepared successfully and labels converted!
