In [3]:
import os
import xml.etree.ElementTree as ET
from PIL import Image
from tqdm import tqdm

xml_path = "../datasets/ai_for_mankind_hpwren/day_time_wildfire_v2/annotations/xmls"
img_path = "../datasets/ai_for_mankind_hpwren/day_time_wildfire_v2/images"
yolo_labels_path = "../datasets/ai_for_mankind_hpwren/day_time_wildfire_v2/labels"

os.makedirs(yolo_labels_path, exist_ok=True)

classes = {'smoke': 0}  # Class mapping for YOLO (class_id starts from 0)

for xml_file in tqdm(os.listdir(xml_path)):
    if not xml_file.endswith(".xml"):
        continue

    tree = ET.parse(os.path.join(xml_path, xml_file))
    root = tree.getroot()

    img_file = root.find('filename').text
    img_full_path = os.path.join(img_path, img_file)
    
    if not os.path.exists(img_full_path):
        continue

    img = Image.open(img_full_path)
    width, height = img.size

    yolo_file = os.path.join(yolo_labels_path, img_file.replace('.jpeg', '.txt'))
    with open(yolo_file, 'w') as f:
        for obj in root.findall('object'):
            cls_name = obj.find('name').text
            cls_id = classes[cls_name]

            xmlbox = obj.find('bndbox')
            xmin = int(float(xmlbox.find('xmin').text))
            xmax = int(float(xmlbox.find('xmax').text))
            ymin = int(float(xmlbox.find('ymin').text))
            ymax = int(float(xmlbox.find('ymax').text))


            x_center = (xmin + xmax) / 2.0 / width
            y_center = (ymin + ymax) / 2.0 / height
            box_width = (xmax - xmin) / width
            box_height = (ymax - ymin) / height

            f.write(f"{cls_id} {x_center} {y_center} {box_width} {box_height}\n")


100%|██████████| 2191/2191 [00:00<00:00, 3704.28it/s]


In [9]:
import shutil
import random
import os

image_dir = "../datasets/ai_for_mankind_hpwren/day_time_wildfire_v2/images"
label_dir = "../datasets/ai_for_mankind_hpwren/day_time_wildfire_v2/labels"
yolo_root = "../datasets/ai_hpwren_yolo"

os.makedirs(f"{yolo_root}/images/train", exist_ok=True)
os.makedirs(f"{yolo_root}/images/val", exist_ok=True)
os.makedirs(f"{yolo_root}/labels/train", exist_ok=True)
os.makedirs(f"{yolo_root}/labels/val", exist_ok=True)

files = [f for f in os.listdir(label_dir) if f.endswith('.txt')]
random.shuffle(files)

split_idx = int(len(files) * 0.8)
train_files, val_files = files[:split_idx], files[split_idx:]

# Copy files
for file_set, split in zip([train_files, val_files], ['train', 'val']):
    for file in file_set:
        shutil.copy(os.path.join(image_dir, file.replace('.txt', '.jpeg')), f"{yolo_root}/images/{split}/")
        shutil.copy(os.path.join(label_dir, file), f"{yolo_root}/labels/{split}/")
