In [7]:
import os
import glob
import xml.etree.ElementTree as ET

from xml.etree.ElementTree import ParseError  # 추가

# ---------- 1. 설정 ----------
XML_ROOT = r"label"  # 네 XML 폴더 경로
OUT_OPEN_CLOSE = r"./labels_openclose"
OUT_FULL_EMPTY = r"./labels_fullempty"

OPEN_CLOSE_MAP = {
    "open_empty": 0,
    "open_full": 0,
    "close_full": 1,
    "close_empty": 1,
}

FULL_EMPTY_MAP = {
    "open_empty": 0,
    "close_empty": 0,
    "open_full": 1,
    "close_full": 1,
}

def voc_to_yolo_bbox(size, box):
    w_img, h_img = size
    xmin, ymin, xmax, ymax = box

    x_center = (xmin + xmax) / 2.0 / w_img
    y_center = (ymin + ymax) / 2.0 / h_img
    bw = (xmax - xmin) / w_img
    bh = (ymax - ymin) / h_img

    return x_center, y_center, bw, bh


def convert_single_xml_2x2(xml_path, out_dir_openclose, out_dir_fullempty):
    # ✅ 여기서 try/except로 깨진 XML 잡기
    try:
        tree = ET.parse(xml_path)
    except ParseError as e:
        print(f"[ERROR] XML ParseError in '{xml_path}': {e}")
        # 이 파일은 건너뜀
        return

    root = tree.getroot()

    size = root.find("size")
    w_img = int(size.find("width").text)
    h_img = int(size.find("height").text)

    base = os.path.splitext(os.path.basename(xml_path))[0]

    out_txt_openclose = os.path.join(out_dir_openclose, base + ".txt")
    out_txt_fullempty = os.path.join(out_dir_fullempty, base + ".txt")

    lines_openclose = []
    lines_fullempty = []

    for obj in root.findall("object"):
        name = obj.find("name").text.strip()

        if name not in OPEN_CLOSE_MAP or name not in FULL_EMPTY_MAP:
            print(f"[WARN] Unknown class '{name}' in {xml_path}, skip this object.")
            continue

        cls_openclose = OPEN_CLOSE_MAP[name]
        cls_fullempty = FULL_EMPTY_MAP[name]

        bnd = obj.find("bndbox")
        xmin = float(bnd.find("xmin").text)
        ymin = float(bnd.find("ymin").text)
        xmax = float(bnd.find("xmax").text)
        ymax = float(bnd.find("ymax").text)

        x_c, y_c, bw, bh = voc_to_yolo_bbox((w_img, h_img), (xmin, ymin, xmax, ymax))

        lines_openclose.append(f"{cls_openclose} {x_c:.6f} {y_c:.6f} {bw:.6f} {bh:.6f}")
        lines_fullempty.append(f"{cls_fullempty} {x_c:.6f} {y_c:.6f} {bw:.6f} {bh:.6f}")

    os.makedirs(out_dir_openclose, exist_ok=True)
    os.makedirs(out_dir_fullempty, exist_ok=True)

    with open(out_txt_openclose, "w", encoding="utf-8") as f:
        f.write("\n".join(lines_openclose))

    with open(out_txt_fullempty, "w", encoding="utf-8") as f:
        f.write("\n".join(lines_fullempty))

    print(f"[INFO] {xml_path} ->")
    print(f"       {out_txt_openclose} (open/close)")
    print(f"       {out_txt_fullempty} (full/empty)")


def convert_all_xml_in_root(root_dir, out_dir_openclose, out_dir_fullempty):
    pattern = os.path.join(root_dir, "**", "*.xml")
    xml_paths = glob.glob(pattern, recursive=True)

    print(f"[INFO] Found {len(xml_paths)} xml files under {root_dir}")

    for xml_path in xml_paths:
        convert_single_xml_2x2(xml_path, out_dir_openclose, out_dir_fullempty)


if __name__ == "__main__":
    convert_all_xml_in_root(XML_ROOT, OUT_OPEN_CLOSE, OUT_FULL_EMPTY)



[INFO] Found 511 xml files under label
[INFO] label\photo_normal_002.xml ->
       ./labels_openclose\photo_normal_002.txt (open/close)
       ./labels_fullempty\photo_normal_002.txt (full/empty)
[INFO] label\photo_normal_004.xml ->
       ./labels_openclose\photo_normal_004.txt (open/close)
       ./labels_fullempty\photo_normal_004.txt (full/empty)
[INFO] label\photo_normal_005.xml ->
       ./labels_openclose\photo_normal_005.txt (open/close)
       ./labels_fullempty\photo_normal_005.txt (full/empty)
[INFO] label\photo_normal_006.xml ->
       ./labels_openclose\photo_normal_006.txt (open/close)
       ./labels_fullempty\photo_normal_006.txt (full/empty)
[INFO] label\photo_normal_007.xml ->
       ./labels_openclose\photo_normal_007.txt (open/close)
       ./labels_fullempty\photo_normal_007.txt (full/empty)
[INFO] label\photo_normal_008.xml ->
       ./labels_openclose\photo_normal_008.txt (open/close)
       ./labels_fullempty\photo_normal_008.txt (full/empty)
[INFO] label\photo_