# Pascal VOC Visualization (Single XML)

Load one Pascal VOC XML file and visualize `bndbox` and `polygon` overlays on the image.

Copyright (c) T.Yoshimura  
https://github.com/tk-yoshimura

In [None]:
from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
import xml.etree.ElementTree as ET

import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np
from PIL import Image

In [None]:
@dataclass
class VocObject:
    name: str
    bbox: tuple[int, int, int, int] | None
    polygon: np.ndarray | None


def _int_text(parent: ET.Element, tag: str) -> int:
    el = parent.find(tag)
    if el is None or el.text is None:
        raise ValueError(f"Missing tag: {tag}")
    return int(float(el.text.strip()))


def _parse_polygon(obj_el: ET.Element) -> np.ndarray | None:
    poly_el = obj_el.find("polygon")
    if poly_el is None:
        return None

    points: list[tuple[float, float]] = []
    i = 1
    while True:
        x_el = poly_el.find(f"x{i}")
        y_el = poly_el.find(f"y{i}")
        if x_el is None or y_el is None or x_el.text is None or y_el.text is None:
            break
        points.append((float(x_el.text), float(y_el.text)))
        i += 1

    if not points:
        return None
    return np.asarray(points, dtype=float)


def parse_pascal_voc(xml_path: str | Path) -> tuple[Path | None, list[VocObject]]:
    xml_path = Path(xml_path)
    root = ET.parse(xml_path).getroot()

    image_path: Path | None = None
    path_el = root.find("path")
    if path_el is not None and path_el.text:
        candidate = Path(path_el.text.strip())
        if candidate.exists():
            image_path = candidate

    if image_path is None:
        filename_el = root.find("filename")
        if filename_el is not None and filename_el.text:
            candidate = xml_path.parent / filename_el.text.strip()
            if candidate.exists():
                image_path = candidate

    if image_path is None:
        for ext in (".png", ".jpg", ".jpeg"):
            candidate = xml_path.with_suffix(ext)
            if candidate.exists():
                image_path = candidate
                break

    objects: list[VocObject] = []
    for obj_el in root.findall("object"):
        name_el = obj_el.find("name")
        name = name_el.text.strip() if (name_el is not None and name_el.text) else ""

        bbox = None
        bndbox_el = obj_el.find("bndbox")
        if bndbox_el is not None:
            try:
                xmin = _int_text(bndbox_el, "xmin")
                ymin = _int_text(bndbox_el, "ymin")
                xmax = _int_text(bndbox_el, "xmax")
                ymax = _int_text(bndbox_el, "ymax")
                bbox = (xmin, ymin, xmax, ymax)
            except ValueError:
                bbox = None

        polygon = _parse_polygon(obj_el)
        objects.append(VocObject(name=name, bbox=bbox, polygon=polygon))

    return image_path, objects


def visualize_pascal_voc(
    xml_path: str | Path,
    *,
    show_bbox: bool = True,
    show_polygon: bool = True,
    figsize: tuple[float, float] = (10, 10),
) -> None:
    xml_path = Path(xml_path)
    image_path, objects = parse_pascal_voc(xml_path)

    if image_path is None:
        raise FileNotFoundError(f"Image file was not found for XML: {xml_path}")

    image = np.asarray(Image.open(image_path).convert("RGB"))
    fig, ax = plt.subplots(figsize=figsize)
    ax.imshow(image)
    ax.set_title(f"{xml_path.name} ({len(objects)} objects)")
    ax.axis("off")

    cmap = plt.get_cmap("tab20")
    for i, obj in enumerate(objects):
        color = cmap(i % 20)

        if show_bbox and obj.bbox is not None:
            xmin, ymin, xmax, ymax = obj.bbox
            rect = patches.Rectangle(
                (xmin, ymin),
                max(1, xmax - xmin),
                max(1, ymax - ymin),
                linewidth=2,
                edgecolor=color,
                facecolor="none",
            )
            ax.add_patch(rect)
            ax.text(
                xmin,
                max(0, ymin - 4),
                obj.name,
                fontsize=10,
                color="white",
                bbox={"facecolor": color, "alpha": 0.8, "pad": 1.5},
            )

        if show_polygon and obj.polygon is not None and len(obj.polygon) >= 2:
            poly = patches.Polygon(
                obj.polygon,
                closed=True,
                fill=False,
                linewidth=2,
                edgecolor=color,
                linestyle="-",
                alpha=0.9,
            )
            ax.add_patch(poly)

    plt.tight_layout()
    plt.show()

In [None]:
# Specify one XML file to visualize
VOC_XML_PATH = Path("output.xml")
visualize_pascal_voc(VOC_XML_PATH, show_bbox=True, show_polygon=True)