In [2]:
import numpy as np
import BboxToolkit as bt
import xml.etree.ElementTree as ET

# Load XML file
xml_file_path = r"C:\Users\Sanjay\Pictures\XML\20190804111453_tile_8679_band_2.xml"
tree = ET.parse(xml_file_path)
root = tree.getroot()

# Initialize lists to store bounding box information
cx_list, cy_list, w_list, h_list, angle_list = [], [], [], [], []

# Extract bounding box information from XML
for obj in root.findall(".//object"):
    cx = float(obj.find("robndbox/cx").text)
    cy = float(obj.find("robndbox/cy").text)
    w = float(obj.find("robndbox/w").text)
    h = float(obj.find("robndbox/h").text)
    angle = float(obj.find("robndbox/angle").text)

    cx_list.append(cx)
    cy_list.append(cy)
    w_list.append(w)
    h_list.append(h)
    angle_list.append(angle)

# Convert to hbb format using BboxToolkit
hbb1 = np.column_stack((cx_list, cy_list, w_list, h_list, angle_list))
hbb2 = bt.bbox2type(hbb1, 'hbb')

# Get indices for xmin, ymin, xmax, ymax
xmin, ymin, xmax, ymax = hbb2[:, 0], hbb2[:, 1], hbb2[:, 2], hbb2[:, 3]

# Image size
image_width = 900
image_height = 900

# Calculate normalized coordinates
normalized_x_center = (xmin + xmax) / (2 * image_width)
normalized_y_center = (ymin + ymax) / (2 * image_height)
normalized_width = (xmax - xmin) / image_width
normalized_height = (ymax - ymin) / image_height

# Add class_id (0) to the format
class_id = np.zeros_like(normalized_x_center, dtype=int)

# Make sure all arrays have the same number of elements
output_text = np.column_stack((class_id, normalized_x_center, normalized_y_center, normalized_width, normalized_height))

# Save the result to a text file with the same name
output_file_path = xml_file_path.replace(".xml", ".txt")
np.savetxt(output_file_path, output_text, fmt="%d %.6f %.6f %.6f %.6f")

print(f"Conversion successful. Result saved to: {output_file_path}")


Conversion successful. Result saved to: C:\Users\Sanjay\Pictures\XML\20190804111453_tile_8679_band_2.txt


In [4]:
cx, cy, w, h

(443.3026, 798.5512, 27.8782, 191.0998)

In [7]:
class_id.shape

(2,)