In [None]:
import os
import cv2
import yaml
import xml.etree.ElementTree as ET
from xml.dom.minidom import parseString

In [None]:
# Base path to dataset
base_path = 'U:\object-detection-yolo\indoor_object_detection_dataset'
splits = ['train', 'valid', 'test']
not_found_image = []

In [None]:
# Load class names from data.yaml
with open(os.path.join(base_path, 'data.yaml'), 'r') as f:
    data = yaml.safe_load(f)
class_names = data['names']

def create_voc_xml(image_path, boxes, img_shape, class_names, output_path):
    height, width, depth = img_shape
    image_filename = os.path.basename(image_path)
    
    annotation = ET.Element('annotation')
    ET.SubElement(annotation, 'folder').text = os.path.basename(os.path.dirname(image_path))
    ET.SubElement(annotation, 'filename').text = image_filename
    
    size = ET.SubElement(annotation, 'size')
    ET.SubElement(size, 'width').text = str(width)
    ET.SubElement(size, 'height').text = str(height)
    ET.SubElement(size, 'depth').text = str(depth)
    ET.SubElement(annotation, 'segmented').text = '0'
    
    for class_id, x_min, y_min, x_max, y_max in boxes:
        obj = ET.SubElement(annotation, 'object')
        ET.SubElement(obj, 'name').text = class_names[class_id]
        ET.SubElement(obj, 'pose').text = 'Unspecified'
        ET.SubElement(obj, 'truncated').text = '0'
        ET.SubElement(obj, 'difficult').text = '0'
        bbox = ET.SubElement(obj, 'bndbox')
        ET.SubElement(bbox, 'xmin').text = str(x_min)
        ET.SubElement(bbox, 'ymin').text = str(y_min)
        ET.SubElement(bbox, 'xmax').text = str(x_max)
        ET.SubElement(bbox, 'ymax').text = str(y_max)
    
    xml_str = ET.tostring(annotation)
    dom = parseString(xml_str)
    with open(output_path, 'w') as f:
        f.write(dom.toprettyxml(indent="  "))

for split in splits:
    image_dir = os.path.join(base_path, split, 'images')
    label_dir = os.path.join(base_path, split, 'labels')
    output_dir = os.path.join(base_path, split, 'annotations')  # new folder for .xml
    os.makedirs(output_dir, exist_ok=True)
    
    for filename in os.listdir(label_dir):
        if not filename.endswith('.txt'):
            continue

        image_filename = os.path.splitext(filename)[0] + '.jpg'  # adjust if using .png
        image_path = os.path.join(image_dir, image_filename)
        label_path = os.path.join(label_dir, filename)
        xml_output_path = os.path.join(output_dir, os.path.splitext(filename)[0] + '.xml')

        if not os.path.exists(image_path):
            print(f"Warning: image not found for {image_filename} in {split}")
            not_found_image.append(image_filename)
            continue

        image = cv2.imread(image_path)
        height, width, depth = image.shape

        boxes = []
        with open(label_path, 'r') as f:
            for line in f:
                class_id, x_center, y_center, w, h = map(float, line.strip().split())
                class_id = int(class_id)
                x_min = int((x_center - w / 2) * width)
                x_max = int((x_center + w / 2) * width)
                y_min = int((y_center - h / 2) * height)
                y_max = int((y_center + h / 2) * height)
                boxes.append((class_id, x_min, y_min, x_max, y_max))

        create_voc_xml(image_path, boxes, image.shape, class_names, xml_output_path)

print("YOLO to VOC XML conversion complete.")

In [None]:
train_img = os.listdir(r"U:\object-detection-yolo\indoor_object_detection_dataset\train\images")
train_xml = os.listdir(r"U:\object-detection-yolo\indoor_object_detection_dataset\train\annotations")
len(train_img), len(train_xml)

In [None]:
test_img = os.listdir(r"U:\object-detection-yolo\indoor_object_detection_dataset\test\images")
test_xml = os.listdir(r"U:\object-detection-yolo\indoor_object_detection_dataset\test\annotations")
len(test_img), len(test_xml)

In [None]:
valid_img = os.listdir(r"U:\object-detection-yolo\indoor_object_detection_dataset\valid\images")
valid_xml = os.listdir(r"U:\object-detection-yolo\indoor_object_detection_dataset\valid\annotations")
len(valid_img), len(valid_xml)

In [1]:
from dataset_indoor import create_train_loader
from tqdm.auto import tqdm

train_dataset = r"U:\object-detection-yolo\indoor_object_detection_dataset\train\images"

  data = fetch_version_info()


inside CustomDataset


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_data_loader = create_train_loader(train_dataset, 4)

inside create_train_loader


In [6]:
# prog_bar = tqdm(train_data_loader, total=len(train_data_loader))
    
# for i, data in enumerate(prog_bar):
#     print(data)
#     break