In [2]:
import os
from pathlib import Path
import shutil
import torch

# images_dir = Path("atlas_dione_objectdetection/JPEGImages") # ATLAS dataset directory containing JPG images
# annotations_dir = Path("atlas_dione_objectdetection/Annotations") # ATLAS dataset directory containing XML files
# test_dir = Path("./dataset/test") # Test directory (only JPG images)

images_dir = Path("../../atlas_dione_objectdetection/ATLAS_Dione_ObjectDetection/ATLAS_Dione_ObjectDetection/ATLAS_Dione_ObjectDetection/JPEGImages") # ATLAS dataset directory containing JPG images
annotations_dir = Path("../../atlas_dione_objectdetection/ATLAS_Dione_ObjectDetection/ATLAS_Dione_ObjectDetection/ATLAS_Dione_ObjectDetection/Annotations") # ATLAS dataset directory containing XML files
test_dir = Path("./dataset/test") # Test directory (only JPG images)

# Creating directories
train_img_dir = Path("./dataset/train/images")
train_labels_dir = Path("./dataset/train/labels")
val_img_dir = Path("./dataset/val/images")
val_labels_dir = Path("./dataset/val/labels")
os.makedirs(train_img_dir, exist_ok=True)
os.makedirs(val_img_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Obtaining list of all images
all_images = [f for f in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, f))]

train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1

# Split dataset 
train_images, val_images, test_images = torch.utils.data.random_split(all_images, [train_ratio, val_ratio, test_ratio])

# Moving files in train, val and test directories
for img in train_images:
    shutil.copy(os.path.join(images_dir, img), os.path.join(train_img_dir, img))

for img in val_images:
    shutil.copy(os.path.join(images_dir, img), os.path.join(val_img_dir, img))

for img in test_images:
    shutil.copy(os.path.join(images_dir, img), os.path.join(test_dir, img))

print(f"Training images: {len(train_images)}")
print(f"Validation images: {len(val_images)}")
print(f"Test images: {len(test_images)}")

Training images: 17974
Validation images: 2247
Test images: 2246


In [3]:
from pathlib import Path
import xml.etree.ElementTree as ET

classes = {"tool": 0}

def convert_bbox(size, box):
    # Size is (width, height) of the image
    dw = 1.0 / size[0]  # width normalization factor
    dh = 1.0 / size[1]  # height normalization factor

    xmin, xmax, ymin, ymax = box

    # Clamp coordinates to ensure they are within the image bounds
    xmin = max(0, xmin)
    ymin = max(0, ymin)
    xmax = min(size[0], xmax)
    ymax = min(size[1], ymax)

    # Ensure width and height are positive after clamping
    width = max(0, xmax - xmin)
    height = max(0, ymax - ymin)

    # If width or height is 0, the box is invalid, so return None or an empty tuple
    if width == 0 or height == 0:
        return None

    # Calculate center coordinates
    x_center = (xmin + xmax) / 2.0
    y_center = (ymin + ymax) / 2.0

    # Normalize the center coordinates and dimensions
    x_center_norm = x_center * dw
    y_center_norm = y_center * dh
    width_norm = width * dw
    height_norm = height * dh

    return (x_center_norm, y_center_norm, width_norm, height_norm)


def convert_annotation(xml_file): # Convert a single  XML file in a TXT file in YOLO format
    tree = ET.parse(xml_file)
    root = tree.getroot()

    size = root.find("size")
    # Height and width are switched in the dataset
    height = int(size.find("width").text)
    width = int(size.find("height").text)

    output_lines = []

    for obj in root.findall("object"):
        class_name = obj.find("name").text
        if class_name not in classes:
            continue

        bndbox = obj.find("bndbox")

        class_id = obj.find("pose").text # Right or left tool
        class_id_int = 0
        if (class_id == "Right"):
            class_id_int = 1

        xmin = int(bndbox.find("xmin").text)
        ymin = int(bndbox.find("ymin").text)
        xmax = int(bndbox.find("xmax").text)
        ymax = int(bndbox.find("ymax").text)

        bbox = convert_bbox((width,height ), (xmin, xmax, ymin, ymax))
        if(bbox != None):
            output_lines.append(f"{class_id_int} {' '.join(map(str, bbox))}\n")

    return output_lines

def convertToYOLO(fileToConvert, annotations_dir, labels_dir):
    if not fileToConvert.endswith(".jpg"):
        return

    # Corresponding XML files
    xml_file = os.path.splitext(fileToConvert)[0] + ".xml"
    input_path = os.path.join(annotations_dir, xml_file)
    
    # Verifying the XML file exists
    if not os.path.exists(input_path):
        print(f"File XML non trovato per {fileToConvert}, ignorato.")
        return

    # Output path for YOLO labels file
    labels_path = os.path.join(labels_dir, os.path.splitext(fileToConvert)[0] + ".txt")
   
    # Converting XML file in YOLO format
    yolo_lines = convert_annotation(input_path)

    # Saving TXT file
    with open(labels_path, "w") as f:
        f.writelines(yolo_lines)

# Converting all XML files for training
for img_train_file in os.listdir(train_img_dir):
    convertToYOLO(img_train_file, annotations_dir, train_labels_dir)

print("Convertion complete for training!")

# Converting all XML files for validation
for img_val_file in os.listdir(val_img_dir):
    convertToYOLO(img_val_file, annotations_dir, val_labels_dir)
    
print("Convertion complete!")

Convertion complete for training!
Convertion complete!
