Convert the data we exported to [Ultralytics YOLO format](https://docs.ultralytics.com/datasets/detect/).

In [9]:
import os

# Input dirs
base_input_dir = '../synthetic_data_creation/output'
input_images_dir = os.path.join(base_input_dir, 'images')
input_bboxes_dir = os.path.join(base_input_dir, 'bounding_boxes')

# Input files
image_files = os.listdir(input_images_dir)
bbox_files = os.listdir(input_bboxes_dir)

In [10]:
# Make the train/val split
TRAIN_VAL_SPLIT = 0.8
num_images = len(image_files)
num_train_images = int(num_images * TRAIN_VAL_SPLIT)
num_val_images = num_images - num_train_images

train_image_files = image_files[:num_train_images]
val_image_files = image_files[num_train_images:]
train_bbox_files = bbox_files[:num_train_images]
val_bbox_files = bbox_files[num_train_images:]

assert {f.split('.')[0] for f in train_image_files} == {f.split('.')[0] for f in train_bbox_files}
assert {f.split('.')[0] for f in val_image_files} == {f.split('.')[0] for f in val_bbox_files}

In [11]:
base_output_dir = 'data/ultralytics_yolo_format'

# Image/label output dirs
output_images_dir = os.path.join(base_output_dir, "images")
output_labels_dir = os.path.join(base_output_dir, "labels")

# Train/val output dirs
train_images_dir = os.path.join(output_images_dir, 'train')
val_images_dir = os.path.join(output_images_dir, 'val')
train_labels_dir = os.path.join(output_labels_dir, 'train')
val_labels_dir = os.path.join(output_labels_dir, 'val')

# Make the dirs
os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(val_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)
os.makedirs(val_labels_dir, exist_ok=True)

In [12]:
def XminYminXmaxYmax_to_XcrYcrWrHr(
    x_min: int,
    y_min: int,
    x_max: int,
    y_max: int,
    image_width: int,
    image_height: int,
) -> tuple[float, float, float, float]:
    """
    Convert a bounding box from (x_min, y_min, x_max, y_max) format to (xc, yc, w, h) format,
    with all new values being relative to the image width and height (between 0 and 1).
    """
    x_center = (x_min + x_max) / 2 / image_width
    y_center = (y_min + y_max) / 2 / image_height
    width = (x_max - x_min) / image_width
    height = (y_max - y_min) / image_height

    assert all((0 <= val <= 1) for val in (x_center, y_center, width, height))

    return x_center, y_center, width, height

In [14]:
from PIL import Image
from tqdm.notebook import tqdm

for i, (image_file, bbox_file) in tqdm(list(enumerate(zip(train_image_files, train_bbox_files)))):
    image_id = image_file.split('.')[0]
    assert image_id == bbox_file.split('.')[0]

    # Get the existing bounding box
    with open(os.path.join(input_bbox_dir, bbox_file), 'r') as f:
        x_min, y_min, x_max, y_max = map(float, f.readline().strip().split(','))

    # Get the image width and height
    image = Image.open(os.path.join(input_images_dir, image_file))
    image_width, image_height = image.size

    # Convert the bounding box to the format required by YOLOv8:
    # `(x_center_rel, y_center_rel, width_rel, height_rel)`
    # (all values are between 0 and 1, relative to the image width and height)
    xcr, ycr, wr, hr = XminYminXmaxYmax_to_XcrYcrWrHr(
        x_min, y_min, x_max, y_max, image_width, image_height
    )

    # Write the bounding box to the output file
    output_bbox_file = os.path.join(train_labels_dir, f'{image_id}.txt')
    with open(output_bbox_file, 'w') as f:
        f.write(f'0 {xcr:.6} {ycr:.6} {wr:.6} {hr:.6}')

    # Copy the image to the output dir
    image.save(os.path.join(train_images_dir, image_file))

  0%|          | 0/2000 [00:00<?, ?it/s]

In [15]:
from PIL import Image
from tqdm.notebook import tqdm

for i, (image_file, bbox_file) in tqdm(
    list(enumerate(zip(val_image_files, val_bbox_files)))
):
    image_id = image_file.split(".")[0]
    assert image_id == bbox_file.split(".")[0]

    # Get the existing bounding box
    with open(os.path.join(input_bbox_dir, bbox_file), "r") as f:
        x_min, y_min, x_max, y_max = map(float, f.readline().strip().split(","))

    # Get the image width and height
    image = Image.open(os.path.join(input_images_dir, image_file))
    image_width, image_height = image.size

    # Convert the bounding box to the format required by YOLOv8:
    # `(x_center_rel, y_center_rel, width_rel, height_rel)`
    # (all values are between 0 and 1, relative to the image width and height)
    xcr, ycr, wr, hr = XminYminXmaxYmax_to_XcrYcrWrHr(
        x_min, y_min, x_max, y_max, image_width, image_height
    )

    # Write the bounding box to the output file
    output_bbox_file = os.path.join(val_labels_dir, f"{image_id}.txt")
    with open(output_bbox_file, "w") as f:
        f.write(f"0 {xcr:.6} {ycr:.6} {wr:.6} {hr:.6}")

    # Copy the image to the output dir
    image.save(os.path.join(val_images_dir, image_file))

  0%|          | 0/500 [00:00<?, ?it/s]