# Import packages

In [2]:
import cv2
import os
import shutil

# Data Prep

In [18]:
def create_file_lists(base_dir, output_image_txt, output_label_txt):
    """
    Create .txt files listing all image and label paths for the dataset.
    Args:
        base_dir (str): The base directory containing train, val, and test folders.
        output_image_txt (str): Path to save the image file list.
        output_label_txt (str): Path to save the label file list.
    """
    image_paths = []
    label_paths = []

    # Recursively collect all .png and .txt files
    for root, _, files in os.walk(base_dir):
        for file in files:
            if file.endswith('.png'):
                # Collect image path
                image_path = os.path.join(root, file)
                image_paths.append(image_path)

                # Collect corresponding label path
                label_file = file.replace('.png', '.txt')
                label_path = os.path.join(root, label_file)
                if os.path.exists(label_path):
                    label_paths.append(label_path)

    # Write image paths to output_image_txt
    with open(output_image_txt, 'w') as f:
        for path in image_paths:
            f.write(f"{path}\n")

    # Write label paths to output_label_txt
    with open(output_label_txt, 'w') as f:
        for path in label_paths:
            f.write(f"{path}\n")

    print(f"Created {output_image_txt} with {len(image_paths)} image paths.")
    print(f"Created {output_label_txt} with {len(label_paths)} label paths.")


In [19]:
# create txt file to store training image and label directories
base_dir = '/UFPR-ALPR_dataset/training'  # Root directory containing train, val, and test folders
output_image_txt = '/UFPR-ALPR_dataset/training/train_images.txt'
output_label_txt = '/UFPR-ALPR_dataset/training/train_labels.txt'

create_file_lists(base_dir, output_image_txt, output_label_txt)

Created /Users/nhngoc02/Downloads/UFPR-ALPR_dataset/training/train_images.txt with 1800 image paths.
Created /Users/nhngoc02/Downloads/UFPR-ALPR_dataset/training/train_labels.txt with 1800 label paths.


In [20]:
# create txt file to store validation image and label directories
base_dir = '/UFPR-ALPR_dataset/validation'  # Root directory containing train, val, and test folders
output_image_txt = '/UFPR-ALPR_dataset/validation/validation_images.txt'
output_label_txt = '/UFPR-ALPR_dataset/validation/validation_labels.txt'

create_file_lists(base_dir, output_image_txt, output_label_txt)

Created /Users/nhngoc02/Downloads/UFPR-ALPR_dataset/validation/validation_images.txt with 900 image paths.
Created /Users/nhngoc02/Downloads/UFPR-ALPR_dataset/validation/validation_labels.txt with 900 label paths.


In [21]:
# create txt file to store testing image and label directories
base_dir = '/UFPR-ALPR_dataset/testing'  # Root directory containing train, val, and test folders
output_image_txt = '/UFPR-ALPR_dataset/testing/testing_images.txt'
output_label_txt = '/UFPR-ALPR_dataset/testing/testing_labels.txt'

create_file_lists(base_dir, output_image_txt, output_label_txt)

Created /Users/nhngoc02/Downloads/UFPR-ALPR_dataset/testing/testing_images.txt with 1800 image paths.
Created /Users/nhngoc02/Downloads/UFPR-ALPR_dataset/testing/testing_labels.txt with 1800 label paths.


## Convert the data structure to YOLO format

In [22]:
def convert_to_yolo_structure_from_txt(image_txt, label_txt, image_output_dir, label_output_dir):
    """
    Converts a dataset using provided image and label file paths into YOLO's required structure.
    Args:
        image_txt (str): Path to the .txt file listing image file paths.
        label_txt (str): Path to the .txt file listing label file paths.
        image_output_dir (str): Directory to store YOLO images.
        label_output_dir (str): Directory to store YOLO labels.
    """
    os.makedirs(image_output_dir, exist_ok=True)
    os.makedirs(label_output_dir, exist_ok=True)

    # Read image and label paths
    with open(image_txt, 'r') as img_f, open(label_txt, 'r') as lbl_f:
        image_paths = [line.strip() for line in img_f.readlines()]
        label_paths = [line.strip() for line in lbl_f.readlines()]

    # Ensure the number of images matches the number of labels
    assert len(image_paths) == len(label_paths), "Mismatch between image and label counts!"

    # Copy images and labels to the YOLO structure
    for img_path, lbl_path in zip(image_paths, label_paths):
        # Copy image
        dest_image = os.path.join(image_output_dir, os.path.basename(img_path))
        shutil.copy(img_path, dest_image)

        # Copy label
        dest_label = os.path.join(label_output_dir, os.path.basename(lbl_path))
        shutil.copy(lbl_path, dest_label)

    print(f"Converted {len(image_paths)} images and labels into YOLO structure.")


In [23]:
def convert_all_to_yolo_structure(base_txt_dir, output_dir):
    """
    Converts all train, val, and test datasets into YOLO structure using *.txt files for paths.
    Args:
        base_txt_dir (str): Directory containing the train, val, and test image/label .txt files.
        output_dir (str): Directory to store the YOLO-structured dataset.
    """
    splits = ['train', 'val', 'test']
    for split in splits:
        image_txt = os.path.join(base_txt_dir, f"{split}_images.txt")
        label_txt = os.path.join(base_txt_dir, f"{split}_labels.txt")
        image_output_dir = os.path.join(output_dir, f"images/{split}")
        label_output_dir = os.path.join(output_dir, f"labels/{split}")

        convert_to_yolo_structure_from_txt(image_txt, label_txt, image_output_dir, label_output_dir)

    print(f"Dataset converted to YOLO structure at {output_dir}.")

## Train, Validation, Test sets

In [25]:
train_images_txt = '/UFPR-ALPR_dataset/training/train_images.txt'
train_labels_txt = '/UFPR-ALPR_dataset/training/train_labels.txt'

val_images_txt = '/UFPR-ALPR_dataset/validation/validation_images.txt'
val_labels_txt = '/UFPR-ALPR_dataset/validation/validation_labels.txt'

test_images_txt = '/UFPR-ALPR_dataset/testing/testing_images.txt'
test_labels_txt = '/UFPR-ALPR_dataset/testing/testing_labels.txt'

# Define YOLO output directories
yolo_output_dir = '/UFPR-ALPR_dataset/yolo_dataset'
train_image_output_dir = os.path.join(yolo_output_dir, 'images/train')
train_label_output_dir = os.path.join(yolo_output_dir, 'labels/train')
val_image_output_dir = os.path.join(yolo_output_dir, 'images/val')
val_label_output_dir = os.path.join(yolo_output_dir, 'labels/val')
test_image_output_dir = os.path.join(yolo_output_dir, 'images/test')
test_label_output_dir = os.path.join(yolo_output_dir, 'labels/test')

# Convert train set
convert_to_yolo_structure_from_txt(
    image_txt=train_images_txt,
    label_txt=train_labels_txt,
    image_output_dir=train_image_output_dir,
    label_output_dir=train_label_output_dir
)

# Convert validation set
convert_to_yolo_structure_from_txt(
    image_txt=val_images_txt,
    label_txt=val_labels_txt,
    image_output_dir=val_image_output_dir,
    label_output_dir=val_label_output_dir
)

# Convert test set
convert_to_yolo_structure_from_txt(
    image_txt=test_images_txt,
    label_txt=test_labels_txt,
    image_output_dir=test_image_output_dir,
    label_output_dir=test_label_output_dir
)

print("Dataset successfully converted to YOLO structure!")

Converted 1800 images and labels into YOLO structure.
Converted 900 images and labels into YOLO structure.
Converted 1800 images and labels into YOLO structure.
Dataset successfully converted to YOLO structure!


## Extract image labels

In [26]:
def parse_corners_to_yolo(label_file, image_width, image_height):
    """
    Parse corners from a label file and convert to YOLO format.
    Args:
        label_file (str): Path to the label file containing corners information.
        image_width (int): Width of the corresponding image.
        image_height (int): Height of the corresponding image.
    Returns:
        list: YOLO-formatted annotations as a list of strings.
    """
    yolo_annotations = []

    with open(label_file, 'r') as f:
        for line in f:
            if line.startswith("corners:"):
                corners_str = line.split(":")[1].strip()
                corners = [tuple(map(int, point.split(','))) for point in corners_str.split()]
                
                # Compute bounding box
                x_min = min(corner[0] for corner in corners)
                y_min = min(corner[1] for corner in corners)
                x_max = max(corner[0] for corner in corners)
                y_max = max(corner[1] for corner in corners)

                # Convert to YOLO format
                x_center = (x_min + x_max) / 2 / image_width
                y_center = (y_min + y_max) / 2 / image_height
                width = (x_max - x_min) / image_width
                height = (y_max - y_min) / image_height

                # Add class_id (e.g., 0 for license plate)
                yolo_annotations.append(f"0 {x_center} {y_center} {width} {height}")

    return yolo_annotations

In [27]:
def convert_corners_labels_to_yolo(image_txt, label_txt, image_output_dir, label_output_dir):
    """
    Converts corners-based labels to YOLO format and organizes the dataset.
    Args:
        image_txt (str): Path to the .txt file containing image paths.
        label_txt (str): Path to the .txt file containing label paths.
        image_output_dir (str): Directory to store YOLO images.
        label_output_dir (str): Directory to store YOLO labels.
    """
    os.makedirs(image_output_dir, exist_ok=True)
    os.makedirs(label_output_dir, exist_ok=True)

    # Read image and label paths
    with open(image_txt, 'r') as img_f, open(label_txt, 'r') as lbl_f:
        image_paths = [line.strip() for line in img_f.readlines()]
        label_paths = [line.strip() for line in lbl_f.readlines()]

    # Ensure the number of images matches the number of labels
    assert len(image_paths) == len(label_paths), "Mismatch between image and label counts!"

    for img_path, lbl_path in zip(image_paths, label_paths):
        # Read image to get dimensions
        img = cv2.imread(img_path)
        if img is None:
            print(f"Error reading image: {img_path}")
            continue
        img_height, img_width = img.shape[:2]

        # Convert label file
        yolo_annotations = parse_corners_to_yolo(lbl_path, img_width, img_height)

        # Save YOLO annotations
        output_label_path = os.path.join(label_output_dir, os.path.basename(lbl_path))
        with open(output_label_path, 'w') as f:
            f.write("\n".join(yolo_annotations))

        # Copy image
        dest_image = os.path.join(image_output_dir, os.path.basename(img_path))
        shutil.copy(img_path, dest_image)

    print(f"Converted labels and images to YOLO format in {image_output_dir} and {label_output_dir}.")


In [10]:
def parse_corners_and_convert(label_file, image_width, image_height):
    """
    Parse the 'corners' field from the label file and convert it to YOLO format.
    Args:
        label_file (str): Path to the label file.
        image_width (int): Width of the image.
        image_height (int): Height of the image.
    Returns:
        list: A list of YOLO-format annotations.
    """
    yolo_annotations = []

    with open(label_file, 'r') as f:
        for line in f:
            if line.startswith("corners:"):
                corners_str = line.split(":")[1].strip()
                corners = [tuple(map(int, point.split(','))) for point in corners_str.split()]
                
                # Compute YOLO format
                x_min = min(corner[0] for corner in corners)
                y_min = min(corner[1] for corner in corners)
                x_max = max(corner[0] for corner in corners)
                y_max = max(corner[1] for corner in corners)
                
                x_center = (x_min + x_max) / 2 / image_width
                y_center = (y_min + y_max) / 2 / image_height
                width = (x_max - x_min) / image_width
                height = (y_max - y_min) / image_height

                # Append class_id (0 for license plate) and YOLO bbox
                yolo_annotations.append(f"0 {x_center} {y_center} {width} {height}")
    
    return yolo_annotations

In [29]:
train_images_txt = '/UFPR-ALPR_dataset/training/train_images.txt'
train_labels_txt = '/UFPR-ALPR_dataset/training/train_labels.txt'

val_images_txt = '/UFPR-ALPR_dataset/validation/validation_images.txt'
val_labels_txt = '/UFPR-ALPR_dataset/validation/validation_labels.txt'

test_images_txt = '/UFPR-ALPR_dataset/testing/testing_images.txt'
test_labels_txt = '/UFPR-ALPR_dataset/testing/testing_labels.txt'


yolo_output_dir = '/UFPR-ALPR_dataset/yolo_dataset_structure'

# Convert train set
convert_corners_labels_to_yolo(
    train_images_txt, train_labels_txt,
    os.path.join(yolo_output_dir, 'images/train'),
    os.path.join(yolo_output_dir, 'labels/train')
)

# Convert validation set
convert_corners_labels_to_yolo(
    val_images_txt, val_labels_txt,
    os.path.join(yolo_output_dir, 'images/val'),
    os.path.join(yolo_output_dir, 'labels/val')
)

# Convert test set
convert_corners_labels_to_yolo(
    test_images_txt, test_labels_txt,
    os.path.join(yolo_output_dir, 'images/test'),
    os.path.join(yolo_output_dir, 'labels/test')
)


Converted labels and images to YOLO format in /Users/nhngoc02/Downloads/UFPR-ALPR_dataset/yolo_dataset_structure/images/train and /Users/nhngoc02/Downloads/UFPR-ALPR_dataset/yolo_dataset_structure/labels/train.
Converted labels and images to YOLO format in /Users/nhngoc02/Downloads/UFPR-ALPR_dataset/yolo_dataset_structure/images/val and /Users/nhngoc02/Downloads/UFPR-ALPR_dataset/yolo_dataset_structure/labels/val.
Converted labels and images to YOLO format in /Users/nhngoc02/Downloads/UFPR-ALPR_dataset/yolo_dataset_structure/images/test and /Users/nhngoc02/Downloads/UFPR-ALPR_dataset/yolo_dataset_structure/labels/test.
