In [24]:
import os
from PIL import Image

# Mapping-Listen für die Kennzeichen-Dekodierung
provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'O']
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']

def parse_ccpd_annotation(filename, image_width, image_height):
    parts = filename.split('-')
    if len(parts) != 7:
        print(f"Skipping {filename}: Invalid format (expected 7 fields, got {len(parts)})")
        return None, None

    # Bounding Box aus Feld 3 (parts[2])
    try:
        left_up, right_bottom = parts[2].split('_')
        x_min, y_min = map(int, left_up.split('&'))
        x_max, y_max = map(int, right_bottom.split('&'))

        if x_min >= x_max or y_min >= y_max:
            print(f"Skipping {filename}: Invalid bbox ({x_min}, {y_min}, {x_max}, {y_max})")
            return None, None
    except (ValueError, IndexError) as e:
        print(f"Skipping {filename}: Malformed bbox ({str(e)})")
        return None, None

    # YOLO-Format berechnen
    x_center = (x_min + x_max) / 2.0 / image_width
    y_center = (y_min + y_max) / 2.0 / image_height
    width = (x_max - x_min) / image_width
    height = (y_max - y_min) / image_height
    yolo_bbox = [x_center, y_center, width, height]

    # Kennzeichen-Indizes aus Feld 5 (parts[4])
    try:
        indices = parts[4].split('_')
        if len(indices) != 8:
            print(f"Skipping {filename}: Expected 8 indices, got {len(indices)}")
            return None, None

        province_idx = int(indices[0])
        letter_idx = int(indices[1])
        ad_indices = list(map(int, indices[2:8]))  # 6 Zeichen

        province = provinces[province_idx]
        letter = alphabets[letter_idx]
        chars = [ads[idx] for idx in ad_indices]
        license_plate = province + letter + ''.join(chars)
    except (ValueError, IndexError) as e:
        print(f"Skipping {filename}: Invalid license plate indices ({str(e)})")
        return None, None

    return yolo_bbox, license_plate

def convert_ccpd_to_yolo(dataset_path, output_path, filenames_txt='file_names.txt'):
    try:
        with open(os.path.join(dataset_path, filenames_txt), 'r') as f:
            filenames = [line.strip() for line in f if line.strip()]
    except FileNotFoundError:
        print(f"Fehler: {filenames_txt} nicht gefunden in {dataset_path}")
        return

    labels_dir = os.path.join(output_path, 'labels')
    os.makedirs(labels_dir, exist_ok=True)
    images_dir = os.path.join(dataset_path, 'images')

    error_log = []
    license_plates = []  # Zum Speichern der dekodierten Kennzeichen

    for filename in filenames:
        if not filename.endswith('.jpg'):
            continue

        image_path = os.path.join(images_dir, filename)
        if not os.path.exists(image_path):
            error_log.append(f"Bild fehlt: {filename}")
            continue

        try:
            with Image.open(image_path) as img:
                img_w, img_h = img.size

            yolo_bbox, license_plate = parse_ccpd_annotation(filename, img_w, img_h)
            if not yolo_bbox or not license_plate:
                error_log.append(f"Fehlerhafte Annotation: {filename}")
                continue

            # YOLO-Label schreiben
            label_path = os.path.join(labels_dir, filename.replace('.jpg', '.txt'))
            with open(label_path, 'w') as f:
                f.write(f"0 {yolo_bbox[0]:.6f} {yolo_bbox[1]:.6f} {yolo_bbox[2]:.6f} {yolo_bbox[3]:.6f}\n")

            # Kennzeichen speichern (optional)
            license_plates.append(f"{filename}: {license_plate}")

        except Exception as e:
            error_log.append(f"Fehler bei {filename}: {str(e)}")
            continue

    # Fehlerbericht
    error_log_path = os.path.join(output_path, 'conversion_errors.log')
    with open(error_log_path, 'w') as f:
        f.write("\n".join(error_log))

    # Kennzeichen in Datei schreiben (optional)
    license_plate_path = os.path.join(output_path, 'license_plates.txt')
    with open(license_plate_path, 'w', encoding='utf-8') as f:
        f.write("\n".join(license_plates))

    print(f"Konvertierung abgeschlossen. Fehlerprotokoll: {error_log_path}")


train_dataset_path = r"C:\Users\judie\Desktop\GitHub\Programming-for-Modern-Machine-Learning\dataset\CCPD2020\ccpd_green\train"
train_output_path = r"C:\Users\judie\Desktop\GitHub\Programming-for-Modern-Machine-Learning\dataset\CCPD2020\ccpd_green\train"
convert_ccpd_to_yolo(train_dataset_path, train_output_path)
val_dataset_path = r"C:\Users\judie\Desktop\GitHub\Programming-for-Modern-Machine-Learning\dataset\CCPD2020\ccpd_green\val"
val_output_path = r"C:\Users\judie\Desktop\GitHub\Programming-for-Modern-Machine-Learning\dataset\CCPD2020\ccpd_green\val"
convert_ccpd_to_yolo(val_dataset_path, val_output_path)
test_dataset_path = r"C:\Users\judie\Desktop\GitHub\Programming-for-Modern-Machine-Learning\dataset\CCPD2020\ccpd_green\test"
test_output_path = r"C:\Users\judie\Desktop\GitHub\Programming-for-Modern-Machine-Learning\dataset\CCPD2020\ccpd_green\test"
convert_ccpd_to_yolo(test_dataset_path, test_output_path)
print("Conversion complete.")

Konvertierung abgeschlossen. Fehlerprotokoll: C:\Users\judie\Desktop\GitHub\Programming-for-Modern-Machine-Learning\dataset\CCPD2020\ccpd_green\train\conversion_errors.log
Konvertierung abgeschlossen. Fehlerprotokoll: C:\Users\judie\Desktop\GitHub\Programming-for-Modern-Machine-Learning\dataset\CCPD2020\ccpd_green\val\conversion_errors.log
Konvertierung abgeschlossen. Fehlerprotokoll: C:\Users\judie\Desktop\GitHub\Programming-for-Modern-Machine-Learning\dataset\CCPD2020\ccpd_green\test\conversion_errors.log
Conversion complete.


### CCPD to YOLO Conversion

This script converts the CCPD dataset into the YOLO format. The annotations are embedded in the filenames of the images and are extracted to save the bounding boxes in YOLO format.

#### Steps:
1. **parse_ccpd_annotation**: This function extracts bounding box information from the filename and saves it in its folder "labels" next to "images", which contains the actual images.
2. **convert_ccpd_to_yolo**: This function iterates through all images in the dataset, extracts the annotations, and saves them in YOLO format.

#### Paths:
- `dataset_path`: The path to the CCPD dataset.
- `output_path`: The path where the YOLO-formatted dataset labels will be saved.