### torch installation
```
pip install torch torchvision --index-url=download.pytoch.org/whl/cu126
```

# Some imports & Settings

In [28]:
# some imports
import torch
from ultralytics import YOLO, settings
from PIL import Image
from pathlib import Path
import numpy as np
import shutil
import cv2

device= 'cuda' if torch.cuda.is_available() else 'cpu'

# ultralytics settings
settings.update({'datasets_dir': './datasets/CCPD'})
settings.update({'runs_dir': './runs'})
settings.update({'weights_dir': './weights'})

# CCPD settings
provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W','X', 'Y', 'Z', 'O']
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']
subsets=['ccpd_base','ccpd_blur','ccpd_challenge','ccpd_db','ccpd_fn','ccpd_rotate','ccpd_tilt','ccpd_weather']

# make val datasets

In [48]:
# make CCPD labels

root = Path('./datasets/CCPD')

def xyxy2xywhnormalize(xyxy):
    """
    Convert xyxy to xywh and normalize the coordinates to [0, 1]
    """
    x1, y1, x2, y2 = xyxy
    x = (x1 + x2) / (2 * 720)
    y = (y1 + y2) / (2 * 1160)
    w = (x2 - x1) / 720
    h = (y2 - y1) / 1160
    return x, y, w, h


def getbbox(image_path):
    """
    Get the bounding box coordinates from the image name.
    """
    parts = image_path.stem.split("-")
    coords = list(map(int, parts[2].replace("&", ",").replace("_", ",").split(",")))
    return coords


for subset in subsets:
    counter=0
    print(f'Processing subset: {subset}')
    subset_images_path = root / subset
    if not subset_images_path.exists():
        continue
    labels_path = subset_images_path/'labels'
    if not labels_path.exists():
        labels_path.mkdir(parents=True, exist_ok=True)
    for image_path in subset_images_path.glob('*.jpg'):
        label_path = labels_path / image_path.name.replace('.jpg', '.txt')
        # print(f'0 {" ".join(map(str, xyxy2xywhnormalize(getbbox(image_path))))}\n{label_path}')
        with open(label_path, 'w') as f:
            f.write(f'0 {" ".join(map(str, xyxy2xywhnormalize(getbbox(image_path))))}\n')  # write label
            counter += 1
    print(f'Finished processing subset: {subset}, {counter} labels created.')

Processing subset: ccpd_base


KeyboardInterrupt: 

In [None]:

import random
from datetime import datetime
import json

# create test dataset as YOLO dataset format, randomly pick pics from subsets of CCPD into dataset, the root directory name is like dataset-202505212358, the structure of the dataset is like:
# dataset-202505212358
# ├── images
# │   ├── train
# │   ├── val
# │   └── test
# └── labels
# │   ├── train
# │   ├── val
# │   └── test
# └── dataset.yaml
# becasue of only val this time, so only one folder
def create_test_dataset(size: float=0.1, subsets: list=subsets):
    datalist={}
    dataset_name = 'dataset-' + datetime.now().strftime('%Y%m%d%H%M%S')
    dataset_path = Path('datasets') / dataset_name
    if dataset_path.exists():
        shutil.rmtree(dataset_path)
    dataset_path.mkdir(parents=True, exist_ok=True)

    # create images and labels directories
    images_path = dataset_path / 'images'
    labels_path = dataset_path / 'labels'
    images_path.mkdir(parents=True, exist_ok=True)
    labels_path.mkdir(parents=True, exist_ok=True)

    for subset in subsets:
        subset_images_path = Path(f'./datasets/CCPD/{subset}')
        subset_labels_path = Path(f'./datasets/CCPD/{subset}/labels')
        total = len(list(subset_images_path.glob('*.jpg')))
        counter = int(total * size)
        datalist[subset] = counter

        # copy 10% of the images and labels to the test dataset
        if counter != 0:
            sample_images = random.sample(list(subset_images_path.glob('*.jpg')), counter)
            sample_labels = [subset_labels_path / image.name.replace('.jpg', '.txt') for image in sample_images]
            for image, label in zip(sample_images, sample_labels):
                shutil.copy(image, images_path/image.name)
                if label.exists():
                    shutil.copy(label, labels_path/label.name)
        print(f'copied {counter} out of {total} images ({size*100:.0f}%) from {subset} subset')

        # for image_file in subset_images_path.glob('*.jpg'):
        #     if random() < 0.1:
        #         shutil.copy(image_file, images_path/image_file.name)
        #         label_file = subset_labels_path / image_file.name.replace('.jpg', '.txt')
        #         if label_file.exists():
        #             shutil.copy(label_file, labels_path/label_file.name)
        #         counter+=1
        # print(f'copied {counter} images from {subset} subset')
        # datalist[subset] = counter
        

    # create dataset.yaml file
    with open(dataset_path / 'dataset.yaml', 'w') as f:
        f.write('train: ./images\n')
        f.write('val: ./images\n')
        f.write('test: ./images\n')
        f.write('nc: 1\n')
        f.write('names: [\'license plate\']\n')
    
    with open(dataset_path/'data.json', 'w') as f:
        f.write(json.dumps(datalist))
        
    print(f'Finished creating test dataset: {dataset_name}, {sum(datalist.values())} images copied.')

create_test_dataset(size=0.1, subsets=subsets)

copied 19999.600000000002 out of 199996 images (10%) from ccpd_base subset
copied 2061.1 out of 20611 images (10%) from ccpd_blur subset
copied 5000.3 out of 50003 images (10%) from ccpd_challenge subset
copied 1013.2 out of 10132 images (10%) from ccpd_db subset
copied 2096.7000000000003 out of 20967 images (10%) from ccpd_fn subset
copied 1005.3000000000001 out of 10053 images (10%) from ccpd_rotate subset
copied 3021.6000000000004 out of 30216 images (10%) from ccpd_tilt subset
copied 999.9000000000001 out of 9999 images (10%) from ccpd_weather subset
Finished creating test dataset: dataset-20250522181353, 35197.700000000004 images copied.


In [None]:
from pathlib import Path
import numpy as np

test_file = './datasets/CCPD/splits/test.txt'
datasets_root_dir = Path('./datasets/CCPD')

with open(test_file, 'r') as f:
    lines = f.readlines()

file = Path(datasets_root_dir / lines[0].strip())
print(f'{file = }')

parts=Path(datasets_root_dir/lines[0].strip()).stem.split('-')
print(f'{parts = }')

cords=list(map(int, parts[3].replace("&", ",").replace("_",",").split(",")))
print(f'{cords = }')

points=np.array(cords).reshape(-1, 2)
print(f'{points = }')

plate_number_list = parts[-3].split('_')
print(f'{plate_number_list = }')

province_letter = provinces[int(plate_number_list[0])]
alphabet_letter = alphabets[int(plate_number_list[1])]
number_letter_list = plate_number_list[2:]
number_letter = "".join([ads[int(char)] for char in number_letter_list])
plate_number = province_letter + alphabet_letter + " " + number_letter
print(f'{plate_number = }')


file = WindowsPath('datasets/CCPD/ccpd_blur/0359-5_21-151&285_417&398-417&398_179&377_151&285_389&306-0_0_4_33_32_25_12-59-4.jpg')
parts = ['0359', '5_21', '151&285_417&398', '417&398_179&377_151&285_389&306', '0_0_4_33_32_25_12', '59', '4']
cords = [417, 398, 179, 377, 151, 285, 389, 306]
points = array([[417, 398],
       [179, 377],
       [151, 285],
       [389, 306]])
plate_number_list = ['0', '0', '4', '33', '32', '25', '12']
plate_number = '皖A E981N'


In [None]:

# !yolo val model=./weights/yolov8s.pt data=./datasets/CCPD/splits_test.yaml split="test"

from ultralytics import YOLO
import torch
device= 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLO('./weights/yolov8s.pt')
model.val(data='./datasets/dataset-20250522011545/dataset.yaml', split='test')
# model.val()

# load yolov5 model


Ultralytics 8.3.139  Python-3.12.6 torch-2.7.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Ti, 8188MiB)
Model summary (fused): 72 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.30.1 ms, read: 150.358.6 MB/s, size: 70.1 KB)


[34m[1mval: [0mScanning D:\projects\ccpd-new\datasets\dataset-20250522011545\labels... 34818 images, 0 backgrounds, 0 corrupt: 100%|██████████| 34818/34818 [00:15<00:00, 2263.59it/s]


[34m[1mval: [0mNew cache created: D:\projects\ccpd-new\datasets\dataset-20250522011545\labels.cache


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2177/2177 [02:32<00:00, 14.32it/s]


                   all      34818      34818      0.998      0.998      0.994      0.774
                single      34818      34818      0.998      0.998      0.994      0.774
Speed: 0.1ms preprocess, 1.2ms inference, 0.0ms loss, 0.8ms postprocess per image
Results saved to [1mruns\detect\val2[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x000001EA710CE5D0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.0480

Ultralytics 8.3.139  Python-3.12.6 torch-2.7.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Ti, 8188MiB)

Model summary (fused): 72 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs

Fast image access  (ping: 0.30.1 ms, read: 150.358.6 MB/s, size: 70.1 KB)

Scanning D:\projects\ccpd-new\datasets\dataset-20250522011545\labels... 34818 images, 0 backgrounds, 0 corrupt: 100%|██████████| 34818/34818 [00:15<00:00, 2263.59it/s]

New cache created: D:\projects\ccpd-new\datasets\dataset-20250522011545\labels.cache

                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2177/2177 [02:32<00:00, 14.32it/s]
                   all      34818      34818      0.998      0.998      0.994      0.774
                single      34818      34818      0.998      0.998      0.994      0.774
Speed: 0.1ms preprocess, 1.2ms inference, 0.0ms loss, 0.8ms postprocess per image

In [None]:
# modelv5=torch.hub.load('ultralytics/yolov5', 'custom', path='./weights/yolov5/best_yolov5s_final.pt')
# modelv5.val(data="./datasets/dataset-20250522181353/dataset.yaml", split="test")
# modelv5('datasets/dataset-20250522181353/images/01-0_0-277&502_421&560-420&560_277&559_278&502_421&503-0_0_28_7_24_26_24-143-28.jpg').pandas().xyxy[0]

!python val.py --weights "./weights/yolov5/best_yolov5s_final.pt" --data "datasets/dataset-20250522181353/dataset-yolov5.yaml" --task "test" --device 0

YOLOv5  a4ddb2e Python-3.12.6 torch-2.7.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Ti, 8188MiB)

Fusing layers... 

Model summary: 157 layers, 7015519 parameters, 0 gradients, 15.8 GFLOPs

                 Class     Images  Instances          P          R      mAP50   mAP50-95: 100%|██████████| 1085/1085 03:46
                   all      34720      34720      0.995      0.998      0.993      0.782
          single_plate      34720      34720      0.995      0.998      0.993      0.782
          
Speed: 0.1ms pre-process, 2.3ms inference, 0.8ms NMS per image at shape (32, 3, 640, 640)