### torch installation
```
pip install torch torchvision --index-url=download.pytoch.org/whl/cu126
```

# Some imports & Settings

In [21]:
# some imports
import torch
from ultralytics import YOLO, settings
from PIL import Image
from pathlib import Path
import numpy as np
import shutil
import cv2

device= 'cuda' if torch.cuda.is_available() else 'cpu'

# ultralytics settings
settings.update({'datasets_dir': './datasets/CCPD'})
settings.update({'runs_dir': './runs'})
settings.update({'weights_dir': './weights'})

# CCPD settings
provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W','X', 'Y', 'Z', 'O']
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']
subsets=['ccpd_base','ccpd_blur','ccpd_challenge','ccpd_db','ccpd_fn','ccpd_rotate','ccpd_tilt','ccpd_weather']

# make val datasets

In [None]:
for label in Path('datasets/CCPD/ccpd_base').glob('*.txt'):
    # delete the label file
    label.unlink()

In [19]:
# make CCPD labels

root = Path('./datasets/CCPD')

def xyxy2xywhnormalize(xyxy):
    """
    Convert xyxy to xywh and normalize the coordinates to [0, 1]
    """
    x1, y1, x2, y2 = xyxy
    x = (x1 + x2) / (2 * 720)
    y = (y1 + y2) / (2 * 1160)
    w = (x2 - x1) / 720
    h = (y2 - y1) / 1160
    return x, y, w, h


def getbbox(image_path):
    """
    Get the bounding box coordinates from the image name.
    """
    parts = image_path.stem.split("-")
    coords = list(map(int, parts[2].replace("&", ",").replace("_", ",").split(",")))
    return coords


for subset in subsets:
    counter=0
    print(f'Processing subset: {subset}')
    subset_path = root / subset
    if not subset_path.exists():
        continue
    labels_path = subset_path/'labels'
    if not labels_path.exists():
        labels_path.mkdir(parents=True, exist_ok=True)
    for image_path in subset_path.glob('*.jpg'):
        label_path = labels_path / image_path.name.replace('.jpg', '.txt')
        # print(f'0 {" ".join(map(str, xyxy2xywhnormalize(getbbox(image_path))))}\n{label_path}')
        with open(label_path, 'w') as f:
            f.write(f'0 {" ".join(map(str, xyxy2xywhnormalize(getbbox(image_path))))}\n')  # dummy label
            counter += 1
    print(f'Finished processing subset: {subset}, {counter} labels created.')

Processing subset: ccpd_rotate
Finished processing subset: ccpd_rotate, 10053 labels created.
Processing subset: ccpd_tilt
Finished processing subset: ccpd_tilt, 30216 labels created.
Processing subset: ccpd_weather
Finished processing subset: ccpd_weather, 9999 labels created.


In [None]:

from random import random
from datetime import datetime
import json

# create test dataset as YOLO dataset format, randomly pick pics from subsets of CCPD into dataset, the root directory name is like dataset-202505212358, the structure of the dataset is like:
# dataset-202505212358
# ├── images
# │   ├── train
# │   ├── val
# │   └── test
# └── labels
# │   ├── train
# │   ├── val
# │   └── test
# └── dataset.yaml
# becasue of only val this time, so only one folder
def create_test_dataset():
    datalist={}
    dataset_name = 'dataset-' + datetime.now().strftime('%Y%m%d%H%M%S')
    dataset_path = Path('datasets')/dataset_name
    if dataset_path.exists():
        shutil.rmtree(dataset_path)
    dataset_path.mkdir(parents=True, exist_ok=True)

    # create images and labels directories
    images_path = dataset_path / 'images'
    labels_path = dataset_path / 'labels'
    images_path.mkdir(parents=True, exist_ok=True)
    labels_path.mkdir(parents=True, exist_ok=True)

    for subset in subsets:
        counter=0
        subset_path = Path(f'./datasets/CCPD/{subset}')
        subset_labels_path = Path(f'./datasets/CCPD/{subset}/labels')

        # copy 10% of the images and labels to the test dataset
        for image_file in subset_path.glob('*.jpg'):
            if random() < 0.1:
                shutil.copy(image_file, images_path/image_file.name)
                label_file = subset_labels_path / image_file.name.replace('.jpg', '.txt')
                if label_file.exists():
                    shutil.copy(label_file, labels_path/label_file.name)
                counter+=1
        print(f'copied {counter} images from {subset} subset')
        datalist[subset] = counter

    # create dataset.yaml file
    with open(dataset_path / 'dataset.yaml', 'w') as f:
        f.write('train: ./images\n')
        f.write('val: ./images\n')
        f.write('test: ./images\n')
        f.write('nc: 1\n')
        f.write('names: [\'license plate\']\n')
    
    with open(dataset_path/'data.json', 'w') as f:
        f.write(json.dumps(datalist))
        
    print(f'Finished creating test dataset: {dataset_name}, {sum(datalist.values())} images copied.')

create_test_dataset()

copied 20066 images from ccpd_base subset
copied 2055 images from ccpd_blur subset
copied 5036 images from ccpd_challenge subset
copied 1006 images from ccpd_db subset
copied 2093 images from ccpd_fn subset
copied 989 images from ccpd_rotate subset
copied 3013 images from ccpd_tilt subset
copied 964 images from ccpd_weather subset


In [5]:
from pathlib import Path
import numpy as np

test_file = './datasets/CCPD/splits/test.txt'
datasets_root_dir = Path('./datasets/CCPD')

with open(test_file, 'r') as f:
    lines = f.readlines()

file = Path(datasets_root_dir / lines[0].strip())
print(f'{file = }')

parts=Path(datasets_root_dir/lines[0].strip()).stem.split('-')
print(f'{parts = }')

cords=list(map(int, parts[3].replace("&", ",").replace("_",",").split(",")))
print(f'{cords = }')

points=np.array(cords).reshape(-1, 2)
print(f'{points = }')

plate_number_list = parts[-3].split('_')
print(f'{plate_number_list = }')

province_letter = provinces[int(plate_number_list[0])]
alphabet_letter = alphabets[int(plate_number_list[1])]
number_letter_list = plate_number_list[2:]
number_letter = "".join([ads[int(char)] for char in number_letter_list])
plate_number = province_letter + alphabet_letter + " " + number_letter
print(f'{plate_number = }')


file = WindowsPath('datasets/CCPD/ccpd_blur/0359-5_21-151&285_417&398-417&398_179&377_151&285_389&306-0_0_4_33_32_25_12-59-4.jpg')
parts = ['0359', '5_21', '151&285_417&398', '417&398_179&377_151&285_389&306', '0_0_4_33_32_25_12', '59', '4']
cords = [417, 398, 179, 377, 151, 285, 389, 306]
points = array([[417, 398],
       [179, 377],
       [151, 285],
       [389, 306]])
plate_number_list = ['0', '0', '4', '33', '32', '25', '12']
plate_number = '皖A E981N'


In [None]:
from pathlib import Path, WindowsPath
import cv2
import numpy as np
# CCPD dataloader
class CCPDLoader:
    def __init__(self, file: str, img_size: int=640):
        with open(file, 'r') as f:
            lines = f.readlines()
        self.img_files = [datasets_root_dir / line.strip() for line in lines]
        self.img_size = img_size
        # CCPD annotation is it's filename
        self.annotations = [self.parse_filename(f) for f in self.img_files]

    def parse_filename(self, filename: WindowsPath):
        """
        Parse CCPD filename format:
        [Area]-[Tilt Angle]-[Bounding box coordinates]-[Four vertices locations]-[License plate number]-[Brightness]-[Blurriness].jpg
        Example: 025-95_113-154&383_386&473-386&473_177&454_154&383_363&402-0_0_22_27_27_33_16-37-15.jpg
        """
        parts = filename.stem.split('-') 
        coords = list(map(int, parts[3].replace("&", ",").replace("_", ",").split(",")))
        points = np.array(coords, dtype=np.float32).reshape(-1, 2)
        plate_number_list = parts[-3].split('_')
        province_letter = provinces[int(plate_number_list[0])]
        alphabet_letter = alphabets[int(plate_number_list[1])]
        number_letter_list = plate_number_list[2:]
        number_letter = "".join([ads[int(char)] for char in number_letter_list])
        plate_number = province_letter + alphabet_letter + " " + number_letter
        return {"points": points, "plate_number": plate_number}

    def __len__(self):
        return len(self.img_files)

    def __getitem__(self, index):
        img = cv2.imread(str(self.img_files[index]))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        h, w = img.shape[:2]

        # resize with same aspect ratio
        r = min(self.img_size / h, self.img_size / w)
        new_h, new_w = int(h * r), int(w * r)
        img = cv2.resize(img, (new_w, new_h))
        img = np.ascontiguousarray(img)
        return img, self.annotations[index]

In [7]:
import cv2

with open('datasets/CCPD/splits/test.txt', 'r') as f:
    lines = f.readlines()
    print(f'{len(lines) = }')
    for line in lines:
        line = line.strip()
        

len(lines) = 141982


In [None]:
# filename=list(Path('./datasets/CCPD/ccpd_base').glob('*.jpg'))[0]
# parts=filename.stem.split('-')
# parts[-3]
# filename="025-95_113-154&383_386&473-386&473_177&454_154&383_363&402-0_0_22_27_27_33_16-37-15.jpg"
# filename.split('-')
# with open(val_file_list,'r') as f:
#     filename=f.readlines()
# print(filename[1])
# !yolo val model=./weights/yolov8s.pt data=./datasets/CCPD/splits_test.yaml split="test"
from ultralytics import YOLO
import torch
device= 'cuda' if torch.cuda.is_available() else 'cpu'
model = YOLO('./weights/yolov8s.pt')
# model.val(data='./datasets/CCPD/splits_test.yaml', split='test')
# model.val()


Ultralytics 8.3.139  Python-3.12.6 torch-2.7.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Ti, 8188MiB)
Model summary (fused): 72 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs


FileNotFoundError: '/mnt/mydisk/xiaolei/code/plate/plate_detect/ultralytics-main/ultralytics/cfg/datasets/plate.yaml' does not exist

In [23]:
from pathlib import Path
path=Path().resolve() / 'datasets/CCPD'
read_file='datasets/CCPD/splits/train.txt'
write_file='datasets/CCPD/test/train.txt'
with open(read_file, 'r') as fr:
    with open(write_file, 'w') as fw:
        lines = fr.readlines()
        for line in lines:
            line=line.strip()
            fw.write(f'{path / line}\n')



# Path().resolve()/'datasets/CCPD'
