# CCPD
https://github.com/detectRecog/CCPD

## Dataset Annotations

Annotations are embedded in file name.

A sample image name is "025-95_113-154&383_386&473-386&473_177&454_154&383_363&402-0_0_22_27_27_33_16-37-15.jpg". Each name can be splited into seven fields. Those fields are explained as follows.

- **Area**: Area ratio of license plate area to the entire picture area.

- **Tilt degree**: Horizontal tilt degree and vertical tilt degree.

- **Bounding box coordinates**: The coordinates of the left-up and the right-bottom vertices.

- **Four vertices locations**: The exact (x, y) coordinates of the four vertices of LP in the whole image. These coordinates start from the right-bottom vertex.

- **License plate number**: Each image in CCPD has only one LP. Each LP number is comprised of a Chinese character, a letter, and five letters or numbers. A valid Chinese license plate consists of seven characters: province (1 character), alphabets (1 character), alphabets+digits (5 characters). "0_0_22_27_27_33_16" is the index of each character. These three arrays are defined as follows. The last character of each array is letter O rather than a digit 0. We use O as a sign of "no character" because there is no O in Chinese license plate characters.
```
provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
             'X', 'Y', 'Z', 'O']
ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
       'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']
```

- **Brightness**: The brightness of the license plate region.

- **Blurriness**: The Blurriness of the license plate region.


In [2]:
import sys
import os
import json
import glob
import cv2
import tqdm
import matplotlib.pyplot as plt
from PIL import Image
from PIL import ExifTags
import numpy as np
from PIL import ImageOps
import glob

In [15]:
%matplotlib inline
plt.rcParams["figure.figsize"] = (20,10)

In [8]:
def rotate_image_by_exif(image):
    """
    Rotate photo

    Parameters
    ----------
    image
    """
    try:
        orientation = 274  # key of orientation ExifTags
        if image._getexif() is not None:
            exif = dict(image._getexif().items())
            if orientation in exif.keys():
                if exif[orientation] == 3:
                    image = image.rotate(180, expand=True)
                    image = ImageOps.mirror(image)
                elif exif[orientation] == 6:
                    image = image.rotate(270, expand=True)
                    image = ImageOps.mirror(image)
                elif exif[orientation] == 8:
                    image = image.rotate(90, expand=True)
                    image = ImageOps.mirror(image)
    except AttributeError:
        pass
    return image

In [9]:
def parse_file_name_ccpd(name="025-95_113-154&383_386&473-386&473_177&454_154&383_363&402-0_0_22_27_27_33_16-37-15.jpg",
                        provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", 
                                     "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"],
                        alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
                                     'X', 'Y', 'Z', 'O'],
                        ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
                               'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']):

    name = os.path.basename(name)
    name = name.split(".")[0]
    area, tilt_degrees, bbox, vertices, plate_number, brightness, blurriness  = name.split("-")
    
    area = int(area)
    brightness = int(brightness)
    blurriness = int(blurriness)
    
    horizontal_tilt_degree, vertical_tilt_degree = tilt_degrees.split("_")
    horizontal_tilt_degree = int(horizontal_tilt_degree)
    vertical_tilt_degree = int(vertical_tilt_degree)
    
    left_up, right_bottom = bbox.split("_")
    bbox = [*left_up.split("&"), *right_bottom.split("&")]
    bbox = [int(item) for item in bbox]
    
    vertices = vertices.split("_")
    vertices = [[int(coord) for coord in p.split("&")] for p in vertices]
    
    plate_number = "".join([ads[int(idx)] for idx in plate_number.split("_")])
    
    return dict(
        area=area, 
        tilt_degree=dict(
            horizontal=horizontal_tilt_degree, 
            vertical=vertical_tilt_degree
        ),
        bbox=bbox, 
        vertices=vertices, 
        plate_number=plate_number, 
        brightness=brightness, 
        blurriness=blurriness
    )

In [21]:
def load_annotations(path_to_res_ann, 
                     path_to_res_images, 
                     path_to_images, 
                     classes = ['numberplate'], debug=True):
    for file_name in tqdm.tqdm(glob.glob(path_to_images)):
        info = parse_file_name_ccpd(file_name)
        image_id = os.path.basename(file_name)
        is_corrupted = 0
        
        pil_image = Image.open(file_name)
        pil_image = rotate_image_by_exif(pil_image)
        image = np.array(pil_image)
        height, width, c = image.shape
        
        to_txt_data = []
        
        label_id  = 0
        bbox = info["bbox"]

        w = bbox[2] - bbox[0]
        h = bbox[3] - bbox[1]

        mx = bbox[0]+w/2
        my = bbox[1]+h/2

        # class x_center y_center width height
        yolo_bbox = [label_id, mx/width, my/height, w/width, h/height]
        if yolo_bbox[1] >= 1 \
            or yolo_bbox[2] >= 1 \
            or yolo_bbox[3] >= 1 \
            or yolo_bbox[4] >= 1:
            print("[corrupted]", file_name, width, height)
            print(bbox)
            print(yolo_bbox)
            is_corrupted = 1
        yolo_bbox = " ".join([str(item) for item in yolo_bbox])
        to_txt_data.append(yolo_bbox)
        if debug or is_corrupted:
            cv2.rectangle(image, 
                (int(bbox[0]), int(bbox[1])), 
                (int(bbox[2]), int(bbox[3])), 
                (0,120,255), 
                3)

        res_path =  f'{path_to_res_ann}/{".".join(image_id.split(".")[:-1])}.txt'
        if debug or is_corrupted:
            print(res_path)
            print("\n".join(to_txt_data))
            print("______________________")
            plt.imshow(image)
            plt.show()
            pass
        else:
            with open(res_path, "w") as wFile:
                wFile.write("\n".join(to_txt_data))
            cv2.imwrite(os.path.join(path_to_res_images, os.path.basename(file_name)), image)

<img src="https://user-images.githubusercontent.com/26833433/98809572-0bc4d580-241e-11eb-844e-eee756f878c2.png">

In [22]:
path_to_res_ann = "/var/www/nomeroff-net/yolov5/npdata/labels/train"
path_to_res_images  = "/var/www/nomeroff-net/yolov5/npdata/images/train"
path_to_images  = "/var/www/nomeroff-net/datasets/numberplate_china/not_recognizion/*"


load_annotations(path_to_res_ann, 
                 path_to_res_images,
                 path_to_images, 
                 debug=False)

100%|██████████| 1871/1871 [00:16<00:00, 111.54it/s]
