# LPRNet: License Plate Recognition via Deep Neural Networks

Download the original code from GitHub 
https://github.com/sirius-ai/LPRNet_Pytorch

https://arxiv.org/abs/1806.10447v1

Sergey Zherzdev, Alexey Gruzdev

This paper proposes LPRNet - end-to-end method for Automatic License Plate Recognition without preliminary character segmentation. Our approach is inspired by recent breakthroughs in Deep Neural Networks, and works in real-time with recognition accuracy up to 95% for Chinese license plates: 3 ms/plate on nVIDIA GeForce GTX 1080 and 1.3 ms/plate on Intel Core i7-6700K CPU. LPRNet consists of the lightweight Convolutional Neural Network, so it can be trained in end-to-end way. To the best of our knowledge, LPRNet is the first real-time License Plate Recognition system that does not use RNNs. As a result, the LPRNet algorithm may be used to create embedded solutions for LPR that feature high level accuracy even on challenging Chinese license plates.

Subjects:	Computer Vision and Pattern Recognition (cs.CV)

Cite as:	arXiv:1806.10447 [cs.CV]
 	(or arXiv:1806.10447v1 [cs.CV] for this version)


In [1]:
# from torch.utils.data import *
from imutils import paths
# import numpy as np
import random
# import cv2
# import os
import sys
#sys.path.insert(0, os.getcwd())
##from data.load_data import CHARS, CHARS_DICT, LPRDataLoader
##from PIL import Image, ImageDraw, ImageFont
###from model.LPRNet import build_lprnet
#from LPRNet import build_lprnet
# import LPRNet
# import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import *
from torch import optim
import torch.nn as nn
import torch
import numpy as np
##import argparse

import time
import cv2
import os

## Neural Network
From `model/LPRNet/LPRNet.py`

In [2]:
class small_basic_block(nn.Module):
    def __init__(self, ch_in, ch_out):
        super(small_basic_block, self).__init__()
        self.block = nn.Sequential(
            nn.Conv2d(ch_in, ch_out // 4, kernel_size=1),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(3, 1), padding=(1, 0)),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(1, 3), padding=(0, 1)),
            nn.ReLU(),
            nn.Conv2d(ch_out // 4, ch_out, kernel_size=1),
        )
    def forward(self, x):
        return self.block(x)

class LPRNet(nn.Module):
    def __init__(self, lpr_max_len, phase, class_num, dropout_rate):
        super(LPRNet, self).__init__()
        self.phase = phase
        self.lpr_max_len = lpr_max_len
        self.class_num = class_num
        self.backbone = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1), # 0
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),  # 2
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1)),
            small_basic_block(ch_in=64, ch_out=128),    # *** 4 ***
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),  # 6
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(2, 1, 2)),
            small_basic_block(ch_in=64, ch_out=256),   # 8
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),  # 10
            small_basic_block(ch_in=256, ch_out=256),   # *** 11 ***
            nn.BatchNorm2d(num_features=256),   # 12
            nn.ReLU(),
            nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(4, 1, 2)),  # 14
            nn.Dropout(dropout_rate),
            nn.Conv2d(in_channels=64, out_channels=256, kernel_size=(1, 4), stride=1),  # 16
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),  # 18
            nn.Dropout(dropout_rate),
            nn.Conv2d(in_channels=256, out_channels=class_num, kernel_size=(13, 1), stride=1), # 20
            nn.BatchNorm2d(num_features=class_num),
            nn.ReLU(),  # *** 22 ***
        )
        self.container = nn.Sequential(
            nn.Conv2d(in_channels=448+self.class_num, out_channels=self.class_num, kernel_size=(1, 1), stride=(1, 1)),
            # nn.BatchNorm2d(num_features=self.class_num),
            # nn.ReLU(),
            # nn.Conv2d(in_channels=self.class_num, out_channels=self.lpr_max_len+1, kernel_size=3, stride=2),
            # nn.ReLU(),
        )

    def forward(self, x):
        keep_features = list()
        for i, layer in enumerate(self.backbone.children()):
            x = layer(x)
            if i in [2, 6, 13, 22]: # [2, 4, 8, 11, 22]
                keep_features.append(x)

        global_context = list()
        for i, f in enumerate(keep_features):
            if i in [0, 1]:
                f = nn.AvgPool2d(kernel_size=5, stride=5)(f)
            if i in [2]:
                f = nn.AvgPool2d(kernel_size=(4, 10), stride=(4, 2))(f)
            f_pow = torch.pow(f, 2)
            f_mean = torch.mean(f_pow)
            f = torch.div(f, f_mean)
            global_context.append(f)

        x = torch.cat(global_context, 1)
        x = self.container(x)
        logits = torch.mean(x, dim=2)

        return logits

def build_lprnet(lpr_max_len=8, phase=False, class_num=66, dropout_rate=0.5):

    Net = LPRNet(lpr_max_len, phase, class_num, dropout_rate)

    if phase == "train":
        return Net.train()
    else:
        return Net.eval()


## Create a modified DataLoader
The following code has been adjusted from the `data/load_data.py` file in the original repo

In [3]:
# The pre-trained model uses this encoding.
# Even though we only use the Latin alphabet
# we need to keep the same encoding as the trained model
CHARS = ['京', '沪', '津', '渝', '冀', '晋', '蒙', '辽', '吉', '黑',
         '苏', '浙', '皖', '闽', '赣', '鲁', '豫', '鄂', '湘', '粤',
         '桂', '琼', '川', '贵', '云', '藏', '陕', '甘', '青', '宁',
         '新',
         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
         'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K',
         'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
         'W', 'X', 'Y', 'Z', 'I', 'O', '-'
         ]

CHARS_DICT = {char:i for i, char in enumerate(CHARS)}

In [4]:
class LPRDataLoader(Dataset):
    def __init__(self, img_dir, imgSize, lpr_max_len, PreprocFun=None):
        self.img_dir = img_dir
        self.img_paths = []
        for i in range(len(img_dir)):
            self.img_paths += [el for el in paths.list_images(img_dir[i])]
        random.shuffle(self.img_paths)
        self.img_size = imgSize
        self.lpr_max_len = lpr_max_len
        if PreprocFun is not None:
            self.PreprocFun = PreprocFun
        else:
            self.PreprocFun = self.transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, index):
        filename = self.img_paths[index]
        Image = cv2.imread(filename)
        height, width, _ = Image.shape
        if height != self.img_size[1] or width != self.img_size[0]:
            Image = cv2.resize(Image, self.img_size)
        Image = self.PreprocFun(Image)

        basename = os.path.basename(filename)
        imgname, suffix = os.path.splitext(basename)
        imgname = imgname.split("-")[0].split("_")[0]
        label = list()
        for c in imgname:
            # one_hot_base = np.zeros(len(CHARS))
            # one_hot_base[CHARS_DICT[c]] = 1
            label.append(CHARS_DICT[c])

        if len(label) == 8:
            if self.check(label) == False:
                print(imgname)
                assert 0, "Error label ^~^!!!"

        return Image, label, len(label)

    def transform(self, img):
        img = img.astype('float32')
        img -= 127.5
        img *= 0.0078125
        img = np.transpose(img, (2, 0, 1))

        return img

    def check(self, label):
        if label[2] != CHARS_DICT['D'] and label[2] != CHARS_DICT['F'] \
                and label[-1] != CHARS_DICT['D'] and label[-1] != CHARS_DICT['F']:
            print("Error label, Please check!")
            return False
        else:
            return True


In [5]:
# Modification from the original LPRDataLoader. This version is customized for the Atlanta dataset
class ATLDataLoader(Dataset):
    def __init__(self, img_dir, imgSize, lpr_max_len, PreprocFun=None):
        self.img_dir = img_dir
        self.img_paths = []
        for i in range(len(img_dir)):
            self.img_paths += [el for el in paths.list_images(img_dir[i])]
        random.shuffle(self.img_paths)
        self.img_size = imgSize
        self.lpr_max_len = lpr_max_len
        if PreprocFun is not None:
            self.PreprocFun = PreprocFun
        else:
            self.PreprocFun = self.transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, index):
        filename = self.img_paths[index]
        Image = cv2.imread(filename)
        Image = Image[16:64,80:200,:]
        height, width, _ = Image.shape
        if height != self.img_size[1] or width != self.img_size[0]:
            Image = cv2.resize(Image, self.img_size)
        Image = self.PreprocFun(Image)

        basename = os.path.basename(filename)
        imgname, suffix = os.path.splitext(basename)
        imgname = imgname.split("-")[0].split("_")[0]
        label = list()
        for c in imgname:
            # one_hot_base = np.zeros(len(CHARS))
            # one_hot_base[CHARS_DICT[c]] = 1
            label.append(CHARS_DICT[c])

        if len(label) == 8:
            if self.check(label) == False:
                print(imgname)
                assert 0, "Error label ^~^!!!"

        return Image, label, len(label)

    def transform(self, img):
        img = img.astype('float32')
        img -= 127.5
        img *= 0.0078125
        img = np.transpose(img, (2, 0, 1))

        return img

    def check(self, label):
        if label[2] != CHARS_DICT['D'] and label[2] != CHARS_DICT['F'] \
                and label[-1] != CHARS_DICT['D'] and label[-1] != CHARS_DICT['F']:
            print("Error label, Please check!")
            return False
        else:
            return True

## Read LPR Images

In [6]:
def collate_fn(batch):
    imgs = []
    labels = []
    lengths = []
    for _, sample in enumerate(batch):
        img, label, length = sample
        imgs.append(torch.from_numpy(img))
        labels.extend(label)
        lengths.append(length)
    labels = np.asarray(labels).flatten().astype(np.float32)

    return (torch.stack(imgs, 0), torch.from_numpy(labels), lengths)

In [7]:
def show(img, label, target):
    #     img = np.transpose(img, (1, 2, 0))
    #     img *= 128.
    #     img += 127.5
    #     img = img.astype(np.uint8)

    lb = ""
    for i in label:
        lb += CHARS[i]
    tg = ""
    for j in target.tolist():
        tg += CHARS[int(j)]

    flag = "F"
    if lb == tg:
        flag = "T"
    # img = cv2.putText(img, lb, (0,16), cv2.FONT_HERSHEY_COMPLEX_SMALL, 0.6, (0, 0, 255), 1)
    #     img = cv2ImgAddText(img, lb, (0, 0))
    #     cv2.imshow("test", img)
    print("target: ", tg, " ### {} ### ".format(flag), "predict: ", lb)
    #     cv2.waitKey()
    #     cv2.destroyAllWindows()

In [8]:
# def get_parser():
#     parser = argparse.ArgumentParser(description='parameters to train net')
#     parser.add_argument('--img_size', default=[94, 24], help='the image size')
#     parser.add_argument('--test_img_dirs', default="./data/test", help='the test images path')
#     parser.add_argument('--dropout_rate', default=0, help='dropout rate.')
#     parser.add_argument('--lpr_max_len', default=8, help='license plate number max length.')
#     parser.add_argument('--test_batch_size', default=100, help='testing batch size.')
#     parser.add_argument('--phase_train', default=False, type=bool, help='train or test phase flag.')
#     parser.add_argument('--num_workers', default=8, type=int, help='Number of workers used in dataloading')
#     parser.add_argument('--cuda', default=False, type=bool, help='Use cuda to train model')
#     parser.add_argument('--show', default=False, type=bool, help='show test image and its predict result or not.')
#     parser.add_argument('--pretrained_model', default='./weights/Final_LPRNet_model.pth', help='pretrained base model')

#     args = parser.parse_args()


In [11]:
img_size = (94, 24) ## [94, 24] # [280, 80]  # the image size (80, 280)
##test_img_dirs = "./data_redatcted/ir_patch"    # the test images path
test_img_dirs = "~/GITHUB/LPRNet_Pytorch/data/test" # downloaded from GitHub 
dropout_rate = 0 
lpr_max_len = 8    # , help='license plate number max length.')
test_batch_size = 100 ## 100  # , help='testing batch size.')
phase_train = False  #, type=bool, help='train or test phase flag.')
num_workers = 0 ### 8    #, type=int, help='Number of workers used in dataloading')
### cuda = False     # , type=bool, help='Use cuda to train model')
### show =      #, type=bool, help='show test image and its predict result or not.')
pretrained_model = './LPRNet/weights/Final_LPRNet_model.pth'   ## , help='pretrained base model')



lprnet = build_lprnet(lpr_max_len = lpr_max_len, phase = False,
                      class_num = len(CHARS), dropout_rate = dropout_rate)
device = "cpu"  ###torch.device("cuda:0" if args.cuda else "cpu")
lprnet.to(device)
print("Successful to build network!")

# # load pretrained model
# if args.pretrained_model:
lprnet.load_state_dict(torch.load(pretrained_model, map_location=torch.device('cpu')))
print("load pretrained model successful!")

# else:
#     print("[Error] Can't found pretrained mode, please check!")
#     return False

#test_img_dirs = os.path.expanduser(args.test_img_dirs)
datasets = ATLDataLoader(test_img_dirs.split(','), img_size, lpr_max_len)
datasets = LPRDataLoader(test_img_dirs.split(','), img_size, lpr_max_len)


Successful to build network!
load pretrained model successful!


In [12]:


epoch_size = len(datasets) // test_batch_size
batch_iterator = iter(DataLoader(datasets, test_batch_size,
                                 shuffle=True, num_workers=num_workers, collate_fn=collate_fn))

Tp = 0
Tn_1 = 0
Tn_2 = 0
t1 = time.time()
for i in range(epoch_size):
    # load train data
    images, labels, lengths = next(batch_iterator)
    if i <5:
        print(f"{i}\t{len(images)}\t{len(labels)}\t{len(lengths)}")
    start = 0
    targets = []
    for length in lengths:
        label = labels[start:start+length]
        targets.append(label)
        start += length
    targets = np.array([el.numpy() for el in targets])
    imgs = images.numpy().copy()

    images = Variable(images)

    # forward
    prebs = lprnet(images)
    # greedy decode
    prebs = prebs.cpu().detach().numpy()
    preb_labels = list()
    for i in range(prebs.shape[0]):
        preb = prebs[i, :, :]
        preb_label = list()
        for j in range(preb.shape[1]):
            preb_label.append(np.argmax(preb[:, j], axis=0))
        no_repeat_blank_label = list()
        pre_c = preb_label[0]
        if pre_c != len(CHARS) - 1:
            no_repeat_blank_label.append(pre_c)
        for c in preb_label: # dropout repeate label and blank label
            if (pre_c == c) or (c == len(CHARS) - 1):
                if c == len(CHARS) - 1:
                    pre_c = c
                continue
            no_repeat_blank_label.append(c)
            pre_c = c
        preb_labels.append(no_repeat_blank_label)
    for i, label in enumerate(preb_labels):
        # show image and its predict label
        #         if args.show:
        show(imgs[i], label, targets[i])
        if len(label) != len(targets[i]):
            Tn_1 += 1
            continue
        if (np.asarray(targets[i]) == np.asarray(label)).all():
            Tp += 1
        else:
            Tn_2 += 1
Acc = Tp * 1.0 / (Tp + Tn_1 + Tn_2)
print("[Info] Test Accuracy: {} [{}:{}:{}:{}]".format(Acc, Tp, Tn_1, Tn_2, (Tp+Tn_1+Tn_2)))
t2 = time.time()
print("[Info] Test Speed: {}s 1/{}]".format((t2 - t1) / len(datasets), len(datasets)))


ValueError: num_samples should be a positive integer value, but got num_samples=0