# Vehicle License Plate Recognition 之 測試文檔

## 一、img2tfrecord

In [23]:
import os
import cv2
import numpy as np
import tensorflow as tf


def write_tfrecords(file_name, data_labels, data_datas):
    """
    創建tfrecords文件並寫入數據
    :param file_name: (string)文件名
    :param data_labels: (list)標籤
    :param data_datas: (list)數據
    :return: True
    """
    writer = tf.python_io.TFRecordWriter(file_name + ".tfrecords")
    num = len(data_labels)
    for i in range(num):
        if (i + 1) % 1000 == 0:
            print("以處理{0}數據集{1}張".format(file_name, i + 1))
        label = data_labels[i].encode()
        data = np.resize(data_datas[i], [1, 3456])[0].tostring()
        example = tf.train.Example(features=tf.train.Features(feature={
            "label": tf.train.Feature(bytes_list=tf.train.BytesList(value=[label])),
            'img_val': tf.train.Feature(bytes_list=tf.train.BytesList(value=[data]))
        }))  # example對象 對label及img_val 進行封裝
        writer.write(example.SerializeToString())
        i += 1
    print("{0}數據集處理完成".format(file_name))
    writer.close()
    return True


def get_data_list(path="车牌字符识别训练数据"):
    """
    遍歷目錄所有文件，將圖片格式化成24*48大小
    :param path: (string)遍歷路徑
    :return: (turp)圖片數據, 圖片標籤
    """
    fi = []
    rt = []
    for root, dirs, files in os.walk(path):
        for f in files:
            if f != '车牌字符识别训练数据' and f != '.DS_Store':
                # 路徑：os.path.join(root, f)
                im = cv2.resize(cv2.imread(os.path.join(root, f)), (24, 48), interpolation=cv2.INTER_CUBIC)
                fi.append(im)
                rt.append(root.split('/')[-1])
    return fi, rt


def make_data():
    """
    創建數據集，隨機打亂並存儲
    :return: None
    """
    from random import shuffle
    filedata, filelabel = get_data_list('./测试图像集/')
    x = [i for i in range(len(filelabel))]  # 共計18499文件
    shuffle(x)
    a=[];b=[];
    for i in x:
        a.append(filelabel[i])
        b.append(filedata[i])
    print('共計'+str(len(a))+'張')
    write_tfrecords('testdata',a,b)

In [24]:
make_data()

共計5428張
以處理testdata數據集1000張
以處理testdata數據集2000張
以處理testdata數據集3000張
以處理testdata數據集4000張
以處理testdata數據集5000張
testdata數據集處理完成


# 二、測試模型輸出結果

In [29]:
import cv2
import os
import keras
from PIL import Image
import tensorflow as tf
import numpy as np
from keras.models import model_from_json, load_model
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import Conv2D, BatchNormalization, MaxPooling2D, Input
from keras.layers import AveragePooling2D, Flatten, GlobalMaxPooling2D
from keras import layers
from keras.models import Model

config = {'黑': 36, 'B': 19, '桂': 45, '2': 62, '陕': 29, '浙': 48, 'N': 3, '1': 58, 'K': 20, 'T': 6, '津': 49, '闽': 44,
          'X': 17, '粤': 47, 'Q': 16, 'V': 15, '琼': 41, '皖': 46, '沪': 32, '冀': 52, '鲁': 50, '贵': 35, '川': 31, '吉': 25,
          '豫': 34, '6': 57, 'L': 21, '5': 63, '晋': 28, '4': 60, 'E': 18, '云': 38, 'S': 7, 'J': 12, 'G': 4, '赣': 30,
          'A': 8, 'D': 14, '湘': 40, '鄂': 51, '0': 55, '蒙': 43, 'Y': 22, '辽': 37, 'U': 2, '3': 61, '9': 54, 'W': 24,
          'Z': 5, 'P': 23, 'F': 9, 'M': 11, '8': 59, '7': 56, 'R': 1, 'H': 10, '青': 27, 'C': 13, '苏': 33, '甘': 42,
          '宁': 26, '京': 53, '渝': 39}
# config_ = dict(zip(config.values(), config.keys()))

config_ = {1: 'R', 2: 'U', 3: 'N', 4: 'G', 5: 'Z', 6: 'T', 7: 'S', 8: 'A', 9: 'F', 10: 'H', 11: 'M', 12: 'J', 13: 'C',
           14: 'D', 15: 'V', 16: 'Q', 17: 'X', 18: 'E', 19: 'B', 20: 'K', 21: 'L', 22: 'Y', 23: 'P', 24: 'W', 25: '吉',
           26: '宁', 27: '青', 28: '晋', 29: '陕', 30: '赣', 31: '川', 32: '沪', 33: '苏', 34: '豫', 35: '贵', 36: '黑', 37: '辽',
           38: '云', 39: '渝', 40: '湘', 41: '琼', 42: '甘', 43: '蒙', 44: '闽', 45: '桂', 46: '皖', 47: '粤', 48: '浙', 49: '津',
           50: '鲁', 51: '鄂', 52: '冀', 53: '京', 54: '9', 55: '0', 56: '7', 57: '6', 58: '1', 59: '8', 60: '4', 61: '3',
           62: '2', 63: '5'}

# print(config_)


def conv_block(input_tensor, bn_axis, filters, phase, name, strides=(1, 1)):
    """
    Conv2D 塊，雙路雙卷積計算
    :param input_tensor:(tensor) 輸入張量
    :param filters:(tuple) 卷積核打包
    :param strides:(int) 卷積步長
    :param BN_axis:(int) 規範化卷積軸
    :return: model
    """
    filters1, filters2, filters3 = filters  # 解包卷積核數量
    Conv_base_name = 'Conv_' + name + '_' + str(phase) + '_phase_'
    BN_base_name = 'BN_' + name + '_' + str(phase) + '_phase_'
    x = Conv2D(
        filters=filters1, kernel_size=(1, 1), strides=strides, name=Conv_base_name + '2a'
    )(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=BN_base_name + '2a')(x)
    x = Activation(activation='relu')(x)

    x = Conv2D(
        filters=filters2, kernel_size=(1, 1), strides=strides, name=Conv_base_name + '2b'
    )(x)
    x = BatchNormalization(axis=bn_axis, name=BN_base_name + '2b')(x)
    x = Activation(activation='relu')(x)

    x = Conv2D(
        filters=filters3, kernel_size=(1, 1), strides=strides, name=Conv_base_name + '2c'
    )(x)
    x = BatchNormalization(axis=bn_axis, name=BN_base_name + '2c')(x)
    x = Activation(activation='relu')(x)

    y = Conv2D(filters3, (1, 1), strides=strides, name=Conv_base_name + '1a')(input_tensor)
    y = BatchNormalization(axis=bn_axis, name=BN_base_name + '1b')(y)

    x = layers.add([x, y])
    a = Activation('relu')(x)

    return a


def identity_block(input_tensor, bn_axis, filters, phase, name, strides=(1, 1)):
    """
    Conv2D 塊，雙路單卷積計算
    :param input_tensor:(tensor) 輸入張量
    :param filters:(tuple) 卷積核打包
    :param strides:(int) 卷積步長
    :param BN_axis:(int) 規範化卷積軸
    :return: model
    """
    filters1, filters2, filters3 = filters  # 解包卷積核數量
    Conv_base_name = 'Conv_' + name + '_' + str(phase) + '_phase_'
    BN_base_name = 'BN_' + name + '_' + str(phase) + '_phase_'
    x = Conv2D(
        filters=filters1, kernel_size=(1, 1), strides=strides, name=Conv_base_name + '2a'
    )(input_tensor)
    x = BatchNormalization(axis=bn_axis, name=BN_base_name + '2a')(x)
    x = Activation(activation='relu')(x)

    x = Conv2D(
        filters=filters2, kernel_size=(1, 1), strides=strides, name=Conv_base_name + '2b'
    )(x)
    x = BatchNormalization(axis=bn_axis, name=BN_base_name + '2b')(x)
    x = Activation(activation='relu')(x)

    x = Conv2D(
        filters=filters3, kernel_size=(1, 1), strides=strides, name=Conv_base_name + '2c'
    )(x)
    x = BatchNormalization(axis=bn_axis, name=BN_base_name + '2c')(x)
    x = Activation(activation='relu')(x)

    x = layers.add([x, input_tensor])
    a = Activation('relu')(x)

    return a


def my_resnet():
    inputs = Input(shape=(1, 24, 48))

    x = Conv2D(
        filters=2, kernel_size=(2, 4), padding='same', name='Conv1', data_format='channels_first')(inputs)
    x = BatchNormalization(axis=1, name='BN_Conv1')(x)
    x = Activation('relu')(x)
    MaxPooling2D(pool_size=(2, 2), strides=(2, 2), data_format='channels_first')(x)

    x = conv_block(input_tensor=x, bn_axis=1, filters=(2, 2, 8), phase=2, name='a')
    x = identity_block(input_tensor=x, bn_axis=1, filters=(2, 2, 8), phase=2, name='b')
    x = identity_block(input_tensor=x, bn_axis=1, filters=(2, 2, 8), phase=2, name='c')

    x = conv_block(input_tensor=x, bn_axis=1, filters=(4, 4, 64), phase=3, name='a')
    x = identity_block(input_tensor=x, bn_axis=1, filters=(4, 4, 64), phase=3, name='b')
    x = identity_block(input_tensor=x, bn_axis=1, filters=(4, 4, 64), phase=3, name='c')

    x = AveragePooling2D((2, 2), name='avg_pool')(x)
    x = Flatten()(x)
    x = Dense(64, activation='softmax', name='softmax')(x)
    #     x = GlobalMaxPooling2D()(x)

    model = Model(inputs, x, name='My_Resnet')
    return model


def create_model():
    """返回一個已創建好的 resnet model"""
    model = my_resnet()
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model


def get_data_list(path="车牌字符识别训练数据"):
    fi = []
    rt = []
    for root, dirs, files in os.walk(path):
        for f in files:
            if f != '车牌字符识别训练数据' and f != '.DS_Store':
                # 路徑：os.path.join(root, f)
                im = cv2.resize(cv2.imread(os.path.join(root, f)), (24, 48), interpolation=cv2.INTER_CUBIC)
                fi.append(im)
                rt.append(root.split('/')[-1])
                # print(cv2.imread(os.path.join(root, f)))
                # new = fname[0] + 'b' + fname[1]
                # os.rename(os.path.join(rt, f), os.path.join(rt, new))
    print(len(fi))
    return fi, rt


def get_tfrecord(filename='train.tfrecords', num=1000):
    # 將製作好的 tfrecord 數據集文件讀取出來,並轉換成圖片,以驗證數據是否準確無誤
    print('開始導入數據' + filename)
    filename_queue = tf.train.string_input_producer([filename])  # 讀入數據流
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)  # 返回文件名和文件
    features = tf.parse_single_example(serialized_example,
                                       features={
                                           'label': tf.FixedLenFeature([], tf.string),
                                           'img_val': tf.FixedLenFeature([], tf.string),
                                       })  # 取出包含image和label的feature对象
    image = tf.decode_raw(features['img_val'], tf.uint8)
    label = tf.cast(features['label'], tf.string)

    imgdata = []
    imglabel = []
    with tf.Session() as sess:  # 開始一個對話
        init_op = tf.global_variables_initializer()
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        tem = np.zeros((num, 64))
        for i in range(num):
            if i % 1000 == 0:
                print('已完成導入數據' + filename + str(i) + '個')
            example, l = sess.run([image, label])  # 在會話中取出image和label數據
            data = np.resize(example, [48, 24, 3])
            data = turn_two_color(data)
            tem[i][config[l.decode('utf-8')]] = 1
            imgdata.append([data])
            imglabel.append(tem[0])
        coord.request_stop()
        coord.join(threads)
    print('導入數據' + filename + '完成')
    return imgdata, tem


def turn_two_color(data):
    import cv2
    grayImage = cv2.cvtColor(data, cv2.COLOR_BGR2GRAY)

    a = 0
    for i in grayImage:
        b = 0
        for j in i:
            if j < grayImage.mean():  # 比对均值
                grayImage[a][b] = 0
            else:
                grayImage[a][b] = 255
            b += 1
        a += 1
        del b
    grayImage = np.resize(grayImage, [24, 48])
    return grayImage



def test_model(ts_images=None, ts_labels=None, num=1000.0, filename='./resnet.h5'):
    ts_images, ts_labels = get_tfrecord('testdata.tfrecords', num=int(num))
    model = load_model(filename)
    pre = model.predict(ts_images)
    tr = 0
    for a, b in zip(ts_labels, pre):
        a = np.argmax(a)
        b = np.argmax(b)
        if a == b:
            tr += 1
        else:
            print('真確 ' + config_[a] + ' 預測 ' + config_[b])
    ls = tr / num
    print("正確率為：{0}".format(ls))


In [30]:
test_model(num=5428)

開始導入數據testdata.tfrecords
已完成導入數據testdata.tfrecords0個
已完成導入數據testdata.tfrecords1000個
已完成導入數據testdata.tfrecords2000個
已完成導入數據testdata.tfrecords3000個
已完成導入數據testdata.tfrecords4000個
已完成導入數據testdata.tfrecords5000個
導入數據testdata.tfrecords完成
真確 琼 預測 粤
真確 渝 預測 沪
真確 A 預測 4
真確 U 預測 1
真確 渝 預測 苏
真確 U 預測 0
真確 U 預測 1
真確 V 預測 川
真確 皖 預測 粤
真確 闽 預測 川
真確 A 預測 1
真確 沪 預測 豫
真確 A 預測 1
真確 川 預測 5
真確 2 預測 1
真確 F 預測 E
真確 闽 預測 川
真確 G 預測 0
真確 U 預測 1
真確 C 預測 桂
真確 豫 預測 湘
真確 G 預測 0
真確 S 預測 5
真確 陕 預測 D
真確 T 預測 津
真確 陕 預測 1
真確 A 預測 1
真確 G 預測 0
真確 G 預測 8
真確 C 預測 2
真確 G 預測 0
真確 京 預測 吉
真確 G 預測 0
真確 R 預測 P
真確 L 預測 赣
真確 津 預測 4
真確 L 預測 1
真確 陕 預測 赣
真確 G 預測 0
真確 W 預測 Q
真確 S 預測 5
真確 Y 預測 1
真確 晋 預測 鲁
真確 鲁 預測 J
真確 E 預測 F
真確 宁 預測 5
真確 Z 預測 3
真確 A 預測 1
真確 V 預測 M
真確 7 預測 1
真確 M 預測 1
真確 G 預測 0
真確 辽 預測 川
真確 1 預測 川
真確 琼 預測 8
真確 C 預測 5
真確 X 預測 Y
真確 S 預測 5
真確 G 預測 5
真確 F 預測 沪
真確 G 預測 0
真確 U 預測 1
真確 渝 預測 沪
真確 A 預測 4
真確 粤 預測 1
真確 7 預測 Y
真確 N 預測 1
真確 黑 預測 湘
真確 A 預測 4
真確 A 預測 1
真確 A 預測 4
真確 N 預測 Y
真確 0 預測 1
真確 琼 預測 4
真確 黑 預測 湘
真確 V 預測 M
真確 U 預測

In [31]:
def get_data_picture(path="测试图像集"):
    fi = []
    rt = []
    for root, dirs, files in os.walk(path):
        for f in files:
            if f != '测试图像集' and f != '.DS_Store':
                rt.append(root.split('/')[-1])
    return fi, rt


def test_model_zh(ts_images=None, ts_labels=None, num=1000.0, filename='./resnet.h5'):
    ts_images, ts_labels = get_tfrecord('testdata.tfrecords', num=int(num))
    _, name_list = get_data_picture()
    zi = {}
    for i in name_list:
        if i in zi:
            zi[i] += 1
        else:
            zi[i] = 1
    error = {}
    model = load_model(filename)
    pre = model.predict(ts_images)
    tr=0
    for a, b in zip(ts_labels, pre):
        a = np.argmax(a)
        b = np.argmax(b)
        if a != b:
            a=config_[a]
            if a in error:
                error[a] += 1
            else:
                error[a] = 1
        else:
            tr+=1
    print(1-tr/18499)
    # print(error)
    for i in error.keys():
        zi[i] = (zi[i] - error[i]) / zi[i]
    for i in zi.keys():
        if zi[i] > 1:
            zi[i] = 1
    print(zi)
    
test_model_zh()

開始導入數據testdata.tfrecords
已完成導入數據testdata.tfrecords0個
導入數據testdata.tfrecords完成
0.9492404994864587
{'D': 1, '甘': 1, '6': 1, '赣': 1, '陕': 0.8333333333333334, '吉': 1, 'X': 0.9979716024340771, '闽': 0.9615384615384616, '豫': 0.9848484848484849, '蒙': 1, 'G': 0.855072463768116, 'M': 0.9878048780487805, 'P': 1, 'B': 1, '云': 1, '浙': 1, 'F': 0.975, '苏': 1, '宁': 0.9, 'Z': 0.9857142857142858, '渝': 0.95, 'S': 0.9727272727272728, 'R': 0.9855072463768116, '鄂': 1, 'Y': 0.995, '鲁': 0.975, 'N': 1, 'V': 0.9710144927536232, '2': 0.9932885906040269, '4': 1, 'E': 0.9966666666666667, 'C': 0.9625, '沪': 0.9655172413793104, '3': 1, '冀': 1, 'J': 1, '粤': 1, 'K': 1, 'W': 0.9875, '津': 0.9166666666666666, '川': 0.9791666666666666, '晋': 0.9473684210526315, '0': 1, '辽': 0.95, '湘': 1, '1': 0.9947089947089947, '琼': 0.8571428571428571, '青': 1, '桂': 1, '5': 1, 'A': 0.9831081081081081, '贵': 1, 'Q': 1, '黑': 1, 'T': 0.9897959183673469, '7': 0.9921875, 'H': 1, '9': 1, '京': 0.9705882352941176, '皖': 0.9852941176470589, 'U': 0.9428

加上一張召回率視圖.
<img src="./img/error2.png">

# 三、終結

1. 訓練數據集的大小和與測試集的關係有很大的相關性.
2. 圖像預處理的方式占很大一部分影響因素,比如本次的訓練模型中,圖像的預處理採用了均值二值化處理,很大程度上的避免了圖像無法分離.

#### 完成時間:2017年12月11日,完成人:曹國鴻