## 相关说明
- 数据集采用的是ctpn进行切分， 手动标记数据1000张， 然后采用ctpn进行切割编码。
- 所有主干网络，均采用keras-applications中的代码， 其中InceptionResNetV2代码有修改， 将其中的padding操作参数全部修改成了“same”
- 一个采用了4个主干网络： InceptionResNetV2， Xception， ResNet50， DenseNet169
- 数据集采用了`sklearn.model_selection.KFold`划分成5份， 由于时间关系， 每个主干网络并没有训练5份， 而是训练了0,1，4一共3份， 还有一个原因是另外2份一直拟合的不好。
- 识别部分采用的是 `cnn + ctc`， 训练集就是官方发布的39620张数据，然后采用ctpn进行切割编码，其中删除了几十张较差的图片
- 训练集图片裁剪后尺寸大小不一致， 全部以高度为80进行等比缩放， 统一尺寸处理到：512×80， 长度超过512的进行resize， 不足512的左右补黑
- 单模型的校验集全对率99.2%附近
- 预测部分： 对于一个模型， 我会根据图片左右补黑的不同尺寸， 进行预测5份结果。 
- 每一个主干网络， 根据数据集的不同， 我训练了3个模型， 然后每个模型预测了5份结果， 我采用了4个不同的主干网络， 一次一共训练了4×3×5=60份不同的结果。
- 结果投票， 对同一个图片， 预测到的60个结果， 60个结果的每一个字符， 都进行投票， 得票最多的字符， 就是最终字符

In [None]:
import cv2
import random
import os
import pandas as pd
import numpy as np
from imgaug import augmenters as iaa
from tqdm import tqdm
import shutil
from sklearn.model_selection import KFold
from tensorflow.keras.applications.resnet50 import *
from tensorflow.keras.applications.densenet import *
from tensorflow.keras.applications.nasnet import *
from tensorflow.keras.applications.inception_resnet_v2 import *
from tensorflow.keras.applications.inception_v3 import *
from tensorflow.keras.applications.xception import *
from tensorflow.keras.applications.vgg16 import *
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.activations import *
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import *
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import *
import tensorflow as tf 
print(tf.__version__)
import matplotlib.pyplot as plt

config = tf.ConfigProto() 
config.gpu_options.per_process_gpu_memory_fraction = 0.9
K.set_session(tf.Session(config=config))


# 定义ctc
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    y_pred = y_pred[:, 2:, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

In [None]:
characters = '0123456789ABCDEFGHIJKLMNOPQRSTUWXYZ'
width, height, n_len, n_class = 512, 80, 10, len(characters)+1
print(width, height, n_len, n_class)

# 训练集和校验集
train_val_images_path = './datasets/train/'
train_val_labels_path = './datasets/train_id_label.csv'
csv_data = pd.read_csv(train_val_labels_path) 
csv_data = np.array(csv_data)

kf = KFold(n_splits=5)
kf_id = 4

for i, (train_index, test_index) in enumerate(kf.split(csv_data)):
    assert kf_id<5
    if i == kf_id:
        train_csv = csv_data[train_index]
        val_csv = csv_data[test_index]

print("train numbers: ", len(train_csv))
print("val numbers: ", len(val_csv))

test_images_path = './datasets/test/'
test_images = os.listdir(test_images_path)
print("test numbers: ", len(test_images))

In [None]:
l2_rate = 1e-5
rnn_size = 256
BASE_MODEL_NAME="DenseNet169"

def crnn_model(base_model_name="InceptionResNetV2", mode="train"):
    if mode=="train":
        drop = 0.5
    else:
        drop = 0
    input_tensor = Input(shape=(width, height, 3))
    
    if base_model_name=="InceptionResNetV2":
        base_model = InceptionResNetV2(include_top=False, input_tensor = input_tensor)
        x = base_model.get_layer('activation_161').output
        
    if base_model_name=="Xception":
        base_model = Xception(include_top=False, input_tensor = input_tensor)
        x = base_model.get_layer('block13_sepconv2_bn').output
    
    if base_model_name=="ResNet50":
        base_model = ResNet50(include_top=False, input_tensor = input_tensor)
        x = base_model.get_layer('activation_39').output
        
#     if base_model_name=="DenseNet121":
#         base_model = DenseNet121(include_top=False, input_tensor = input_tensor)
#         x = base_model.get_layer('pool4_conv').output
        
    if base_model_name=="DenseNet169":
        base_model = DenseNet169(include_top=False, input_tensor = input_tensor)
        x = base_model.get_layer('pool4_conv').output
        
#     if base_model_name=="InceptionV3":
#         base_model = InceptionV3(include_top=False, input_tensor = input_tensor)
#         x = base_model.get_layer('activation_74').output
        
        
    conv_shape = x.get_shape().as_list()
    rnn_length = conv_shape[1]
    rnn_length -= 2
    x = Reshape(target_shape=(conv_shape[1], conv_shape[2]*conv_shape[3]))(x)
    x = Dropout(drop)(x)
    x = Dense(rnn_size, kernel_regularizer=l2(l2_rate), bias_regularizer=l2(l2_rate))(x)
    x = BatchNormalization(gamma_regularizer=l2(l2_rate), beta_regularizer=l2(l2_rate))(x)
    x = Activation('relu')(x)

    gru_1a = GRU(rnn_size, return_sequences=True, name='gru1a')(x)
    gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, name='gru1_b')(x)
    gru1_merged = add([gru_1a, gru_1b])

    gru_2a = GRU(rnn_size, return_sequences=True, name='gru2a')(gru1_merged)
    gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, name='gru2_b')(gru1_merged)
    x = concatenate([gru_2a, gru_2b])
    x = Dropout(drop)(x)

    x = Dense(n_class, activation='softmax', kernel_regularizer=l2(l2_rate), bias_regularizer=l2(l2_rate))(x)
    base_model = Model(inputs=input_tensor, outputs=x)

    labels = Input(name='the_labels', shape=[n_len], dtype='float32')
    input_length = Input(name='input_length', shape=(1,), dtype='int64')
    label_length = Input(name='label_length', shape=(1,), dtype='int64')
    loss_out = Lambda(ctc_lambda_func, name='ctc')([base_model.output, labels, input_length, label_length])

    model = Model(inputs=(input_tensor, labels, input_length, label_length), outputs=loss_out)
    return base_model, model, rnn_length

K.clear_session()
base_model, model, rnn_length = crnn_model(base_model_name=BASE_MODEL_NAME, mode="train")
model.summary()

In [None]:
aug = iaa.Sequential([     
    iaa.CropAndPad(
        px=((0, 10), (0, 0), (0, 10), (0, 0)),
        pad_mode=["constant", "edge"],
        pad_cval=(0, 128),
    ),

    # 仿射变换
    iaa.Affine(
        scale={"x": (0.9, 1.1), "y": (0.9, 1.1)},
        rotate=(-5, 5),
        cval=(0, 255),
    ),
    
    iaa.Add((-10, 10), per_channel=0.5),
    
    # 增强和减弱图片的对比度
    iaa.ContrastNormalization((0.8, 1.2)),
    
    # 图片更亮或者更暗
    iaa.Multiply((0.8, 1.2), per_channel=0.5),
    iaa.WithColorspace(
         to_colorspace="HSV",
         from_colorspace="RGB",
         children=iaa.WithChannels(0, iaa.Add((0, 30)))
     ),
    
     iaa.CoarseDropout(0.02, size_percent=0.5),
    
    ], random_order=False)

In [None]:
def gen_data(csvs, train_val_images_path, mode='train', batch_size=64):
    while True:
        images = []
        labels = []
        #np.random.shuffle(val_csv)
        for csv in csvs:
            img = cv2.imread(os.path.join(train_val_images_path, csv[0]))
            
            if type(img)!=type(None):
                scale = img.shape[1]/img.shape[0]
                
                new_width = int(height*scale)
                if scale<3.5 or scale>7:
                    continue
                if new_width > width:
                    new_width = width
                img = cv2.resize(img, (new_width, height))
                bg = np.zeros((height, width, 3))
                #rand = random.randint(0, width-new_width)
                rand = (width-new_width)//4 * random.randint(1, 4)
                bg[:, rand:rand+new_width, :] = img
                img = cv2.cvtColor(bg.astype('uint8'), cv2.COLOR_BGR2RGB)
                lbl = [characters.find(x) for x in csv[1][1:]]
                images.append(img)
                labels.append(lbl)
                
                if len(images) == batch_size:    
                    if mode=='train':
                        images = aug(images=images)
                    
                    # images 预处理
                    images = preprocess_input(np.array(images))
                    
                    images = images.transpose(0, 2, 1, 3)
                    
                    
                    labels = np.array(labels)
                    yield [images, labels, np.array([rnn_length]*batch_size), np.array([10]*batch_size)], np.ones(batch_size)
                    images = []
                    labels = []                

g = gen_data(train_csv, train_val_images_path, mode='train', batch_size=3)
[imgs, lbls, p, label_length_test], t = next(g)
print(imgs.shape, lbls.shape, p, label_length_test, t)

In [None]:
def evaluate(batch_size=100, steps=20):
    batch_acc = 0
    generator = gen_data(val_csv, train_val_images_path, mode='val', batch_size=batch_size)
    for i in range(steps):
        [X_test, y_test, _, _], _  = next(generator)
        y_pred = base_model.predict(X_test)
        shape = y_pred[:,2:,:].shape
        ctc_decode = K.ctc_decode(y_pred[:,2:,:], input_length=np.ones(shape[0])*shape[1])[0][0]
        out = K.get_value(ctc_decode)[:, :n_len]

        if out.shape[1] == n_len:
            batch_acc += sum((y_test == out).all(axis=1))
    return batch_acc / (batch_size*steps)

class Evaluator(Callback):
    def __init__(self):
        self.accs = []
    
    def on_epoch_end(self, epoch, logs=None):
        acc = evaluate(batch_size=100, steps=len(val_csv)//100)*100
        self.accs.append(acc) 
        print('\n acc: %f%%' % acc)
evaluator = Evaluator()

In [None]:
batch_size = 40
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=Adam(lr=1e-4))
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=1e-6, verbose=1)
model_checkpoint = ModelCheckpoint('./models/%s_%d.h5'%(BASE_MODEL_NAME, kf_id), monitor='val_loss', save_best_only=True)
earlystop = EarlyStopping(monitor='val_loss', patience=6, verbose=1, mode='auto')

In [None]:
#model.load_weights('./models/%s_%d.h5'%(BASE_MODEL_NAME, kf_id))
history = model.fit_generator(
    gen_data(train_csv, train_val_images_path, mode='train', batch_size=batch_size),
    validation_data=gen_data(val_csv, train_val_images_path, mode='val', batch_size=batch_size), 
    validation_steps = len(val_csv)//batch_size,
    steps_per_epoch=len(train_csv)//batch_size, 
    epochs=40, 
    callbacks=[reduce_lr, earlystop, model_checkpoint]) #evaluator,

In [None]:
kf_ids = [0, 1, 4] 
BASE_MODEL_NAMES = ["InceptionV3"]
        

for BASE_MODEL_NAME in BASE_MODEL_NAMES:
    for kf_id in kf_ids:
        
        K.clear_session()
        base_model, model, rnn_length = crnn_model(base_model_name=BASE_MODEL_NAME, mode="test")
        model.load_weights('./models/%s_%d.h5'%(BASE_MODEL_NAME, kf_id))

        for iii in range(1,6):
            res = []
            tensor = []

            images = []
            img_names = []
            batch_size = 400
            for img_name in tqdm(test_images):
                img = cv2.imread(os.path.join(test_images_path, img_name))
                scale = img.shape[1]/img.shape[0]
                new_width = int(height*scale)
                if new_width > width:
                    new_width = width
                img = cv2.resize(img, (new_width, height))
                bg = np.zeros((height, width, 3))
                rand = random.randint(0, width-new_width)
                rand = (width-new_width)//6*iii
                bg[:, rand:rand+new_width, :] = img
                img = cv2.cvtColor(bg.astype('uint8'), cv2.COLOR_BGR2RGB).transpose(1, 0, 2)

                images.append(img)
                img_names.append(img_name)

                if len(images)==batch_size:    
                    images = preprocess_input(np.array(images))
                    y_pred = base_model.predict(images)

                    shape = y_pred[:,2:,:].shape
                    ctc_decode = K.ctc_decode(y_pred[:,2:,:], input_length=np.ones(shape[0])*shape[1])[0][0]
                    out = K.get_value(ctc_decode)[:, :n_len]

                    for i in range(len(out)):
                        label_list = [characters[int(idx)] for idx in out[i]]
                        label = ''
                        for c in label_list:
                            label += c
                        img_name = img_names[i]
                        #print(img_name, label)
                        res.append([img_name, label])

                    images = []
                    img_names = []

            for i in range(len(res)):
                res[i] = [res[i][0], ' '+res[i][1]]

            pd_data = pd.DataFrame(np.array(res),columns=['name','label'])
            print("BASE_MODEL_NAME", BASE_MODEL_NAME, "kf_id: ",kf_id, "iii: " ,iii)
            pd_data.to_csv('./results/%s_%d_%d.csv'%(BASE_MODEL_NAME, kf_id, iii), index=False)

In [None]:
csv_list = os.listdir("./results/")
print(len(csv_list))
csv_list.sort()
for iii, sss in enumerate(csv_list):
    print(iii, sss)
#print(csv_list)
csvs = []
for csv in csv_list:
    csv = np.array(pd.read_csv("./results/" + csv))
    csvs.append(csv)

In [None]:
err_id = []
def update_label(labels):
    res = ""
    for i in range(len(labels[0])):
        cs = []
        for s in labels:
            if len(s)==11:
                
                if i>4 and (not (s[i] in "0123456789")):
                    continue
                cs.append(s[i])
        d = {}
        for c in cs:
            if c in d:
                d[c] += 1
            else:
                d[c] = 1
        l = sorted(d.items(), key=lambda item: item[1], reverse=True) 
        res += l[0][0]
    return res

index = 0
labels = []
for i in tqdm(range(len(csvs[0]))):
    lbls = []
    for csv in csvs:
        lbls.append(csv[i][1])
    
    index +=1
    res = update_label(lbls)
    
    
    if len(set(lbls)) >1: 
        print(index, "-----------------------")
        print(lbls)
        err_id.append(index)
        
        img = cv2.imread("./datasets/test/" + csv[i][0])
    labels.append(res)
    
    
img_names = []
for img_name in (test_images):
    img_names.append(img_name)
    
res = []
for i in range(len(img_names)):
    res.append([img_names[i], labels[i]])
    
pd_data = pd.DataFrame(np.array(res),columns=['name','label'])
pd_data.to_csv('./results/zzz-Xecp&&IncV2&&Dense&&resnet.csv', index=False)

data = np.array(pd_data)

print("==============================")
print("err_ids:", len(err_id))
for i,d in enumerate(data):
    if i in err_id:
        print(i, d)
        shutil.copyfile("./datasets/test/"+d[0],"./datasets/err_test/"+d[0])