In [1]:
import os.path as path
import glob

import numpy as np
import tensorflow.gfile as gfile
import pickle

from PIL import Image
from keras import backend as K
from keras.layers import Input, Conv2D, Activation, MaxPooling2D, Flatten, Dropout, Dense, Concatenate
from keras.models import Model
from keras.utils.vis_utils import plot_model

Using TensorFlow backend.


In [2]:
NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

CAPTCHA_LEN = 4
CAPTCHA_HEIGHT = 60
CAPTCHA_WIDTH = 160

TRAIN_DATA_DIR = './data/train/'
TEST_DATA_DIR = './data/test/'

BATCH_SIZE = 100
EPOCHS = 10
OPT = 'adam'
LOSS = 'binary_crossentropy'

MODEL_DIR = './model/'
HISTORY_DIR = './history/'
FILE_NAME_FORMAT = "{}captcha_{}_{}_bs_{}_epochs_{}{}"
MODEL_FILE = FILE_NAME_FORMAT.format(MODEL_DIR, OPT, LOSS, str(BATCH_SIZE), str(EPOCHS), '.h5')
HISTORY_FILE = FILE_NAME_FORMAT.format(HISTORY_DIR, OPT, LOSS, str(BATCH_SIZE), str(EPOCHS), '.history')

In [3]:
def rgb2gray(image):
    return np.dot(image[...,:3], [0.299, 0.587, 0.114])

def char2onehot(char, charset):
    onehot = np.zeros(len(charset))
    onehot[charset.index(char)] = 1
    return onehot

def text2vec(text, charset=NUMBER):
    vectors = [char2onehot(c, charset) for c in text]
    return np.array(vectors).flatten()

def vec2text(vector, onehot_length=4, charset=NUMBER):
    indexes = vector.reshape(onehot_length, -1).argmax(axis=1)
    text = [charset[i] for i in indexes]
    return "".join(text)

def fit_keras_channels(batch, rows=CAPTCHA_HEIGHT, cols=CAPTCHA_WIDTH):
    if K.image_data_format() == 'channels_first':
        batch = batch.reshape(batch.shape[0], 1, rows, cols)
        input_shape = (1, rows, cols)
    else:
        batch = batch.reshape(batch.shape[0], rows, cols, 1)
        input_shape = (rows, cols, 1)
    return batch, input_shape

In [4]:
def read_data(data_dir):
    x = []
    y = []
    for file_path in glob.glob(path.join(data_dir, '*.png')):
        label = path.splitext(path.basename(file_path))[0]
        y.append(label)
        image = np.array(Image.open(file_path))
        x.append(image)
    return x, y

In [5]:
def preprocess(x, y):
    x = np.array(x, dtype=np.float32)
    x = rgb2gray(x)
    x = x / 255
    x, input_shape = fit_keras_channels(x)
    print("x.shape = {}".format(x.shape))
    print("input_shape = {}".format(input_shape))

    y = [text2vec(label) for label in y]
    y = np.asarray(y)
    print("y.shape = {}".format(y.shape))
    
    return x, y

In [6]:
x_train_raw, y_train_raw = read_data(TRAIN_DATA_DIR)
x_train, y_train = preprocess(x_train_raw, y_train_raw)

x.shape = (3976, 60, 160, 1)
input_shape = (60, 160, 1)
y.shape = (3976, 40)


In [7]:
x_test_raw, y_test_raw = read_data(TEST_DATA_DIR) 
x_test, y_test = preprocess(x_test_raw, y_test_raw)

x.shape = (959, 60, 160, 1)
input_shape = (60, 160, 1)
y.shape = (959, 40)


In [8]:
input_shape = x_train.shape[1:]
inputs = Input(shape=input_shape, name='inputs')

conv_1 = Conv2D(32, (3, 3), name='conv_1')(inputs)
relu_1 = Activation('relu', name='relu_1')(conv_1)

conv_2 = Conv2D(32, (3, 3), name='conv_2')(relu_1)
relu_2 = Activation('relu', name='relu_2')(conv_2)
pool_2 = MaxPooling2D((2, 2), padding='same', name='pool_2')(relu_2)

conv_3 = Conv2D(64, (3, 3), name='conv_3')(pool_2)
relu_3 = Activation('relu', name='relu_3')(conv_3)
pool_3 = MaxPooling2D((2, 2), padding='same', name='pool_3')(relu_3)

flatten = Flatten()(pool_3)

dropout = Dropout(0.25)(flatten)

dense = [Dense(10, activation='softmax', name='fc_{}'.format(i + 1))(dropout) for i in range(4)]

outputs = Concatenate()(dense)

model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [9]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
inputs (InputLayer)             (None, 60, 160, 1)   0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 58, 158, 32)  320         inputs[0][0]                     
__________________________________________________________________________________________________
relu_1 (Activation)             (None, 58, 158, 32)  0           conv_1[0][0]                     
__________________________________________________________________________________________________
conv_2 (Conv2D)                 (None, 56, 156, 32)  9248        relu_1[0][0]                     
__________________________________________________________________________________________________
relu_2 (Ac

In [10]:
plot_model(model, to_file='model.png', show_shapes=True)

In [11]:
history = model.fit(x_train, y_train,
                    batch_size=BATCH_SIZE, epochs=EPOCHS,
                    verbose=1, validation_data=(x_test, y_test))

Train on 3976 samples, validate on 959 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
sample_image = x_test[100].reshape(1, 60, 160, 1)
sample_label = y_test[100]
predict_label = model.predict(sample_image)
print("sample_label = {}, predict_label = {}".format(vec2text(sample_label), vec2text(predict_label)))

sample_label = 0937, predict_label = 0933


In [13]:
if not gfile.Exists(MODEL_DIR):
    gfile.MakeDirs(MODEL_DIR)

model.save(MODEL_FILE)

In [14]:
if not gfile.Exists(HISTORY_DIR):
    gfile.MakeDirs(HISTORY_DIR)

with open(HISTORY_FILE, 'wb') as f:
    pickle.dump(history.history, f)