In [23]:
import cv2
from keras.callbacks import *
from keras.layers import *
from keras.models import *
from keras.preprocessing import image
import numpy as np
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.utils import to_categorical
import sys
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [13]:
def read_img(file_name, color_inv_norm=True):
    """
    read a image from local file system
    :param file_name:
    :param color_inv_norm:
    :return:
    """
    img = cv2.imread(file_name, cv2.IMREAD_GRAYSCALE)

    if color_inv_norm:
        img = 255 - img

        # img = remove_back(img, np.ones((5, 5), np.uint8))

        img = img / 255.0
    else:
        img[img < 50] = 0
        img = img / 255.0

    return img

In [14]:
def get_data(base_path):
    """

    :param base_path:
    :return:
    """
    nums = os.listdir(base_path)
    train_data = []
    train_label = []
    lbl = [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]
    for num in tqdm(nums):
        calc = 0
        jpgs = os.listdir(os.path.join(base_path, num))
#         print('-' * 30, 'now load %s' % num, '-' * 30)
        for jpg in jpgs:
            # calc += 1
            # if calc > 5000:
            #     print('the %s data is more than 5000' % num)
                # break

            fname = os.path.join(base_path, num, jpg)
            pic = read_img(fname, color_inv_norm=False)
            train_data.append(pic)
            train_label.append(lbl[int(num)])

    train_data = np.array(train_data)
    train_label = np.array(train_label)
    # print(train_data.shape, train_label.shape)
    # print(train_data)
    # print(np.argmax(train_label, axis=1))
    return train_data, train_label

In [15]:
X, y = get_data('/home/lian19931201/datasets/num_ocr')
X.shape, y.shape

100%|██████████| 10/10 [00:05<00:00,  1.86it/s]


In [50]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)
X_train.shape, y_train.shape

((39545, 48, 48, 1), (39545, 10))

# Model

In [34]:
num_class = 10
input_tensor = Input((48, 48, 1))

In [29]:
def vgg(input_tensor):
    vgg = VGG16(include_top=False, input_tensor=input_tensor)
    return vgg

In [89]:
# def resnet_pretrain(input_tensor):
#     rn = ResNet50(include_top=False, input_tensor=input_tensor)
#     return rn

def resnet(input_tensor, units=32, kernel_size=(3, 3)):
    x = input_tensor
    for i in range(2):
        x  =res_block(x, (2 ** i) * units, kernel_size=kernel_size)
    return x

In [71]:
def conv2d_bn(x, units, kernel_size, strides=(1, 1), padding='same', name=None):
    if name is not None:
        bn_name = name + '_bn'
        conv_name = name + '_conv'
    else:
        bn_name = None
        conv_name = None

    x = Conv2D(units, kernel_size, padding=padding, strides=strides, activation='relu', name=conv_name)(x)
    x = BatchNormalization(axis=3, name=bn_name)(x)
    return x

In [74]:
def res_block(inpt, units, kernel_size, strides=(1, 1), with_conv_shortcut=True):
    x = conv2d_bn(inpt, units=units, kernel_size=kernel_size, strides=strides, padding='same')
    x = conv2d_bn(x, units=units, kernel_size=kernel_size, padding='same')
    if with_conv_shortcut:
        shortcut = conv2d_bn(inpt, units=units, strides=strides, kernel_size=kernel_size)
        x = add([x, shortcut])
        return x
    else:
        x = add([x, inpt])
        return x

In [31]:
def ordinary(x, ite=2):
    for i in range(ite):
        x = Conv2D(32 * 2 ** i, (3, 3), kernel_initializer='he_normal')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(32 * 2 ** 2, (3, 3), kernel_initializer='he_normal')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPool2D(pool_size=(2, 2))(x)
    return x

In [90]:
# x = ordinary(input_tensor)
x = resnet(input_tensor, units=32, kernel_size=(3,3))

x = Flatten()(x)
x = Dense(128, kernel_initializer='he_normal')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.25)(x)
x = Dense(10, kernel_initializer='he_normal', activation='softmax')(x)
# print('now x\'s shape:', x.shape)

base_model = Model(input=input_tensor, output=x)
modellabels = Input(name='labels', shape=[num_class], dtype='float32')
model = Model(inputs=input_tensor, outputs=x)

  if sys.path[0] == '':


In [91]:
model.compile(loss='mean_squared_error', optimizer='adam')

In [92]:
model.fit(X_train, y_train)

Epoch 1/1


<keras.callbacks.History at 0x7f38f6eebe90>

In [53]:
np.mean(np.argmax(model.predict(X_test[:1000]), axis=1) == np.argmax(y_test[:1000], axis=1))

0.948

In [78]:
np.mean(np.argmax(model.predict(X_test[:1000]), axis=1) == np.argmax(y_test[:1000], axis=1))

0.995

In [88]:
np.mean(np.argmax(model.predict(X_test[:100]), axis=1) == np.argmax(y_test[:100], axis=1))

1.0

In [93]:
model.save('/home/lian19931201/models/resnet2.h5')

In [87]:
X_train[0].shape

(48, 48, 1)