In [1]:
import os, cv2, csv
import numpy as np
from keras.models import Sequential
from keras.models import load_model
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.utils  import np_utils
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard

SIZE = 2500
MODEL_FOLDER = "model/"
WIDTH = 200
HEIGHT = 60
PROCESSED_FOLDER = "processed/"
LABEL_CSV_FILE = 'label.csv'
allowedChars = 'ACDEFGHJKLNPQRTUVXYZ2346789';

Using TensorFlow backend.


In [2]:
def one_hot_encoding(text, allowedChars):
    label_list = []
    for c in text:
        onehot = [0] * len(allowedChars)
        onehot[allowedChars.index(c)] = 1
        label_list.append(onehot)
    return label_list

In [3]:
# creat CNN model
print('Creating CNN model...')
tensor_in = Input((HEIGHT, WIDTH, 3))
tensor_out = tensor_in

tensor_out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(tensor_out)
tensor_out = Dropout(0.25)(tensor_out)
tensor_out = MaxPooling2D(pool_size=(2, 2))(tensor_out)

tensor_out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(tensor_out)
tensor_out = Dropout(0.25)(tensor_out)
tensor_out = MaxPooling2D(pool_size=(2, 2))(tensor_out)

tensor_out = Flatten()(tensor_out)
tensor_out = Dropout(0.25)(tensor_out)

tensor_out = [Dense(len(allowedChars), name='digit1', activation='softmax')(tensor_out),\
              Dense(len(allowedChars), name='digit2', activation='softmax')(tensor_out),\
              Dense(len(allowedChars), name='digit3', activation='softmax')(tensor_out),\
              Dense(len(allowedChars), name='digit4', activation='softmax')(tensor_out),\
              Dense(len(allowedChars), name='digit5', activation='softmax')(tensor_out)]


W0918 01:54:39.971849 4527457728 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:66: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0918 01:54:39.988924 4527457728 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:541: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0918 01:54:40.007565 4527457728 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4432: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0918 01:54:40.051299 4527457728 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:148: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0918 01:54:40.062272 4527457728 deprecati

Creating CNN model...


In [4]:
model = Model(inputs=tensor_in, outputs=tensor_out)
model.compile(loss='categorical_crossentropy', optimizer='Adamax', metrics=['accuracy'])
model.summary()

W0918 01:54:40.335929 4527457728 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/optimizers.py:793: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0918 01:54:40.387578 4527457728 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3576: The name tf.log is deprecated. Please use tf.math.log instead.



Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 60, 200, 3)   0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 60, 200, 32)  896         input_1[0][0]                    
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 60, 200, 32)  0           conv2d_1[0][0]                   
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 30, 100, 32)  0           dropout_1[0][0]                  
____________________________________________________________________________________________

In [5]:
def read_train_data(filename, size):
    train_data = []
    if os.path.isdir(filename):
        train_data = np.stack([np.array(cv2.imread(filename + str(index) + ".jpg"))/255.0 for index in range(1, size + 1)])
    return train_data

In [6]:
def read_label_data(filename, allowedChars, num_dic):
    train_label = []
    traincsv = open(filename, 'r', encoding = 'utf8')
    
    read_label =  [one_hot_encoding(row[0], allowedChars) for row in csv.reader(traincsv)]
    train_label = [[] for _ in range(num_dic)]
    
    for arr in read_label:
        for index in range(num_dic):
            train_label[index].append(arr[index])
    train_label = [arr for arr in np.asarray(train_label)]
    return train_label

In [7]:
print("Reading training data...")

train_data = read_train_data(PROCESSED_FOLDER, SIZE)
train_label = read_label_data(LABEL_CSV_FILE, allowedChars, 5)

print("Reading completed")

Reading training data...
Reading completed


In [8]:
filepath = MODEL_FOLDER + "{epoch:02d}-{loss:.2f}-{val_loss:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_digit4_acc', verbose=1, save_best_only=False, mode='max')
earlystop = EarlyStopping(monitor='val_loss', patience=8, verbose=1, mode='auto')
tensorBoard = TensorBoard(log_dir = 'logs', histogram_freq = 1)
callbacks_list = [tensorBoard, earlystop, checkpoint]

In [9]:
print('model loading...')
model = load_model("twse_cnn_model.hdf5")
print('loading completed')

model loading...


W0918 01:54:55.684997 4527457728 deprecation.py:323] From /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


loading completed


In [None]:
history = model.fit(train_data, train_label, validation_split=0.2, batch_size=50, epochs=30, verbose=1, callbacks=callbacks_list)

W0918 01:54:56.970074 4527457728 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/callbacks.py:1120: The name tf.summary.histogram is deprecated. Please use tf.compat.v1.summary.histogram instead.

W0918 01:54:57.021807 4527457728 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/callbacks.py:1122: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.

W0918 01:54:57.026633 4527457728 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/callbacks.py:1125: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.



Train on 2000 samples, validate on 500 samples
Epoch 1/30

Epoch 00001: saving model to model/01-2.00-1.63.hdf5
Epoch 2/30

In [None]:
import matplotlib.pyplot as plt
def show_train_history(train_history, train, validation):
    plt.plot(train_history.history[train])
    plt.plot(train_history.history[validation])
    plt.title('Train History')
    plt.ylabel(train)
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

In [None]:
show_train_history(history, 'digit1_acc', 'val_digit1_acc')