In [5]:
%cd "/content/drive/My Drive/Colab Notebooks/handwritten-text-recognition/src"
!ls -l

/content/drive/My Drive/Colab Notebooks/handwritten-text-recognition/src
total 75127
drwx------ 2 root root     4096 Jun 26 19:58 data
-rw------- 1 root root       92 Jun 26 19:10 evaluate.txt
-rw------- 1 root root 38461048 Jun 25 21:38 htr_weights1.h5
-rw------- 1 root root 38460952 Jun 26 09:49 htr_weights3.h5
-rw------- 1 root root     2194 Jun 26 19:34 main.py


In [6]:
import os
import datetime
import string

# define parameters
source = "iam"
batch_size = 64

# define paths
source_path = os.path.join("..", "data", f"{source}.hdf5")

# define input size, number max of chars per line and list of valid chars
input_size = (1024, 128, 1)
max_text_length = 128
charset_base = string.printable[:95]

print("source:", source_path)
print("charset:", charset_base)

source: ../data/iam.hdf5
charset: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 


In [7]:
from data.generator import DataGenerator
dtgen = DataGenerator(source=source_path,
                      batch_size=batch_size,
                      charset=charset_base,
                      max_text_length=max_text_length)

print(f"Train images: {dtgen.size['train']}")
print(f"Validation images: {dtgen.size['valid']}")
print(f"Test images: {dtgen.size['test']}")

Train images: 5369
Validation images: 744
Test images: 1425


In [8]:
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras import Model
from tensorflow.keras.layers import Conv2D, Bidirectional, LSTM, Dense
from tensorflow.keras.layers import Dropout, BatchNormalization, ReLU, LeakyReLU
from tensorflow.keras.layers import Input, Add, Activation, Lambda, MaxPooling2D, Reshape

input_data = Input(name="input", shape=(1024, 128, 1))

cnn = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding="same")(input_data)
cnn = BatchNormalization()(cnn)
cnn = LeakyReLU(alpha=0.01)(cnn)
cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn)

cnn = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn)
cnn = BatchNormalization()(cnn)
cnn = LeakyReLU(alpha=0.01)(cnn)
cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn)

cnn = Dropout(rate=0.2)(cnn)
cnn = Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn)
cnn = BatchNormalization()(cnn)
cnn = LeakyReLU(alpha=0.01)(cnn)
cnn = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn)

cnn = Dropout(rate=0.2)(cnn)
cnn = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn)
cnn = BatchNormalization()(cnn)
cnn = LeakyReLU(alpha=0.01)(cnn)

cnn = Dropout(rate=0.2)(cnn)
cnn = Conv2D(filters=80, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn)
cnn = BatchNormalization()(cnn)
cnn = LeakyReLU(alpha=0.01)(cnn)

shape = cnn.get_shape()
blstm = Reshape((shape[1], shape[2] * shape[3]))(cnn)

blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm)
blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm)
blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm)
blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm)
blstm = Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm)

blstm = Dropout(rate=0.5)(blstm)

output_data = Dense(units=98, activation="softmax")(blstm)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
# create and compile
model = Model(inputs=input_data, outputs=output_data)
model.summary()


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 1024, 128, 1)]    0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 1024, 128, 16)     160       
_________________________________________________________________
batch_normalization_5 (Batch (None, 1024, 128, 16)     64        
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 1024, 128, 16)     0         
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 512, 64, 16)       0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 512, 64, 32)       4640      
_________________________________________________________________
batch_normalization_6 (Batch (None, 512, 64, 32)       128   

In [9]:
from tensorflow.keras.callbacks import ModelCheckpoint

def ctc_loss_lambda_func(y_true, y_pred):
    """Function for computing the CTC loss"""

    if len(y_true.shape) > 2:
        y_true = tf.squeeze(y_true)

    # y_pred.shape = (batch_size, string_length, alphabet_size_1_hot_encoded)
    # output of every model is softmax
    # so sum across alphabet_size_1_hot_encoded give 1
    #               string_length give string length
    input_length = tf.math.reduce_sum(y_pred, axis=-1, keepdims=False)
    input_length = tf.math.reduce_sum(input_length, axis=-1, keepdims=True)

    # y_true strings are padded with 0
    # so sum of non-zero gives number of characters in this string
    label_length = tf.math.count_nonzero(y_true, axis=-1, keepdims=True, dtype="int64")

    loss = K.ctc_batch_cost(y_true, y_pred, input_length, label_length)

    # average loss across all entries in the batch
    loss = tf.reduce_mean(loss)

    return loss

# model.load_weights("htr_weights1.h5")
model.compile(optimizer=optimizer, loss=ctc_loss_lambda_func)
filepath = "htr_weights2.h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=True)
callback_list = [checkpoint]

In [None]:
model_history = model.fit_generator(dtgen.next_train_batch(),
              epochs=80,
              steps_per_epoch=dtgen.steps['train'],
              validation_data=dtgen.next_valid_batch(),
              validation_steps=dtgen.steps['valid'],
              shuffle=True,
              callbacks=callback_list,
              verbose=1)

In [None]:
import matplotlib.pyplot as plt

print("model created")
# list all data in history
print(model_history.history.keys())
# summarize history for accuracy
plt.plot(model_history.history['loss'])
plt.plot(model_history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
# Testing

In [11]:
import numpy as np

model.load_weights("htr_weights3.h5")

out= model.predict(dtgen.next_test_batch(),
                batch_size=64,
                steps=dtgen.steps['test'],
                verbose=1)

batch_size=64
steps=dtgen.steps['test']

print("Model Predict")

ctc_decode=True
steps_done = 0
print("CTC Decode")
progbar = tf.keras.utils.Progbar(target=steps)

batch_size = int(np.ceil(len(out) / steps))
input_length = len(max(out, key=len))

predicts, probabilities = [], []

while steps_done < steps:
    index = steps_done * batch_size
    until = index + batch_size

    x_test = np.asarray(out[index:until])
    x_test_len = np.asarray([input_length for _ in range(len(x_test))])

    decode, log = K.ctc_decode(x_test,
                                x_test_len,
                                greedy=False,
                                beam_width=10,
                                top_paths=1)

    probabilities.extend([np.exp(x) for x in log])
    decode = [[[int(p) for p in x if p != -1] for x in y] for y in decode]
    predicts.extend(np.swapaxes(decode, 0, 1))

    steps_done += 1
    progbar.update(steps_done)

Model Predict
CTC Decode


In [12]:
predict = [dtgen.tokenizer.decode(x[0]) for x in predicts]
for i, item in enumerate(dtgen.dataset['test']['dt']):
    
    print(dtgen.dataset['test']['gt'][i])
    print(predict[i], "\n")

quite unable to explain why he should feel
quite unable to explain why he should feel 

meet the Deanes , and as soon as Guy had
met the Seanes , and as scon as buy had 

the horses and drank enough to cure our
the hasses and cramk grough to cure owr 

you not killed ? ' ' Because we know all things , ' the
yu not killed . " ' Because we know all things , ' the 

with the possibility of faulty design . " He held
with the possibility of faulty design . " He held 

all due deference , Miss Deane - come off it !
all due deference , Miss beane - come off it ! 

It would have been acceptable to all concerned
It would have been acceptable to all concerned 

to make you understand just what happened
to make you understard just that happened 

course of action should be . First , to avoid the
course of action should be . First , to avoid the 

( Stamp Department ) while Sally sulked at home .
I stamp Repartment ) while sally sulked at home . 

Bill is good man , and Bueno Buck is raised on lak

In [17]:
from data import evaluation

evaluate = evaluation.ocr_metrics(predicts=predict,
                                  ground_truth=dtgen.dataset['test']['gt'])

e_corpus = "\n".join([
    f"Total test images:    {dtgen.size['test']}",
    f"Character Error Rate: {evaluate[0]*100:.3f}",
    f"Word Error Rate:      {evaluate[1]*100:.3f}"
])

with open(os.path.join("evaluate.txt"), "w") as lg:
    lg.write(e_corpus)
    print(e_corpus)

Total test images:    1425
Character Error Rate: 7.044
Word Error Rate:      21.892
