# Baseline Model for Handwritten recognition

## Imports

In [1]:
from tensorflow.keras import layers
from tensorflow.keras import Sequential
from tensorflow.keras.preprocessing.sequence import pad_sequences
from jiwer import wer
import difflib
import os
import cv2
from PIL import Image
import numpy as np
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
import pandas as pd
import json
from sklearn.model_selection import train_test_split

2023-06-25 20:27:01.161437: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-25 20:27:01.788426: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-06-25 20:27:01.792740: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Loading Data




In [2]:
folder_path = 'resize'  # Replace 'folder_name' with the actual folder name

images = []
for file_name in os.listdir(folder_path):
  image_path = os.path.join(folder_path, file_name)
  image = np.array(Image.open(image_path))
  images.append(image)

images = np.array(images)
images.shape

(13352, 64, 512)

In [3]:
labels_head = pd.read_csv('labels.csv', sep='\t', header=None)
labels = labels_head.iloc[:,1].to_numpy()
labels.shape

(13352,)

In [4]:
train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=1)

In [5]:
def label_preprocessing(labels_sentence, vocabulary):
    '''
    Takes in a single label as a string (sentence matching the content of the image) and
    preprocesses it so that the label can interpreted by the model
    Param: label_sentence: the label a string
    Returns: The preprocessed label as a sequence of indexes
    '''
    max_sequence_length = max(len(sentence) for sentence in labels_sentence )

    labels_indexes = [[vocabulary[char] for char in sentence] for sentence in labels_sentence]
    preprocesed_label = pad_sequences(labels_indexes, maxlen=max_sequence_length, padding='post')

    return preprocesed_label, max_sequence_length


def get_vocabulary(train_labels):
    """
    creates a vocabulary for the translation of the prediction
    """
    vocabulary = sorted(set(''.join(train_labels)))
    vocabulary_dict = {char: index for index, char in enumerate(vocabulary)}
    return vocabulary_dict

In [15]:
vocabulary = get_vocabulary(train_labels)
train_labels_preprocessed, max_sequence_length = label_preprocessing(train_labels, vocabulary)

## Metric Functions

In [7]:
def word_error_rate(y_true, y_pred):
    """
    returns calculation of Word Error Rate for a prediction.
    """
    wer_value = wer(y_true, y_pred)
    return wer_value

def character_error_rate(y_true, y_pred):
    """
    returns calculation of Character Error Rate for a prediction.
    """
    matcher = difflib.SequenceMatcher(None, y_true, y_pred)
    cer_value = 1 - matcher.ratio()
    return cer_value

## Baseline Model implementation

In [8]:
#loading configuration file
with open("config.json") as f:
    config = json.load(f)

In [9]:
# Hyperparameters:
input_shape = (train_images.shape[1],
               train_images.shape[2],
               1)

optimizer = Adam(learning_rate=config["LR"])
loss = SparseCategoricalCrossentropy()

In [10]:
# Define the CNN-GRU model
model = Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64,activation='relu', kernel_initializer='he_normal'))
model.add(layers.RepeatVector(max_sequence_length))
model.add(layers.GRU(128, return_sequences=True, kernel_initializer='he_normal', name='gru1'))
model.add(layers.GRU(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b'))
model.add(layers.Dense(80,activation='softmax', kernel_initializer='he_normal',name='dense2'))

# Compile the model
model.compile(optimizer=optimizer, loss=loss, metrics=config["METRIC"])

2023-06-25 20:27:37.412245: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-25 20:27:37.416102: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-25 20:27:37.418776: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 62, 510, 32)       320       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 31, 255, 32)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 29, 253, 64)       18496     
                                                                 
 batch_normalization (BatchN  (None, 29, 253, 64)      256       
 ormalization)                                                   
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 14, 126, 64)      0         
 2D)                                                             
                                                        

In [12]:
# Train the model
model.fit(train_images, train_labels_preprocessed,
          validation_split = config["VAL_SPLIT"],
          epochs=config["EPOCHS"],
          batch_size=config["BATCH_SIZE"])

2023-06-25 20:27:38.213324: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 279969792 exceeds 10% of free system memory.


Epoch 1/5


2023-06-25 20:27:39.566695: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-25 20:27:39.574911: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-25 20:27:39.578853: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus



2023-06-25 20:42:24.833845: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2023-06-25 20:42:24.838962: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2023-06-25 20:42:24.843355: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fac03300d90>

## Prediction

In [16]:
preds = model.predict(test_images)
index_to_char = {v: k for k, v in vocabulary.items()}
cer_sum = 0
wer_sum = 0

for i, pred in enumerate(preds):
    indices = np.argmax(pred, axis=-1)

    characters = ''.join([index_to_char[idx] for idx in indices])
    cer_sum += character_error_rate(test_labels[i], characters)
    wer_sum += word_error_rate(test_labels[i], characters)

print('CER mean: ', cer_sum / len(test_labels))
print('WER mean: ', wer_sum / len(test_labels))

CER mean:  0.8966858046624254
WER mean:  0.9569918690505834
