In [66]:
# Common
import numpy as np
import pandas as pd
import tensorflow as tf
from IPython.display import clear_output as cls

# Data
from glob import glob
from tqdm import tqdm
import tensorflow.data as tfd
import cv2
# Data Visualization
import matplotlib.pyplot as plt

# Model
from tensorflow import keras
from tensorflow.keras import callbacks
from tensorflow.keras import layers

import warnings
# Ignore all warnings
warnings.filterwarnings('ignore')


from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Dense, Bidirectional, LSTM, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.backend import ctc_batch_cost
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow.keras.backend as K

In [67]:
train_path = "/kaggle/input/handwriting-recognitionocr/CSV/written_name_train.csv"
test_path = "/kaggle/input/handwriting-recognitionocr/CSV/written_name_test.csv"
val_path = "/kaggle/input/handwriting-recognitionocr/CSV/written_name_validation.csv"

In [68]:
train_imgs[0].shape

(256, 32, 1)

In [4]:
train = pd.read_csv(train_path)
train.head()

Unnamed: 0,FILENAME,IDENTITY
0,TRAIN_00001.jpg,BALTHAZAR
1,TRAIN_00002.jpg,SIMON
2,TRAIN_00003.jpg,BENES
3,TRAIN_00004.jpg,LA LOVE
4,TRAIN_00005.jpg,DAPHNE


In [69]:
train_imgs_path = '/kaggle/input/handwriting-recognitionocr/train_v2/train/'
test_imgs_path = '/kaggle/input/handwriting-recognitionocr/test_v2/test/'
val_imgs_path = '/kaggle/input/handwriting-recognitionocr/validation_v2/validation/'

## Image Preprocessing 

In [70]:
def preprocess_image(image_path, target_size=(32, 256)):
    
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, target_size)
    img = img.astype('float32') / 255.0
    img = np.expand_dims(img, axis=-1)
    
    return img

In [71]:
def read_data(csv_path, images_path, number_of_samples):
    labels = []
    imgs = []
    
    csv = pd.read_csv(csv_path)[:number_of_samples]
    
    for index, row in tqdm(csv.iterrows(), total=csv.shape[0], desc="Processing Images"):
        labels.append(row["IDENTITY"])
        img_path = images_path + row["FILENAME"]
        img = preprocess_image(img_path)
        imgs.append(img)
    return imgs, labels

In [72]:
train_size = 16000
val_size = 8000
test_size = 1600

In [74]:
print("Train \n")
train_imgs, train_labels = read_data(train_path, train_imgs_path, train_size)
print(f"Number of images: {len(train_imgs)}\nNumber of labels {len(train_labels)}")

print("\n Test\n")
test_imgs, test_labels = read_data(test_path, test_imgs_path, test_size)
print(f"Number of images: {len(test_imgs)}\nNumber of labels {len(test_labels)}")

print("\n Validation\n")
val_imgs, val_labels = read_data(val_path, val_imgs_path, val_size)
print(f"Number of images: {len(val_imgs)}\nNumber of labels {len(train_labels)}")

train 



Processing Images: 100%|██████████| 16000/16000 [00:33<00:00, 484.78it/s]


Number of images: 16000
Number of labels 16000
Test



Processing Images: 100%|██████████| 1600/1600 [00:03<00:00, 461.29it/s]


Number of images: 1600
Number of labels 1600
Validation



Processing Images: 100%|██████████| 8000/8000 [00:18<00:00, 439.87it/s]

Number of images: 8000
Number of labels 16000





In [56]:
train_labels = [str(label) for label in train_labels]
test_labels = [str(label) for label in test_labels]
val_labels = [str(label) for label in val_labels]

In [58]:
unique_chars = set(char for word in train_labels for char in word)
n_classes = len(unique_chars)

print(f"Total number of unique characters : {n_classes}")
print(f"Unique Characters : \n{unique_chars}")

Total number of unique characters : 41
Unique Characters : 
{'u', 'K', 'G', 'r', 'F', 'H', 'y', 'Y', "'", 'V', 'c', 'E', 'X', 'J', 'L', 'Q', 'C', 'I', 'M', 'U', 's', ' ', 'n', 'p', 'a', 'l', 't', 'N', 'O', 'D', 'e', 'P', 'A', 'S', 'T', '-', 'B', 'W', 'o', 'R', 'Z'}


In [59]:
## conver char to numerical and vice versa 
char_to_num = {char: idx for idx, char in enumerate(unique_chars)}
num_to_char = {idx: char for char, idx in char_to_num.items()}

In [60]:
def encode_labels(labels):
    encoded = [np.array([char_to_num[char] for char in label]) for label in labels]
    return encoded

## model building 

In [61]:
def ocr(input_shape, output_dim):
    input_img = Input(shape=input_shape, name='image_input')
    
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    new_shape = ((input_shape[0] // 8), (input_shape[1] // 8) * 128)
    x = Reshape(target_shape=new_shape)(x)
    
    x = Bidirectional(LSTM(256, return_sequences=True))(x)
    x = Bidirectional(LSTM(256, return_sequences=True))(x)
    
    output = Dense(output_dim, activation='softmax', name='dense_output')(x)
    
    labels = Input(shape=(None,), name='labels', dtype='float32')
    input_length = Input(shape=(1,), name='input_length', dtype='int64')
    label_length = Input(shape=(1,), name='label_length', dtype='int64')
    
    def ctc_loss_lambda(inputs):
        y_true, y_pred, input_length, label_length = inputs
        return K.ctc_batch_cost(y_true, y_pred, input_length, label_length)
    
    loss_out = Lambda(ctc_loss_lambda, output_shape=(1,))([labels, output, input_length, label_length])
    
    model = Model(inputs=[input_img, labels, input_length, label_length], outputs=loss_out)
    model.compile(optimizer='adam', loss=lambda y_true, y_pred: y_pred)
    
    return model

In [64]:
train_input_length = np.full((len(train_imgs), 1), train_imgs.shape[1] // 8, dtype=int)
val_input_length = np.full((len(val_imgs), 1), val_imgs.shape[1] // 8, dtype=int)

train_label_length = np.array([len(label) for label in train_labels_encoded], dtype=int).reshape(-1, 1)
val_label_length = np.array([len(label) for label in val_labels_encoded], dtype=int).reshape(-1, 1)

In [46]:
input_shape = train_imgs[0].shape
output_dim = n_classes + 1
model = ocr(input_shape, output_dim)
model.summary()

In [36]:
train_labels_encoded = encode_labels(train_labels)
test_labels_encoded = encode_labels(test_labels)
val_labels_encoded = encode_labels(val_labels)

In [47]:
print(f"Number of training images: {len(train_imgs)}")
print(f"Number of training labels: {len(train_labels_encoded)}")

Number of training images: 16000
Number of training labels: 16000


In [48]:
train_imgs = np.array(train_imgs)
val_imgs = np.array(val_imgs)

print(f"Shape of training images: {train_imgs.shape}")
print(f"Shape of validation images: {val_imgs.shape}")
print(f"Shape of a single image: {train_imgs[0].shape}")

Shape of training images: (16000, 256, 32, 1)
Shape of validation images: (8000, 256, 32, 1)
Shape of a single image: (256, 32, 1)


In [49]:
max_label_length = max(len(label) for label in train_labels_encoded)
max_label_length

24

In [51]:
train_labels_encoded_padded = pad_sequences(train_labels_encoded, maxlen=max_label_length, padding='post')
val_labels_encoded_padded = pad_sequences(val_labels_encoded, maxlen=max_label_length, padding='post')

In [52]:
print(f"Shape of padded training labels: {train_labels_encoded_padded.shape}")
print(f"First few padded training labels: {train_labels_encoded_padded[:5]}")
print(f"Shape of padded validation labels: {val_labels_encoded_padded.shape}")
print(f"First few padded validation labels: {val_labels_encoded_padded[:5]}")

Shape of padded training labels: (16000, 24)
First few padded training labels: [[36 32 14 34  5 32 40 32 39  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [33 17 18 28 27  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [36 11 27 11 33  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [14 32 21 14 28  9 11  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [29 32 31  5 27 11  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]]
Shape of padded validation labels: (8000, 24)
First few padded validation labels: [[36 17 14 11 14  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [14 32 19 18 17 28 27 17 11 39  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [14 11 32  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [13 11 32 27 35 39 28 16  5  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [39 19 31 31  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]]


In [65]:
history = model.fit(
    x={'image_input': train_imgs, 
       'labels': train_labels_encoded_padded, 
       'input_length': train_input_length, 
       'label_length': train_label_length},
    y=np.zeros(len(train_imgs)),
    epochs=20,
    batch_size=64,
    validation_data=(
        {'image_input': val_imgs, 
         'labels': val_labels_encoded_padded, 
         'input_length': val_input_length, 
         'label_length': val_label_length},
        np.zeros(len(val_imgs))
    )
)

Epoch 1/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 77ms/step - loss: 25.0831 - val_loss: 20.2184
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 71ms/step - loss: 19.7667 - val_loss: 19.0614
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 71ms/step - loss: 18.8101 - val_loss: 18.7302
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 71ms/step - loss: 18.5414 - val_loss: 18.5621
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 71ms/step - loss: 18.4231 - val_loss: 18.3501
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 71ms/step - loss: 18.2545 - val_loss: 18.2712
Epoch 7/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 71ms/step - loss: 17.9243 - val_loss: 18.0756
Epoch 8/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 71ms/step - loss: 17.8792 - val_loss: 18.0217
Epoch 9/