Step 1: Install Dependencies

In [32]:
# Install required libraries
!pip install opencv-python Pillow tensorflow keras pyspellchecker



Step 2: Import Python Libraries

In [53]:
#Import python libraries
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
from PIL import Image
import tensorflow as tf
from tensorflow import keras
import string
import glob

from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Bidirectional, LSTM, Dense, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dropout


Step 3: Upload and Extract IAM Dataset

In [34]:
# Upload IAM ZIP dataset (with .jpg images and gt_test.txt)
from google.colab import files
uploaded = files.upload()

# Extract the zip into a directory
import zipfile
zip_filename = list(uploaded.keys())[0]
extract_path = "/content/data/"
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall(extract_path)
print("✅ Dataset extracted successfully!")

Saving archive.zip to archive (3).zip
✅ Dataset extracted successfully!


Step 4: Parse gt_test.txt to Get Labels

In [35]:
# Load the ground-truth labels into a dictionary
label_dict = {}
with open('/content/data/IAM/gt_test.txt', 'r') as f:
    for line in f:
        if line.strip() == "":
            continue
        parts = line.strip().split('\t')
        if len(parts) == 2:
            image_id, label = parts
            label_dict[image_id] = label.lower()

Step 5: Match Labels to Image Paths and Remove Empty Labels

In [36]:
# Match images with labels
image_paths = glob.glob('/content/data/IAM/**/*.jpg', recursive=True)
valid_images = []
labels = []

for path in image_paths:
    filename = os.path.basename(path)
    if filename in label_dict:
        label = label_dict[filename].strip()
        if label:  # Remove empty labels
            valid_images.append(path)
            labels.append(label)

print(f"✅ Matched {len(valid_images)} images with non-empty labels.")

✅ Matched 2915 images with non-empty labels.


Step 6: Preprocess All Images

In [37]:
# Resize to (128x32), normalize to [0, 1], and expand dims for CNN input
def preprocess_image(image_path, img_size=(128, 32)):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, img_size)
    img = img / 255.0
    img = np.expand_dims(img, axis=-1)
    return img

# Apply preprocessing
images = np.array([preprocess_image(p) for p in valid_images])
print("✅ Preprocessed", len(images), "images. Shape:", images.shape)

✅ Preprocessed 2915 images. Shape: (2915, 32, 128, 1)


Step 7: Character Mapping for Labels

In [38]:
# Define allowed characters (a-z, 0-9, space)
all_characters = string.ascii_lowercase + string.digits + ' '

# Create dictionary mapping characters to integers
char_to_int = {ch: i for i, ch in enumerate(all_characters)}
int_to_char = {i: ch for i, ch in enumerate(all_characters)}

print("✅ Character mapping ready. Total characters:", len(all_characters))

✅ Character mapping ready. Total characters: 37


Step 8: Convert Text Labels to Integer Sequences

In [39]:
# Convert text to sequence of integers
def text_to_labels(text):
    return [char_to_int[char] for char in text if char in char_to_int]

label_sequences = [text_to_labels(label) for label in labels]

Step 9: Final Filter — Remove Too-Long or Empty Sequences

In [52]:
# CTC rule: max label length = (output steps - 1) // 2, so 64 -> 31
max_label_len = 31  # CTC-safe max length for 64 time steps

filtered_images = []
filtered_label_seqs = []

for img, seq in zip(images, label_sequences):
    if 0 < len(seq) <= max_label_len:
        filtered_images.append(img)
        filtered_label_seqs.append(seq)

images = np.array(filtered_images)
label_sequences = filtered_label_seqs
label_lengths = [len(seq) for seq in label_sequences]
label_length = np.array(label_lengths).reshape(-1, 1)
input_length = np.ones((len(images), 1)) * 64  # 🔁 updated to match model output

max_len = max(label_lengths)
label_padded = np.zeros((len(label_sequences), max_len))
for i, seq in enumerate(label_sequences):
    label_padded[i, :len(seq)] = seq

print("✅ Final filtered dataset size:", len(images))

✅ Final filtered dataset size: 357


Step 10: Build CNN + BiLSTM + CTC Model

In [59]:
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

def build_ctc_model(input_shape=(32, 128, 1), output_dim=len(all_characters) + 1):
    inputs = Input(name='image_input', shape=input_shape)

    # CNN feature extractor
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Reshape for time-distributed input to RNN
    x = Reshape(target_shape=(64, -1))(x)  # 🔁 time steps = 64

    # BiLSTM with Dropout

    # First BiLSTM layer
    x = Bidirectional(LSTM(128, return_sequences=True))(x)
    x = Dropout(0.3)(x)

    # Second BiLSTM layer
    x = Bidirectional(LSTM(64, return_sequences=True))(x)  # 👈 Added extra layer
    x = Dropout(0.3)(x)

    # Final output layer
    x = Dense(output_dim, activation='softmax')(x)

    # CTC loss inputs
    labels = Input(name='ground_truth_labels', shape=(None,))
    input_length = Input(name='input_length', shape=(1,))
    label_length = Input(name='label_length', shape=(1,))

    # Loss output
    loss_output = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')(
        [x, labels, input_length, label_length]
    )

    model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_output)
    prediction_model = Model(inputs=inputs, outputs=x)
    return model, prediction_model

# Build and compile
model, prediction_model = build_ctc_model()
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=Adam())
model.summary()

Step 11: Prepare Inputs for Training

In [42]:
# Inputs for training
inputs = {
    'image_input': images,
    'ground_truth_labels': label_padded,
    'input_length': input_length,
    'label_length': label_length
}

# Dummy output for CTC loss (loss is computed inside the Lambda layer)
outputs = np.zeros([len(images)])

Step 12: Train the Model

In [60]:
# Train the model
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='loss', patience=8, restore_best_weights=True)

history = model.fit(
    x=inputs,
    y=outputs,
    batch_size=16,
    epochs=120,  # 🔁 longer training
    validation_split=0.1,
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/120
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 407ms/step - loss: 112.9604 - val_loss: 75.7815
Epoch 2/120
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 401ms/step - loss: 70.7162 - val_loss: 73.1082
Epoch 3/120
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 306ms/step - loss: 70.5764 - val_loss: 72.9001
Epoch 4/120
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 308ms/step - loss: 71.1568 - val_loss: 74.9151
Epoch 5/120
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 307ms/step - loss: 70.5291 - val_loss: 72.7970
Epoch 6/120
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 333ms/step - loss: 70.4915 - val_loss: 73.4802
Epoch 7/120
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 365ms/step - loss: 70.0080 - val_loss: 73.1211
Epoch 8/120
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 365ms/step - loss: 70.4062 - val_loss: 74.5768
Epoch 9/

Step 13: Beam Search Decoding

In [61]:
def decode_prediction_beam(pred):
    decoded, _ = K.ctc_decode(
        pred, input_length=np.ones(pred.shape[0]) * pred.shape[1],
        greedy=False, beam_width=10, top_paths=1
    )
    return K.get_value(decoded[0])

# Run prediction
y_preds = prediction_model.predict(images[:5])
decoded = decode_prediction_beam(y_preds)

# Convert to readable text
def labels_to_text(label_seq):
    return ''.join([int_to_char.get(i, '') for i in label_seq if i in int_to_char])

for i, seq in enumerate(decoded):
    print(f"Sample {i+1}: {labels_to_text(seq)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 719ms/step
Sample 1: the a i l 
Sample 2: er ti o i hit
Sample 3: thu wa an e t tein
Sample 4: hod a e he ha
Sample 5: fo a pe in s o 


Step 14: Post-Process Using NLP Spell Correction

In [62]:
from spellchecker import SpellChecker

# Initialize spell checker
spell = SpellChecker()

# Function to clean up decoded text
def post_process_text(text):
    words = text.split()
    corrected = [spell.correction(w) if w.isalpha() else w for w in words]
    return ' '.join(corrected)

In [63]:
# Decode prediction using beam search (as before)
def decode_prediction_beam(pred):
    decoded, _ = K.ctc_decode(
        pred, input_length=np.ones(pred.shape[0]) * pred.shape[1],
        greedy=False, beam_width=10, top_paths=1
    )
    return K.get_value(decoded[0])

# Predict
y_preds = prediction_model.predict(images[:5])
decoded = decode_prediction_beam(y_preds)

# Convert to text + apply spell correction
print("✅ Spell-Corrected Predictions:")
for i, seq in enumerate(decoded):
    raw = ''.join([int_to_char.get(x, '') for x in seq if x in int_to_char])
    corrected = post_process_text(raw)
    print(f"Sample {i+1}: {corrected}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
✅ Spell-Corrected Predictions:
Sample 1: the a i l
Sample 2: er ti o i hit
Sample 3: thu a an e t ten
Sample 4: hod a e he ha
Sample 5: to a pe in s o


Step 15: Saving the prediction model

In [64]:
prediction_model.save('/content/handwriting_prediction_model.h5')
print("✅ Prediction model saved successfully.")



✅ Prediction model saved successfully.


In [65]:
from google.colab import files
files.download('/content/handwriting_prediction_model.h5')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>