In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
# Import required libraries
import tensorflow as tf
import keras
from keras import layers, models
import cv2
import matplotlib.pyplot as plt
from keras.utils import Sequence
from keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Layer
from tensorflow.keras.backend import ctc_batch_cost

In [4]:
# Load and preprocess the dataset
train_df = pd.read_csv('/kaggle/input/handwriting-recognition/written_name_train_v2.csv')
val_df = pd.read_csv('/kaggle/input/handwriting-recognition/written_name_validation_v2.csv')
train_df.dropna(inplace=True)
train_df['IDENTITY'] = train_df['IDENTITY'].str.upper()

In [5]:
# Define unique character sets for label encoding
characters = sorted(set(''.join(train_df['IDENTITY'].values)))
char_to_label = {char: idx for idx, char in enumerate(characters)}
label_to_char = {idx: char for char, idx in char_to_label.items()}

In [9]:
# Define DataGenerator class for efficient data loading and augmentation
class DataGenerator(Sequence):
    def __init__(self, df, path, char_map, batch_size=64, img_size=(128, 32), max_text_len=29, shuffle=True):
        self.df = df
        self.path = path
        self.char_map = char_map
        self.batch_size = batch_size
        self.img_size = img_size
        self.max_text_len = max_text_len
        self.shuffle = shuffle
        self.indices = np.arange(len(df))
        self.on_epoch_end()

    def __len__(self):
        return len(self.df) // self.batch_size

    def __getitem__(self, idx):

        print(f"Processing batch {idx + 1} / {len(self)}")

        
        batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_images = np.ones((self.batch_size, *self.img_size, 1), dtype=np.float32)
        batch_labels = np.ones((self.batch_size, self.max_text_len), dtype=np.int64)

        for i, batch_idx in enumerate(batch_indices):
            img_path = os.path.join(self.path, self.df.iloc[batch_idx]['FILENAME'])
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (self.img_size[1], self.img_size[0]))
            img = img.astype("float32") / 255.0
            batch_images[i, :, :, 0] = img

            # Encode labels
            text = self.df.iloc[batch_idx]['IDENTITY']
            # label = [self.char_map[char] for char in text if char in self.char_map]
            if isinstance(text, str):  # Check if text is a string
                label = [self.char_map[char] for char in text if char in self.char_map]
            else:
                label = [] 

            if len(label) > self.max_text_len:
                label = label[:self.max_text_len]  # Truncate to max_text_len
            else:
                label = label + [0] * (self.max_text_len - len(label))
            
            batch_labels[i, :len(label)] = label

        return {'input_data': batch_images, 'input_label': batch_labels}, np.zeros((self.batch_size,))

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

In [10]:
# Create model architecture with CNN + BiLSTM + CTC
def create_model(input_shape, num_classes):
    input_img = layers.Input(shape=input_shape, name='input_data')
    labels = layers.Input(name='input_label', shape=(None,), dtype='float32')

    # CNN layers
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)

    # Reshape and BiLSTM layers
    new_shape = ((input_shape[0] // 4), (input_shape[1] // 4) * 64)
    x = layers.Reshape(target_shape=new_shape)(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
    x = layers.Dense(num_classes + 1, activation='softmax')(x)  # num_classes + 1 for CTC blank

    # CTC loss layer
    output = CTCLayer(name='ctc_loss')(labels, x)

    model = models.Model(inputs=[input_img, labels], outputs=output)
    return model

In [11]:
# Custom CTC layer
class CTCLayer(layers.Layer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.loss_fn = ctc_batch_cost

    def call(self, y_true, y_pred):
        batch_size = tf.shape(y_pred)[0]
        input_length = tf.ones((batch_size, 1), dtype=tf.float32) * tf.cast(tf.shape(y_pred)[1], tf.float32)
        label_length = tf.ones((batch_size, 1), dtype=tf.float32) * tf.cast(tf.shape(y_true)[1], tf.float32)
        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
        self.add_loss(loss)
        
        # return self.loss_fn(y_true, y_pred, input_length, label_length)

        return loss

In [19]:
# Model initialization and compilation
input_shape = (128, 32, 1)
num_classes = len(characters)
model = create_model(input_shape, num_classes)
# model.compile(optimizer='adam', loss=None)

from tensorflow.keras.optimizers import Adam

# Lower the learning rate
optimizer = Adam(learning_rate=1e-4, clipvalue=1.0)

# Recompile your model with the adjusted optimizer
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

In [20]:
#works - does all
# train_gen = DataGenerator(
#     train_df, 
#     '/kaggle/input/handwriting-recognition/train_v2/train', 
#     char_to_label, 
#     img_size=(128, 32), 
#     batch_size=32
# )

# val_gen = DataGenerator(
#     val_df, 
#     '/kaggle/input/handwriting-recognition/validation_v2/validation', 
#     char_to_label, 
#     img_size=(128, 32), 
#     batch_size=32
# )

# es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# history = model.fit(
#     train_gen, 
#     validation_data=val_gen, 
#     epochs=1, 
#     callbacks=[es],
#     steps_per_epoch=len(train_df) // 32,
#     validation_steps=len(val_df) //32
# )

# Use only a subset of the training and validation datasets
train_subset = train_df.sample(frac=0.01, random_state=42)  # Use 10% of the training data
val_subset = val_df.sample(frac=0.01, random_state=42)      # Use 10% of the validation data

# Create new DataGenerators for the subsets
train_gen_subset = DataGenerator(
    train_subset, 
    '/kaggle/input/handwriting-recognition/train_v2/train', 
    char_to_label, 
    img_size=(128, 32), 
    batch_size=32
)

val_gen_subset = DataGenerator(
    val_subset, 
    '/kaggle/input/handwriting-recognition/validation_v2/validation', 
    char_to_label, 
    img_size=(128, 32), 
    batch_size=32
)

es = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model using the subset generators
history = model.fit(
    train_gen_subset, 
    validation_data=val_gen_subset, 
    epochs=1, 
    callbacks=[es],
    steps_per_epoch=len(train_subset) // 32,
    validation_steps=len(val_subset) // 32
)


Processing batch 1 / 103
Processing batch 2 / 103
Processing batch 14 / 103
Processing batch 40 / 103
Processing batch 98 / 103
Processing batch 23 / 103
Processing batch 72 / 103
Processing batch 41 / 103
Processing batch 13 / 103
Processing batch 39 / 103
Processing batch 75 / 103
[1m  1/103[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10:22[0m 6s/step - accuracy: 0.0000e+00 - loss: nanProcessing batch 78 / 103
[1m  2/103[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m14s[0m 142ms/step - accuracy: 0.2500 - loss: nan   Processing batch 6 / 103
[1m  3/103[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m14s[0m 146ms/step - accuracy: 0.3889 - loss: nanProcessing batch 69 / 103
[1m  4/103[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m14s[0m 143ms/step - accuracy: 0.4792 - loss: nanProcessing batch 32 / 103
[1m  5/103[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m14s[0m 144ms/step - accuracy: 0.5433 - loss: nanProcessing batch 1 / 103
[1m  6/103[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m14s[0m 145ms/step - accurac

In [10]:
from tensorflow.keras.backend import ctc_decode

def decode_predictions(preds, char_map):
    # Decode predictions using CTC
    input_length = np.ones(preds.shape[0]) * preds.shape[1]  # All sequences assumed to have full input length
    results = ctc_decode(preds, input_length=input_length, greedy=True)[0][0].numpy()
    
    # Convert numeric results to text
    output_texts = []
    for res in results:
        output_text = ''.join([char_map[idx] for idx in res if idx != -1])  # Ignore blank tokens (-1)
        output_texts.append(output_text)
    
    return output_texts


In [11]:
#Option 1 

# def decode_predictions(preds, char_map):
#     results = keras.backend.ctc_decode(preds, input_length=np.ones(preds.shape[0])*preds.shape[1])[0][0]
#     output_texts = []
#     for res in results:
#         output_text = ''.join([char_map[idx] for idx in res if idx in char_map])
#         output_texts.append(output_text)
#     return output_texts

In [12]:
#Option 2 - more robust than 1 

# def decode_predictions(preds, char_map):

#     # Decode the predictions using CTC
#     results = keras.backend.ctc_decode(preds, input_length=np.ones(preds.shape[0]) * preds.shape[1])[0][0]
    
#     output_texts = []
#     for res in results:
#         # Convert each predicted index sequence to text using the char_map
#         output_text = ''.join([char_map[idx] for idx in res if idx != -1])  # Ignore blank token (-1)
#         output_texts.append(output_text)
    
#     return output_texts


In [13]:
# Create an inference model (uses only input_data)
inference_model = models.Model(inputs=model.input[0], outputs=model.layers[-2].output)


In [None]:
from sklearn.metrics import accuracy_score


# Define a function to convert true labels to text
def true_labels_to_text(labels, char_map):
    texts = []
    for label_seq in labels:
        text = ''.join([char_map[idx] for idx in label_seq if idx != 0])  # Ignore padding (0)
        texts.append(text)
    return texts

# Initialize lists to store predictions and true labels
all_true_texts = []
all_pred_texts = []

# Iterate through the validation data generator
for batch_data, _ in val_gen_subset:
    # Get the input data and true labels
    input_data = batch_data['input_data']
    input_labels = batch_data['input_label']
    
    # Make predictions with the inference model
    preds = inference_model.predict(input_data)  # Only `input_data` is used for inference
    
    # Decode predictions
    pred_texts = decode_predictions(preds, label_to_char)
    
    # Convert true labels to text
    true_texts = true_labels_to_text(input_labels, label_to_char)
    
    # Store true and predicted labels for accuracy calculation
    all_true_texts.extend(true_texts)
    all_pred_texts.extend(pred_texts)

# Define a custom accuracy metric
def sequence_accuracy(true_texts, pred_texts):
    correct = sum(1 for t, p in zip(true_texts, pred_texts) if t == p)
    return correct / len(true_texts)

# Calculate and display accuracy
accuracy = sequence_accuracy(all_true_texts, all_pred_texts)
print(f"Model Accuracy: {accuracy * 100:.2f}%")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 384ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5

In [None]:


# # Initialize lists to store predictions and true labels
# all_true_labels = []
# all_pred_labels = []

# # Iterate through the validation data generator
# for batch_data, _ in val_gen:
#     # Get the input data and true labels
#     input_data = batch_data['input_data']
#     input_labels = batch_data['input_label']
    
#     # Create a dummy tensor for the second input (labels) since it's not needed for inference
#     # We pass a tensor of zeros because the model expects two inputs.
#     dummy_labels = np.zeros_like(input_labels)
    
#     # Make predictions with the model
#     preds = model.predict([input_data, dummy_labels])  # Pass both inputs as a list
    
#     # Decode predictions and true labels
#     pred_texts = decode_predictions(preds, label_to_char)
#     true_texts = decode_predictions(input_labels, label_to_char)
    
#     # Store true and predicted labels for accuracy calculation
#     all_true_labels.extend(true_texts)
#     all_pred_labels.extend(pred_texts)

# # Now calculate the accuracy
# accuracy = accuracy_score(all_true_labels, all_pred_labels)
# print(f"Model Accuracy: {accuracy * 100:.2f}%")


In [None]:
# # Sample Prediction
# sample_batch = next(iter(val_gen))

# # Unpack the tuple
# batch_data, _ = sample_batch  # batch_data is the dict, _ is the dummy labels

# # Access input data
# input_data = batch_data['input_data']
# input_labels = batch_data['input_label']  # Optional: For comparing predictions

# # Make predictions with the model
# preds = model.predict(input_data)

# # Decode the predictions to text
# pred_texts = decode_predictions(preds, label_to_char)

# # (Optional) Decode the true labels for comparison
# true_texts = decode_predictions(input_labels, label_to_char)

# # Print or visualize the results
# for i in range(len(pred_texts)):
#     print(f"Predicted: {pred_texts[i]} | True: {true_texts[i]}")