In [11]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

In [12]:
def load_iam_dataset(data_path, img_height, img_width):
    images = []
    labels = []
    
    with open(os.path.join(data_path, 'words.txt')) as f:
        for line in f:
            if line.startswith('#'):
                continue
            parts = line.strip().split(' ')
            part1 = parts[0].split('-')
            file_path = os.path.join(data_path, part1[0])
            file_path = os.path.join(file_path, part1[0]+'-'+part1[1])
            file_path = os.path.join(file_path, parts[0] + '.png')
            if not os.path.exists(file_path):
                continue
            # print(file_path)
            img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
            # print(img_width,img_height)
            try:
                img = cv2.resize(img, (img_width, img_height))
                images.append(img)
                labels.append(parts[-1])
            except Exception as e:
                print(e)

    images = np.array(images).reshape(-1, img_height, img_width, 1).astype('float32') / 255
    return images, labels

In [13]:
iam_data_path = "words"

# Set your desired image height and width
img_height = 64
img_width = 128

# Load and preprocess the dataset
images, labels = load_iam_dataset(iam_data_path, img_height, img_width)

OpenCV(4.6.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\resize.cpp:4052: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'



In [14]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
num_classes = len(label_encoder.classes_)

In [15]:
x_train, x_test, y_train, y_test = train_test_split(images, encoded_labels, test_size=0.2, random_state=42)

In [57]:
from tensorflow.keras.layers import Dense, Softmax
from tensorflow.keras.models import Sequential
def create_model(input_shape, num_classes):
    input_data = layers.Input(shape=input_shape, name='input')
    
    # CNN layers
    cnn = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(input_data)
    cnn = layers.MaxPooling2D((2, 2))(cnn)
    cnn = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(cnn)
    cnn = layers.MaxPooling2D((2, 2))(cnn)
    
    # Prepare output for RNN
    shape = cnn.get_shape().as_list()
    rnn_input = layers.Reshape(target_shape=(shape[1], shape[2] * shape[3]))(cnn)
    
    # RNN layers
    rnn = layers.Bidirectional(layers.GRU(128, return_sequences=True, dropout=0.25))(rnn_input)
    rnn = layers.Bidirectional(layers.GRU(128, return_sequences=True, dropout=0.25))(rnn)
    
    # Output layer
    output = layers.Dense(num_classes + 1, activation='softmax', name='output')(rnn)

    model = models.Model(inputs=input_data, outputs=output)
    return model


input_shape = (img_height, img_width, 1)
model = create_model(input_shape, num_classes)
num_classes = np.max(y_train) + 1
model.add(Dense(num_classes, activation='softmax'))

AttributeError: 'Functional' object has no attribute 'add'

In [38]:
def ctc_loss(y_true, y_pred):
    # Calculate label and logit lengths
    print("y_true shape:", y_true.shape)
    print("y_pred shape:", y_pred.shape)

    label_length = tf.reduce_sum(tf.cast(tf.math.not_equal(y_true, -1), tf.int32), axis=-1)
    logit_length = tf.fill([tf.shape(y_pred)[0]], tf.shape(y_pred)[1])

    return tf.nn.ctc_loss(
        labels=y_true,
        logits=y_pred,
        label_length=label_length,
        logit_length=logit_length,
        logits_time_major=False,
        blank_index=-1
    )

def train_step(x_batch, y_batch):
    with tf.GradientTape() as tape:
        logits = model(x_batch, training=True)
        loss_value = ctc_loss(y_batch, logits)

    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    return loss_value


In [56]:
epochs = 10
batch_size = 32
optimizer = Adam()

# Assuming y_train is a NumPy array of shape (num_samples,)
# max_label_length = np.max([len(sequence) for sequence in y_train])
y_train_padded = np.full((y_train.shape[0], len(y_train)), -1)

for i, sequence in enumerate(y_train):
    print(i,sequence)
    y_train_padded[i, :len(sequence)] = sequence


# Prepare the dataset for training
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train_padded)).batch(batch_size)


# Train the model
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    for step, (x_batch, y_batch) in enumerate(train_dataset):
        print(x_batch.shape)
        print(y_batch.shape)
        loss_value = train_step(x_batch, y_batch)
        print(f"Step {step + 1}: loss = {loss_value.numpy()}")

0 2901


TypeError: object of type 'numpy.int64' has no len()

In [None]:
model.save("my_handwriting_model.h5")

In [61]:
from tensorflow.keras import Model
iam_model_pred = Model()
iam_model_pred.load_weights(filepath='/my_handwriting_model.h5')

ValueError: Unable to load weights saved in HDF5 format into a subclassed Model which has not created its variables yet. Call the Model first, then load the weights.

In [None]:

correct_predictions = np.sum(y_test == predicted_words)
total_predictions = len(y_test)
accuracy = correct_predictions / total_predictions

print(f"Accuracy: {accuracy * 100:.2f}%")

In [None]:
num_samples = 10

for i in range(num_samples):
    plt.imshow(x_test[i].reshape(img_height, img_width), cmap="gray")
    plt.title(f"True: {y_test[i]}\nPredicted: {predicted_words[i]}")
    plt.axis("off")
    plt.show()