# Task 5: Handwritten Text Generation

Implement a character-level recurrent neural network (RNN) to generate handwritten-like text. Train the model on a dataset of handwritten text examples, and let it generate new text based on the learned patterns.

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from PIL import Image

# Data Preprocessing

In [3]:
#reading the text file
with open("C:/Users/mahi/OneDrive/Documents/generation.txt", 'r', encoding='utf-8') as file:
    text = file.read()

In [6]:
#tokenization
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1

In [8]:
input_sequences = []
for line in text.split('\n'):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i + 1]
        input_sequences.append(n_gram_sequence)

In [10]:
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding="pre"))

In [12]:
#preparing X and y
X = input_sequences[:, :-1]
y = input_sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

# Model Creation

In [14]:
model = Sequential()
model.add(Embedding(input_dim=total_words, output_dim=100, input_length=max_sequence_len - 1))
model.add(LSTM(150))
model.add(Dense(total_words, activation='softmax'))
print(model.summary())



None


In [16]:
#compiling and fiting of the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.0000e+00 - loss: 2.4891
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.2000 - loss: 2.4757
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - accuracy: 0.4000 - loss: 2.4622
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.5000 - loss: 2.4481
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.5000 - loss: 2.4325
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - accuracy: 0.6000 - loss: 2.4145
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.6000 - loss: 2.3932
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.5000 - loss: 2.3674
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x1e83983ba70>

# Model Training

In [6]:
#fit the model
model.fit(X, y, epochs=100, verbose=1)

NameError: name 'model' is not defined

# Text Generation

In [18]:
#generation of text
seed_text = "He chuckled to"
next_words = 3

for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
    predicted = np.argmax(model.predict(token_list), axis=-1)
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted:
            output_word = word
            break
    seed_text += " " + output_word

print(seed_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 261ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
He chuckled to himself and rubbed


In [20]:
with open("output.txt", 'w') as file:
    file.writelines([seed_text])

# Image Creation

In [22]:
# read character images and create a composite image
BG = Image.open("C:/Users/mahi/Pictures/index/write.png")
sheet_width = BG.width
gap, ht = 0, 0
line_height = 0  # track the height of the current line

with open("C:/Users/mahi/OneDrive/Documents/generation.txt", 'r', encoding='utf-8') as file:
    lines = file.readlines()

for line in lines:
    text = line.strip()
    for char in text.replace("\n", ""):
        char_image_path = "C:/Users/mahi/Pictures/index/{}.png".format(str(ord(char)))
        try:
            char_image = Image.open(char_image_path)
            BG.paste(char_image, (gap, ht))
            gap += char_image.width
            line_height = max(line_height, char_image.height)
            if gap + char_image.width > sheet_width:
                gap = 0
                ht += line_height
                line_height = 0
        except FileNotFoundError:
            print(f"Character image for '{char}' not found at path: {char_image_path}")

BG.show()