A simple implementation of RNN for text prediction

Code from Geekforgeeks: https://www.geeksforgeeks.org/machine-learning/introduction-to-recurrent-neural-network/

Python packages to import
np
tensorflow

In [25]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense

In [None]:
# Choose between the two options for text data

# You can use this sample text for training, you can add more to it
text = "A bomb threat is any communication—whether by phone, email, " \
"letter, or other means—that warns of an explosive device intended to cause " \
"harm or disrupt normal activities. Regardless of whether the threat is " \
"genuine or a hoax, such messages are always taken seriously due to the " \
"potential risk to life and property. Bomb threats may originate for " \
"several reasons: the person may want to genuinely warn about a danger, " \
"create panic and anxiety to disrupt business or school operations, or " \
"seek to undermine confidence in leadership. When someone receives a " \
"bomb threat, authorities recommend noting as many details as possible " \
"(such as the caller’s exact words, time, and background noises) and " \
"following established protocols, which often include evacuating the " \
"area and avoiding use of electronic devices that might trigger explosive " \
"materials. Handling a bomb threat with seriousness and caution not only minimizes risk but also ensures appropriate investigation and response by emergency personnel."

# It is better to load a larger text corpus for better results
# Take note, the bigger the content of the text, the longer the training time
with open("reign_of_greed.txt", "r", encoding='utf-8') as file:
    text += file.read()

# Data Preprocessing
# Creating character to index and index to character mappings
chars = sorted(list(set(text)))
char_to_index = {char: i for i, char in enumerate(chars)}
index_to_char = {i: char for i, char in enumerate(chars)}

In [27]:
# Getting the sequences and labels
# or Time Window
seq_length = 3

# get the sequence from the input
sequences = []

# label for the sequence
# next character following the sequence
labels = []

for i in range(len(text) - seq_length):
    seq = text[i:i + seq_length]
    label = text[i + seq_length]
    sequences.append([char_to_index[char] for char in seq])
    labels.append(char_to_index[label])

X = np.array(sequences)
y = np.array(labels)

In [28]:
X_one_hot = tf.one_hot(X, len(chars))
y_one_hot = tf.one_hot(y, len(chars))

In [29]:
# Building the RNN Model
model = Sequential()
model.add(SimpleRNN(50, input_shape=(seq_length, len(chars)), activation='relu'))
model.add(Dense(len(chars), activation='softmax'))

  super().__init__(**kwargs)


In [30]:
# Configuring and Training the Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Setting up the epochs and training the model
model.fit(X_one_hot, y_one_hot, epochs=100)

Epoch 1/100
[1m21563/21563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 1ms/step - accuracy: 0.3735 - loss: 2.2023
Epoch 2/100
[1m21563/21563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 2ms/step - accuracy: 0.4170 - loss: 2.0068
Epoch 3/100
[1m 3050/21563[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m28s[0m 2ms/step - accuracy: 0.4249 - loss: 1.9675

KeyboardInterrupt: 

In [None]:
# Script for testing the model
start_seq = "A bomb"
generated_text = start_seq

for i in range(50):
    x = np.array([[char_to_index[char] for char in generated_text[-seq_length:]]])
    x_one_hot = tf.one_hot(x, len(chars))
    prediction = model.predict(x_one_hot)
    next_index = np.argmax(prediction)
    next_char = index_to_char[next_index]
    generated_text += next_char

print("Generated Text:")
print(generated_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2