In [10]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

# Step 1: Load a Simple Text Dataset
data = [
    "Hello world",
    "This is a simple RNN example",
    "We are learning about sequences",
    "RNNs are great for sequence prediction",
    "Let's build a simple model"
]

# Convert to DataFrame for easier manipulation (optional)
df = pd.DataFrame(data, columns=["text"])

# Step 2: Tokenize the Text Data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['text'])
sequences = tokenizer.texts_to_sequences(df['text'])

# Create input/output pairs
max_sequence_length = 5  # Number of words in each input sequence
input_sequences = []
output_words = []

for seq in sequences:
    for i in range(1, len(seq)):
        input_seq = seq[:i]  # All words up to the i-th word
        output_word = seq[i]  # The i-th word as output
        
        # Pad input sequences to ensure uniform length
        input_sequences.append(pad_sequences([input_seq], maxlen=max_sequence_length)[0])
        output_words.append(output_word)

# Convert to numpy arrays for training
X = np.array(input_sequences)
y = np.array(output_words)

# Step 3: Define the RNN Model
vocab_size = len(tokenizer.word_index) + 1  # Plus one for padding
embedding_dim = 8  # You can adjust this value

model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_sequence_length))
model.add(SimpleRNN(units=16))  # You can adjust the number of units
model.add(Dense(vocab_size, activation='softmax'))

# Step 4: Compile the Model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Step 5: Train the Model
model.fit(X, y, epochs=100, verbose=1)

# Summary of the model architecture (optional)
model.summary()




Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.0000e+00 - loss: 3.1222
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0000e+00 - loss: 3.1156
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0000e+00 - loss: 3.1091
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0526 - loss: 3.1029
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0526 - loss: 3.0968
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.0526 - loss: 3.0909
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.0526 - loss: 3.0852
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.0526 - loss: 3.0795
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━

In [11]:
import numpy as np

def generate_text(model, tokenizer, seed_text, next_words, max_sequence_length):
    """
    Generate text using a trained RNN model.

    Args:
    model: Trained RNN model.
    tokenizer: Keras Tokenizer used for text preprocessing.
    seed_text: Initial text to start generating from.
    next_words: Number of words to generate.
    max_sequence_length: Maximum length of input sequences.

    Returns:
    Generated text.
    """
    generated_text = seed_text
    for _ in range(next_words):
        # Tokenize the input sequence
        token_list = tokenizer.texts_to_sequences([generated_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_length - 1, padding='pre')

        # Predict the next word
        predicted = model.predict(token_list, verbose=0)
        predicted_word_index = np.argmax(predicted, axis=-1)[0]

        # Convert index back to word
        output_word = tokenizer.index_word[predicted_word_index]
        
        # Append the predicted word to the generated text
        generated_text += " " + output_word

    return generated_text

# Example usage:
seed_text = "This is"
next_words = 5
generated = generate_text(model, tokenizer, seed_text, next_words, max_sequence_length)
print(generated)


This is model simple rnn example sequence
