## Next-word generator with simple RNN

#### Making arrangements

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

#### Pre-process function and Shakespeare's text

In [2]:
# Lightweight text corpus
def load_lightweight_text():
    text = """
    hello world this is a test
    machine learning is fun
    deep learning with tensorflow
    natural language processing with python
    python is a versatile language
    deep learning is deeper than machine learning
    """
    return text.lower()

text = load_lightweight_text()

#### Tokenize the text

In [3]:
vocab_size = 100  # Limit the vocabulary size
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1

In [4]:
total_words

21

#### Create input sequences

In [5]:
input_sequences = []
for line in text.split("\n"):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i + 1]
        input_sequences.append(n_gram_sequence)

#### Pad sequences

In [6]:
max_seq_length = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_seq_length, padding='pre')

#### Split into predictors and label

In [7]:
predictors, labels = input_sequences[:, :-1], input_sequences[:, -1]
labels = to_categorical(labels, num_classes=total_words)

#### Build the RNN model

In [8]:
model = Sequential([
    Embedding(total_words, 128, input_length=max_seq_length - 1),
    SimpleRNN(128, return_sequences=False),
    Dense(total_words, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()



#### Train the model

In [9]:
epochs = 35
model.fit(predictors, labels, epochs=epochs, batch_size=16, verbose=1)

Epoch 1/35
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.0742 - loss: 3.0195
Epoch 2/35
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.2167 - loss: 2.8819  
Epoch 3/35
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.2700 - loss: 2.7402  
Epoch 4/35
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.2225 - loss: 2.6077
Epoch 5/35
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.2167 - loss: 2.4643  
Epoch 6/35
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.1958 - loss: 2.3880
Epoch 7/35
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.1750 - loss: 2.3446  
Epoch 8/35
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.4125 - loss: 2.1644  
Epoch 9/35
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

<keras.src.callbacks.history.History at 0x170a0aab490>

#### Function to generate text

In [10]:
def generate_text(seed_text, next_words, model, tokenizer, max_seq_length):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_seq_length - 1, padding='pre')
        predicted = np.argmax(model.predict(token_list), axis=-1)

        for word, index in tokenizer.word_index.items():
            if index == predicted:
                seed_text += " " + word
                break

    return seed_text

#### Test the model

In [11]:
seed_text = "deep learning is deeper than"
generated_text = generate_text(seed_text, 10, model, tokenizer, max_seq_length)
print("Generated Text:", generated_text)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 171ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Generated Text: deep learning is deeper than machine learning is fun than machine learning is fun than
