In [13]:
# Simple RNN for Text Classification and Next Word Generation

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np


In [14]:
# Dataset for Classification
texts = [
    "Photosynthesis helps plants make food",     # Science
    "Pythagoras theorem applies to triangles",   # Math
    "The Renaissance was a historic period",     # History
    "Nouns and verbs are parts of speech"        # English
]
labels = [1, 0, 2, 3]  # 0=Math,1=Science,2=History,3=English

In [15]:
# Tokenizer and preprocessing
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
max_len = max(len(seq) for seq in sequences)
X = pad_sequences(sequences, maxlen=max_len, padding='post')
y = tf.keras.utils.to_categorical(labels, num_classes=4)

In [16]:
# Build Classification Model
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=8, input_length=max_len),
    SimpleRNN(8),
    Dense(4, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [17]:
# Train model
model.fit(X, y, epochs=30, verbose=0)

<keras.src.callbacks.history.History at 0x7edb4c401010>

In [18]:
# Evaluate
loss, accuracy = model.evaluate(X, y, verbose=0)
print(f"Classification Accuracy: {accuracy:.2f}")

Classification Accuracy: 1.00


In [19]:
# Next Word Generation Data (simple corpus)
corpus = "Photosynthesis is how plants make food. The Pythagorean theorem is useful in geometry."

In [20]:
# Prepare sequences
tokenizer.fit_on_texts([corpus])
total_words = len(tokenizer.word_index) + 1

input_sequences = []
for line in corpus.split('.'):
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        input_sequences.append(token_list[:i+1])

max_seq_len = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_seq_len, padding='pre')

X_gen = input_sequences[:, :-1]
y_gen = input_sequences[:, -1]
y_gen = tf.keras.utils.to_categorical(y_gen, num_classes=total_words)


In [21]:
# Build Generation Model
gen_model = Sequential([
    Embedding(total_words, 10, input_length=max_seq_len-1),
    SimpleRNN(20),
    Dense(total_words, activation='softmax')
])
gen_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train Generation Model
gen_model.fit(X_gen, y_gen, epochs=100, verbose=0)

<keras.src.callbacks.history.History at 0x7edb4c366550>

In [22]:
# Function to generate words
def generate_text(seed_text, next_words=10):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
        predicted = np.argmax(gen_model.predict(token_list, verbose=0), axis=1)[0]
        output_word = ''
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

In [23]:
# Generate sample text
print("\nGenerated Text:")
print(generate_text("Photosynthesis is"))


Generated Text:
Photosynthesis is is is make food was plants plants in geometry pythagorean
