In [None]:
#set A que 1
#Create a simple RNN model for generating data using text and predict the text using a pre-trained model.(Use ReLU and Softmax activation function). (Use shakespeare.txt file)
# --- Step 1: Imports ---
import os
import requests
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Activation

# --- Step 2: Download dataset if not available ---
file_path = "shakespeare.txt"
if not os.path.exists(file_path):
    url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
    response = requests.get(url)
    with open(file_path, "w", encoding="utf-8") as f:
        f.write(response.text)
    print("✅ Dataset downloaded!")
else:
    print("📂 Dataset already exists.")

# --- Step 3: Load text data ---
with open(file_path, "r", encoding="utf-8") as f:
    text = f.read()

print(f"Dataset length: {len(text)} characters")

# --- Step 4: Tokenization ---
vocab = sorted(set(text))
vocab_size = len(vocab)
print("Vocabulary size:", vocab_size)

char_to_idx = {u: i for i, u in enumerate(vocab)}
idx_to_char = np.array(vocab)

# Convert entire text into integers
text_as_int = np.array([char_to_idx[c] for c in text])

# --- Step 5: Create training sequences ---
seq_length = 40
examples_per_epoch = len(text_as_int) // (seq_length + 1)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

# Create sequences of length seq_length+1
sequences = char_dataset.batch(seq_length + 1, drop_remainder=True)

def split_input_target(chunk):
    input_text = chunk[:-1]   # first 40 chars
    target_text = chunk[1:]   # next 40 chars
    return input_text, target_text

dataset = sequences.map(split_input_target)

# Shuffle and batch
BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

# --- Step 6: Build model ---
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64),
    SimpleRNN(128, activation='tanh', return_sequences=True),  # 🔑 fix
    Dense(vocab_size),
    Activation('softmax')
])

# Build explicitly
model.build(input_shape=(None, seq_length))

# Compile
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()


# --- Step 7: Train ---
EPOCHS = 5
history = model.fit(dataset, epochs=EPOCHS)


import numpy as np

# --- Step 8: Text Generation Function ---
def generate_text(model, seed_text, char_to_int, idx_to_char, length=200): # Changed int_to_char to idx_to_char
    generated = seed_text
    for _ in range(length):
        # Encode seed text → integers
        encoded = np.array([[char_to_int.get(c, 0) for c in seed_text]])

        # Predict next char probs for all timesteps → take last one
        preds = model.predict(encoded, verbose=0)
        preds = preds[:, -1, :]   # last timestep only

        # Sample from probability distribution (better than argmax)
        next_index = np.random.choice(len(preds[0]), p=preds[0])
        next_char = idx_to_char[next_index] # Changed int_to_char to idx_to_char

        # Append & shift seed
        generated += next_char
        seed_text = seed_text[1:] + next_char
    return generated


# --- Step 9: Try it out ---
# Make sure to run the previous cell first to define 'text', 'seq_length', 'char_to_idx', and 'idx_to_char'
seed = text[:seq_length]   # first 40 chars as seed
print("Seed:\n", seed)
print("\nGenerated Text:\n")
print(generate_text(model, seed, char_to_idx, idx_to_char, length=300)) # Changed int_to_char to idx_to_char



✅ Dataset downloaded!
Dataset length: 1115394 characters
Vocabulary size: 65


Epoch 1/5
[1m425/425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 32ms/step - accuracy: 0.2517 - loss: 2.8595
Epoch 2/5
[1m425/425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 32ms/step - accuracy: 0.4015 - loss: 2.0820
Epoch 3/5
[1m425/425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 32ms/step - accuracy: 0.4370 - loss: 1.9314
Epoch 4/5
[1m425/425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 33ms/step - accuracy: 0.4604 - loss: 1.8470
Epoch 5/5
[1m425/425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 34ms/step - accuracy: 0.4746 - loss: 1.7912
Seed:
 First Citizen:
Before we proceed any fur

Generated Text:

First Citizen:
Before we proceed any furrevatuse himy centies be exice,
She's wild like hast cound! I shook knoe
jequagertter
As to to shall, whop kree
Marrapam to more our thene than what ammonter besforied, to the my grothing of my charstion and wish the cool.
Why, I canse?

SABIO:
Ye
would Lefort not,
For is lor! Which ip do