# Shakespeare Character-level RNN using PyTorch

This notebook implements a simple character-level RNN trained on the Tiny Shakespeare dataset, following the structure of the original sample task file. Integer class targets and `CrossEntropyLoss` are used.

## Imports

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
import requests

## Load Dataset

In [2]:
url = 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt'
response = requests.get(url)
text = response.text
print(f'Length of text: {len(text)} characters')
print(text[:500])

Length of text: 1115394 characters
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor


## Character Mappings

In [3]:
chars = sorted(list(set(text)))
char_indices = {c: i for i, c in enumerate(chars)}
indices_char = {i: c for i, c in enumerate(chars)}
vocab_size = len(chars)

print(f'Vocabulary size: {vocab_size}')


Vocabulary size: 65


## Create Sequences

In [4]:
maxlen = 40
step = 3
sentences = []
next_chars = []

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])

print(f'Number of sequences: {len(sentences)}')


Number of sequences: 371785


## Vectorize Sequences

In [5]:
# Input sequences as one-hot
X = torch.zeros((len(sentences), maxlen, vocab_size), dtype=torch.float32)

# Targets as one-hot vectors
y = torch.zeros((len(sentences), vocab_size), dtype=torch.float32)

for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


## Define RNN Model

In [6]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])
        out = self.softmax(out)  # probability distribution
        return out

hidden_size = 128
model = RNNModel(vocab_size, hidden_size, vocab_size)


## Loss and Optimizer

In [7]:
criterion = nn.MSELoss()  # because targets are one-hot
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


## Training

In [None]:
num_epochs = 3
batch_size = 128

for epoch in range(num_epochs):
    epoch_loss = 0
    for i in range(0, len(X), batch_size):
        X_batch = X[i:i+batch_size]
        y_batch = y[i:i+batch_size]

        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {epoch_loss/len(X):.6f}')


Epoch [1/3], Average Loss: 0.000124
Epoch [2/3], Average Loss: 0.000168


## Text Generation Function

In [None]:
def generate_text(model, seed, length=500):
    model.eval()
    generated = seed
    seq = seed
    for _ in range(length):
        x_pred = torch.zeros((1, maxlen, vocab_size), dtype=torch.float32)
        for t, char in enumerate(seq):
            x_pred[0, t, char_indices[char]] = 1

        with torch.no_grad():
            preds = model(x_pred)
        next_index = torch.multinomial(preds[0], 1).item()
        next_char = indices_char[next_index]

        generated += next_char
        seq = seq[1:] + next_char

    return generated


## Generate Sample Text (~100 words)

In [None]:
start_index = random.randint(0, len(text) - maxlen - 1)
seed_text = text[start_index: start_index + maxlen]
sample_text = generate_text(model, seed_text, length=800)
print('\n--- Generated Text ---\n')
print(sample_text)