In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import requests
import os
import math

# ----------------------------
# Device (MPS)
# ----------------------------
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print("Using device:", device)

# ----------------------------
# Download Tiny Shakespeare
# ----------------------------
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
if not os.path.exists("shakespeare.txt"):
    print("Downloading dataset...")
    r = requests.get(url)
    with open("shakespeare.txt", "w") as f:
        f.write(r.text)

text = open("shakespeare.txt").read()
print("Dataset length:", len(text))


Using device: mps
Dataset length: 1115394


In [7]:

# ----------------------------
# Build Vocabulary
# ----------------------------
chars = sorted(list(set(text)))
vocab_size = len(chars)
print("Vocab size:", vocab_size)

stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for ch, i in stoi.items()}

data = torch.tensor([stoi[c] for c in text], dtype=torch.long)


Vocab size: 65


In [9]:
chars

['\n',
 ' ',
 '!',
 '$',
 '&',
 "'",
 ',',
 '-',
 '.',
 '3',
 ':',
 ';',
 '?',
 'A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T',
 'U',
 'V',
 'W',
 'X',
 'Y',
 'Z',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [10]:

# ----------------------------
# Hyperparameters
# ----------------------------
batch_size = 128
seq_len = 128
hidden_dim = 512
num_layers = 2
lr = 3e-4
max_iters = 5000
eval_interval = 500

# ----------------------------
# Batch Loader (Truncated BPTT)
# ----------------------------
def get_batch():
    ix = torch.randint(len(data) - seq_len - 1, (batch_size,))
    x = torch.stack([data[i:i+seq_len] for i in ix])
    y = torch.stack([data[i+1:i+seq_len+1] for i in ix])
    return x.to(device), y.to(device)

# ----------------------------
# Model
# ----------------------------
class CharRNN(nn.Module):
    def __init__(self, vocab_size, hidden_dim, num_layers):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, hidden_dim)
        self.rnn = nn.GRU(hidden_dim, hidden_dim, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden=None):
        x = self.embedding(x)
        out, hidden = self.rnn(x, hidden)
        logits = self.fc(out)
        return logits, hidden

model = CharRNN(vocab_size, hidden_dim, num_layers).to(device)

optimizer = optim.AdamW(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()


In [11]:

# ----------------------------
# Text Generation
# ----------------------------
@torch.no_grad()
def generate(model, start="ROMEO:", length=300):
    model.eval()
    context = torch.tensor([[stoi[c] for c in start]], device=device)
    hidden = None
    
    output = start
    
    for _ in range(length):
        logits, hidden = model(context, hidden)
        probs = torch.softmax(logits[:, -1, :], dim=-1)
        next_char = torch.multinomial(probs, num_samples=1)
        
        output += itos[next_char.item()]
        context = next_char
        
    model.train()
    return output


In [12]:

# ----------------------------
# Training Loop
# ----------------------------
print("Starting training...")

for step in range(max_iters):
    x, y = get_batch()
    
    logits, _ = model(x)
    
    loss = criterion(
        logits.reshape(-1, vocab_size),
        y.reshape(-1)
    )
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if step % 100 == 0:
        print(f"Step {step}, Loss: {loss.item():.4f}")
        print(generate(model))
        print("="*50)


Starting training...
Step 0, Loss: 4.1854
ROMEO:O$ghHRaoOT-gxJDgBPB fgTMwLfQ Hv!Imorols$SDM.':H??OKZznVqh-VVHPgudrwazIWCLU-,&oVQAQSDPDkJuq
Y!GvLPfmFlPMMqR
MXk?C?u.oWn
Qav,Q?RXc.A; ntfKY?NFIFFimzPnP!xp,AaQzdJHfhgnazSpkQ!&gvs;RJV3dTlXmsEN?lJds
g
utM,H!fkMPpZ$Z-j&H?Rd-zYhpmky?;
ZMtO'Ozd.RdXZZJj'n,luggInd'sry.sUcL?waIB rmAhle$tjtJ;TkWrbUNG G h!v?OFKv
Step 100, Loss: 2.1121
ROMEO:
And yo.

CEOIHM ham yoy cum.

MENGE:
Nark's sut hentyefrerepot as mus;
And and?

KLEONO!.

TIdLA, diul, Gtrumkn the thas, lond titen,
Favey st it ind now nod dut ove, blabnels beath whalt ; was, led you famady; dire Heve will lon? now tore theet; then thake, he nit nRemT:
ho mull bead, heres-
DRiTh
Step 200, Loss: 1.8169
ROMEO:
Trents un, I have the sexply dal
To drence, shall be adrays his by the common of like ome moken not his with and inienquends, you hasje:
Whoud Mardues me them brot!
Noat sir.

GOOFCERSTOR:
Stay, a Handrest Cleus!

SRUSE VICULET:
Oun to from how
And grant coman, we it on netteng purse-gntoin