# RNN / LSTM / GRU

## setup

In [166]:
import requests
import torch as t
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from tqdm.notebook import tqdm

device = 'cuda' if t.cuda.is_available() else 'cpu'

## utils

In [171]:
# download alice in wonderland
url = 'https://www.gutenberg.org/cache/epub/11/pg11.txt'
book = requests.get(url).content
book = book.decode('ascii', 'ignore')
vocab = set(book)
d_vocab = len(vocab)
d_hidden = 100
d_batch = 1000000
atoi = {a: i for i, a in enumerate(vocab)}
itoa = {i: a for a, i in atoi.items()}

In [172]:
def to_dataloader(text, seq_len=25, batch_size=d_batch):
    x = [text[i:i+seq_len] for i in range(0, len(text)-seq_len-1, seq_len)]
    y = [text[i+1:i+seq_len+1] for i in range(0, len(text)-seq_len-1, seq_len)]
    x = t.tensor([[atoi[a] for a in s] for s in x])
    y = t.tensor([[atoi[a] for a in s] for s in y])
    dataset = TensorDataset(x, y)
    return DataLoader(dataset, batch_size=batch_size, shuffle=True)

dataloader = to_dataloader(book)

## Recurrent Neural Networks (RNN)

### model

In [145]:
class RNN(nn.Module):
    def __init__(self, d_in=10, d_hidden=20, d_out=30):
        super().__init__()
        self.embed = nn.Linear(d_in, d_hidden)
        self.hidden = nn.Linear(d_hidden, d_hidden)
        self.unembed = nn.Linear(d_hidden, d_out)

    def forward(self, xs, memory=None, return_memory=False):
        # xs: (batch, d_context, d_vocab)
        batch, d_context, _ = xs.shape
        outs = []
        if memory is None: memory = t.zeros(batch, self.hidden.in_features).to(xs.device)
        for i in range(d_context):
            x = xs[:, i]
            memory = F.tanh(self.embed(x) + self.hidden(memory))
            outs.append(self.unembed(memory))
        if return_memory:
            return t.stack(outs, dim=1), memory
        return t.stack(outs, dim=1)

model = RNN(d_vocab, d_hidden, d_vocab)

### train

In [161]:
@t.no_grad()
def sample(model, text='A', d_sample=100):
    model = model.to(device)
    memory = t.zeros(1, model.hidden.in_features).to(device)
    x = t.tensor([[atoi[c] for c in text]])
    x = F.one_hot(x, num_classes=d_vocab).float().to(device)
    while len(text) <= d_sample:
        outs, memory = model(x, memory=memory, return_memory=True)
        probs = outs[0, -1].softmax(dim=0)
        next_sample = t.multinomial(probs, num_samples=1)
        text += itoa[next_sample.item()]
        x = F.one_hot(next_sample, num_classes=d_vocab).float().to(device)[:, None, :]
    return text

# sample(model)

In [186]:
def train(model, dataloader, epochs=2000, d_vocab=d_vocab, opt=None, lr=3e-4):
    model = model.to(device)
    if opt is None:
        opt = t.optim.Adam(model.parameters(), lr=lr)
    for epoch in tqdm(range(epochs)):
        for xs, ys in dataloader:
            out = model(F.one_hot(xs, num_classes=d_vocab).float().to(device))
            loss = F.cross_entropy(out.permute(0, 2, 1), ys.to(device))
            opt.zero_grad()
            loss.backward()
            opt.step()
        if epoch % 50 == 0:
            print(f'loss={loss.item():.4f}')
        if epoch % 999 == 0:
            print(sample(model))

train(model, dataloader, epochs=10000)

  0%|          | 0/10000 [00:00<?, ?it/s]

loss=1.2869
Alices Cthink beated associce-extref toINBUOF.
TeOlm EINBE THE
E Dery how
brea what it in the feat
loss=1.2870
loss=1.2868
loss=1.2867
loss=1.2867
loss=1.2867
loss=1.2867
loss=1.2866
loss=1.2866
loss=1.2866
loss=1.2865
loss=1.2865
loss=1.2865
loss=1.2864
loss=1.2864
loss=1.2864
loss=1.2863
loss=1.2863
loss=1.2862
loss=1.2862
Archive Fgescclt! or shouldrons very repailate or dreadfurer nime, wated so certem! said the Quevery
loss=1.2861
loss=1.2861
loss=1.2862
loss=1.2860
loss=1.2861
loss=1.2862
loss=1.2860
loss=1.2859
loss=1.2860
