# RNN / LSTM / GRU

## setup

In [22]:
import requests
import torch as t
import torch.nn as nn
import torch.nn.functional as F

## utils

In [16]:
# download alice in wonderland
url = 'https://www.gutenberg.org/cache/epub/11/pg11.txt'
book = requests.get(url).content
book = book.decode('ascii', 'ignore')
alphabet = ['<eof>'] + list(set(book))

## Recurrent Neural Networks (RNN)

In [35]:
class RNN(nn.Module):
    def __init__(self, d_in=10, d_hidden=20, d_out=30):
        super().__init__()
        self.embed = nn.Linear(d_in, d_hidden)
        self.hidden = nn.Linear(d_hidden, d_hidden)
        self.unembed = nn.Linear(d_hidden, d_out)

    def forward(self, xs, memory=None):
        # xs: (batch, d_context, d_vocab)
        batch, d_context, _ = xs.shape
        outs = []
        if memory is None: memory = t.zeros(batch, self.hidden.in_features)
        for i in range(d_context):
            x = xs[:, i]
            memory = F.tanh(self.embed(x) + self.hidden(memory))
            out = self.unembed(memory)
            outs.append(out)
        return out

model = RNN(1, 5, 1)
opt = t.optim.Adam(model.parameters(), lr=3e-4)

In [37]:
ds = t.tensor([
    [0, 1, 0, 1, 0, 1, 0, 1, 0],
    [1, 0, 1, 0, 1, 0, 1, 0, 1],
    [0, 0, 1, 0, 0, 1, 0, 0, 1],
    [0, 1, 0, 0, 1, 0, 0, 1, 0],
    [1, 0, 0, 1, 0, 0, 1, 0, 0],
], dtype=t.float)
xs = ds[:, :-1, None]
ys = ds[:, -1:]

for epoch in range(1500):
    out = model(xs)
    loss = F.mse_loss(out, ys)
    opt.zero_grad()
    loss.backward()
    opt.step()
    if epoch % 100:
        print(f'loss={loss.item():.4f}')

loss=0.0504
loss=0.0503
loss=0.0502
loss=0.0501
loss=0.0499
loss=0.0498
loss=0.0497
loss=0.0496
loss=0.0495
loss=0.0493
loss=0.0492
loss=0.0491
loss=0.0490
loss=0.0489
loss=0.0487
loss=0.0486
loss=0.0485
loss=0.0484
loss=0.0483
loss=0.0481
loss=0.0480
loss=0.0479
loss=0.0478
loss=0.0477
loss=0.0475
loss=0.0474
loss=0.0473
loss=0.0472
loss=0.0471
loss=0.0469
loss=0.0468
loss=0.0467
loss=0.0466
loss=0.0465
loss=0.0463
loss=0.0462
loss=0.0461
loss=0.0460
loss=0.0459
loss=0.0457
loss=0.0456
loss=0.0455
loss=0.0454
loss=0.0453
loss=0.0451
loss=0.0450
loss=0.0449
loss=0.0448
loss=0.0446
loss=0.0444
loss=0.0443
loss=0.0442
loss=0.0440
loss=0.0439
loss=0.0438
loss=0.0437
loss=0.0435
loss=0.0434
loss=0.0433
loss=0.0432
loss=0.0430
loss=0.0429
loss=0.0428
loss=0.0427
loss=0.0425
loss=0.0424
loss=0.0423
loss=0.0422
loss=0.0420
loss=0.0419
loss=0.0418
loss=0.0417
loss=0.0415
loss=0.0414
loss=0.0413
loss=0.0411
loss=0.0410
loss=0.0409
loss=0.0408
loss=0.0406
loss=0.0405
loss=0.0404
loss=0.0402
loss

In [48]:
# predict
model(t.tensor([[0, 1, 0, 1, 0, 1, 0, 1]])[:, :, None].float())

tensor([[0.0107]], grad_fn=<AddmmBackward0>)