In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn

In [2]:
torch.manual_seed(0)

<torch._C.Generator at 0x7f9cd92dd570>

## RNN

$$
h_t = tanh(\mathbf{W}_{ih}x_t + \mathbf{W}_{hh}h_{t-1} + b_{h})\\
y_t = \mathbf{W}_{hy} h_t + b_y
$$
<br>
<img src='img/rnn.png' width=50% height=50%/>

## Dimension check

In [3]:
input_size = 4
hidden_size = 2

h = [1, 0, 0, 0]
e = [0, 1, 0, 0]
l = [0, 0, 1, 0]
o = [0, 0, 0, 1]

input_data_np = np.array([[h, e, l, l, o], [e, o, l, l, l], [l, l, e, e, l]], dtype=np.float32)
input_data = torch.Tensor(input_data_np)

input_data.shape # (batch_size, sequence_length, input_size)

torch.Size([3, 5, 4])

In [4]:
rnn = torch.nn.RNN(input_size, hidden_size, batch_first=True)

In [5]:
for n, w in rnn.named_parameters():
    print(n,':', w.shape)

weight_ih_l0 : torch.Size([2, 4])
weight_hh_l0 : torch.Size([2, 2])
bias_ih_l0 : torch.Size([2])
bias_hh_l0 : torch.Size([2])


> Second bias vector included for CuDNN compatibility. Only one
  bias vector is needed in standard definition.

In [6]:
outputs, last_hidden = rnn(input_data)
print(outputs)
print(outputs.size()) # batch_size, sequence_length, output_size

tensor([[[-0.7497, -0.6135],
         [-0.5753, -0.0070],
         [-0.9077, -0.3205],
         [-0.9141, -0.2142],
         [-0.8996,  0.3307]],

        [[-0.5282, -0.2473],
         [-0.9052,  0.2597],
         [-0.8944, -0.2902],
         [-0.9133, -0.2209],
         [-0.9109, -0.2263]],

        [[-0.9136, -0.4269],
         [-0.9173, -0.1989],
         [-0.5134, -0.0288],
         [-0.5086, -0.1379],
         [-0.9126, -0.3170]]], grad_fn=<TransposeBackward1>)
torch.Size([3, 5, 2])


In [7]:
torch.all(outputs[:,-1] == last_hidden)

tensor(True)

## Charseq

In [8]:
sample = " learned deep learning"

### integer encoding

In [9]:
char_list = list(set(sample))
char_dic = {c: i for i, c in enumerate(char_list)}
print(char_dic)

{'e': 0, 'g': 1, 'n': 2, 'i': 3, 'd': 4, 'r': 5, 'l': 6, ' ': 7, 'p': 8, 'a': 9}


In [10]:
sample_idx = [char_dic[c] for c in sample]
x_data = [sample_idx[:-1]]

### one-hot encoding

In [11]:
x_one_hot = [np.eye(len(char_dic))[x] for x in x_data]
y_data = sample_idx[1:]
y_data

[6, 0, 9, 5, 2, 0, 4, 7, 4, 0, 0, 8, 7, 6, 0, 9, 5, 2, 3, 2, 1]

### training with rnn

In [12]:
X = torch.FloatTensor(x_one_hot)
Y = torch.LongTensor(y_data)

In [13]:
dic_size = len(char_dic)
hidden_size = len(char_dic)
n_epochs = 50
lr = 0.5

rnn = torch.nn.RNN(dic_size, hidden_size, batch_first=True)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(rnn.parameters(), lr=lr, momentum=0.9)

In [14]:
for epoch in range(n_epochs):

    outputs, _status = rnn(X)
    outputs = outputs.view(-1, dic_size)
    loss = criterion(outputs, Y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    result = outputs.detach().numpy().argmax(axis=1)
    result_str = ''.join([char_list[c] for c in result])
    if (epoch+1) % 5 == 0:
        print(f'epoch {epoch+1:<3}  loss:{loss.item():.3f}  prediction:{result_str}')

epoch 5    loss:2.006  prediction:eeeeeeeeeeeeeeeeeeeee
epoch 10   loss:1.523  prediction:eearnepeeeep eearnene
epoch 15   loss:1.218  prediction:learned eeep eearneng
epoch 20   loss:1.099  prediction:learnid deep learning
epoch 25   loss:1.036  prediction:learnid leep learning
epoch 30   loss:0.998  prediction:learnid leep learning
epoch 35   loss:0.974  prediction:learnid leep learning
epoch 40   loss:0.952  prediction:learnid leep learning
epoch 45   loss:0.939  prediction:learnid leep learning
epoch 50   loss:0.929  prediction:learnid leep learning
