# Lecture 12: RNN1 - Basics
## Exercise 12-4: Implement RNN

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

# reproducibility
torch.manual_seed(777)
if device =='cuda':
    torch.cuda.manual_seed_all(777)

cuda


## (1) Data preparation

In [3]:
idx2char = ['h', 'i', 'e', 'l', 'o']

# Teach "hihell" -> "ihello"
x_data = [[0, 1, 0, 2, 3, 3]] # "hihell"
one_hot_lookup = [[[1, 0, 0, 0, 0],  # 0
                   [0, 1, 0, 0, 0],  # 1
                   [0, 0, 1, 0, 0],  # 2
                   [0, 0, 0, 1, 0],  # 3
                   [0, 0, 0, 0, 1]]] # 4

y_data = [1, 0, 2, 3, 3, 4] # "ihello"
x_one_hot = [[one_hot_lookup[0][x] for x in x_data[0]]]

# As we have one batch of samples, we will change them to variables only once
inputs = Variable(torch.Tensor(x_one_hot)).to(device)
labels = Variable(torch.LongTensor(y_data)).to(device)

In [4]:
x_one_hot

[[[1, 0, 0, 0, 0],
  [0, 1, 0, 0, 0],
  [1, 0, 0, 0, 0],
  [0, 0, 1, 0, 0],
  [0, 0, 0, 1, 0],
  [0, 0, 0, 1, 0]]]

## (2) Parameters

In [5]:
epochs = 100

num_classes = 5
input_size = 5      # one-hot size
hidden_size = 5     # ouput from the LSTM. 5 to directly predict one-hot
batch_size = 1      # one sentence
output_size = 6     # |ihello| == 6
num_layers = 1      # one-layer rnn

## (3) Our model

메모리 셀에서 은닉 상태를 계산하는 식을 다음과 같이 정의하였음

$a_{t} = Ux_{t} + Wh_{t−1} + b$<br>
$h_{t} = \tanh{(a_{t})} = \tanh{(Ux_{t} + Wh_{t−1} + b)}$<br>
$o_{t} = Vh_{t} + c$<br>
$\hat{y_{t}}=\textrm{softmax}(o_{t})$<br>



In [6]:
class RNNcell(nn.Module):

    def __init__(self, input_size, hidden_size):
        super(RNNcell, self).__init__()

        # self.input_size = input_size
        self.hidden_size = hidden_size

        self.U = nn.Linear(hidden_size, input_size, bias=True)  # hidden to input (128, 24), bias:b1
        self.W = nn.Linear(hidden_size, hidden_size, bias=True) # hidden to hidden (128, 128), bias:b2
        self.V = nn.Linear(input_size, hidden_size, bias=True)  # input to hidden (24, 128), bias:c
    
        # self.reset_parameters()


    def reset_parameters(self):
        std = 1.0 / np.sqrt(self.hidden_size)

        # Initialize each parameter with numbers sampled from the continuous uniform distribution
        for parameter in self.parameters():
            # type(parameter.data) : Tensor
            parameter.data.uniform_(-std, std)


    def forward(self, x_t, h_prev):
        a_t = self.U(x_t) + self.W(h_prev) # U * x_t + W * h_(t-1) + b
        h_t = torch.tanh(a_t) # tanh{U * x_t + W * h_(t-1) + b}
        o_t = self.V(h_t) # V * h_t + c

        return o_t, h_t # out, hidden


class VanillaRNN(nn.Module):

    def __init__(self, input_size, hidden_size, output_size, num_classes, num_layers=1):
        super(VanillaRNN, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.num_classes = num_classes
        self.num_layers = num_layers

        self.rnn = RNNcell(input_size=input_size, hidden_size=hidden_size)


    def forward(self, x):
        h_0 = self.init_hidden(x)

        # Reshape input
        x.view(x.size(0), self.output_size, self.input_size)

        # Propagate input through RNN
        # Input: (batch, seq_len, input_size)
        # h_0: (num_layers * num_directions, batch, hidden_size)

        out, hidden = self.rnn(x, h_0)
        out = out.view(-1, num_classes)
        return out


    def init_hidden(self, x):
        # Initialize hidden and cell states
        return Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).to(device)

In [7]:
class RNNfromPyTorch(nn.Module):

    def __init__(self, input_size, hidden_size, output_size, num_classes, num_layers=1):
        super(RNNfromPyTorch, self).__init__()

        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.rnn = nn.RNN(input_size=5, hidden_size=5, batch_first=True)


    def forward(self, x):
        h_0 = self.init_hidden(x)

        # Reshape input
        x.view(x.size(0), self.output_size, self.input_size)

        # Propagate input through RNN
        # Input: (batch, seq_len, input_size)
        # h_0: (num_layers * num_directions, batch, hidden_size)

        out, hidden = self.rnn(x, h_0)
        out = out.view(-1, num_classes)
        return out

        
    def init_hidden(self, x):
        # Initialize hidden and cell states
        # (num_layers * num_directions, batch, hidden_size) for batch_first=True
        return Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)).to(device)

## (4) Loss & Training

### VanillaRNN

In [8]:
# Instantiate RNN model
model = VanillaRNN(input_size, hidden_size, output_size, num_classes, num_layers).to(device)
# model = RNNfromPyTorch(input_size, hidden_size, output_size, num_classes, num_layers).to(device)
print(model)

# Set loss and optimizer function
# CrossEntropyLoss = LogSoftmax + NLLLoss
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.1)

VanillaRNN(
  (rnn): RNNcell(
    (U): Linear(in_features=5, out_features=5, bias=True)
    (W): Linear(in_features=5, out_features=5, bias=True)
    (V): Linear(in_features=5, out_features=5, bias=True)
  )
)


In [9]:
# Train the model
for epoch in range(1, epochs + 1):
    outputs = model(inputs).to(device)

    optimizer.zero_grad()
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, idx = outputs.cpu().max(1)
    idx = idx.data.numpy()
    result_str = "".join([idx2char[c] for c in idx.squeeze()])
    print(f"Epoch: {epoch}, Loss: {loss.data.item()}")
    print(f"Predicted string: {result_str}")

Epoch: 1, Loss: 1.6119270324707031
Predicted string: ololll
Epoch: 2, Loss: 1.457548975944519
Predicted string: llllll
Epoch: 3, Loss: 1.3519129753112793
Predicted string: llllll
Epoch: 4, Loss: 1.2186530828475952
Predicted string: llllll
Epoch: 5, Loss: 1.0769691467285156
Predicted string: elelll
Epoch: 6, Loss: 0.970247209072113
Predicted string: eheloo
Epoch: 7, Loss: 0.8818891644477844
Predicted string: eheloo
Epoch: 8, Loss: 0.7949978709220886
Predicted string: ihiloo
Epoch: 9, Loss: 0.7192606329917908
Predicted string: ihiloo
Epoch: 10, Loss: 0.6587330102920532
Predicted string: ihilll
Epoch: 11, Loss: 0.6083053350448608
Predicted string: ihilll
Epoch: 12, Loss: 0.5648816227912903
Predicted string: ihilll
Epoch: 13, Loss: 0.5310947299003601
Predicted string: eheloo
Epoch: 14, Loss: 0.5079692006111145
Predicted string: eheloo
Epoch: 15, Loss: 0.49220970273017883
Predicted string: eheloo
Epoch: 16, Loss: 0.4814983308315277
Predicted string: eheloo
Epoch: 17, Loss: 0.475664705038070

### RNNfromPyTorch

In [10]:
# Instantiate RNN model
model = RNNfromPyTorch(input_size, hidden_size, output_size, num_classes, num_layers).to(device)
print(model)

# Set loss and optimizer function
# CrossEntropyLoss = LogSoftmax + NLLLoss
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.1)

RNNfromPyTorch(
  (rnn): RNN(5, 5, batch_first=True)
)


In [11]:
# Train the model
for epoch in range(1, epochs + 1):
    outputs = model(inputs).to(device)

    optimizer.zero_grad()
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    _, idx = outputs.cpu().max(1)
    idx = idx.data.numpy()
    result_str = "".join([idx2char[c] for c in idx.squeeze()])
    print(f"Epoch: {epoch}, Loss: {loss.data.item()}")
    print(f"Predicted string: {result_str}")

Epoch: 1, Loss: 1.7460657358169556
Predicted string: iilihh
Epoch: 2, Loss: 1.5093475580215454
Predicted string: iillll
Epoch: 3, Loss: 1.3484634160995483
Predicted string: illlll
Epoch: 4, Loss: 1.23129141330719
Predicted string: illlll
Epoch: 5, Loss: 1.134954571723938
Predicted string: ihllll
Epoch: 6, Loss: 1.0518126487731934
Predicted string: ihlllo
Epoch: 7, Loss: 0.9598258137702942
Predicted string: ihlllo
Epoch: 8, Loss: 0.9119052290916443
Predicted string: ihehll
Epoch: 9, Loss: 0.8687078952789307
Predicted string: ihehll
Epoch: 10, Loss: 0.7842835783958435
Predicted string: ihelll
Epoch: 11, Loss: 0.730978786945343
Predicted string: ihelll
Epoch: 12, Loss: 0.7004899978637695
Predicted string: ihelll
Epoch: 13, Loss: 0.6720936894416809
Predicted string: ihelll
Epoch: 14, Loss: 0.6466363668441772
Predicted string: ihelll
Epoch: 15, Loss: 0.625917911529541
Predicted string: ihelll
Epoch: 16, Loss: 0.6092942357063293
Predicted string: ihelll
Epoch: 17, Loss: 0.5957011580467224
Pr