# RNNのさらなる工夫

## Bidirectional RNN

- 前に作成したmyRNNを、bidirectional RNNに対応させる
    - bidirection引数をつかし、TrueならBiRNNとする
    - backwardは省略
    - 返り値(bidirectional=Trueの時)
        - outputs: [batch_size, seq_len, hidden_size*2]
        - h_n: [2, batch_size, hidden_size]

In [1]:
import math
import torch
class myRNN:
    def __init__(self, input_size, hidden_size, bidirectional=False):
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional

        init_range = 1.0/math.sqrt(hidden_size)

        # 順方向の重みとバイアス
        self.W_in = torch.empty(hidden_size, input_size).uniform_(-init_range, init_range).requires_grad_(True)
        self.W_h = torch.empty(hidden_size, hidden_size).uniform_(-init_range, init_range).requires_grad_(True)
        self.b_in = torch.empty(hidden_size).uniform_(-init_range, init_range).requires_grad_(True)
        self.b_h = torch.empty(hidden_size).uniform_(-init_range, init_range).requires_grad_(True)

        # 逆方向の重みとバイアス
        self.W_in_backward = torch.empty(hidden_size, input_size).uniform_(-init_range, init_range).requires_grad_(True)
        self.W_h_backward = torch.empty(hidden_size, hidden_size).uniform_(-init_range, init_range).requires_grad_(True)
        self.b_in_backward = torch.empty(hidden_size).uniform_(-init_range, init_range).requires_grad_(True)
        self.b_h_backward = torch.empty(hidden_size).uniform_(-init_range, init_range).requires_grad_(True)
        
    def forward(self, input, h_0=None):
        # input: [batch_size, seq_len, input_size]
        self.input = input
        self.h_0 = h_0
        batch_size, self.seq_len, _ = input.size()

        if h_0 is None:
            self.h_0 = torch.zeros(1, batch_size, self.hidden_size)#.to(device)

        # 順方向の処理
        h = self.h_0 # [1, batch_size, hidden_size]
        outputs = []
        for i in range(self.seq_len):
            # [batch_size, hidden_size]
            h = torch.tanh(input[:, i]@self.W_in.T + self.b_in + h.squeeze(0)@self.W_h.T + self.b_h)
            outputs.append(h.unsqueeze(1))# [batch_size, hidden_size] -> # [batch_size, 1, hidden_size]
        self.output_seq = torch.cat(outputs, dim=1)

        # 逆方向の処理(双方向の場合)
        if self.bidirectional:
            h_backward = self.h_0
            outputs_backward = []
            for i in reversed(range(self.seq_len)):
                # [batch_size, hidden_size]
                h_backward = torch.tanh(input[:, i]@self.W_in_backward.T + self.b_in_backward + h_backward.squeeze(0)@self.W_h_backward.T + self.b_h_backward)
                outputs_backward.append(h_backward.unsqueeze(1))# [batch_size, hidden_size] -> # [batch_size, 1, hidden_size]
            # 順方向と逆方向の隠れ状態を同じステップで結合するようにする
            outputs_backward = outputs_backward[::-1 ]
            self.output_seq_backward = torch.cat(outputs_backward, dim=1) 
            self.output_seq = torch.cat((self.output_seq, self.output_seq_backward), dim=2)
            # biRNNでは，h_n[0]に順方向の最後(t=T)の隠れ状態を保持し，h_n[1]に逆方向の最後(つまりシーケンス上ではt=1)の隠れ状態とする
            h_n = torch.cat((h.unsqueeze(0), h_backward.unsqueeze(0)), dim=0) # [batch_size, hidden_size] -> [2, batch_size, hidden_size]        
        else:
            h_n = h.unsqueeze(0)

        return self.output_seq, h_n

In [2]:
import torch
# BiRNNのテスト
input_size = 10
hidden_size = 3
batch_size = 8
seq_len = 5

# サンプルのTensor
input_tensor = torch.randn(batch_size, seq_len, input_size)
birnn = myRNN(input_size, hidden_size, bidirectional=True)
output_seq, h_n = birnn.forward(input_tensor)

In [3]:
output_seq.shape

torch.Size([8, 5, 6])

## PyotorchのBiRNNを使う

nn.RNN
- 前に作成したModelクラスをBidirectional対応に変更する
- bidirectional=TrueにBiLSTMを使用することができる

In [4]:
import torch.nn as nn
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, rnn_type='LSTM', bidirectional=False):

        super().__init__()
        self.hidden_size = hidden_size
        self.num_directions = 2 if bidirectional else 1
        
        if rnn_type == 'RNN':
            self.rnn = nn.RNN(input_size, hidden_size, batch_first=True, bidirectional=bidirectional)
        elif rnn_type == 'LSTM':
            self.rnn = nn.LSTM(input_size, hidden_size, batch_first=True, bidirectional=bidirectional)
        elif rnn_type == 'GRU':
            self.rnn = nn.GRU(input_size, hidden_size, batch_first=True, bidirectional=bidirectional)
        # elif rnn_type == 'UGRNN':
        #     self.rnn = UGRNN(input_size, hidden_size, batch_first=True)
        else:
            raise ValueError('Unsupported RNN type. Choose from ["LSTM", "RNN", "GRU", "UGRNN"]')
            
        self.fc = nn.Linear(hidden_size*self.num_directions, output_size)

    def forward(self, x):
        output_seq, _ = self.rnn(x)
        # output_seq: [batch_size, seq_len, hidden_size]
        output_seq = output_seq[:, -1, :]
        out = self.fc(output_seq)
        return out

In [5]:
rnn = nn.RNN(input_size, hidden_size, bidirectional=True)
output_seq, hn = rnn(input_tensor)

In [6]:
output_seq.shape

torch.Size([8, 5, 6])

In [8]:
output_size = 3
model = Model(input_size, hidden_size, output_size, rnn_type='LSTM', bidirectional=True)
out = model(input_tensor)

In [9]:
model.num_directions

2

In [10]:
out.shape

torch.Size([8, 3])

## Deep RNN

- 以前作成したModelクラスをDeep対応に変更する
- nn.RNN()のnum_layers引数を使用する
    - nn.RNNの隠れ状態のサイズは[num_layes*num_directions, batch_size, hidden_size]となることに注意する

In [11]:
num_layers_list = [1, 2, 3]

for num_layers in num_layers_list:
    rnn = nn.RNN(input_size, hidden_size, num_layers=num_layers, bidirectional=True)
    output_seq, h_n = rnn(input_tensor)

    print(f"Num Layers: {num_layers}")
    print(f"Output Shape :{output_seq.shape}")
    print(f"Last Hidden State Shape :{h_n.shape}")

Num Layers: 1
Output Shape :torch.Size([8, 5, 6])
Last Hidden State Shape :torch.Size([2, 5, 3])
Num Layers: 2
Output Shape :torch.Size([8, 5, 6])
Last Hidden State Shape :torch.Size([4, 5, 3])
Num Layers: 3
Output Shape :torch.Size([8, 5, 6])
Last Hidden State Shape :torch.Size([6, 5, 3])


In [12]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1, rnn_type='LSTM', bidirectional=False):

        super().__init__()
        self.hidden_size = hidden_size
        self.num_directions = 2 if bidirectional else 1
        
        if rnn_type == 'RNN':
            self.rnn = nn.RNN(input_size, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=bidirectional)
        elif rnn_type == 'LSTM':
            self.rnn = nn.LSTM(input_size, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=bidirectional)
        elif rnn_type == 'GRU':
            self.rnn = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True, bidirectional=bidirectional)
        # elif rnn_type == 'UGRNN':
        #     self.rnn = UGRNN(input_size, hidden_size, batch_first=True)
        else:
            raise ValueError('Unsupported RNN type. Choose from ["LSTM", "RNN", "GRU", "UGRNN"]')
            
        self.fc = nn.Linear(hidden_size*self.num_directions, output_size)

    def forward(self, x):
        output_seq, _ = self.rnn(x)
        # output_seq: [batch_size, seq_len, hidden_size]
        output_seq = output_seq[:, -1, :]
        out = self.fc(output_seq)
        return out

In [13]:
model = Model(input_size, hidden_size, output_size, num_layers=2, rnn_type='LSTM', bidirectional=True)
out = model(input_tensor)
print(out.shape)

torch.Size([8, 3])
