# 双方向LSTMを計算するためのTimeBiLSTMクラスを実装する

In [1]:
import numpy as np

try:
    from google.colab import files
    print('Google Colab. 上での実行です')
    print('「ファイルを選択」から、notebook/commonフォルダのfunctions.py, layers.py, time_layers.pyを選択し、アップロードしてください')
    print('===========')
    files.upload()
    !mkdir common
    !mv *.py ./common
except:
    print('ローカル環境での実行です')


from common.time_layers import TimeLSTM


Google Colab. 上での実行です
「ファイルを選択」から、notebook/commonフォルダのfunctions.py, layers.py, time_layers.pyを選択し、アップロードしてください


Saving functions.py to functions.py
Saving layers.py to layers.py
Saving time_layers.py to time_layers.py


### [演習]
* 以下のTimeBiLSTMクラスを完成させましょう

In [2]:
class TimeBiLSTM:
    """
    双方向LSTM
    """
    def __init__(self, Wx1, Wh1, b1, Wx2, Wh2, b2, stateful=False):
        
        # レイヤの定義
        self.forward_lstm = TimeLSTM(Wx1, Wh1, b1, stateful)
        self.backward_lstm = TimeLSTM(Wx2, Wh2, b2, stateful)
        
        # パラメータ、勾配をそれぞれまとめる
        self.params = self.forward_lstm.params + self.backward_lstm.params
        self.grads = self.forward_lstm.grads + self.backward_lstm.grads

    def forward(self, xs):
        """
        順伝播
        xs : 入力データ
        """
        # 順方向のLSTM
        o1 = self.forward_lstm.forward(xs)
        
        # 逆方向のLSTM
        o2 = self.backward_lstm.forward(xs[:, ::-1,:]) # xsを逆順にして入力する
        o2 = o2[:, ::-1,:] # 結果を逆順にする
        
        # 順方向LSTMの結果と逆方向LSTMの結果を結合する
        out = np.concatenate((o1, o2), axis=2)
        return out

    def backward(self, dhs):
        """
        逆伝播
        dhs : 勾配
        """
        H = dhs.shape[2] // 2
        do1 = dhs[:, :, :H]
        do2 = dhs[:, :, H:]

        dxs1 = self.forward_lstm.backward(do1)
        do2 = do2[:, ::-1]
        dxs2 = self.backward_lstm.backward(do2)
        dxs2 = dxs2[:, ::-1]
        dxs = dxs1 + dxs2
        return dxs

In [3]:
# 語彙数
V = 3
# 埋め込み後次元数
D = 3
# 中間層ノード数
H = 4
# データ数
N = 3
# 単語数
T = 5

rn = np.random.randn
Wx1 = rn(D, 4 * H) / np.sqrt(D)
Wh1 = rn(H, 4 * H) / np.sqrt(H)
b1 = np.zeros(4 * H)
Wx2 = rn(D, 4 * H) / np.sqrt(D)
Wh2 = rn(H, 4 * H) / np.sqrt(H)
b2 = np.zeros(4 * H)

# モデル構築
Wx1, Wh1, b1, Wx2, Wh2, b2
tb = TimeBiLSTM(Wx1, Wh1, b1, Wx2, Wh2, b2)


xs = np.random.randint(0, V, N*T*D).reshape(N, T, D)
print("xs=", xs)
print()

# 順伝播計算
out = tb.forward(xs)
print("out=", out)
print()

# 逆伝播計算
dhs = np.random.randn(N*T*H*2).reshape(N, T, H*2)
dxs = tb.backward(dhs)
print("dxs=", dxs)
print()

xs= [[[0 2 2]
  [1 1 0]
  [2 0 2]
  [1 1 0]
  [0 2 0]]

 [[1 1 1]
  [1 1 2]
  [0 0 1]
  [0 1 0]
  [0 1 0]]

 [[1 2 2]
  [1 0 0]
  [0 0 1]
  [1 2 2]
  [1 0 2]]]

out= [[[ 0.04388869 -0.24097503 -0.01112616 -0.03458454 -0.63121025
    0.10570155 -0.05498259 -0.09154819]
  [ 0.09051628 -0.29445408 -0.15954031  0.06681726 -0.37965853
   -0.12158112 -0.05227416 -0.23105509]
  [-0.018382   -0.61683243 -0.12045827  0.01922328 -0.53212843
    0.17405054 -0.16463892 -0.35854852]
  [ 0.06013785 -0.57150427 -0.17893964  0.15197467 -0.41033046
   -0.1594824  -0.03217959 -0.10854902]
  [ 0.09459214 -0.58635268 -0.0571309   0.08334434 -0.33713878
   -0.23994227 -0.06203373 -0.05749415]]

 [[ 0.02378141 -0.21899862 -0.0664496  -0.00900007 -0.34358414
   -0.00693723 -0.1658528  -0.23048123]
  [ 0.00611647 -0.47293286 -0.06520579 -0.02151121 -0.25518883
    0.19089004 -0.12988479 -0.20612444]
  [ 0.00260054 -0.21515314 -0.11809846 -0.04089735 -0.18246492
    0.05394794 -0.08050362 -0.15466651]
  [ 0.12