In [2]:
import numpy as np
import pandas as pd

## 【問題1】SimpleRNNのフォワードプロパゲーション実装
SimpleRNNのクラスSimpleRNNを作成してください。基本構造はFCクラスと同じになります。
フォワードプロパゲーションの数式は以下のようになります。ndarrayのshapeがどうなるかを併記しています。
バッチサイズをbatch_size、入力の特徴量数をn_features、RNNのノード数をn_nodesとして表記します。活性化関数はtanhとして進めますが、これまでのニューラルネットワーク同様にReLUなどに置き換えられます。

 <msub>
    <mi>a</mi>
    <mi>t</mi>
  </msub>
  <mo>=</mo>
  <msub>
    <mi>x</mi>
    <mrow class="MJX-TeXAtom-ORD">
      <mi>t</mi>
    </mrow>
  </msub>
  <mo>&#x22C5;<!-- ⋅ --></mo>
  <msub>
    <mi>W</mi>
    <mrow class="MJX-TeXAtom-ORD">
      <mi>x</mi>
    </mrow>
  </msub>
  <mo>+</mo>
  <msub>
    <mi>h</mi>
    <mrow class="MJX-TeXAtom-ORD">
      <mi>t</mi>
      <mo>&#x2212;<!-- − --></mo>
      <mn>1</mn>
    </mrow>
  </msub>
  <mo>&#x22C5;<!-- ⋅ --></mo>
  <msub>
    <mi>W</mi>
    <mrow class="MJX-TeXAtom-ORD">
      <mi>h</mi>
    </mrow>
  </msub>
  <mo>+</mo>
  <mi>B</mi>
  <mspace linebreak="newline" />
  <msub>
    <mi>h</mi>
    <mi>t</mi>
  </msub>
  <mo>=</mo>
  <mi>t</mi>
  <mi>a</mi>
  <mi>n</mi>
  <mi>h</mi>
  <mo stretchy="false">(</mo>
  <msub>
    <mi>a</mi>
    <mi>t</mi>
  </msub>
  <mo stretchy="false">)</mo>
</math>

## 【問題2】小さな配列でのフォワードプロパゲーションの実験
小さな配列でフォワードプロパゲーションを考えてみます。


入力x、初期状態h、重みw_xとw_h、バイアスbを次のようにします。


ここで配列xの軸はバッチサイズ、系列数、特徴量数の順番です。

In [149]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100 # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 # (n_nodes, n_nodes)
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes)) # (batch_size, n_nodes)
b = np.array([1, 1, 1, 1]) # (n_nodes,)

In [150]:
print('x'+ str(x.shape))
print('w_x'+str(w_x.shape))
print('w_h'+str(w_h.shape))
print('batch_size'+str(batch_size))
print('n_sequences'+str(n_sequences))
print('n_features'+str(n_features))
print('n_nodes'+str(n_nodes))
print('h'+str(h.shape))
print('b'+ str(b.shape))

x(1, 3, 2)
w_x(2, 4)
w_h(4, 4)
batch_size1
n_sequences3
n_features2
n_nodes4
h(1, 4)
b(4,)


### 手計算

In [151]:
# X * W_x + H * W_h + B

In [152]:
hs = h

In [153]:
xw1 = np.dot(x[0][0], w_x)

In [154]:
a1 = xw1+ b +hs

In [155]:
hs1 = np.tanh(a1)

In [156]:
xw2 = np.dot(x[0][1], w_x)

In [157]:
a2 = xw2+ b +np.dot(hs1, w_h)

In [158]:
hs2 = np.tanh(a2)

In [159]:
xw3 = np.dot(x[0][2], w_x)

In [160]:
a3 = xw3+ b +np.dot(hs2, w_h)

In [161]:
np.tanh(a3)

array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])

In [162]:
# 正解([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])

In [163]:
batch, t, features = x.shape

In [164]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100 # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 # (n_nodes, n_nodes)
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes)) # (batch_size, n_nodes)
b = np.array([1, 1, 1, 1]) # (n_nodes,)

In [165]:
batch, t, features = x.shape
hs = h
for i in range(t):
    xw = np.dot(x[0][i], w_x)
    a = xw+ b +np.dot(hs, w_h)
    hs = np.tanh(a)

In [166]:
hs

array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])

### 関数化

In [167]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100 # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 # (n_nodes, n_nodes)
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes)) # (batch_size, n_nodes)
b = np.array([1, 1, 1, 1]) # (n_nodes,)

In [168]:
class SimpleFC:
    """"""
    
    def __init__(self, w_x = w_x, w_h = w_h, activater = None):
        self.activater = activater
        self.w_x = w_x
        self.w_h = w_h
        self.n_nodes = len(self.w_h)
        
    def forward(self, X):
        """

        """
        self.batch, self.t, self.features = X.shape
        self.hs = np.zeros([self.batch, self.n_nodes])
        self.b = np.ones(self.n_nodes)
        
        for i in range(t):
            xw = np.dot(x[0][i], self.w_x)
            a = xw+ self.b +np.dot(self.hs, self.w_h)
            self.hs = self.activater(a)
            Z = Softmax_with_Loss(a)
        
        return self.hs,  Z
    

In [169]:
def Relu( A):
    """

    """
    mask = (A <= 0)
    out = A.copy()
    out[mask] = 0

    return out

def Tanh( A):
    """

    """

    return np.tanh(A)

def Softmax_with_Loss( A):
    """

    """
    out = np.exp(A)/(np.exp(A).sum(1).reshape(-1, 1))

    return out


In [170]:
sf = SimpleFC( w_x = w_x, w_h = w_h, activater = Tanh)
sf.forward(x)

(array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]]),
 array([[0.2259324 , 0.24163788, 0.2584351 , 0.27399462]]))

In [171]:
sf = SimpleFC( w_x = w_x, w_h = w_h, activater = Relu)
sf.forward(x)

(array([[1.12744024, 1.2264713 , 1.32550236, 1.41149812]]),
 array([[0.21497755, 0.23735684, 0.26206583, 0.28559979]]))