<a href="https://colab.research.google.com/github/libra3910/diveintocode-ml/blob/master/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 【問題1】SimpleRNNのフォワードプロパゲーション実装

In [None]:
import numpy as np
# Initializerクラス
# SimpleInitializer 【問題2】初期化方法のクラス化
class SimpleInitializer:
    
    def __init__(self, sigma, seed=0):
        
        self.sigma = sigma
        self.seed = seed
        np.random.seed(self.seed)
        
    def Wx(self):
        
        Wx = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # w_x.shape = (2, 4)
        
        return Wx
        
    def Wh(self):
        
        Wh = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 # w_h.shape = (4, 4)
        
        return Wh
    
    def B(self):
        
        B = np.array([1, 1, 1, 1]) # B.shape = (1, 4)
        
        return B

In [None]:
# Optimizerクラス
# SGD
class SGD:

    def __init__(self, lr):
        
        self.lr = lr
        
    def update(self, layer):

        layer.W -= self.lr * np.dot(layer.X.T, layer.dA)
        layer.B -= self.lr * np.sum(layer.dA, axis=0)

In [None]:
# 【問題2】小さな配列でのフォワードプロパゲーションの実験
# Layerクラス
# FC
class FC:

    def __init__(self, initializer, optimizer):

        self.initializer = initializer
        self.optimizer = optimizer

        # 初期化
        # initializerのメソッドを使い、self.Wとself.Bを初期化する

        self.Wx = self.initializer.Wx()
        self.Wh = self.initializer.Wh()
        self.B = self.initializer.B()
        self.X = None
        self.dA = None
        
    def forward(self, X, ht1):

        self.X = X
        self.ht1 = ht1
        A = np.dot(self.X, self.Wx) + np.dot(self.ht1, self.Wh) + self.B
        
        return A

# 【問題3】（アドバンス課題）バックプロパゲーションの実装
    def backward(self, dA, y):

        self.dA = dA
        
        dz = dA + np.dot(y, self.W.T)
        
        return dz


In [None]:
# Tanh
class Tanh:

    def __init__(self):

        pass

    def forward(self, X):

        self.Z = (np.exp(X) - np.exp(-X)) / (np.exp(X) + np.exp(-X))
        return self.Z
        
    def backward(self, dA):
        
        dA2 = self.Z
        
        return dA2

In [None]:
# Calculates log(sum(exp(x)))
class logsumexp:
    
    def __init__(self):

        pass

    def forward(self, X):
        
        xmax = X.max(axis=1, keepdims=True)
        self.Z = np.log(np.exp(X - xmax).sum(axis=1, keepdims=True)) + xmax
        
        return self.Z

    def backward(self, Z3, log_Z3, y):

        y_one_hot = (y.reshape(-1,1)==np.arange(10))
        log_Z3 = (Z3 - y_one_hot) / y_one_hot.shape[0]
        dZa = np.exp(log_Z3)
        
        return dza

In [None]:
# Softmax
class Softmax:
    
    def __init__(self):
        
        pass
    
    def forward(self, X):
        
        dz = np.exp(X) / np.sum(np.exp(X), axis=1, keepdims=True)
        return dz
    
    def backward(self, Z3, y):

        y_one_hot = (y.reshape(-1,1)==np.arange(10))
        dza = (Z3 - y) / y.shape[0]
        #【問題3】交差エントロピー誤差の実装
        L = np.sum(np.mean(-(y_one_hot * Z3), axis=0))
        C = np.sum(np.array(dza).argmax(axis=1) == y)
        
        return dza, L, C

In [None]:
# Tranerクラス
class SimpleRNN:
    """
    Trainerクラスの定義
    
    """
    # self.sigma : ガウス分布の標準偏差
    # self.lr : 学習率
    # self.n_nodes1 : 1層目のノード数
    # self.n_nodes2 : 2層目のノード数
    # self.n_output : 出力層のノード数
    
    sigma = 0.01
    lr = 0.01

    def __init__(self, seed=0, verbose = True, verbose2 = False):
        
        self.seed = seed
        self.verbose = verbose
        self.verbose2 = verbose2
        
    def fit(self, x, y):
        
        # 初期値設定

        batch_size = x.shape[0] # 1
        n_sequences = x.shape[1] # 3
        n_features = x.shape[2] # 2
        n_nodes = 4 # 4 w_x.shape = (2, 4)
        h = np.zeros((batch_size, n_nodes)) # (batch_size=1, n_nodes=4)
        print("x.shape={}".format(x.shape))
        
        # 初期化関数
        initializer = SimpleInitializer(self.sigma, self.seed)
        
        # 最適化関数
        optimizer = SGD(self.lr)
        
        # 活性化関数1
        self.activation1 = Tanh()
        
        # 活性化関数2
        self.activation2 = Tanh()
        
        # 学習用関数初期化
        self.FC1 = FC(initializer, optimizer) 
        
        # 学習：forward propergation
        for i in range(n_sequences):
            
            A = self.FC1.forward(x[0][i], h)
            print("A={}".format(A))

            if i < (n_sequences - 1):
                h = self.activation1.forward(A)
                print("ha={}".format(h))
            else:
                h = self.activation2.forward(A)
        
        print("Forward Propergation h={}".format(h))
                
        # 学習：back propergation
        #for i in range(n_sequences):
            
            #if i == n_sequences:
                #dAh = self.activation2.forward(h)
            #else:
                #sAh = self.activation2.forward(h)            
           
            #h = self.FC1.backward(sAh)


In [None]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100 # (batch_size=1, n_sequences=3, n_features=2)
y = np.array([[[1],[2],[3]]])

In [None]:
rnn = SimpleRNN()
rnn.fit(x, y)

x.shape=(1, 3, 2)
A=[[1.0007 1.0013 1.0019 1.0023]]
ha=[[0.76188798 0.76213958 0.76239095 0.76255841]]
A=[[1.07733574 1.13931527 1.20129481 1.25535044]]
ha=[[0.792209   0.8141834  0.83404912 0.84977719]]
A=[[1.08471832 1.15192269 1.21912707 1.27759095]]
Forward Propergation h=[[0.79494228 0.81839002 0.83939649 0.85584174]]
