In [104]:
import copy
import numpy as np

In [1]:
class SimpleInitializer:
    """
    ガウス分布によるシンプルな初期化
    Parameters
    ----------
    sigma : float
    n_nodes1:
    n_nodes2:
    ガウス分布の標準偏差
    """
    
    def __init__(self, sigma=0.01):
        self.sigma = sigma
        
    def W(self, n_nodes1, n_nodes2):
        """
        重みの初期化
        Parameters
        ----------
        Returns
        ----------
        W :
        """
        W = self.sigma * np.random.randn(n_nodes1, n_nodes2)
        return W
    
    
    def B(self, n_nodes2):
        """
        バイアスの初期化
        Parameters
        ----------
        n_nodes2 : int
        後の層のノード数
        Returns
        ----------
        B :
        """
        B = self.sigma * np.random.randn(n_nodes2)[np.newaxis, :]
        
        return B

In [2]:
class SGD:
    """
    確率的勾配降下法
    Parameters
    ----------
    lr : 学習率
    """
    def __init__(self, lr):
        self.lr = lr
        
    def update(self, layer):
        """
        ある層の重みやバイアスの更新
        Parameters
        ----------
        layer : 更新前の層のインスタンス
        
        Returns
        ----------
        layer : 更新後の層のインスタンス
        """
        layer.W -= self.lr * layer.dW
        layer.B -= self.lr * layer.dB
        
        return layer

In [3]:
class Rnn_SGD:
    """
    確率的勾配降下法
    Parameters
    ----------
    lr : 学習率
    """
    def __init__(self, lr):
        self.lr = lr
        
    def update(self, layer):
        """
        ある層の重みやバイアスの更新
        Parameters
        ----------
        layer : 更新前の層のインスタンス
        
        Returns
        ----------
        layer : 更新後の層のインスタンス
        """
        layer.W -= self.lr * layer.dW
        layer.B -= self.lr * layer.dB
        leyer.W_h = self.lr * layer.dW_h
        
        return layer

In [3]:
class Sigmoid:
    """
    sigmoid関数の処理と導関数の算出
    """
    
    def __init__(self):
        pass
        
    def forward(self, A):
        self.A  = A
        Z = 1/1+np.exp(-self.A)
        
        return Z
    
    def backward(self, dout):
        Z = self.forward(self.A)
        dout_sig = Z*(1-Z)*dout
        
        return dout_sig
    
class Tanh:
    """
    tanh関数の処理と導関数の算出
    """
    def __init__(self):
        pass
    
    def forward(self, A):
        self.A = A
        Z = (np.exp(self.A)-np.exp(-self.A)) / (np.exp(self.A)+np.exp(-self.A))
        
        return Z
    
    def backward(self, dout):
        Z = self.forward(self.A)
        dout_tanh = (1-Z**2)*dout
        
        return dout_tanh
    
class Relu:
    """
    relu関数の処理と導関数の算出
    """
    
    def __init__(self):
        pass
    
    def forward(self, A):
        self.A = A
        Z = np.where(self.A<=0, 0, self.A)
        
        return Z
    
    def backward(self, dout):
        dout_relu = np.where(self.A<=0, 0, 1)*dout
        
        return dout_relu
    
class Softmax:
    """
    relu関数の処理とsoftmax_with_cross_entropyの導関数の算出
    """
    def __init__(self):
        self.Z = None
    
    def forward(self, A):
        #if A.ndim == 2:
           # A = A.T
           # A = A - np.max(A, axis=0)
           # y = np.exp(A) / np.sum(np.exp(A), axis=0)
           # return y.T
        A = A - np.max(A)
        Z = np.exp(A) / np.sum(np.exp(A),axis=1, keepdims=True)
        self.Z = Z
        
        return Z
    
    
    def backward(self, y):
        dout_soft_max = self.Z - y
        
        return dout_soft_max

In [4]:
class FC:
    """
    ノード数n_nodes1からn_nodes2への全結合層
    Parameters
    ----------
    n_nodes1 : int
      前の層のノード数
    n_nodes2 : int
      後の層のノード数
    initializer : 初期化方法のインスタンス
    optimizer : 最適化手法のインスタンス
    """
    def __init__(self, n_nodes1, n_nodes2, initializer, optimizer):
        self.n_nodes1 = n_nodes1
        self.n_nodes2 = n_nodes2
        self.initializer = initializer
        self.optimizer = optimizer
        # initializerのメソッドを使い、self.Wとself.Bを初期化する
        self.W = self.initializer.W(self.n_nodes1, self.n_nodes2)
        self.B = self.initializer.B(self.n_nodes2)
    
    def forward(self, X):
        """
        フォワード
        Parameters
        ----------
        X : 次の形のndarray, shape (batch_size, n_nodes1)
            入力
        Returns
        ----------
        A : 次の形のndarray, shape (batch_size, n_nodes2)
            出力
        """
        # XとWの内積をとり、biasを加える
        self.Z = copy.deepcopy(X) # Xが更新されないように
        A = np.dot(X, self.W) + self.B
        
        return A
    
    
    def backward(self, dA):
        """
        バックワード
        Parameters
        ----------
        dA : 次の形のndarray, shape (batch_size, n_nodes2)
            後ろから流れてきた勾配
        Returns
        ----------
        dZ : 次の形のndarray, shape (batch_size, n_nodes1)
            前に流す勾配
        """
        # 勾配を算出する
        self.dB = np.average(dA)
        self.dW = np.dot(self.Z.T, dA)/dA.shape[0]
        
        dZ = np.dot(dA, self.W.T)
        
        self = self.optimizer.update(self)# FCクラスのself.W, self.B, self.dW, self.dBを用いて更新
        
        return dZ


In [5]:
class Dropout:
    """
    randomに生成したWと同じ配列の要素でdroput_ratioを以下のものをFalseとして格納
    """
    def __init__(self, dropout_ratio=0.5):
        self.dropout_ratio = dropout_ratio
        self.mask = None
        
    def forward(self, X, train_flag=True):
        if train_flag:
            self.mask = np.random.rand(*X.shape) > self.dropout_ratio # *X.shapeはXと同じshapeにするため
            return X*self.mask
        
        else:
            return X*(1-self.dropout_ratio)
        
    def backward(self, dout):
        
        return dout * self.mask

In [6]:
class Loss:
    
    def __init__(self):
        pass
        
    def cross_entropy_error(self, y_pred, y):
        cross_entropy_error = np.sum(-1*y*np.log(y_pred+1e-10),axis=1)
        
        return cross_entropy_error

【問題1】SimpleRNNのフォワードプロパゲーション実装

【問題3】（アドバンス課題）バックプロパゲーションの実装

In [4]:
class RNN:
    def __init__(self,batch,n_sequences, n_features, n_nodes, initializer,optimizer):
        self.batch = batch
        self.sequences = n_sequences
        self.n_features = n_features
        self.n_nodes = n_nodes
        self.initializer = initializer
        self.optimizer = optimizer
        self.h = np.zeros((batch, n_nodes))


        self.W_x = self.initializer.W(self.n_features, self.n_nodes)
        self.W_h = self.initializer.W(self.n_nodes, self.n_nodes)
        self.B = np.array([1])
        
        self.dh = np.zeros_like(self.batch_size, self.n_nodes)
        self.dX = None
        self.dW_x = None
        self.dW_h = None
        self.dh = None
        self.dB = None
        
    def forward(self, X):
        """
        X 次の形のndarray (batch_size, n_sequens, n_features)
        W_x : 入力に対する重み (n_features, n_nodes)
        self.h : 次の形のndarray (batch_size, n_nodoes)
        W_h : 時刻t-1の状態（前の時刻から伝わる順伝播） (batch_size, n_nodes)
        """
        output_list = []
        self.X = X
        for t in range(self.sequences):
            a = np.dot(X[:,t,:], self.W_x) + np.dot(self.h, self.W_h) + b
            
            tanh = Tanh()
            z = tanh.forward(a)
            self.h = z
            output_list += [z]
        
        output_array = np.array(output_list).reshape(self.batch, self.sequences, self.n_nodes)
        self.output_array = output_array
        
        return output_array
    
    def backward(self, dz):
        """
        dz: Affinからの勾配(batch_size, self.sequences, self.nodes)
        dh: RNNからの勾配(batch_size, n_nodes)
        self.dX: ndarray(batch_size, n_sequences, n_features)
        self.dW_x: ndarray (n_features, n_nodes)
        self.dh: ndarray(batch_size, n_nodes)
        self.dW_h: ndarray(n_nodes, n_nodes)
        self.dB: ndarray(1)
        
        
        """
        self.dX = np.zeros(self.batch, self.sequences, self.features)
        
        for t in reversed(range(self.n_sequences)):
            d = dz[:,t,:] + self.dh
            da = d*(1-(self.output_array[:,t,:])**2)
            self.dW_x = np.dot(self.X[:,t,:].T, da)/self.batch_size
            self.dh = np.dot(da, self.W_h.T)/self.batch_size
            self.dW_h = np.dot(self.h.T, da)/self.batch_size
            self.dB = np.average(da)
            self.dX[:,t,:] = np.dot(da, self.W_x.T)
            dh = self.dh
            
            
            self = self.optimizer.update(layer)
            
            
        

In [166]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes))
b = np.array([1])

In [169]:
rnn = RNN(batch_size, n_features, n_nodes, SimpleInitializer(), SGD(lr=0.01))

In [170]:
out = rnn.forward(x)
out.shape

(1, 2)
(1, 2)
(1, 2)


(1, 3, 2)

【問題2】小さな配列でのフォワードプロパゲーションの実験

In [195]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes))
b = np.array([1])

In [196]:
x.shape

(1, 3, 2)

In [198]:
x[:,2,:].shape

(1, 2)

In [193]:
for t in range(n_sequences):
    a = np.dot(x[:,t,:], w_x) + np.dot(h, w_h) + b
    tanh = Tanh()
    z = tanh.forward(a)
    h = z
h

[[0. 0. 0. 0.]]
[[0.76188798 0.76213958 0.76239095 0.76255841]]
[[0.792209   0.8141834  0.83404912 0.84977719]]


array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])