# Sprint 22 - Scratch Recurrent Neural Network

## Problem 1

In [None]:
import numpy as np
def forward(x):
    h = np.zeros((batch_size, n_nodes))
    for n in range(n_sequences):
        h = np.tanh(x[:, n, :] @ w_x + h @ w_h + b)
    return h

## Problem 2

In [None]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100 # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 # (n_nodes, n_nodes)
batch_size = x.shape[0] # 1
n_sequences = x.shape[1] # 3
n_features = x.shape[2] # 2
n_nodes = w_x.shape[1] # 4
h = np.zeros((batch_size, n_nodes)) # (batch_size, n_nodes)
b = np.array([1, 1, 1, 1]) # (n_nodes,)

In [None]:
forward(x)

array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])

**The output of the forward propagation is the same as given**

## Problem 3

In [None]:
class SimpleInitializer:

    def __init__(self, sigma):
        self.sigma = sigma
        
    def W(self, n_nodes1, n_nodes2):
        W = self.sigma * np.random.randn(n_nodes1, n_nodes2)
        return W
    
    def B(self, n_nodes2):
        B = self.sigma * np.random.randn(1, n_nodes2)
        return B

In [None]:
class SGD:

    def __init__(self, lr):
        self.lr = lr
        
    def update(self, layer):
        layer.Wx -= self.lr*layer.dWx
        layer.Wh -= self.lr*layer.dWh
        layer.B -= self.lr*layer.dB
        return layer

In [None]:
class Tanh:

    def forward(self, A):
        self.A = A
        Z = np.tanh(self.A)
        return Z
    
    def backward(self, dZ):
        dA = dZ * (1 - np.tanh(self.A)**2)
        return dA

In [None]:
class ScratchSimpleRNNClassifier:
    
    def __init__(self,):
        self.Wx = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 
        self.Wh = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 
        self.b = np.array([1, 1, 1, 1]) 
        self.dB = 0
        self.dWx = 0
        self.dWh = 0
        self.activator = Tanh()
        self.optimizer = SGD(lr=0.001)
        self.h_his = []
    
    def forward(self, x):
        self.x = x.copy()
        batch_size = x.shape[0] 
        self.n_sequences = x.shape[1] 
        n_features = x.shape[2] 
        n_nodes = w_x.shape[1] 
        h = np.zeros((batch_size, n_nodes))
        for n in range(self.n_sequences):
            A = x[:, n, :] @ self.Wx + h @ self.Wh + self.b
            h = self.activator.forward(A)
            self.h_his.append(h)
        return h
    
    def backward(self, dh):
        dA = self.activator.backward(dh)
        for n in range(self.n_features)[::-1]:
            self.dB += np.sum(dA, axis=0)
            self.dWx += x[:, n, :].T @ dA
            self.dWh += h[n].T @ dA

        dx = dA @ Wx
        dh = dA @ Wh
        return dx, dh

Let's test it 

In [None]:
test = ScratchSimpleRNNClassifier()
test.forward(x)

array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])

**The output is the same as well.** 