## **Recurrent neural network scratch**

---

### **`[Problem 1] Simple Forward propagation implementation of RNN`**

---



In [51]:
#Library
import numpy as np

In [52]:
class ScratchSimpleRNNClassifier:
    """
    
    """
    def __init__(self, x, w_x, w_h):
        self.w_x = w_x
        self.w_h = w_h
        self.batch_size = x.shape[0] # 1
        self.n_sequences = x.shape[1] # 3
        self.n_features = x.shape[2] # 2
        self.n_nodes = w_x.shape[1] # 4
        self.h = np.zeros((self.batch_size, self.n_nodes)) # (batch_size, n_nodes)
        self.b = np.array([1, 1, 1, 1]) # (n_nodes,)
      
        
    def forward(self,x):
      '''

      '''
      self.x = x
      for n in range(self.n_sequences):
          self.h = np.tanh(x[:, n, :] @ self.w_x + self.h @ self.w_h + self.b)
      return self.h


    def backward(self, dA):
      """
      """
      pass

### **`[Problem 2] Experiment of forward propagation with small sequence`**

---



In [53]:
x = np.array([[[1, 2], [2, 3], [3, 4]]])/100 # (batch_size, n_sequences, n_features)
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]])/100 # (n_features, n_nodes)
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]])/100 # (n_nodes, n_nodes)

rnn = ScratchSimpleRNNClassifier(x=x, w_h=w_h, w_x=w_x)

rnn.forward(x)

array([[0.79494228, 0.81839002, 0.83939649, 0.85584174]])

### **`[Problem 3] (Advance assignment) Implementation of backpropagation`**

---



In [54]:
import numpy as np
from numpy.random import randn

class ScratchSimpleRNNClassifier:

  def __init__(self, x, w_x, w_h):
        self.w_x = w_x
        self.w_h = w_h
        self.batch_size = x.shape[0] # 1
        self.n_sequences = x.shape[1] # 3
        self.n_features = x.shape[2] # 2
        self.n_nodes = w_x.shape[1] # 4
        self.bh = np.zeros((self.batch_size, self.n_nodes)) # (batch_size, n_nodes)
        self.by = np.array([1, 1, 1, 1]) # (n_nodes,)


  def forward(self, inputs):
    '''
    Perform a forward pass of the RNN using the given inputs.
    Returns the final output and hidden state.
    - inputs is an array of one hot vectors with shape (input_size, 1).
    '''

    self.last_inputs = inputs
    self.last_hs = { 0: self.bh }

    # Perform each step of the RNN
    for i in enumerate(inputs):
      self.bh = np.tanh(inputs[:, i, :] @ self.w_x + self.bh @ self.w_h + self.by)
      self.last_hs[i + 1] = self.bh


    return self.bh

  def backprop(self, d_y, learn_rate=2e-2):
    '''
    Perform a backward pass of the RNN.
    - d_y (dL/dy) has shape (output_size, 1).
    - learn_rate is a float.
    '''
    n = len(self.last_inputs)

    # Calculate dL/dWhy and dL/dby.
    d_W_x = d_y @ self.last_hs[n].T
    d_by = d_y

    # Initialize dL/dWhh, dL/dWxh, and dL/dbh to zero.
    d_W_h = np.zeros(self.w_h.shape)
    d_W_x = np.zeros(self.w_x.shape)
    d_bh = np.zeros(self.bh.shape)

    # Calculate dL/dh for the last h.
    # dL/dh = dL/dy * dy/dh
    d_h =  d_y

    # Backpropagate through time.
    for t in reversed(range(n)):
      # An intermediate value: dL/dh * (1 - h^2)
      temp = ((1 - self.last_hs[t + 1] ** 2) * d_h)

      # dL/db = dL/dh * (1 - h^2)
      d_bh += temp

      # dL/dWhh = dL/dh * (1 - h^2) * h_{t-1}
      d_Whh += temp @ self.last_hs[t].T

      # dL/dWxh = dL/dh * (1 - h^2) * x
      d_Wxh += temp @ self.last_inputs[t].T

      # Next dL/dh = dL/dh * (1 - h^2) * Whh
      d_h = self.Whh @ temp

    # Clip to prevent exploding gradients.
    for d in [d_Wxh, d_Whh, d_bh, d_by]:
      np.clip(d, -1, 1, out=d)

    # Update weights and biases using gradient descent.
    self.w_h -= learn_rate * d_Whh
    self.w_x -= learn_rate * d_Wxh
    self.bh -= learn_rate * d_bh
    self.by -= learn_rate * d_by