### BLG561E Assignment 4.1: Recurrent Neural Networks


In this assignment, we will use the same API as in Assignment 1. You have implemented most of the required layers. You will add RNN layer under `./layers/layers_with_weights`

In [1]:
from blg561.layer.layers_with_weights import RNNLayer
from blg561.checks import rel_error, grad_check
from blg561.layer.layer import Tanh
import numpy as np
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

### RNN Layer: Forward step (5 points)
First implement and call the forward step for the RNN layer in RNNLayer class, and check the error rates whether below or 1e-6 or not.

In [2]:
N, D, H = 3, 10, 4
rnn = RNNLayer(3, 3)
x = np.linspace(-0.4, 0.7, num=N*D).reshape(N, D)
prev_h = np.linspace(-0.2, 0.5, num=N*H).reshape(N, H)
rnn.W_ih = np.linspace(-0.1, 0.9, num=D*H).reshape(D, H)
rnn.W_hh = np.linspace(-0.3, 0.7, num=H*H).reshape(H, H)
rnn.b = np.linspace(-0.2, 0.4, num=H)

next_h = rnn.forward_step(x, prev_h)
expected_next_h = np.array([
  [-0.58172089, -0.50182032, -0.41232771, -0.31410098],
  [ 0.66854692,  0.79562378,  0.87755553,  0.92795967],
  [ 0.97934501,  0.99144213,  0.99646691,  0.99854353]])

print('next_h error: ', rel_error(expected_next_h, next_h))

next_h error:  6.292421426471037e-09


### RNN Forward Pass (5 points)
Now, under RNNLayer, implement the forward method. It processes whole series. i.e. all time points in the sequence

In [3]:
N, T, D, H = 2, 3, 4, 5
rnn = RNNLayer(4,5)
x = np.linspace(-0.1, 0.3, num=N*T*D).reshape(N, T, D)
prev_h = np.linspace(-0.3, 0.1, num=N*H).reshape(N, H)
rnn.W_ih = np.linspace(-0.2, 0.4, num=D*H).reshape(D, H)
rnn.W_hh = np.linspace(-0.4, 0.1, num=H*H).reshape(H, H)
rnn.b = np.linspace(-0.7, 0.1, num=H)

h = rnn.forward(x, prev_h)
expected_h = np.array([
  [
    [-0.42070749, -0.27279261, -0.11074945,  0.05740409,  0.22236251],
    [-0.39525808, -0.22554661, -0.0409454,   0.14649412,  0.32397316],
    [-0.42305111, -0.24223728, -0.04287027,  0.15997045,  0.35014525],
  ],
  [
    [-0.55857474, -0.39065825, -0.19198182,  0.02378408,  0.23735671],
    [-0.27150199, -0.07088804,  0.13562939,  0.33099728,  0.50158768],
    [-0.51014825, -0.30524429, -0.06755202,  0.17806392,  0.40333043]]])
print('h error: ', rel_error(expected_h[0], h[0]))

h error:  3.533790822429694e-08


### RNN Layer: Backward step (5 points)
First implement and call the backward step for the RNN layer in RNNLayer class, and check the error rates to see whether they are below or 1e-6 or not.

In [4]:
np.random.seed(1)
N, D, H = 4, 5, 6
rnn = RNNLayer(D, H)

x = np.linspace(-0.4, 0.7, num=N*D).reshape(N, D)
prev_H = np.linspace(-0.2, 0.5, num=N*H).reshape(N, H)
rnn.W_ih = np.linspace(-0.1, 0.9, num=D*H).reshape(D, H)
rnn.W_hh = np.linspace(-0.3, 0.7, num=H*H).reshape(H, H)
rnn.b = np.linspace(-0.2, 0.4, num=H)
rnn.x = [x]
out = rnn.forward_step(x, prev_H)

dnext_h = np.linspace(-0.2, 0.4, num=N*H).reshape(N, H)

dx = rnn.backward_step(prev_H, dnext_h)
dprev_h = rnn.dprev_H
dW_ih = rnn.dW_ih 
dW_hh = rnn.dW_hh
db = rnn.db
f = lambda _: rnn.forward_step(x, prev_H)


dx_num = grad_check(f, x, dnext_h)
dprev_h_num = grad_check(f, prev_H, dnext_h)
dW_ih_num = grad_check(f, rnn.W_ih, dnext_h)
dW_hh_num = grad_check(f, rnn.W_hh, dnext_h)
db_num = grad_check(f, rnn.b, dnext_h)


print('dx error: ', rel_error(dx_num, dx))
print('dprev_h error: ', rel_error(dprev_h_num, dprev_h))
print('dWx error: ', rel_error(dW_ih_num, dW_ih))
print('dWh error: ', rel_error(dW_hh_num, dW_hh))
print('db error: ', rel_error(db_num, db))

dx error:  8.933803417802173e-11
dprev_h error:  8.162318324366429e-11
dWx error:  2.7611508291536214e-11
dWh error:  5.4289594216460855e-11
db error:  3.2647691588646005e-10


### RNNLayer: Backward pass (5 points)
Now, under RNNLayer, implement the backward method. It processes whole series. i.e. all time points in the sequence

In [5]:
np.random.seed(1)

N, D, T, H = 2, 10, 5, 5
rnn = RNNLayer(D, H)


x = np.linspace(-0.4, 0.7, num=N*D*T).reshape(N, T, D)
h = np.linspace(-0.2, 0.5, num=N*H).reshape(N, H)
rnn.W_ih = np.linspace(-0.1, 0.9, num=D*H).reshape(D, H)
rnn.W_hh = np.linspace(-0.3, 0.7, num=H*H).reshape(H, H)
rnn.b = np.linspace(-0.2, 0.4, num=H)

out = rnn.forward(x, h)

dout = np.linspace(-0.2, 0.4, num=N*H*T).reshape(N,T,H)

dx, dh, dWx, dWh, db = rnn.backward(dout)

dx_num = np.load('blg561/expected/dx.npy')
dh0_num = np.load('blg561/expected/dh0.npy')
dWx_num = np.load('blg561/expected/dW_ih.npy')
dWh_num = np.load('blg561/expected/dW_hh.npy')
db_num = np.load('blg561/expected/db.npy')

print('dx error: ', rel_error(dx_num, dx))
print('dh0 error: ', rel_error(dh0_num, dh))
print('dWx error: ', rel_error(dWx_num, dWx))
print('dWh error: ', rel_error(dWh_num, dWh))
print('db error: ', rel_error(db_num, db))

dx error:  6.3568026948154245e-15
dh0 error:  4.282551415282429e-16
dWx error:  2.0852706256201176e-16
dWh error:  4.888728652742765e-16
db error:  1.538241775454289e-15
