<h2>Recurrent Neural Network from Scratch Practice</h2>

In [8]:
import numpy as np

<h3>Utility Functions</h3>

In [9]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

<h3>Initialize weights and biases</h3>

In [10]:
np.random.seed(42)

x_input = np.random.randn(3, 10)
a_prev = np.random.randn(5, 10)

parameters = {}
parameters['Waa'] = np.random.randn(5, 5)
parameters['Wax'] = np.random.randn(5, 3)
parameters['ba'] = np.random.randn(5, 1)
parameters['Wya'] = np.random.randn(2, 5)
parameters['by'] = np.random.randn(2, 1)


In [11]:
def rnn_cell(x, a, parameters):
    Waa = parameters['Waa']
    Wax = parameters['Wax']
    Wya = parameters['Wya']
    ba = parameters['ba']
    by = parameters['by']

    a_next = np.tanh(np.dot(Waa, a) + np.dot(Wax, x) + ba)
    y_pred = softmax(np.dot(Wya, a_next) + by)
    cache = (a_next, y_pred, x, parameters)
    return a_next, y_pred, cache

In [12]:
a_next, yt_pred, cache = rnn_cell(x_input, a_prev, parameters)
print(a_next.shape)

(5, 10)


<h3>RNN Forward</h3>

In [13]:
def rnn_forward(x, a0, parameters):
    caches = []
    n_x, m, T_x = x.shape
    n_y, n_a = parameters['Wya'].shape

    a = np.zeros([n_a, m, T_x])
    y_pred = np.zeros([n_y, m, T_x])

    a_next = a0

    for t in range(T_x):
        a_next, yt_pred, cache = rnn_cell(x[:, :, t], a_next, parameters)
        a[:, :, t] = a_next
        y_pred[:, :, t] = yt_pred
        caches.append(cache)

    caches = (caches, x)
    return a, y_pred, caches

In [14]:
np.random.seed(42)

a = np.random.randn(3, 10, 5)
x = np.random.randn(5, 10)

parameters = {}
parameters['Waa'] = np.random.randn(5, 5)
parameters['Wax'] = np.random.randn(5, 3)
parameters['Wya'] = np.random.randn(2, 5)
parameters['ba'] = np.random.randn(5, 1)
parameters['by'] = np.random.randn(2, 1)

a, y, cache = rnn_forward(a, x, parameters)

print(a.shape)

(5, 10, 5)


<h3>LSTM Implmentation</h3>

In [15]:
def lstm_cell(xt, a_prev, c_prev, parameters):
    Wf = parameters['Wf']
    bf = parameters['bf']
    Wi = parameters['Wi']
    bi = parameters['bi']
    Wo = parameters['Wo']
    bo = parameters['bo']
    Wc = parameters['Wc']
    bc = parameters['bc']
    Wy = parameters['Wy']
    by = parameters['by']

    n_x, m = xt.shape
    n_y, n_a = Wy.shape

    concat = np.zeros([n_x + n_a, m])
    concat[: n_a, :] = a_prev
    concat[n_a :, :] = xt

    # ft = sigmoid(Wf*concatenate(a<t-1>, x<t>) + bf)
    ft = sigmoid(np.dot(Wf, concat) + bf)
    it = sigmoid(np.dot(Wi, concat) + bi)
    cct = np.tanh(np.dot(Wc, concat) + bc)
    c_next = ft * c_prev + it * cct
    ot = sigmoid(np.dot(Wo, concat) + bo)
    a_next = ot * np.tanh(c_next)

    yt_pred = softmax(np.dot(Wy, a_next) + by)

    cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt, parameters)

    return a_next, c_next, yt_pred, cache


In [17]:
np.random.seed(42)

xt_tmp = np.random.randn(3, 10)
a_prev_tmp = np.random.randn(5, 10)
c_prev_tmp = np.random.randn(5, 10)

parameters_tmp = {}
parameters_tmp['Wf'] = np.random.randn(5, 5+3)
parameters_tmp['Wi'] = np.random.randn(5, 5+3)
parameters_tmp['Wo'] = np.random.randn(5, 5+3)
parameters_tmp['Wc'] = np.random.randn(5, 5+3)
parameters_tmp['Wy'] = np.random.randn(2, 5)
parameters_tmp['bf'] = np.random.randn(5, 1)
parameters_tmp['bi'] = np.random.randn(5, 1)
parameters_tmp['bo'] = np.random.randn(5, 1)
parameters_tmp['bc'] = np.random.randn(5, 1)
parameters_tmp['by'] = np.random.randn(2, 1)


a_next_tmp, c_next_tmp, yt_tmp, cache_tmp = lstm_cell(xt_tmp, a_prev_tmp, c_prev_tmp, parameters_tmp)

print(a_next.shape)

(5, 10)


In [None]:
def lstm_forward(x, a0, parameters):
    caches = []

    Wy = parameters['Wy']

    n_x, m, T_x = x.shape
    n_y, n_a = Wy.shape

    a = np.zeros([n_a, m, T_x])
    c = np.zeros([n_a, m, T_x])
    y = np.zeros([n_y, m, T_x])

    a_next = a0

In [None]:
np.random.seed(42)

x_tmp = np.random.randn(3, 10, 7)
a0_tmp = np.random.randn(5, 10)

parameters_tmp = {}
parameters_tmp['Wf'] = np.random.randn(5, 5+3)
parameters_tmp['Wi'] = np.random.randn(5, 5+3)
parameters_tmp['Wo'] = np.random.randn(5, 5+3)
parameters_tmp['Wc'] = np.random.randn(5, 5+3)
parameters_tmp['Wy'] = np.random.randn(2, 5)
parameters_tmp['bf'] = np.random.randn(5, 1)
parameters_tmp['bi'] = np.random.randn(5, 1)
parameters_tmp['bo'] = np.random.randn(5, 1)
parameters_tmp['bc'] = np.random.randn(5, 1)
parameters_tmp['by'] = np.random.randn(2, 1)

a_tmp, y_tmp, c_tmp, caches_tmp = lstm_forward(x_tmp, a0_tmp, parameters_tmp)
print(a_tmp.shape)