In [3]:
import numpy as np
from rnn_utils import *

In [4]:
def rnn_cell_forward(xt,a_prev,parameters):
    Wax = parameters["Wax"]
    Waa = parameters["Waa"]
    Wya = parameters["Wya"]
    ba = parameters["ba"]
    by = parameters["by"]
    
    a_next = np.tanh(np.dot(Waa,a_prev)+np.dot(Wax,xt)+ba)
    yt_pred = softmax(np.dot(Wya,a_next)+by)
    
    cache = (a_next, a_prev, xt, parameters)
    
    return a_next,yt_pred,cache

In [5]:
np.random.seed(1)
xt_tmp = np.random.randn(3,10)
a_prev_tmp = np.random.randn(5,10)
parameters_tmp = {}
parameters_tmp['Waa'] = np.random.randn(5,5)
parameters_tmp['Wax'] = np.random.randn(5,3)
parameters_tmp['Wya'] = np.random.randn(2,5)
parameters_tmp['ba'] = np.random.randn(5,1)
parameters_tmp['by'] = np.random.randn(2,1)

a_next_tmp, yt_pred_tmp, cache_tmp = rnn_cell_forward(xt_tmp, a_prev_tmp, parameters_tmp)
print("a_next[4] = \n", a_next_tmp[4])
print("a_next.shape = \n", a_next_tmp.shape)
print("yt_pred[1] =\n", yt_pred_tmp[1])
print("yt_pred.shape = \n", yt_pred_tmp.shape)

a_next[4] = 
 [ 0.59584544  0.18141802  0.61311866  0.99808218  0.85016201  0.99980978
 -0.18887155  0.99815551  0.6531151   0.82872037]
a_next.shape = 
 (5, 10)
yt_pred[1] =
 [0.9888161  0.01682021 0.21140899 0.36817467 0.98988387 0.88945212
 0.36920224 0.9966312  0.9982559  0.17746526]
yt_pred.shape = 
 (2, 10)


In [6]:
np.random.randn(3,10)

array([[-1.14434139,  0.80186103,  0.0465673 , -0.18656977, -0.10174587,
         0.86888616,  0.75041164,  0.52946532,  0.13770121,  0.07782113],
       [ 0.61838026,  0.23249456,  0.68255141, -0.31011677, -2.43483776,
         1.0388246 ,  2.18697965,  0.44136444, -0.10015523, -0.13644474],
       [-0.11905419,  0.01740941, -1.12201873, -0.51709446, -0.99702683,
         0.24879916, -0.29664115,  0.49521132, -0.17470316,  0.98633519]])

In [7]:
def rnn_forward(x, a0, parameters):
    caches = []
    
    n_x,m,T_x = x.shape
    n_y,n_a = parameters["Wya"].shape
    
    a = np.zeros((n_a,m,T_x))
    y_pred = np.zeros((n_y,m,T_x))
    
    a_next = a0
    
    for t in range (T_x):
        
        a_next, yt_pred, cache = rnn_cell_forward(x[:,:,t], a_next, parameters)
        a[:,:,t] = a_next
        y_pred[:,:,t] = yt_pred
        caches.append(cache)
        
    caches = (caches, x)
    
    return a, y_pred, caches
        

In [8]:
np.random.seed(1)
x = np.random.randn(3,10,4)
a0 = np.random.randn(5,10)
Waa = np.random.randn(5,5)
Wax = np.random.randn(5,3)
Wya = np.random.randn(2,5)
ba = np.random.randn(5,1)
by = np.random.randn(2,1)
parameters = {"Waa": Waa, "Wax": Wax, "Wya": Wya, "ba": ba, "by": by}

a, y_pred, caches = rnn_forward(x, a0, parameters)
print("a[4][1] = ", a[4][1])
print("a.shape = ", a.shape)
print("y_pred[1][3] =", y_pred[1][3])
print("y_pred.shape = ", y_pred.shape)
print("caches[1][1][3] =", caches[1][1][3])
print("len(caches) = ", len(caches))

a[4][1] =  [-0.99999375  0.77911235 -0.99861469 -0.99833267]
a.shape =  (5, 10, 4)
y_pred[1][3] = [0.79560373 0.86224861 0.11118257 0.81515947]
y_pred.shape =  (2, 10, 4)
caches[1][1][3] = [-1.1425182  -0.34934272 -0.20889423  0.58662319]
len(caches) =  2


In [9]:
y_pred

array([[[9.99129368e-01, 8.90772592e-01, 7.04206315e-01, 9.19730000e-01],
        [9.99394165e-01, 3.26947813e-01, 9.98789616e-01, 9.88219303e-01],
        [9.94277463e-01, 9.95999371e-01, 9.97939529e-01, 2.76624090e-01],
        [2.04396268e-01, 1.37751394e-01, 8.88817431e-01, 1.84840534e-01],
        [7.42084036e-01, 4.16030656e-01, 5.96207274e-02, 9.56452021e-01],
        [3.43688280e-01, 9.98867515e-01, 9.89766361e-01, 8.99137589e-01],
        [9.39252776e-01, 7.07585502e-01, 8.85151052e-01, 3.34768716e-01],
        [9.35504646e-01, 9.85731426e-01, 9.37994449e-01, 6.45782938e-01],
        [6.39183022e-01, 2.35008185e-02, 8.20174082e-01, 3.42431253e-01],
        [6.07750160e-01, 8.98666718e-01, 9.99114486e-01, 9.90187623e-01]],

       [[8.70631878e-04, 1.09227408e-01, 2.95793685e-01, 8.02699998e-02],
        [6.05834882e-04, 6.73052187e-01, 1.21038427e-03, 1.17806974e-02],
        [5.72253732e-03, 4.00062909e-03, 2.06047094e-03, 7.23375910e-01],
        [7.95603732e-01, 8.62248606e

In [23]:
x = np.random.randn(2,3,4)

In [24]:
x

array([[[-2.09424782, -0.22876583,  1.61336137, -0.37480469],
        [-0.74996962,  2.0546241 ,  0.05340954, -0.4791571 ],
        [ 0.35016716,  0.01716473, -0.42914228,  1.20845633]],

       [[ 1.1157018 ,  0.84086156, -0.10288722,  1.14690038],
        [-0.04970258,  0.46664327,  1.03368687,  0.80884436],
        [ 1.78975468,  0.45128402, -1.68405999, -1.1601701 ]]])

In [25]:
x[:,:,2]

array([[ 1.61336137,  0.05340954, -0.42914228],
       [-0.10288722,  1.03368687, -1.68405999]])

# Long Short-Term Memory (LSTM) network

In [26]:
def lstm_cell_forward(xt, a_prev, c_prev, parameters):
    """
    Implements a single forward step of the LSTM-cell
    """