### Importing dependencies

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Input, Dense, Flatten, SimpleRNN
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam




**Things you should automatically know and memorize:**

**N** = Number of samples<br>
**T** = Sequence of length<br>
**D** = Number of input features<br>
**M** = Number of hidden units<br>
**K** = Number of output units

### Make some data

In [2]:
N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

In [3]:
X

array([[[-1.43623521,  0.47876681, -0.84119578],
        [ 0.444773  ,  0.66736841,  0.55960827],
        [-0.8710904 , -1.25502943, -1.89549606],
        [ 0.76475397,  0.18554303,  1.6659261 ],
        [ 0.48443908,  0.44171948, -0.31515384],
        [ 0.46547723,  1.24103132, -1.57995658],
        [ 0.84239627, -0.75920677, -0.44038738],
        [-1.30888582, -0.31479177,  0.71211389],
        [ 0.05097859, -0.55301796,  0.89242714],
        [-0.47924422, -0.07483556, -1.75579194]]])

### Make RNN

In [4]:
M = 5
i = Input(shape = (T,D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)




In [5]:
Yhat = model.predict(X)



In [6]:
print(Yhat)

[[-1.2232696  1.0081006]]


Output size is (1, 2) i.e. (N, K).

In [7]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10, 3)]           0         
                                                                 
 simple_rnn (SimpleRNN)      (None, 5)                 45        
                                                                 
 dense (Dense)               (None, 2)                 12        
                                                                 
Total params: 57 (228.00 Byte)
Trainable params: 57 (228.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [8]:
model.layers[1].get_weights()

[array([[ 0.13595778,  0.46882063,  0.39732057, -0.42152104,  0.84948987],
        [ 0.42945594,  0.01535523, -0.2985717 ,  0.41874665, -0.03360796],
        [ 0.61703366, -0.80372256,  0.5747407 ,  0.5906499 , -0.3564192 ]],
       dtype=float32),
 array([[ 0.21126842,  0.40024236,  0.78311306,  0.3360117 , -0.26268223],
        [-0.02324926, -0.57324743,  0.5938863 , -0.32787365,  0.45896092],
        [-0.6799957 ,  0.5610718 ,  0.1010887 , -0.13071303,  0.44215316],
        [ 0.59040684,  0.24186799, -0.15305823,  0.25316545,  0.71091497],
        [-0.37927657, -0.371338  , -0.01968719,  0.8357131 ,  0.13947557]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [9]:
# Check their shapes
# First output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)
a, b, c = model.layers[1].get_weights()
a.shape, b.shape, c.shape

((3, 5), (5, 5), (5,))

(D,M), (M,M), (M,)

### Computing manually

In [10]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [11]:
h_last = np.zeros(M) # initial hidden state
x = X[0] # the one and only sample
Yhat = [] # where we store the output

for t in range(T):
    h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
    y = h.dot(Wo) + bo # We only care about his value on the last iteration
    Yhat.append(y)

    # imoportant: assign h to h_last
    h_last = h

# Print the final output
print(Yhat[-1])

[-1.2232696   1.00810052]


### Bonus exercise: Calculate the output for multiple samples at once (N>1)

In [12]:
N = 3
T = 10
D = 4
K = 2
X = np.random.randn(N, T, D)

**Using API**

In [13]:
M = 5
i = Input(shape = (T,D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

In [14]:
Yhat = model.predict(X)



In [15]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 10, 4)]           0         
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 5)                 50        
                                                                 
 dense_1 (Dense)             (None, 2)                 12        
                                                                 
Total params: 62 (248.00 Byte)
Trainable params: 62 (248.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [16]:
print(Yhat)

[[-1.4181659  -1.0003536 ]
 [ 0.13516168  0.62432885]
 [-0.02000346 -1.0792183 ]]


**Manually**

In [17]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [18]:
Yhat_all_samples = []

for s in range(N):
    h_last = np.zeros(M) # initial hidden state for each sample
    x = X[s] # get one sample
    Yhat = [] # where we store the output for each sample

    for t in range(T):
        h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
        y = h.dot(Wo) + bo 
        Yhat.append(y)

        # imoportant: assign h to h_last
        h_last = h

    # Append the final output for this sample to Yhat_all_samples
    Yhat_all_samples.append(Yhat[-1])

# Convert Yhat_all_samples to a numpy array
Yhat_all_samples = np.array(Yhat_all_samples)

# Print the final outputs for all samples
print(Yhat_all_samples)

[[-1.41816593 -1.00035351]
 [ 0.13516156  0.62432858]
 [-0.02000353 -1.07921828]]
