In [1]:
import tensorflow as tf
print(tf.__version__)

2.3.1


In [2]:
import numpy as np

In [3]:
print(np.__version__)

1.18.5


In [4]:
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
# Things you should automatically know and have memorized
# N - number of samples
# T - sequence length
# D - number of input features
# M - number of hidden units
# K - number of output units

In [19]:
# Make some data
N = 3
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

In [20]:
# Make an RNN
M = 5 # number of hidden units
i = Input(shape=(T,D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i,x)

In [21]:
# Get the output
Yhat = model.predict(X)
print(Yhat)

[[-0.78490984 -0.30868205]
 [-0.85483146  1.1177346 ]
 [ 0.84584373  1.2946328 ]]


In [22]:
model.summary()

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 10, 3)]           0         
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 5)                 45        
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 12        
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [23]:
model.layers[1].get_weights()

[array([[-0.3041228 ,  0.14652735, -0.44635728, -0.38041073, -0.23696923],
        [ 0.12936872,  0.6421047 ,  0.42220384,  0.39694065, -0.80862695],
        [ 0.81851965,  0.62728566, -0.28555894,  0.57325965, -0.16135174]],
       dtype=float32),
 array([[-0.16342914,  0.567871  ,  0.21201554,  0.27873752,  0.72675174],
        [-0.4855685 , -0.75011915,  0.01169065,  0.23025516,  0.38521472],
        [-0.3521136 ,  0.04743075,  0.16041875, -0.9024999 ,  0.18310122],
        [-0.6888422 ,  0.24663594,  0.3463769 ,  0.2340455 , -0.5384356 ],
        [ 0.3728625 , -0.22751871,  0.8995563 ,  0.00213601, -0.00162007]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [24]:
# Check their shapes
# Should make sense
# first output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)

a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [25]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [26]:
h_last = np.zeros(M) # initial hidden state
x = X[0] # the one and only sample
Yhats = []

for t in range(T):
    h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
    y = h.dot(Wo) + bo # we only care about this value on the last iteration
    Yhats.append(y)

    # important: assign h to h_last
    h_last = h

# print the final output
print(Yhats[-1])

[-0.78490976 -0.30868203]
