In [1]:
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# Things to memorize:
# N = number of samples
# T = sequence length
# D = number of input features
# M = number of hidden units
# K = number of output units

In [3]:
# Make some data
N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

In [4]:
# Make an RNN
M = 5
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

In [5]:
# Get the output
Yhat = model.predict(X)
print(Yhat)

[[-0.59715766  0.16662745]]


In [6]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10, 3)]           0         
                                                                 
 simple_rnn (SimpleRNN)      (None, 5)                 45        
                                                                 
 dense (Dense)               (None, 2)                 12        
                                                                 
Total params: 57 (228.00 Byte)
Trainable params: 57 (228.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [7]:
model.layers[1].get_weights()

[array([[ 0.73569435, -0.6403835 ,  0.5940667 ,  0.7657934 ,  0.41452748],
        [-0.18677288,  0.08738357,  0.49622422,  0.23452121, -0.1081962 ],
        [-0.07746834,  0.7913403 ,  0.11536545, -0.31224066, -0.5586885 ]],
       dtype=float32),
 array([[-0.9571347 , -0.18040596,  0.2072014 , -0.03925599, -0.08290668],
        [-0.07325457,  0.79138553,  0.16751225, -0.54874855, -0.1978822 ],
        [ 0.20249067, -0.5102903 ,  0.13154027, -0.4808842 , -0.6708568 ],
        [-0.09500375, -0.2135387 , -0.55248874, -0.63801825,  0.48276737],
        [ 0.16881606, -0.1875304 ,  0.7787559 , -0.24293405,  0.52043813]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [9]:
# First output is input>hidden
# Second output is hidden>hidden
# Third output is bias term (vector of length M)
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [14]:
Wx,Wh,bh = model.layers[1].get_weights()
Wo,bo = model.layers[2].get_weights()

In [15]:
h_last = np.zeros(M) #initial hidden state
x = X[0] # the one and only sample
Yhats = [] # where we store the outputs

for t in range(T):
  h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
  y = h.dot(Wo) + bo # we only care about this value on the last iteration
  Yhats.append(y)

  # important: assign h to h_last
  h_last = h

# print the final output
print(Yhats[-1])

[-0.5971576   0.16662737]
