In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf

from keras.models import Model
from keras.layers import Input, Dense, SimpleRNN, Flatten
from keras.optimizers import Adam, SGD


This tutorial is all about understanding the shape of the data when it goes through an RNN. The notation we need recognize and to always remember are: 

- $N = $ number of samples
- $T = $ sequence of length 
- $D = $ number of input features 
- $M = $ number of hidden units 
- $K = $ number of output units

In [4]:
# Prepare some fake data 

N = 1
T = 10
D = 3
K = 2

X = np.random.randn(N, T, D)

In [5]:
# Make an RNN 

M = 5 # number of hidden units 

i = Input(shape = (T,D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i,x)

In [7]:
# get the output 

y_hat = model.predict(X)

print(y_hat)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[[ 0.29978862 -1.2921464 ]]


In [8]:
# see if we can replicate the output above by pulling the weights of the model

model.summary()

In [13]:
# index is 1 corresponding to the RNN layer

model.layers[1].get_weights()

[array([[ 0.6722953 , -0.77071685, -0.06604749, -0.15009606,  0.44120008],
        [ 0.01340675, -0.7135534 , -0.06368726, -0.7971151 ,  0.21534473],
        [-0.73105025, -0.5743897 , -0.32175505, -0.27217633, -0.05720061]],
       dtype=float32),
 array([[-0.12651193,  0.30750299,  0.01868686, -0.5189194 ,  0.7872803 ],
        [ 0.9530391 ,  0.09475023, -0.11890485, -0.2570137 , -0.0504427 ],
        [-0.17462526, -0.450994  ,  0.09798197, -0.7860484 , -0.37234125],
        [-0.17991573,  0.21707349, -0.93639606, -0.1203839 , -0.17082024],
        [-0.11334688,  0.80370885,  0.31476077, -0.17972888, -0.45806962]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [14]:
# check their shapes 

a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


The first output is D x M, corresponding to input to hidden weights, the second is M X M corresponding to hidden to hidden weights and the last vector is of size M corresponding to biases. Note that the total paramters of this layer is 45 as advertised through the .summary() method: (3 x 5) + (5 x 5) + 5 =45! 

In [16]:
wx, wh, bh = model.layers[1].get_weights()

wo, bo = model.layers[2].get_weights() # weights and biases of the output layer 

In [20]:
# Manual RNN calculation 

h_last = np.zeros(M) # initial hidden state

x = X[0] # the only sample we have (recall N = 1)

y_hats = [] 

for t in range(T):

    h = np.tanh(x[t].dot(wx) + h_last.dot(wh) + bh)

    y = h.dot(wo) + bo

    y_hats.append(y)

    # update last_h to new h we calculate above 

    h_last = h 

print(y_hats[-1])   

[ 0.29978868 -1.29214649]


In [None]:
# To Do: Calculate the output for multiple samples at once (N > 1) to mimic a real world example. 