<a href="https://colab.research.google.com/github/navidyamini/TensorFlow2.0_Codes/blob/main/code/TF2_0_RNN_Shapes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
# Things you should automatically know and have memorized
# N = number of samples
# T = sequence length
# D = number of input features
# M = number of hidden units
# K = number of output units

In [3]:
# Make some data
N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D) 

In [4]:
# Make an RNN
M = 5 # number of hidden units
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

In [5]:
# Get the output
Yhat = model.predict(X)
print(Yhat)

[[0.39179695 0.9910832 ]]


In [6]:
# See if we can replicate this output
# Get the weights first
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10, 3)]           0         
                                                                 
 simple_rnn (SimpleRNN)      (None, 5)                 45        
                                                                 
 dense (Dense)               (None, 2)                 12        
                                                                 
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [7]:
# See what's returned
model.layers[1].get_weights()

[array([[ 0.84328073, -0.23984814,  0.05741078, -0.40568924, -0.80407953],
        [ 0.16030937, -0.7396547 , -0.58631384,  0.5258549 ,  0.18614727],
        [-0.6951524 , -0.40779138,  0.8603342 ,  0.71884805, -0.48174158]],
       dtype=float32),
 array([[-0.03332174,  0.0871714 , -0.32234648,  0.9274769 , -0.16483305],
        [-0.30357674, -0.75577044, -0.32552475,  0.03215745,  0.47921985],
        [-0.19151309,  0.34593374, -0.8392166 , -0.35277325, -0.12214409],
        [ 0.06703057, -0.5325512 , -0.06404306, -0.11784123, -0.83301145],
        [ 0.9303549 , -0.13390756, -0.28590283, -0.02041596,  0.18534054]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [8]:
# Check their shapes
# Should make sense
# First output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [10]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [11]:
h_last = np.zeros(M) # initial hidden state
x = X[0] # the one ans only sample
Yhats = [] # where we store the outputs

for t in range(T):
  h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
  y = h.dot(Wo) + bo # we only care about this value on the last iteration
  Yhats.append(y)

  # important: assign h to h_last
  h_last = h

# print the final output
print(Yhats[-1])

[0.39179697 0.99108318]
