# Import
---

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from tensorflow.keras.layers import Input, SimpleRNN, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

# Data
---

In [2]:
# Things you should automatically know and have memorized
# N = num of samples
# T = sequence length
# D = num of input features
# M = num of hidden units
# K = num of output units

In [4]:
# Make some data
N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

# Model
---

In [6]:
M = 5  # hidden layers
i = Input(shape=(T, D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i, x)

In [8]:
# (numbers were random)
Yhat = model.predict(X)
print(Yhat)

[[0.28018332 0.1876842 ]]


In [9]:
model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 10, 3)]           0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 5)                 45        
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 12        
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [14]:
# See whats stored in there
model.layers[1].get_weights()

[array([[ 0.03491932,  0.843633  , -0.11389434,  0.01984614,  0.0483017 ],
        [ 0.72304946, -0.58804214,  0.11798382, -0.8160446 ,  0.40371102],
        [-0.3917046 ,  0.42196244,  0.28473157,  0.42220742,  0.03064734]],
       dtype=float32),
 array([[ 0.24056554,  0.76600975, -0.06483824,  0.38337   , -0.45186365],
        [ 0.37622082, -0.38937205, -0.7429483 ,  0.39304847, -0.01970199],
        [-0.6474078 , -0.13595973, -0.32072902, -0.15433273, -0.66007   ],
        [-0.19561543, -0.3614092 ,  0.49246085,  0.7526049 , -0.14895071],
        [-0.5858189 ,  0.3354353 , -0.31375042,  0.32910126,  0.58099264]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [17]:
# Check their shapes
# Should make sense
# First output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)
a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [18]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [27]:
# Manual RNN calculation
h_last = np.zeros(M)  # initial hidden state
x = X[0]  # the one and only sample
Yhats = []  # where we store outputs

for t in range(T):
  h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
  y = h.dot(Wo) + bo  # we only care about this value on the last iteration
  Yhats.append(y)

  # important: assign h to h_last
  h_last = h

# print the final output
print(Yhats[-1])

[0.2801833  0.18768427]


In [26]:
print(X)

[[[ 0.4521015   2.64066084 -0.65752532]
  [-0.13992527 -1.38958305 -0.73630676]
  [-1.50714784  0.29715531 -0.06107451]
  [-0.40738244 -0.40446759 -0.75944071]
  [ 1.11804398  1.77259477 -0.49043924]
  [-0.11965677  0.34006346  1.02437767]
  [-0.35778184  0.12298961  0.4556761 ]
  [ 1.47522809 -0.49510367  0.27932768]
  [ 1.30738724  0.52371629 -0.50936613]
  [-0.32359711 -0.34216495 -0.35966886]]]


In [None]:
# TODO: improve this code