In [1]:
import tensorflow as tf
print(tf.__version__)

2.0.1


In [2]:
import numpy as np

In [3]:
print(np.__version__)

1.19.5


In [4]:
from tensorflow.keras.layers import Input, SimpleRNN, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
# Things you should automatically know and have memorized
# N - number of samples
# T - sequence length
# D - number of input features
# M - number of hidden units
# K - number of output units

In [12]:
# Make some data
N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

In [13]:
print(X)

[[[ 0.38366184  0.01094198  1.47895739]
  [ 0.67794095 -0.13159957  0.90285573]
  [ 1.62457324  0.14661692  0.54412575]
  [-0.99039354  0.75640654 -1.10643689]
  [ 0.04077221  0.01235557  0.98824049]
  [-0.97973791  0.23021623  0.64429543]
  [ 2.02305551 -0.53803181 -0.15367733]
  [ 1.07045457 -0.81538872  0.30191102]
  [-0.71226991 -0.35243245  1.49462956]
  [-1.41499577 -0.22338629  1.55433856]]]


In [14]:
# Make an RNN
M = 5 # number of hidden units
i = Input(shape=(T,D))
x = SimpleRNN(M)(i)
x = Dense(K)(x)

model = Model(i,x)

In [15]:
# Get the output
Yhat = model.predict(X)
print(Yhat)

[[-0.726123  -0.6260365]]


In [16]:
model.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 10, 3)]           0         
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 5)                 45        
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 12        
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [17]:
model.layers[1].get_weights()

[array([[-0.14862329,  0.72479063, -0.47879082, -0.5859579 ,  0.80050415],
        [-0.38984877, -0.16076511,  0.7640466 ,  0.34628493, -0.23794585],
        [-0.8213527 , -0.76283234, -0.6451081 , -0.43207404, -0.31746614]],
       dtype=float32),
 array([[-0.07037139, -0.2172454 , -0.3694741 ,  0.8120384 , -0.389788  ],
        [-0.43037954,  0.8119925 , -0.01127151, -0.01424125, -0.39384237],
        [-0.06989142, -0.21958931,  0.8734252 ,  0.13755926, -0.40632713],
        [ 0.52839124,  0.4880439 ,  0.27456495,  0.4947525 ,  0.40305078],
        [ 0.72507864,  0.08406198, -0.15844387, -0.27692634, -0.6044848 ]],
       dtype=float32),
 array([0., 0., 0., 0., 0.], dtype=float32)]

In [18]:
# Check their shapes
# Should make sense
# first output is input > hidden
# Second output is hidden > hidden
# Third output is bias term (vector of length M)

a, b, c = model.layers[1].get_weights()
print(a.shape, b.shape, c.shape)

(3, 5) (5, 5) (5,)


In [19]:
Wx, Wh, bh = model.layers[1].get_weights()
Wo, bo = model.layers[2].get_weights()

In [20]:
h_last = np.zeros(M) # initial hidden state
x = X[0] # the one and only sample
Yhats = []

for t in range(T):
    h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
    y = h.dot(Wo) + bo # we only care about this value on the last iteration
    Yhats.append(y)

    # important: assign h to h_last
    h_last = h

# print the final output
print(Yhats[-1])

[-0.72612306 -0.62603655]
