In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Input, SimpleRNN, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

# Thing we need to memorize:
N : Number of Samples

T : Sequence length

D : Number of Input Features

M : Number of Hidden Units

K : Number of Output Units

Note: K can be more than 1 for regression problem.For example it might be a multidimensional regression (like predicting latitude & longitude).So K>1 doesn't always mean that it's a classification problem.

In [38]:
N = 1
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

X

array([[[-0.65743571,  0.06261316,  2.30478886],
        [-1.47074087,  0.09752981,  1.34757659],
        [-0.70496749,  0.94331793, -0.7078629 ],
        [-1.20547946, -0.84801168,  0.11094414],
        [ 1.05520462,  0.90278798, -0.67210647],
        [ 0.04262162,  0.92023336, -1.14153366],
        [-1.60084256,  0.29722629,  0.61521752],
        [ 0.14962499, -0.69196894, -1.1627263 ],
        [-0.02739404,  0.15243377, -1.18359365],
        [ 2.0353315 ,  0.05343955, -1.39577274]]])

In [39]:
M = 5 # Number of hidden units

i = Input(shape = (T,D))
x = SimpleRNN(M)(i)   # by defult, RNN activation function is "tanh"
x = Dense(K)(x)  # for it's regression, we don't use any activation

model = Model(i, x)

model.compile(
    loss="mse",
    optimizer=Adam(learning_rate=0.1), 
)

In [40]:
model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 10, 3)]           0         
                                                                 
 simple_rnn_3 (SimpleRNN)    (None, 5)                 45        
                                                                 
 dense_3 (Dense)             (None, 2)                 12        
                                                                 
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [41]:
Yhat = model.predict(X)
print(Yhat)
print(Yhat.shape)
# Since we have output K value as 2, it will return us 2 outputs in an array.So our Yhat shape will be (1,2) 
# Let's see if we can replicate this output

[[ 0.19519468 -1.15848   ]]
(1, 2)


In [42]:
# Weights
print(model.layers)  # We have 3 layers: Input , Rnn, Dense(Output)

#Let's see our RNN layer's weights
print(
    model.layers[1].get_weights()
    )
# it gave us 3 weights 

[<keras.engine.input_layer.InputLayer object at 0x7fd8e56ed360>, <keras.layers.rnn.simple_rnn.SimpleRNN object at 0x7fd8e56edc00>, <keras.layers.core.dense.Dense object at 0x7fd8f0329750>]
[array([[ 0.47612423, -0.03806829, -0.5027425 , -0.22551346, -0.7348896 ],
       [ 0.01746541,  0.03113955, -0.78103095, -0.16630036, -0.03100222],
       [ 0.06622034,  0.81962675,  0.48380178, -0.4241633 , -0.37538385]],
      dtype=float32), array([[ 0.31518042,  0.21609305,  0.33874065,  0.85756296, -0.06169206],
       [-0.51879835,  0.7194001 ,  0.20803568, -0.10152966, -0.39965564],
       [ 0.47911096, -0.19680247,  0.34863043, -0.31560996, -0.71454084],
       [-0.606075  , -0.59837914,  0.48711663,  0.17527023, -0.08132207],
       [-0.18610655, -0.19742969, -0.69509   ,  0.35206285, -0.5650554 ]],
      dtype=float32), array([0., 0., 0., 0., 0.], dtype=float32)]


In [43]:
# Checking their weights to see if it matches 
# First output is input -> hidden : D x M
# Second output is hidden -> hidden :  M x M
# Third output is bias term [vector of length M] : M

a, b, c = model.layers[1].get_weights()
print(f"Shape of A: {a.shape}   Shape of B: {b.shape}   Shape of C:{c.shape}")

Shape of A: (3, 5)   Shape of B: (5, 5)   Shape of C:(5,)


In [44]:
Wx, Wh, bh = model.layers[1].get_weights() # RNN Weights
Wo, bo = model.layers[2].get_weights() # Output layer's weights

In [49]:
print(X.shape) 
#X has only 1 sample.So we have to extract it from the first dimension
print(X[0])
print(X[0].shape)

(1, 10, 3)
[[-0.65743571  0.06261316  2.30478886]
 [-1.47074087  0.09752981  1.34757659]
 [-0.70496749  0.94331793 -0.7078629 ]
 [-1.20547946 -0.84801168  0.11094414]
 [ 1.05520462  0.90278798 -0.67210647]
 [ 0.04262162  0.92023336 -1.14153366]
 [-1.60084256  0.29722629  0.61521752]
 [ 0.14962499 -0.69196894 -1.1627263 ]
 [-0.02739404  0.15243377 -1.18359365]
 [ 2.0353315   0.05343955 -1.39577274]]
(10, 3)


In [50]:
h_last = np.zeros(M)
Yhats = []
                        #X has only 1 sample.So we have to extract it from the first dimension
x = X[0]                # x.shape=(10,3)
for t in range(T):
  h = np.tanh(x[t].dot(Wx) + h_last.dot(Wh) + bh)
  y = h.dot(Wo) + bo         
  Yhats.append(y)
  h_last = h    

# Let's print the final output
print(Yhats[-1])

[ 0.19519453 -1.15848006]


In [51]:
# Now let's compare two values to see if we get it right using CNN by hand
# CNN model: [[ 0.19519468 -1.15848   ]]
# CNN by hand: [ 0.19519453 -1.15848006]
# :)

# Let's see what happens if we have multiple samples

In [52]:
N = 3
T = 10
D = 3
K = 2
X = np.random.randn(N, T, D)

X

array([[[-3.43366980e-02, -7.33101831e-01, -1.76265733e+00],
        [ 2.58046522e-01, -1.29257791e-01, -4.22432809e-01],
        [-7.74991060e-01, -3.18559289e-01,  6.08223643e-01],
        [-9.53358923e-01, -9.92842516e-02,  1.18159310e+00],
        [ 2.88956442e-01, -2.12619199e+00,  9.61480896e-01],
        [ 8.32612665e-04,  1.35463220e+00, -8.91001352e-01],
        [ 4.32643042e-01,  8.73607735e-01,  9.95925001e-01],
        [ 4.03689221e-01, -9.89846212e-01, -2.44236348e-01],
        [ 1.42479075e-01, -1.58384636e+00, -1.34948878e+00],
        [-6.94475676e-01,  1.18375062e+00, -4.27060233e-01]],

       [[ 1.10456827e+00,  1.49650441e+00,  6.04608147e-01],
        [ 3.15554423e-01, -1.48905318e+00,  3.47419855e-01],
        [-7.67496744e-01, -5.10920113e-01, -3.81827688e-01],
        [-1.27676622e+00, -2.87279349e-01, -1.11079563e-01],
        [-3.49028288e-01, -1.13600008e+00, -4.61107452e-01],
        [-1.69454012e-01,  1.28233189e+00, -1.28497187e+00],
        [-2.43666972e+

In [53]:
M = 5 # Number of hidden units

i = Input(shape = (T,D))
x = SimpleRNN(M)(i)   # by defult, RNN activation function is "tanh"
x = Dense(K)(x)  # for it's regression, we don't use any activation

model = Model(i, x)

model.compile(
    loss="mse",
    optimizer=Adam(learning_rate=0.1), 
)

In [54]:
model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 10, 3)]           0         
                                                                 
 simple_rnn_4 (SimpleRNN)    (None, 5)                 45        
                                                                 
 dense_4 (Dense)             (None, 2)                 12        
                                                                 
Total params: 57
Trainable params: 57
Non-trainable params: 0
_________________________________________________________________


In [55]:
Yhat = model.predict(X)
print(Yhat)
print(Yhat.shape)
# Since we have output K value as 2 and we have 3 samples, it returns (3, 2) as prediction
# Let's see if we can replicate this output

[[-0.2906301  -0.52476156]
 [ 0.11569875 -0.01596417]
 [ 0.74665457  0.01347425]]
(3, 2)


In [56]:
# Weights
print(model.layers)  # We have 3 layers: Input , Rnn, Dense(Output)

#Let's see our RNN layer's weights
print(
    model.layers[1].get_weights()
    )
# it gave us 3 weights 

[<keras.engine.input_layer.InputLayer object at 0x7fd8e5635210>, <keras.layers.rnn.simple_rnn.SimpleRNN object at 0x7fd8e5635db0>, <keras.layers.core.dense.Dense object at 0x7fd8e5637b80>]
[array([[ 0.25955623, -0.24849886, -0.67392355, -0.18745673,  0.56701416],
       [ 0.61479336, -0.18606699, -0.81998175,  0.03805548, -0.27226388],
       [ 0.35964972, -0.0975073 , -0.31581432, -0.09368545,  0.14148206]],
      dtype=float32), array([[-0.34136176, -0.482692  , -0.21747243, -0.429175  ,  0.64729834],
       [-0.173593  , -0.02289428,  0.7202057 , -0.61318886, -0.27321154],
       [ 0.24835353,  0.20202789,  0.58430916,  0.30529636,  0.68035454],
       [ 0.76526767, -0.62767065,  0.05431936, -0.08399855, -0.10192448],
       [-0.45389813, -0.57593286,  0.2994024 ,  0.58270633, -0.18190505]],
      dtype=float32), array([0., 0., 0., 0., 0.], dtype=float32)]


In [59]:
# Checking their weights to see if it matches 
# First output is input -> hidden : D x M
# Second output is hidden -> hidden :  M x M
# Third output is bias term [vector of length M] : M

a, b, c = model.layers[1].get_weights()
print(f"Shape of A (which is Wx): {a.shape}   Shape of B (Which is Wh): {b.shape}   Shape of C (Which is bh):{c.shape}")

Shape of A (which is Wx): (3, 5)   Shape of B (Which is Wh): (5, 5)   Shape of C (Which is bh):(5,)


In [58]:
Wx, Wh, bh = model.layers[1].get_weights() # RNN Weights
Wo, bo = model.layers[2].get_weights() # Output layer's weights

In [60]:
print(X.shape) 
#X has only 1 sample.So we have to extract it from the first dimension
print(X[0])
print(X[0].shape)

(3, 10, 3)
[[-3.43366980e-02 -7.33101831e-01 -1.76265733e+00]
 [ 2.58046522e-01 -1.29257791e-01 -4.22432809e-01]
 [-7.74991060e-01 -3.18559289e-01  6.08223643e-01]
 [-9.53358923e-01 -9.92842516e-02  1.18159310e+00]
 [ 2.88956442e-01 -2.12619199e+00  9.61480896e-01]
 [ 8.32612665e-04  1.35463220e+00 -8.91001352e-01]
 [ 4.32643042e-01  8.73607735e-01  9.95925001e-01]
 [ 4.03689221e-01 -9.89846212e-01 -2.44236348e-01]
 [ 1.42479075e-01 -1.58384636e+00 -1.34948878e+00]
 [-6.94475676e-01  1.18375062e+00 -4.27060233e-01]]
(10, 3)


In [71]:
h_last = np.zeros(M)
Yhats = []
                        
          
for t in range(T):
  h = np.tanh(X[0][t].dot(Wx) + h_last.dot(Wh) + bh)
  y = h.dot(Wo) + bo         
  Yhats.append(y)
  h_last = h    

# Let's print the final output
print(Yhats[-1])

[-0.29063011 -0.5247616 ]


In [82]:
h_last = np.zeros(M)
Yhats = []
                        
       
for t in range(T):
  h = np.tanh(X[1][t].dot(Wx) + h_last.dot(Wh) + bh)
  y = h.dot(Wo) + bo         
  Yhats.append(y)
  h_last = h    

# Let's print the final output
print(Yhats[-1])

[ 0.11569876 -0.01596417]


In [73]:
h_last = np.zeros(M)
Yhats = []
        
for t in range(T):
  h = np.tanh(X[2][t].dot(Wx) + h_last.dot(Wh) + bh)
  y = h.dot(Wo) + bo         
  Yhats.append(y)
  h_last = h    

# Let's print the final output
print(Yhats[-1])

[0.74665465 0.0134742 ]


In [None]:
[[-0.2906301  -0.52476156]
 [ 0.11569875 -0.01596417]
 [ 0.74665457  0.01347425]]

In [84]:
new_y=[]

for x_N in range(X.shape[0]):  
  x_N_list=[]
  h_last = np.zeros(M)
  for t in range(T):
    h = np.tanh(X[x_N][t].dot(Wx) + h_last.dot(Wh) + bh)
    y = h.dot(Wo) + bo         
    x_N_list.append(y)
    h_last = h    
  
  new_y.append(x_N_list[-1])

In [85]:
new_y

[array([-0.29063011, -0.5247616 ]),
 array([ 0.11569876, -0.01596417]),
 array([0.74665465, 0.0134742 ])]

In [86]:
# Our values match !