In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [6]:
# The function called 'create_RNN' returns a model that includes a SimpleRNN layer and a Dense layer for
# learning sequential data. The input_shape specifies the parameter(time_steps, x features).
# We will simplify everything and use univariate data,i.e., one feature only; the time steps are discussed
# below.

def create_RNN(hidden_units, dense_units, input_shape, activation):
    model = Sequential()
    model.add(SimpleRNN(hidden_units, input_shape=input_shape, activation=activation[0]))
    model.add(Dense(dense_units, activation=activation[1]))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model              

In [7]:
demo_model = create_RNN(hidden_units=2, dense_units=1, input_shape=(3,1), activation=['linear', 'linear'])
demo_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_1 (SimpleRNN)    (None, 2)                 8         
                                                                 
 dense (Dense)               (None, 1)                 3         
                                                                 
Total params: 11
Trainable params: 11
Non-trainable params: 0
_________________________________________________________________


##### If we have m hidden units(m=2 in this case), then:
- Input : $x \in R$<br>
- Hidden unit : $h \in R^{m}$<br>
- Weights for the input units : $w_{x} \in R^{m}$<br>
- Weights for the hidden units : $w_{h} \in R^{m*m}$<br>
- Bias for the hidden units : $b_{h} \in R^{m}$<br>
- Weight for the dense layer : $w_{y} \in R^{m}$<br>
- Bias for the dense layer : $b_{y} \in R$

In [12]:
# Let's look at the above weights. Note: As the weights are randomly initialized, the results posted here
# will be different from yours. The important thing is to learn what the structure of each object being used
# looks like and how it interacts with others to produce the final output.

wx = demo_model.get_weights()[0]
wh = demo_model.get_weights()[1]
bh = demo_model.get_weights()[2]
wy = demo_model.get_weights()[3]
by = demo_model.get_weights()[4]

print(f"wx={wx},\nwh={wh},\nbh={bh},\nwy={wy},\nby={by}")

wx=[[ 0.677106  -0.6184709]],
wh=[[ 0.5515145  0.8341653]
 [-0.8341653  0.5515145]],
bh=[0. 0.],
wy=[[-0.46552944]
 [ 0.9985508 ]],
by=[0.]


In [15]:
# We will input x for 3 time steps and let the network generate an output. The values of the hidden units
# at time steps 1,2,and 3 will be computed. h0 is initialized to the zero vector. The output o3 is computed
# from h3 and Wy. An activation function is not required as we are using linear units.

x = np.array([1,2,3])
# Reshape the input to the required sample_size * time_steps * features
x_input = x.reshape(-1, len(x), 1)
y_pred_model = demo_model.predict(x_input)

m = 2
h0 = np.zeros(m)
h1 = np.dot(x[0], wx) + h0 + bh
h2 = np.dot(x[1], wx) + np.dot(h1, wh) + bh
h3 = np.dot(x[2], wx) + np.dot(h2, wh) + bh
o3 = np.dot(h3, wy) + by

print(f"h0 = {h0},\nh1 = {h1},\nh2 = it{h3},\nh3 = {h3}")
print("Prediction from network ", y_pred_model)
print("prediction from our computation ", o3)

h0 = [0. 0.],
h1 = [[ 0.67710602 -0.61847091]],
h2 = [[ 4.11386222 -0.54272395]],
h3 = [[ 4.11386222 -0.54272395]]
Prediction from network  [[-2.4570615]]
prediction from our computation  [[-2.4570614]]
