In [1]:
import tensorflow as tf
tf.random.set_seed(1)


Creates a SimpleRNN layer with 2 units (neurons), uses bias, and returns the full sequence of outputs for each timestep.

In [2]:
rnn_layer=tf.keras.layers.SimpleRNN(
    units=2,#specifies the number of units or neurons in the RNN layer
    use_bias=True,
    return_sequences=True#determines whether to return the full sequence of outputs for each timestep in the input sequence (True) or only the output at the last timestep (False)
)


Builds the RNN layer with a specified input shape of (batch_size, timesteps, input_features). Here, None indicates that the batch size and number of timesteps can be variable, and each input has 5 features.

In [3]:
rnn_layer.build(input_shape=(None,None,5))

In [4]:
w_xh, w_oo, b_h = rnn_layer.weights
#weight matrices
print('w_xh shape:',w_xh.shape)
print('w_oo shape:',w_oo.shape)
#bias vector
print('b_h shape:',b_h.shape)

w_xh shape: (5, 2)
w_oo shape: (2, 2)
b_h shape: (2,)


In [5]:
w_xh, w_oo, b_h

(<tf.Variable 'simple_rnn_cell/kernel:0' shape=(5, 2) dtype=float32, numpy=
 array([[-0.8766506 ,  0.9172727 ],
        [-0.37667286,  0.6936799 ],
        [-0.44308627, -0.71843755],
        [-0.627252  ,  0.8343315 ],
        [-0.00902045,  0.08299422]], dtype=float32)>,
 <tf.Variable 'simple_rnn_cell/recurrent_kernel:0' shape=(2, 2) dtype=float32, numpy=
 array([[ 0.57300806, -0.8195498 ],
        [ 0.8195498 ,  0.573008  ]], dtype=float32)>,
 <tf.Variable 'simple_rnn_cell/bias:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>)

In [6]:
print('The weights of input to hidden layer:w_xh')
print(w_xh.numpy())
print('\nThe weights of output to output layer:w_oo')
print(w_oo.numpy())
print('\nThe weights of bias of hidden layer:b_h')
print(b_h.numpy())


The weights of input to hidden layer:w_xh
[[-0.8766506   0.9172727 ]
 [-0.37667286  0.6936799 ]
 [-0.44308627 -0.71843755]
 [-0.627252    0.8343315 ]
 [-0.00902045  0.08299422]]

The weights of output to output layer:w_oo
[[ 0.57300806 -0.8195498 ]
 [ 0.8195498   0.573008  ]]

The weights of bias of hidden layer:b_h
[0. 0.]


Creates a sequence of input tensors with shape (3, 5), representing three timesteps with each timestep having a vector of 5 features.

In [7]:
x_seq=tf.convert_to_tensor(
    [[1.0]*5,[2.0]*5,[3.0]*5],
    dtype=tf.float32
)

print('Input:')
print(x_seq)

Input:
tf.Tensor(
[[1. 1. 1. 1. 1.]
 [2. 2. 2. 2. 2.]
 [3. 3. 3. 3. 3.]], shape=(3, 5), dtype=float32)


Passes the reshaped input sequence through the SimpleRNN layer to obtain the output. The reshape is done to add a batch dimension since the RNN layer expects input in the shape (batch_size, timesteps, input_features).

In [8]:
#output of SimpleRNN
# treating a single sequence with 3 time steps and 5 features as 1 batch for the RNN layer.
output=rnn_layer(tf.reshape(x_seq, shape=(1,3,5)))

In [9]:
output

<tf.Tensor: shape=(1, 3, 2), dtype=float32, numpy=
array([[[-0.98134404,  0.94781566],
        [-0.9997276 ,  0.999903  ],
        [-0.99999744,  0.9999977 ]]], dtype=float32)>

A loop iterates over each timestep of the input sequence.
For each timestep, it prints the input, weights, and biases.
Computes the hidden state (ht) using the SimpleRNN update rule.
Computes the output (ot) manually using the hidden state and weights to the output layer.
Applies the hyperbolic tangent (tanh) activation function to the output.
Prints the manually computed output for each timestep.

In [22]:
#intialize an empty list
out_man = []
for t in range(len(x_seq)):

    print('\n\n\ntimestep t=', t)
    xt = tf.reshape(x_seq[t], (1, 5))#input at time step t is being treated as a batch of size 1, with each element having 5 features.
    print('Time step {}=>'.format(t))
    print('Input xt= :', xt.numpy())
    print('\n w_xh :', w_xh.numpy())
    print('\nb_h :', b_h.numpy())

    ht = tf.matmul(xt, w_xh) + b_h
    print('\nht=xt*w_xh+b_h :', ht.numpy())

    if t > 0:
        prev_o = out_man[t-1]
        print('\nprev_o=', prev_o.numpy())
    else:
        prev_o = tf.zeros(shape=(ht.shape))

    print('\nht =', ht.numpy())
    ot = ht + tf.matmul(prev_o, w_oo)
    print('\not = ht+prev_o*w_oo before tanh =', ot.numpy())
    ot = tf.math.tanh(ot)
    out_man.append(ot)
    print(f'\noutput (manual)@ timestep {t} :', ot.numpy())

print('\n\nSimpleRNN output:')
print(output[0].numpy())





timestep t= 0
Time step 0=>
Input xt= : [[1. 1. 1. 1. 1.]]

 w_xh : [[ 0.9245993   0.33459878]
 [-0.26637006 -0.61695325]
 [ 0.43761563 -0.33505726]
 [ 0.03919727 -0.5819472 ]
 [-0.904947   -0.5740896 ]]

b_h : [0. 0.]

ht=xt*w_xh+b_h : [[ 0.23009521 -1.7734485 ]]

ht = [[ 0.23009521 -1.7734485 ]]

ot = ht+prev_o*w_oo before tanh = [[ 0.23009521 -1.7734485 ]]

output (manual)@ timestep 0 : [[ 0.22611868 -0.9439862 ]]



timestep t= 1
Time step 1=>
Input xt= : [[2. 2. 2. 2. 2.]]

 w_xh : [[ 0.9245993   0.33459878]
 [-0.26637006 -0.61695325]
 [ 0.43761563 -0.33505726]
 [ 0.03919727 -0.5819472 ]
 [-0.904947   -0.5740896 ]]

b_h : [0. 0.]

ht=xt*w_xh+b_h : [[ 0.46019042 -3.546897  ]]

prev_o= [[ 0.22611868 -0.9439862 ]]

ht = [[ 0.46019042 -3.546897  ]]

ot = ht+prev_o*w_oo before tanh = [[ 0.9734181 -2.7229815]]

output (manual)@ timestep 1 : [[ 0.75020254 -0.9914097 ]]



timestep t= 2
Time step 2=>
Input xt= : [[3. 3. 3. 3. 3.]]

 w_xh : [[ 0.9245993   0.33459878]
 [-0.26637006 -0.61