# Tutoriel : Recurrent neural network (RNN) 


In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
from matplotlib import colors
%matplotlib inline

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, RNN, LSTM, GRU, Bidirectional
from tensorflow.keras.models import Sequential, Model

tf.__version__

2022-06-02 15:17:30.850175: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-06-02 15:17:30.850213: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


'2.9.1'

## I. Presenting the API

We will use two samples (dimension 1), four timestamps in the first and two in the second ; and three features for each timestamp.

In [3]:
test_in = [
    [
        [1, 2, 3],
        [4, 5, 6],
        [7, 8, 9],
        [2, 2, 1]
    ],
    [
        [2, 3, 4],
        [5, 6, 7]
    ]
] # 2 samples : the first one has 4 sequences and the second 2 sequences. there are 4 features


test_in

[[[1, 2, 3], [4, 5, 6], [7, 8, 9], [2, 2, 1]], [[2, 3, 4], [5, 6, 7]]]

In [4]:
# We will pad the seuences 
# it will calculate the maximum length of sequences
# then add zeroes to the other sequences so they have the same length

test_in = tf.keras.utils.pad_sequences(test_in)

test_in = test_in.astype('float32')

test_shape = test_in.shape

print('shape:', test_shape)

test_in

shape: (2, 4, 3)


array([[[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.],
        [2., 2., 1.]],

       [[0., 0., 0.],
        [0., 0., 0.],
        [2., 3., 4.],
        [5., 6., 7.]]], dtype=float32)

### I.1. LSTM with only the last state

In [5]:
lstm_model = Sequential([
    LSTM(2, activation='relu', use_bias=True, name='lstm1')
], name='lstm1_exp')

# LSTM contains 4 gates; each gate contains 2 neurons as defined above
# each neuron has parameters to combine x (3) and h (2) plus the bias (1) = 6 parameters
# this gives us : 4 * 2 * 6 = 48
lstm_model.build(input_shape=(None, test_shape[1], test_shape[2]))

lstm_model.summary()

Model: "lstm1_exp"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm1 (LSTM)                (None, 2)                 48        
                                                                 
Total params: 48
Trainable params: 48
Non-trainable params: 0
_________________________________________________________________


2022-06-02 15:17:32.957049: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-06-02 15:17:32.957083: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-06-02 15:17:32.957102: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (kariminf-pc): /proc/driver/nvidia/version does not exist
2022-06-02 15:17:32.957322: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
test_out = lstm_model.predict(test_in)

print(test_out.shape)

# For each sample, we get the last state of LSTM, 
# which means, each sample (2) has an output of number of units (2)
test_out

(2, 2)


array([[4.7866642e-01, 2.7942717e-02],
       [1.9537210e-02, 2.7138586e-04]], dtype=float32)

### I.2. LSTM which returns intermediate states

In [7]:
# the same model, but we get all intermed
lstm2_model = Sequential([
    LSTM(2, return_sequences=True,  name='lstm2')
], name='lstm2_exp')

# LSTM contains 4 gates; each gate contains 2 neurons as defined above
# each neuron has parameters to combine x (3) and h (2) plus the bias (1) = 6 parameters
# this gives us : 4 * 2 * 6 = 48
lstm2_model.build(input_shape=(None, test_shape[1], test_shape[2]))

lstm2_model.summary()

Model: "lstm2_exp"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm2 (LSTM)                (None, 4, 2)              48        
                                                                 
Total params: 48
Trainable params: 48
Non-trainable params: 0
_________________________________________________________________


In [8]:
test_out = lstm2_model.predict(test_in)

print(test_out.shape)

# For each sample, we get all states of LSTM (4), 
# each state has an output of number of units (2)
# shape = [2 samples, 4 states, 2 values]
test_out

(2, 4, 2)


array([[[-0.01813751,  0.05000282],
        [ 0.44618767,  0.3395636 ],
        [ 0.8624286 ,  0.310429  ],
        [ 0.5880492 ,  0.32558584]],

       [[ 0.        ,  0.        ],
        [ 0.        ,  0.        ],
        [ 0.14251402,  0.23120478],
        [ 0.6533775 ,  0.343485  ]]], dtype=float32)

### I.3. GRU

In [9]:
gru_model = Sequential([
    GRU(2, name='gru1') # by default : activation='tanh', use_bias=True, 
], name='gru1_exp')

# GRU contains 3 gates; each gate contains 2 neurons as defined above
# apparently h parameters and h parameters are calculated separatly
# each neuron has parameters to combine x (3) and h (2) plus two bias (2) = 7 parameters
# this gives us : 3 * 2 * 7 = 42
gru_model.build(input_shape=(None, test_shape[1], test_shape[2]))

gru_model.summary()

Model: "gru1_exp"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru1 (GRU)                  (None, 2)                 42        
                                                                 
Total params: 42
Trainable params: 42
Non-trainable params: 0
_________________________________________________________________


In [10]:
test_out = gru_model.predict(test_in)

print(test_out.shape)

# For each sample, we get the last state of LSTM, 
# which means, each sample (2) has an output of number of units (2)
test_out

(2, 2)


array([[ 0.705231  , -0.29595575],
       [ 0.511054  , -0.14981261]], dtype=float32)

### I.4. Bidirectional LSTM

In [11]:
bilstm_model = Sequential([
    Bidirectional(LSTM(2, name='bilstm1'))
], name='bilstm1_exp')

# LSTM contains 4 gates; each gate contains 2 neurons as defined above
# each neuron has parameters to combine x (3) and h (2) plus the bias (1) = 6 parameters
# this gives us : 4 * 2 * 6 = 48 
# Two cells, so 48 * 2 = 96 parameters
bilstm_model.build(input_shape=(None, test_shape[1], test_shape[2]))

bilstm_model.summary()

Model: "bilstm1_exp"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 4)                96        
 l)                                                              
                                                                 
Total params: 96
Trainable params: 96
Non-trainable params: 0
_________________________________________________________________


In [12]:
test_out = bilstm_model.predict(test_in)

print(test_out.shape)

# For each sample, we get the last state of LSTM, 
# which means, each sample (2) has an output of number of units (2) * 2 (forward + backward)
test_out

(2, 4)


array([[ 0.2246958 ,  0.16578221,  0.35787773,  0.0792452 ],
       [-0.00085884,  0.02932756,  0.05654396,  0.08873055]],
      dtype=float32)

### I.5. Bidirectional LSTM with intermediate states

In [13]:
bilstm2_model = Sequential([
    Bidirectional(LSTM(2, return_sequences=True, name='bilstm1'))
], name='bilstm1_exp')

# LSTM contains 4 gates; each gate contains 2 neurons as defined above
# each neuron has parameters to combine x (3) and h (2) plus the bias (1) = 6 parameters
# this gives us : 4 * 2 * 6 = 48 
# Two cells, so 48 * 2 = 96 parameters
bilstm2_model.build(input_shape=(None, test_shape[1], test_shape[2]))

bilstm2_model.summary()

Model: "bilstm1_exp"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_1 (Bidirectio  (None, 4, 4)             96        
 nal)                                                            
                                                                 
Total params: 96
Trainable params: 96
Non-trainable params: 0
_________________________________________________________________


In [14]:
test_out = bilstm2_model.predict(test_in)

print(test_out.shape)

# For each sample, we get all states of LSTM (4), 
# each state has an output of number of units (2) * 2
# shape = [2 samples, 4 states, 4 values]
test_out

(2, 4, 4)


array([[[ 4.2398680e-02, -2.8261372e-01, -2.2900328e-01, -3.0911764e-02],
        [ 4.8246598e-03, -5.1124692e-01, -5.9111363e-01, -2.3039220e-02],
        [ 2.4122061e-04, -6.8491870e-01, -7.2512102e-01, -2.5331583e-02],
        [ 7.2909750e-02, -4.7772995e-01, -3.8514832e-01, -1.7888796e-02]],

       [[ 0.0000000e+00,  0.0000000e+00, -9.8676279e-02, -3.7043985e-02],
        [ 0.0000000e+00,  0.0000000e+00, -1.3407968e-01, -3.4704108e-02],
        [ 1.9683568e-02, -2.9378051e-01, -3.6276820e-01, -3.1188214e-03],
        [ 1.7498329e-03, -5.2954406e-01, -6.7740488e-01, -1.6986907e-05]]],
      dtype=float32)

## II. 