# 1. Importing libs

In [None]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

In [None]:
# physical_devices = tf.config.list_physical_devices("GPU")
# tf.config.experimental.set_memory_growth(physical_devices[0], True)

# 2. Defining RNNs

In [None]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


## Since ours is an image dataset, we unroll one row of the image as a time step input for the RNN Model (a Sequence model)

## 2.1 Simple RNN with `relu`

In [None]:
model = keras.Sequential()
model.add(keras.Input(shape=(None, 28)))
model.add(layers.SimpleRNN(512, return_sequences=True, activation="relu"))
model.add(layers.SimpleRNN(512, activation="relu"))
model.add(layers.Dense(10))

print(model.summary())
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, None, 512)         276992    
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 512)               524800    
                                                                 
 dense (Dense)               (None, 10)                5130      
                                                                 
Total params: 806,922
Trainable params: 806,922
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10
938/938 - 265s - loss: 0.3002 - accuracy: 0.9072 - 265s/epoch - 282ms/step
Epoch 2/10
938/938 - 266s - loss: 0.1410 - accuracy: 0.9611 - 266s/epoch - 284ms/step
Epoch 3/10
938/938 - 259s - loss: 0.1208 - accuracy: 0.9671 - 259s/epoch - 277ms/step
Epoch 4/10
938/938 - 260s - loss: 0.1009 - accuracy

[0.07316961884498596, 0.9828000068664551]

## 2.2 Simple RNn with `tanh`

In [None]:
model = keras.Sequential()
model.add(keras.Input(shape=(None, 28)))
model.add(layers.SimpleRNN(256, return_sequences=True, activation="tanh"))
model.add(layers.SimpleRNN(256))
model.add(layers.Dense(10))

print(model.summary())
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_2 (SimpleRNN)    (None, None, 256)         72960     
                                                                 
 simple_rnn_3 (SimpleRNN)    (None, 256)               131328    
                                                                 
 dense_1 (Dense)             (None, 10)                2570      
                                                                 
Total params: 206,858
Trainable params: 206,858
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10
938/938 - 92s - loss: 0.3000 - accuracy: 0.9111 - 92s/epoch - 99ms/step
Epoch 2/10
938/938 - 91s - loss: 0.1824 - accuracy: 0.9457 - 91s/epoch - 97ms/step
Epoch 3/10
938/938 - 93s - loss: 0.1582 - accuracy: 0.9532 - 93s/epoch - 99ms/step
Epoch 4/10
938/938 - 95s - loss: 0.1503 - accuracy: 0.9564

[0.21362753212451935, 0.9359999895095825]

# 3. using GRU

In [None]:
model = keras.Sequential()
model.add(keras.Input(shape=(None, 28)))
model.add(layers.GRU(256, return_sequences=True, activation="relu"))
model.add(layers.GRU(256))
model.add(layers.Dense(10))

print(model.summary())
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, None, 256)         219648    
                                                                 
 gru_1 (GRU)                 (None, 256)               394752    
                                                                 
 dense_2 (Dense)             (None, 10)                2570      
                                                                 
Total params: 616,970
Trainable params: 616,970
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10
938/938 - 244s - loss: 0.2891 - accuracy: 0.9032 - 244s/epoch - 260ms/step
Epoch 2/10
938/938 - 247s - loss: 0.0701 - accuracy: 0.9784 - 247s/epoch - 264ms/step
Epoch 3/10
938/938 - 247s - loss: 0.0492 - accuracy: 0.9852 - 247s/epoch - 263ms/step
Epoch 4/10
938/938 - 245s - loss: 0.0395 - accura

[0.03067385032773018, 0.9908999800682068]

# 4. Using LSTM

In [None]:
# using Bi-directional LSTM layers
model = keras.Sequential()
model.add(keras.Input(shape=(None, 28)))
model.add(
    layers.Bidirectional(layers.LSTM(256, return_sequences=True, activation="relu"))
)
model.add(layers.LSTM(256, name="lstm_layer2"))
model.add(layers.Dense(10))

model = keras.Sequential()
model.add(keras.Input(shape=(None, 28)))
model.add(
    layers.Bidirectional(layers.LSTM(256, return_sequences=True, activation="relu"))
)
model.add(layers.Bidirectional(layers.LSTM(256, name="lstm_layer2")))
model.add(layers.Dense(10))

print(model.summary())
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    metrics=["accuracy"],
)

model.fit(x_train, y_train, batch_size=128, epochs=5, verbose=2)
model.evaluate(x_test, y_test, batch_size=128, verbose=2)

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_7 (Bidirectio  (None, None, 512)        583680    
 nal)                                                            
                                                                 
 bidirectional_8 (Bidirectio  (None, 512)              1574912   
 nal)                                                            
                                                                 
 dense_8 (Dense)             (None, 10)                5130      
                                                                 
Total params: 2,163,722
Trainable params: 2,163,722
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/5
469/469 - 632s - loss: 0.3130 - accuracy: 0.8966 - 632s/epoch - 1s/step
Epoch 2/5
469/469 - 624s - loss: 0.0816 - accuracy: 0.9752 - 624s/epoch - 1s/step
Epoch 3

[0.0495150052011013, 0.9854999780654907]