In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist


In [2]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [3]:
len(tf.config.list_physical_devices('GPU'))>0

True

### Load MNIST data

In [4]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [6]:
print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of y_train: {y_train.shape}")

print(f"Shape of X_test: {X_test.shape}")
print(f"Shape of y_test: {y_test.shape}")

Shape of X_train: (60000, 28, 28)
Shape of y_train: (60000,)
Shape of X_test: (10000, 28, 28)
Shape of y_test: (10000,)


In [5]:
X_train = X_train.astype("float32") / 255.0
X_test = X_test.astype("float32") / 255.0

### RNN 

In [7]:
model = keras.Sequential()
model.add(layers.Input(shape=(None, 28))) # None is for time steps
model.add(layers.SimpleRNN(256, return_sequences = True, activation = "tanh")) # default activation is tanh
model.add(layers.SimpleRNN(256, activation = "tanh"))
model.add(layers.Dense(10)) # output layer

In [8]:
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, None, 512)         276992    
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 512)               524800    
                                                                 
 dense (Dense)               (None, 10)                5130      
                                                                 
Total params: 806,922
Trainable params: 806,922
Non-trainable params: 0
_________________________________________________________________
None


In [9]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = keras.optimizers.Adam(learning_rate = 0.001),
    metrics = ["accuracy"]
)

In [12]:
model.fit(X_train, y_train, batch_size = 64, epochs = 5, verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2603fe36f70>

In [13]:
model.evaluate(X_test, y_test, batch_size = 64, verbose = 1)



[0.08153832703828812, 0.9789999723434448]

### GRU (lil better than RNN)

In [14]:
model_gru = keras.Sequential()
model_gru.add(layers.Input(shape=(None, 28))) # None is for time steps
model_gru.add(layers.GRU(256, return_sequences = True, activation = "tanh")) # default activation is tanh
model_gru.add(layers.GRU(256, activation = "tanh"))
model_gru.add(layers.Dense(10)) # output layer

In [15]:
print(model_gru.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, None, 256)         219648    
                                                                 
 gru_1 (GRU)                 (None, 256)               394752    
                                                                 
 dense_1 (Dense)             (None, 10)                2570      
                                                                 
Total params: 616,970
Trainable params: 616,970
Non-trainable params: 0
_________________________________________________________________
None


In [16]:
model_gru.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = keras.optimizers.Adam(learning_rate = 0.001),
    metrics = ["accuracy"]
)

In [17]:
model_gru.fit(X_train, y_train, batch_size = 64, epochs = 5, verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x2604e093f70>

In [18]:
model_gru.evaluate(X_test, y_test, batch_size = 64, verbose = 1)



[0.03606480360031128, 0.9886999726295471]

### LSTM (better than GRUs)

In [19]:
model_lstm = keras.Sequential()
model_lstm.add(layers.Input(shape=(None, 28))) # None is for time steps
model_lstm.add(layers.LSTM(256, return_sequences = True, activation = "tanh")) # default activation is tanh
model_lstm.add(layers.LSTM(256, activation = "tanh"))
model_lstm.add(layers.Dense(10)) # output layer

In [20]:
print(model_lstm.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, None, 256)         291840    
                                                                 
 lstm_1 (LSTM)               (None, 256)               525312    
                                                                 
 dense_2 (Dense)             (None, 10)                2570      
                                                                 
Total params: 819,722
Trainable params: 819,722
Non-trainable params: 0
_________________________________________________________________
None


In [21]:
model_lstm.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = keras.optimizers.Adam(learning_rate = 0.001),
    metrics = ["accuracy"]
)

In [22]:
model_lstm.fit(X_train, y_train, batch_size = 64, epochs = 5, verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x25f82f90a00>

In [23]:
model_lstm.evaluate(X_test, y_test, batch_size = 64, verbose = 1)



[0.04458455368876457, 0.9858999848365784]

### Bidirectional LSTM (lil better than LSTM)

In [25]:
model_bi_lstm = keras.Sequential()
model_bi_lstm.add(layers.Input(shape=(None, 28))) # None is for time steps
model_bi_lstm.add(layers.Bidirectional(layers.LSTM(256, return_sequences = True, activation = "tanh"))) # default activation is tanh
model_bi_lstm.add(layers.Bidirectional(layers.LSTM(256, activation = "tanh")))
model_bi_lstm.add(layers.Dense(10)) # output layer

In [26]:
print(model_bi_lstm.summary())

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, None, 512)        583680    
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 512)              1574912   
 nal)                                                            
                                                                 
 dense_3 (Dense)             (None, 10)                5130      
                                                                 
Total params: 2,163,722
Trainable params: 2,163,722
Non-trainable params: 0
_________________________________________________________________
None


In [27]:
model_bi_lstm.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = keras.optimizers.Adam(learning_rate = 0.001),
    metrics = ["accuracy"]
)

In [28]:
model_bi_lstm.fit(X_train, y_train, batch_size = 64, epochs = 5, verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x25f8a801b80>

In [29]:
model_bi_lstm.evaluate(X_test, y_test, batch_size = 64, verbose = 1)



[0.03538480028510094, 0.989300012588501]