In [1]:
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.datasets import mnist

physical_devices = tf.config.list_physical_devices('GPU')
print('Physical Devices', physical_devices)
tf.config.experimental.set_memory_growth(physical_devices[0], True)

Physical Devices [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


### Load MNIST Data

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
x_train.shape

(60000, 28, 28)

In [4]:
x_train = x_train.astype('float32') /  255.
x_test = x_test.astype('float32') /  255.

### RNN Model with Sequential API

In [5]:
model = keras.Sequential()

In [6]:
model.add(keras.Input(shape=(None, 28))) #None specifies number of timesteps. None means, number of timesteps can be anything. 28 specifies the embedding size

In [7]:
model.add(
    layers.SimpleRNN(512, return_sequences = True, activation = 'relu')
)

In [8]:
model.add(
    layers.SimpleRNN(512, activation = 'relu')
)

In [9]:
model.add(layers.Dense(10))

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, None, 512)         276992    
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 512)               524800    
                                                                 
 dense (Dense)               (None, 10)                5130      
                                                                 
Total params: 806,922
Trainable params: 806,922
Non-trainable params: 0
_________________________________________________________________


In [11]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics=['accuracy']
)

  super(Adam, self).__init__(name, **kwargs)


In [12]:
model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)layers

Epoch 1/10
938/938 - 53s - loss: 0.3033 - accuracy: 0.9072 - 53s/epoch - 57ms/step
Epoch 2/10
938/938 - 52s - loss: 0.1461 - accuracy: 0.9603 - 52s/epoch - 55ms/step
Epoch 3/10
938/938 - 51s - loss: 0.1154 - accuracy: 0.9684 - 51s/epoch - 54ms/step
Epoch 4/10
938/938 - 50s - loss: 0.0933 - accuracy: 0.9748 - 50s/epoch - 53ms/step
Epoch 5/10
938/938 - 50s - loss: 0.0911 - accuracy: 0.9752 - 50s/epoch - 54ms/step
Epoch 6/10
938/938 - 51s - loss: 0.0852 - accuracy: 0.9773 - 51s/epoch - 54ms/step
Epoch 7/10
938/938 - 51s - loss: 0.0731 - accuracy: 0.9803 - 51s/epoch - 54ms/step
Epoch 8/10
938/938 - 53s - loss: 0.0697 - accuracy: 0.9807 - 53s/epoch - 56ms/step
Epoch 9/10
938/938 - 55s - loss: 0.0663 - accuracy: 0.9822 - 55s/epoch - 59ms/step
Epoch 10/10
938/938 - 52s - loss: 0.0682 - accuracy: 0.9812 - 52s/epoch - 55ms/step


<keras.callbacks.History at 0x7fdbd03cdf40>

### GRU Model with Sequential API

In [17]:
model = keras.Sequential()
model.add(keras.Input(shape=(None, 28))) #None specifies number of timesteps. None means, number of timesteps can be anything. 28 specifies the embedding size
model.add(
        layers.GRU(512, return_sequences = True, activation = 'tanh')
)
model.add(
    layers.GRU(512, activation = 'tanh')
)
model.add(layers.Dense(10))
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_2 (GRU)                 (None, None, 512)         832512    
                                                                 
 gru_3 (GRU)                 (None, 512)               1575936   
                                                                 
 dense_2 (Dense)             (None, 10)                5130      
                                                                 
Total params: 2,413,578
Trainable params: 2,413,578
Non-trainable params: 0
_________________________________________________________________


In [18]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics=['accuracy']
)

In [19]:
model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)

Epoch 1/10
938/938 - 20s - loss: 0.2401 - accuracy: 0.9212 - 20s/epoch - 21ms/step
Epoch 2/10
938/938 - 12s - loss: 0.0604 - accuracy: 0.9817 - 12s/epoch - 13ms/step
Epoch 3/10
938/938 - 12s - loss: 0.0465 - accuracy: 0.9859 - 12s/epoch - 13ms/step
Epoch 4/10
938/938 - 12s - loss: 0.0352 - accuracy: 0.9885 - 12s/epoch - 13ms/step
Epoch 5/10
938/938 - 12s - loss: 0.0275 - accuracy: 0.9915 - 12s/epoch - 13ms/step
Epoch 6/10
938/938 - 12s - loss: 0.0238 - accuracy: 0.9925 - 12s/epoch - 13ms/step
Epoch 7/10
938/938 - 12s - loss: 0.0236 - accuracy: 0.9926 - 12s/epoch - 13ms/step
Epoch 8/10
938/938 - 12s - loss: 0.0192 - accuracy: 0.9940 - 12s/epoch - 13ms/step
Epoch 9/10
938/938 - 12s - loss: 0.0174 - accuracy: 0.9947 - 12s/epoch - 13ms/step
Epoch 10/10
938/938 - 12s - loss: 0.0161 - accuracy: 0.9950 - 12s/epoch - 13ms/step


<keras.callbacks.History at 0x7fdb38046700>

### LSTM Model with Sequential API

In [20]:
model = keras.Sequential()
model.add(keras.Input(shape=(None, 28))) #None specifies number of timesteps. None means, number of timesteps can be anything. 28 specifies the embedding size
model.add(
        layers.LSTM(512, return_sequences = True, activation = 'tanh')
)
model.add(
    layers.LSTM(512, activation = 'tanh')
)
model.add(layers.Dense(10))
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, None, 512)         1107968   
                                                                 
 lstm_1 (LSTM)               (None, 512)               2099200   
                                                                 
 dense_3 (Dense)             (None, 10)                5130      
                                                                 
Total params: 3,212,298
Trainable params: 3,212,298
Non-trainable params: 0
_________________________________________________________________


In [21]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics=['accuracy']
)

In [22]:
model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)

Epoch 1/10
938/938 - 18s - loss: 0.2880 - accuracy: 0.9060 - 18s/epoch - 19ms/step
Epoch 2/10
938/938 - 15s - loss: 0.0808 - accuracy: 0.9750 - 15s/epoch - 16ms/step
Epoch 3/10
938/938 - 14s - loss: 0.0557 - accuracy: 0.9830 - 14s/epoch - 15ms/step
Epoch 4/10
938/938 - 15s - loss: 0.0420 - accuracy: 0.9869 - 15s/epoch - 15ms/step
Epoch 5/10
938/938 - 14s - loss: 0.0374 - accuracy: 0.9886 - 14s/epoch - 15ms/step
Epoch 6/10
938/938 - 15s - loss: 0.0288 - accuracy: 0.9912 - 15s/epoch - 16ms/step
Epoch 7/10
938/938 - 15s - loss: 0.0258 - accuracy: 0.9920 - 15s/epoch - 16ms/step
Epoch 8/10
938/938 - 15s - loss: 0.0216 - accuracy: 0.9932 - 15s/epoch - 16ms/step
Epoch 9/10
938/938 - 15s - loss: 0.0206 - accuracy: 0.9937 - 15s/epoch - 16ms/step
Epoch 10/10
938/938 - 15s - loss: 0.0180 - accuracy: 0.9943 - 15s/epoch - 16ms/step


<keras.callbacks.History at 0x7fdb1c0eb4f0>

### Bidrectional LSTM with Sequential API

In [23]:
model = keras.Sequential()
model.add(keras.Input(shape=(None, 28))) #None specifies number of timesteps. None means, number of timesteps can be anything. 28 specifies the embedding size
model.add(
    layers.Bidirectional(
        layers.LSTM(512, return_sequences = True, activation = 'tanh')
    )
)
model.add(
    layers.Bidirectional(
        layers.LSTM(512, activation = 'tanh')
    )
)
model.add(layers.Dense(10))
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, None, 1024)       2215936   
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 1024)             6295552   
 nal)                                                            
                                                                 
 dense_4 (Dense)             (None, 10)                10250     
                                                                 
Total params: 8,521,738
Trainable params: 8,521,738
Non-trainable params: 0
_________________________________________________________________


In [24]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = keras.optimizers.Adam(lr=0.001),
    metrics=['accuracy']
)

  super(Adam, self).__init__(name, **kwargs)


In [25]:
model.fit(x_train, y_train, batch_size=64, epochs=10, verbose=2)

Epoch 1/10
938/938 - 38s - loss: 0.2531 - accuracy: 0.9178 - 38s/epoch - 40ms/step
Epoch 2/10
938/938 - 31s - loss: 0.0751 - accuracy: 0.9773 - 31s/epoch - 33ms/step
Epoch 3/10
938/938 - 31s - loss: 0.0535 - accuracy: 0.9840 - 31s/epoch - 33ms/step
Epoch 4/10
938/938 - 30s - loss: 0.0405 - accuracy: 0.9874 - 30s/epoch - 32ms/step
Epoch 5/10
938/938 - 31s - loss: 0.0331 - accuracy: 0.9898 - 31s/epoch - 33ms/step
Epoch 6/10
938/938 - 31s - loss: 0.0279 - accuracy: 0.9915 - 31s/epoch - 33ms/step
Epoch 7/10
938/938 - 30s - loss: 0.0228 - accuracy: 0.9930 - 30s/epoch - 32ms/step
Epoch 8/10
938/938 - 31s - loss: 0.0195 - accuracy: 0.9937 - 31s/epoch - 33ms/step
Epoch 9/10
938/938 - 30s - loss: 0.0179 - accuracy: 0.9945 - 30s/epoch - 32ms/step
Epoch 10/10
938/938 - 30s - loss: 0.0152 - accuracy: 0.9948 - 30s/epoch - 32ms/step


<keras.callbacks.History at 0x7fdadc316fd0>