In [0]:
%matplotlib inline

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout,BatchNormalization
from keras.layers.recurrent import SimpleRNN, LSTM, GRU 
from keras.utils import np_utils
from keras.datasets import mnist
from keras import initializers
import keras


## Getting Mnist data and reshaping for RNN & LSTM

In [18]:
batch_size = 128
num_classes = 10
epochs = 15

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x, y), (x_test, y_test) = mnist.load_data()
x = x.reshape(-1,28,28)
x_test = x_test.reshape(-1,28,28)
#print(x1_train.shape,x1_test.shape)



x = x.astype('float32')
x_test = x_test.astype('float32')

x_train = x[0:50000]
x_val = x[50000::]


x_train /= 255
x_test /= 255
print(x.shape)
print(x_train.shape, 'train samples')
print(x_val.shape, 'validation samples')
print(x_test.shape, 'test samples')

# convert class vectors to binary class matrices
y = keras.utils.to_categorical(y, num_classes)
y_train = y[0:50000]
y_val=y[50000::]
y_test = keras.utils.to_categorical(y_test, num_classes)


(60000, 28, 28)
(50000, 28, 28) train samples
(10000, 28, 28) validation samples
(10000, 28, 28) test samples


## RNN without batch normalization and dropout

In [0]:
 def RNN(batchsize,lr):
    model = Sequential()
    model.add(SimpleRNN(units=512, activation='relu', input_shape=(28,28)))
    model.add(Dense(num_classes, activation='softmax'))
    sgd = keras.optimizers.SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=False)

    model.summary()

    model.compile(loss='categorical_crossentropy',
                  optimizer='sgd',
                  metrics=['accuracy'])

    history = model.fit(x_train, y_train,
                        batch_size=batchsize,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_val, y_val))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('test loss:', score[0])
    print('test accuracy:', score[1])

## Learning rate =0.01 and Batchsize =128

In [4]:
RNN(batchsize=128,lr=0.01)

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_1 (SimpleRNN)     (None, 512)               276992    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                5130      
Total params: 282,122
Trainable params: 282,122
Non-trainable params: 0
_________________________________________________________________
Instructions for updating:
Use tf.cast instead.
Train on 50000 samples, validate on 10000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
test loss: 0.06476740560053149
test accuracy: 0.9791


### Training Accuracy achieved : 98.6% 
### Testing acuracy achieved : 97.9%
### we will try batch normalization for the convergence speed and dropout to improve accuracy

## RNN with batchnormalisation and dropout layers

In [0]:
 def RNN(batchsize,lr):
    model = Sequential()
    model.add(SimpleRNN(units=512, activation='relu', input_shape=(28,28)))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(num_classes, activation='softmax'))
    sgd = keras.optimizers.SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=False)

    model.summary()

    model.compile(loss='categorical_crossentropy',
                  optimizer='sgd',
                  metrics=['accuracy'])

    history = model.fit(x_train, y_train,
                        batch_size=batchsize,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_val, y_val))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('test loss:', score[0])
    print('test accuracy:', score[1])

In [6]:
RNN(batchsize=128,lr=0.01)

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_2 (SimpleRNN)     (None, 512)               276992    
_________________________________________________________________
batch_normalization_1 (Batch (None, 512)               2048      
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 10)                5130      
Total params: 284,170
Trainable params: 283,146
Non-trainable params: 1,024
_________________________________________________________________
Train on 50000 samples, validate on 10000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15


### with batch normalization the convergence is fast we achieved previous training accuracy 98% in 8 iteration itself

### With dropout based regularization can be used at the times of overfitting

### Learning rate =0.1 Batchsize = 128

In [32]:
RNN(batchsize=128,lr=0.1)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_3 (SimpleRNN)     (None, 512)               276992    
_________________________________________________________________
batch_normalization_2 (Batch (None, 512)               2048      
_________________________________________________________________
dropout_15 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 10)                5130      
Total params: 284,170
Trainable params: 283,146
Non-trainable params: 1,024
_________________________________________________________________
Train on 50000 samples, validate on 10000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
test loss: 0.06093739502064418
test accurac

### Changing learning rate to 0.1 provides fast gradient update
### Training accuracy : 99.17%
### Testing accuracy : 98.21%

## Learning rate =0.1 Batchsize=1

In [42]:
RNN(batchsize=1,lr=0.1)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_4 (SimpleRNN)     (None, 512)               276992    
_________________________________________________________________
batch_normalization_6 (Batch (None, 512)               2048      
_________________________________________________________________
dropout_24 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_20 (Dense)             (None, 10)                5130      
Total params: 284,170
Trainable params: 283,146
Non-trainable params: 1,024
_________________________________________________________________
Train on 50000 samples, validate on 10000 samples
Epoch 1/15

KeyboardInterrupt: ignored

### With batch size of 1 the convergence will be very slow
### Batch size of 1 is taking more time so due to compute restriction we stopped the training

## LSTM with one layer

In [0]:
def LSTMnet(batchsize,lr):
    model = Sequential()
    model.add(LSTM(128, input_shape=(28,28), activation='relu', return_sequences=False))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Dense(10, activation='softmax'))
    sgd = keras.optimizers.SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=False)

    model.summary()

    model.compile(loss='categorical_crossentropy',
                  optimizer='sgd',
                  metrics=['accuracy'])

    history = model.fit(x_train, y_train,
                        batch_size=batchsize,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_val, y_val))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('test loss:', score[0])
    print('test accuracy:', score[1])

In [47]:
LSTMnet(batchsize=128,lr=0.01)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_17 (LSTM)               (None, 128)               80384     
_________________________________________________________________
batch_normalization_8 (Batch (None, 128)               512       
_________________________________________________________________
dropout_26 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_22 (Dense)             (None, 10)                1290      
Total params: 82,186
Trainable params: 81,930
Non-trainable params: 256
_________________________________________________________________
Train on 50000 samples, validate on 10000 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
test loss: 0.1474612377241254
test accuracy: 0.

### Training accuracy achieved : 97%
### Testing accuracy achieved: 95.3%

### Lets improve accuracy using two more hidden layer

## LSTM with two layers

In [0]:
def LSTMnet(batchsize,lr):
    model = Sequential()
    model.add(LSTM(128, input_shape=(28,28), activation='relu', return_sequences=True))
    model.add(BatchNormalization())
    model.add(Dropout(0.4))
    model.add(LSTM(128, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(10, activation='softmax'))
    sgd = keras.optimizers.SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=False)

    model.summary()

    model.compile(loss='categorical_crossentropy',
                  optimizer='sgd',
                  metrics=['accuracy'])

    history = model.fit(x_train, y_train,
                        batch_size=batchsize,
                        epochs=epochs,
                        verbose=1,
                        validation_data=(x_val, y_val))
    score = model.evaluate(x_test, y_test, verbose=0)
    print('test loss:', score[0])
    print('test accuracy:', score[1])

In [40]:
LSTMnet(batchsize=128,lr=0.01)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_12 (LSTM)               (None, 28, 128)           80384     
_________________________________________________________________
batch_normalization_4 (Batch (None, 28, 128)           512       
_________________________________________________________________
dropout_18 (Dropout)         (None, 28, 128)           0         
_________________________________________________________________
lstm_13 (LSTM)               (None, 128)               131584    
_________________________________________________________________
dropout_19 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 128)               16512     
_________________________________________________________________
dropout_20 (Dropout)         (None, 128)               0         
__________

### With addition of two hidden layer the accuract has improved by 2%
### Test accuracy : 97.5%
### Train accuracy : 96.7%

##  Learning rate =0.1 and Batchsize =128

In [41]:
LSTMnet(batchsize=128,lr=0.1)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_14 (LSTM)               (None, 28, 128)           80384     
_________________________________________________________________
batch_normalization_5 (Batch (None, 28, 128)           512       
_________________________________________________________________
dropout_21 (Dropout)         (None, 28, 128)           0         
_________________________________________________________________
lstm_15 (LSTM)               (None, 128)               131584    
_________________________________________________________________
dropout_22 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 128)               16512     
_________________________________________________________________
dropout_23 (Dropout)         (None, 128)               0         
__________

### With learning rate of 0.1 the gradient update is faster and testing accuracy improved by 0.5%
### Testing accuracy 97.7%
### Traing accuracy 96.7#

In [49]:
LSTMnet(batchsize=1,lr=0.1)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_19 (LSTM)               (None, 128)               80384     
_________________________________________________________________
batch_normalization_10 (Batc (None, 128)               512       
_________________________________________________________________
dropout_28 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_24 (Dense)             (None, 10)                1290      
Total params: 82,186
Trainable params: 81,930
Non-trainable params: 256
_________________________________________________________________
Train on 50000 samples, validate on 10000 samples
Epoch 1/15

KeyboardInterrupt: ignored

### With batch size of 1 the convergence will be very slow
### Batch size of 1 is taking more time so due to compute restriction we stopped the training