# PART 1: Warm-Up - Running baseline model

In [1]:
import os
os.environ['PYTHONHASHSEED'] = '0'

In [2]:
from __future__ import print_function
import tensorflow as tf
from tensorflow import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, LSTM, Activation
from keras.optimizers import RMSprop
import pandas as pd
from keras.optimizers import SGD
import numpy as np
import matplotlib.pyplot as plt
import random
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
import math
from keras.callbacks import LearningRateScheduler
from keras.callbacks import ReduceLROnPlateau

Using TensorFlow backend.


In [3]:
np.random.seed(19)
random.seed(100)
tf.random.set_seed(200)

In [4]:
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [5]:
x_train = x_train.reshape(60000,28,28,1)
x_test = x_test.reshape(10000,28,28,1)

In [6]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

In [7]:
num_classes = 10

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [8]:
#Baseline model for MNIST - running for 25 epochs

batch_size = 128
epochs = 25

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)

print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

#model.optimizer.lr = 0.01

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               401920    
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________
Train on 60000 samples, validate on 10000 samples
Epoch 1/25
Epoch 2/25
Epo

In [9]:
#After 25 epochs
max(history.history['val_accuracy'])

0.9850999712944031

### After 25 epochs, we get a maximum val_accuracy of **98.5%**. At this point the accuracy doesn't show significant improvement, hence we might need to modify the layers to increase the performance on the dataset, and it'll only overfit if we increase the epochs without changing model parameters

# PART 2: Increasing accuracy by modifying network

In [12]:
#Beating test accuracy for MNIST

x_train = x_train.reshape(60000,28,28,1)
x_test = x_test.reshape(10000,28,28,1)

num_classes = 10

batch_size = 128
epochs = 30

model = Sequential()


model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same',input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))
model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(128, activation='relu'))

model.add(Dense(num_classes, activation='softmax'))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_data=(x_test, y_test))

score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 28, 28, 64)        640       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 14, 14, 32)        18464     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 7, 7, 32)          0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 7, 7, 32)          0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 1568)             

### After experimenting with different setups, the final model I've built has a validation accuracy of close to **99.2%** after 30 epochs. Beyond this number, there isn't much increase in val_accuracy while the train_accuracy increases, suggesting overfitting.

**The final model has the following layers:**


*   Conv2D with 64 filters
*   MaxPool with pool size (2,2)
*   Dropout (0.25)
*   Conv2D with 32 filters
*   MaxPool with pool size (2,2)
*   Dropout (0.25)
*   Flatten this input onto a dense layer
*   Dense layer with 128 units
*   Softmax output with 10 classes


