# AI ASSIGNMENT - Handwritten Digit Recognition

In [1]:
from keras.models import Sequential
from keras.layers import Dense
from keras.datasets import mnist
from keras.utils import np_utils

import struct
import numpy as np
from matplotlib import pyplot
import matplotlib as mpl
from keras import optimizers


np.random.seed(5)

Using TensorFlow backend.


## Loading Training Dataset

In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [3]:
'''
    Flatten the Numpy Array from 2D (28*28) to 1D (784*1)
'''
X_train = X_train.reshape(X_train.shape[0], 784).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 784).astype('float32')



In [4]:
'''
    Change Intensity Values of Pixels from 0 - 255 to 0 - 1
'''
X_train = X_train / 255
X_test = X_test / 255

In [5]:
'''
    One Hot Encoding . 
    i.e.,
        0 =  0000000001
        1 =  0000000010
        2 =  0000000100
        3 =  0000001000
        .
        .
        9 =  1000000000
'''
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

## ADAM OPTIMIZER ( with Sigmoid Activation )

In [19]:
model = Sequential()

'''
    This is Input Layer
'''
model.add(Dense(784 , input_dim=784, kernel_initializer='random_uniform',activation='sigmoid'))

'''
    These are hidden layers . 16 Neurons in each Layer
'''
model.add(Dense(16 , activation='sigmoid'))
model.add(Dense(16 , activation='sigmoid'))

'''
    This is output Layer
'''
model.add(Dense(10 , activation='sigmoid'))


In [10]:
'''
    Compile the model
'''
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
'''
    Fit the Model
'''
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
scores = model.evaluate(X_test, y_test, verbose=0)

print("Final Baseline Error(Using Sigmoid): %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 11s - loss: 1.9062 - acc: 0.5161 - val_loss: 1.5188 - val_acc: 0.7819
Epoch 2/10
 - 9s - loss: 1.2046 - acc: 0.8480 - val_loss: 0.9114 - val_acc: 0.9152
Epoch 3/10
 - 10s - loss: 0.6958 - acc: 0.9279 - val_loss: 0.5159 - val_acc: 0.9429
Epoch 4/10
 - 9s - loss: 0.4022 - acc: 0.9498 - val_loss: 0.3219 - val_acc: 0.9550
Epoch 5/10
 - 9s - loss: 0.2595 - acc: 0.9623 - val_loss: 0.2255 - val_acc: 0.9637
Epoch 6/10
 - 9s - loss: 0.1855 - acc: 0.9712 - val_loss: 0.1759 - val_acc: 0.9693
Epoch 7/10
 - 10s - loss: 0.1418 - acc: 0.9758 - val_loss: 0.1516 - val_acc: 0.9697
Epoch 8/10
 - 10s - loss: 0.1120 - acc: 0.9807 - val_loss: 0.1270 - val_acc: 0.9738
Epoch 9/10
 - 10s - loss: 0.0903 - acc: 0.9839 - val_loss: 0.1147 - val_acc: 0.9742
Epoch 10/10
 - 9s - loss: 0.0726 - acc: 0.9875 - val_loss: 0.0997 - val_acc: 0.9774
Final Baseline Error(Using Sigmoid): 2.26%


## ADAM OPTIMIZER (with  RELU ACTIVATION)

In [12]:
model_relu = Sequential()

model_relu.add(Dense(784 , input_dim=784, kernel_initializer='random_uniform',activation='relu'))

model_relu.add(Dense(16 , activation='relu'))
model_relu.add(Dense(16 , activation='relu'))

model_relu.add(Dense(10 , activation='sigmoid'))

model_relu.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model_relu.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
scores = model_relu.evaluate(X_test, y_test, verbose=0)

print("Final Baseline Error(Using Relu): %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 10s - loss: 0.7531 - acc: 0.6974 - val_loss: 0.1663 - val_acc: 0.9525
Epoch 2/10
 - 11s - loss: 0.1421 - acc: 0.9597 - val_loss: 0.1416 - val_acc: 0.9580
Epoch 3/10
 - 9s - loss: 0.0939 - acc: 0.9722 - val_loss: 0.0987 - val_acc: 0.9705
Epoch 4/10
 - 11s - loss: 0.0670 - acc: 0.9803 - val_loss: 0.0891 - val_acc: 0.9744
Epoch 5/10
 - 11s - loss: 0.0508 - acc: 0.9847 - val_loss: 0.0748 - val_acc: 0.9772
Epoch 6/10
 - 10s - loss: 0.0365 - acc: 0.9897 - val_loss: 0.0846 - val_acc: 0.9751
Epoch 7/10
 - 9s - loss: 0.0288 - acc: 0.9915 - val_loss: 0.0714 - val_acc: 0.9806
Epoch 8/10
 - 10s - loss: 0.0198 - acc: 0.9946 - val_loss: 0.0753 - val_acc: 0.9793
Epoch 9/10
 - 10s - loss: 0.0168 - acc: 0.9952 - val_loss: 0.0712 - val_acc: 0.9813
Epoch 10/10
 - 12s - loss: 0.0144 - acc: 0.9958 - val_loss: 0.0772 - val_acc: 0.9800
Final Baseline Error(Using Relu): 2.00%


## Stochastic Gradient Boost Optimizer ( with Relu activation ) 

In [24]:

'''
    Stochastic Gradient Boost Optimiser
    lr -- > Learning Rate
    momentum --> 0.9
'''
sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=False)


In [25]:
model_relu.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model_relu.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
scores = model_relu.evaluate(X_test, y_test, verbose=0)

print("Final Baseline Error(Using Relu): %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 8s - loss: 0.0021 - acc: 0.9993 - val_loss: 0.0903 - val_acc: 0.9829
Epoch 2/10
 - 7s - loss: 9.8644e-04 - acc: 0.9998 - val_loss: 0.0914 - val_acc: 0.9822
Epoch 3/10
 - 7s - loss: 6.0617e-04 - acc: 1.0000 - val_loss: 0.0909 - val_acc: 0.9829
Epoch 4/10
 - 7s - loss: 4.9203e-04 - acc: 1.0000 - val_loss: 0.0923 - val_acc: 0.9829
Epoch 5/10
 - 7s - loss: 4.0829e-04 - acc: 1.0000 - val_loss: 0.0922 - val_acc: 0.9828
Epoch 6/10
 - 7s - loss: 3.6390e-04 - acc: 1.0000 - val_loss: 0.0932 - val_acc: 0.9832
Epoch 7/10
 - 7s - loss: 3.2759e-04 - acc: 1.0000 - val_loss: 0.0938 - val_acc: 0.9830
Epoch 8/10
 - 7s - loss: 2.9351e-04 - acc: 1.0000 - val_loss: 0.0946 - val_acc: 0.9833
Epoch 9/10
 - 7s - loss: 2.6945e-04 - acc: 1.0000 - val_loss: 0.0952 - val_acc: 0.9834
Epoch 10/10
 - 10s - loss: 2.5073e-04 - acc: 1.0000 - val_loss: 0.0955 - val_acc: 0.9831
Final Baseline Error(Using Relu): 1.69%


## ADAGRAD OPTIMIZER ( with Relu )

In [30]:
'''
    Adagrad Optimizer with Learning Rate 0.01
'''
agd = optimizers.Adagrad(lr=0.01, epsilon=0.05, decay=0.0)


In [31]:
model_relu.compile(loss='categorical_crossentropy', optimizer=agd, metrics=['accuracy'])

model_relu.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
scores = model_relu.evaluate(X_test, y_test, verbose=0)

print("Final Baseline Error(Using Relu): %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 8s - loss: 2.3650e-04 - acc: 1.0000 - val_loss: 0.0965 - val_acc: 0.9832
Epoch 2/10
 - 8s - loss: 2.1181e-04 - acc: 1.0000 - val_loss: 0.0972 - val_acc: 0.9834
Epoch 3/10
 - 12s - loss: 1.9131e-04 - acc: 1.0000 - val_loss: 0.0974 - val_acc: 0.9832
Epoch 4/10
 - 8s - loss: 1.7537e-04 - acc: 1.0000 - val_loss: 0.0980 - val_acc: 0.9836
Epoch 5/10
 - 8s - loss: 1.6189e-04 - acc: 1.0000 - val_loss: 0.0986 - val_acc: 0.9832
Epoch 6/10
 - 9s - loss: 1.4994e-04 - acc: 1.0000 - val_loss: 0.0988 - val_acc: 0.9836
Epoch 7/10
 - 8s - loss: 1.4138e-04 - acc: 1.0000 - val_loss: 0.0991 - val_acc: 0.9834
Epoch 8/10
 - 8s - loss: 1.3269e-04 - acc: 1.0000 - val_loss: 0.0994 - val_acc: 0.9836
Epoch 9/10
 - 11s - loss: 1.2591e-04 - acc: 1.0000 - val_loss: 0.1000 - val_acc: 0.9832
Epoch 10/10
 - 10s - loss: 1.2002e-04 - acc: 1.0000 - val_loss: 0.1001 - val_acc: 0.9838
Final Baseline Error(Using Relu): 1.62%


# With REGULARIZER

In [None]:
from keras import regularizers

model_reg = Sequential()


model_reg.add(Dense(784 , input_dim=784, kernel_regularizer=regularizers.l2(0.01), activity_regularizer=regularizers.l1(0.01), kernel_initializer='random_uniform',activation='relu'))

model_reg.add(Dense(16 , activation='relu'))
model_reg.add(Dense(16 , activation='relu'))


model_reg.add(Dense(10 , activation='sigmoid'))

agd = optimizers.Adagrad(lr=0.01, epsilon=0.05, decay=0.0)

model_reg.compile(loss='categorical_crossentropy', optimizer=agd, metrics=['accuracy'])

model_reg.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=2)
scores = model_reg.evaluate(X_test, y_test, verbose=0)

print("Final Baseline Error(Using Sigmoid): %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 47s - loss: 5.9546 - acc: 0.1118 - val_loss: 4.1423 - val_acc: 0.1135
Epoch 2/10


# PARAMETER TUNING ( Using Relu and Adagrad - Best Combination )

## Batch Size ( 20 )

In [32]:
agd = optimizers.Adagrad(lr=0.01, epsilon=0.05, decay=0.0)
model_relu.compile(loss='categorical_crossentropy', optimizer=agd, metrics=['accuracy'])

model_relu.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=20, verbose=2)
scores = model_relu.evaluate(X_test, y_test, verbose=0)

print("Final Baseline Error(Using Relu): %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 35s - loss: 1.7896e-04 - acc: 1.0000 - val_loss: 0.1021 - val_acc: 0.9840
Epoch 2/10
 - 40s - loss: 8.3799e-05 - acc: 1.0000 - val_loss: 0.1027 - val_acc: 0.9835
Epoch 3/10
 - 34s - loss: 6.1567e-05 - acc: 1.0000 - val_loss: 0.1040 - val_acc: 0.9839
Epoch 4/10
 - 35s - loss: 5.2612e-05 - acc: 1.0000 - val_loss: 0.1044 - val_acc: 0.9835
Epoch 5/10
 - 35s - loss: 4.5908e-05 - acc: 1.0000 - val_loss: 0.1049 - val_acc: 0.9837
Epoch 6/10
 - 35s - loss: 4.0758e-05 - acc: 1.0000 - val_loss: 0.1055 - val_acc: 0.9840
Epoch 7/10
 - 35s - loss: 3.6620e-05 - acc: 1.0000 - val_loss: 0.1060 - val_acc: 0.9840
Epoch 8/10
 - 35s - loss: 3.3679e-05 - acc: 1.0000 - val_loss: 0.1060 - val_acc: 0.9840
Epoch 9/10
 - 34s - loss: 3.0730e-05 - acc: 1.0000 - val_loss: 0.1063 - val_acc: 0.9841
Epoch 10/10
 - 35s - loss: 2.8592e-05 - acc: 1.0000 - val_loss: 0.1067 - val_acc: 0.9839
Final Baseline Error(Using Relu): 1.61%


### Batch Size ( 500 )

In [33]:
agd = optimizers.Adagrad(lr=0.01, epsilon=0.05, decay=0.0)
model_relu.compile(loss='categorical_crossentropy', optimizer=agd, metrics=['accuracy'])

model_relu.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=500, verbose=2)
scores = model_relu.evaluate(X_test, y_test, verbose=0)

print("Final Baseline Error(Using Relu): %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 6s - loss: 2.6525e-05 - acc: 1.0000 - val_loss: 0.1068 - val_acc: 0.9840
Epoch 2/10
 - 6s - loss: 2.6303e-05 - acc: 1.0000 - val_loss: 0.1069 - val_acc: 0.9840
Epoch 3/10
 - 6s - loss: 2.6136e-05 - acc: 1.0000 - val_loss: 0.1069 - val_acc: 0.9840
Epoch 4/10
 - 6s - loss: 2.5968e-05 - acc: 1.0000 - val_loss: 0.1070 - val_acc: 0.9839
Epoch 5/10
 - 6s - loss: 2.5805e-05 - acc: 1.0000 - val_loss: 0.1071 - val_acc: 0.9840
Epoch 6/10
 - 6s - loss: 2.5646e-05 - acc: 1.0000 - val_loss: 0.1071 - val_acc: 0.9840
Epoch 7/10
 - 6s - loss: 2.5485e-05 - acc: 1.0000 - val_loss: 0.1072 - val_acc: 0.9839
Epoch 8/10
 - 6s - loss: 2.5340e-05 - acc: 1.0000 - val_loss: 0.1072 - val_acc: 0.9838
Epoch 9/10
 - 6s - loss: 2.5184e-05 - acc: 1.0000 - val_loss: 0.1073 - val_acc: 0.9837
Epoch 10/10
 - 6s - loss: 2.5039e-05 - acc: 1.0000 - val_loss: 0.1073 - val_acc: 0.9841
Final Baseline Error(Using Relu): 1.59%


### Batch Size ( 1000 )

In [34]:
agd = optimizers.Adagrad(lr=0.01, epsilon=0.05, decay=0.0)
model_relu.compile(loss='categorical_crossentropy', optimizer=agd, metrics=['accuracy'])

model_relu.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=1000, verbose=2)
scores = model_relu.evaluate(X_test, y_test, verbose=0)

print("Final Baseline Error(Using Relu): %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 5s - loss: 2.4871e-05 - acc: 1.0000 - val_loss: 0.1073 - val_acc: 0.9840
Epoch 2/10
 - 5s - loss: 2.4797e-05 - acc: 1.0000 - val_loss: 0.1074 - val_acc: 0.9838
Epoch 3/10
 - 5s - loss: 2.4718e-05 - acc: 1.0000 - val_loss: 0.1074 - val_acc: 0.9839
Epoch 4/10
 - 5s - loss: 2.4643e-05 - acc: 1.0000 - val_loss: 0.1074 - val_acc: 0.9839
Epoch 5/10
 - 5s - loss: 2.4573e-05 - acc: 1.0000 - val_loss: 0.1074 - val_acc: 0.9839
Epoch 6/10
 - 5s - loss: 2.4503e-05 - acc: 1.0000 - val_loss: 0.1075 - val_acc: 0.9839
Epoch 7/10
 - 5s - loss: 2.4431e-05 - acc: 1.0000 - val_loss: 0.1075 - val_acc: 0.9838
Epoch 8/10
 - 5s - loss: 2.4357e-05 - acc: 1.0000 - val_loss: 0.1075 - val_acc: 0.9839
Epoch 9/10
 - 5s - loss: 2.4287e-05 - acc: 1.0000 - val_loss: 0.1075 - val_acc: 0.9839
Epoch 10/10
 - 5s - loss: 2.4222e-05 - acc: 1.0000 - val_loss: 0.1075 - val_acc: 0.9839
Final Baseline Error(Using Relu): 1.61%


### Batch Size ( 10000 )

In [35]:
agd = optimizers.Adagrad(lr=0.01, epsilon=0.05, decay=0.0)
model_relu.compile(loss='categorical_crossentropy', optimizer=agd, metrics=['accuracy'])

model_relu.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=10000, verbose=2)
scores = model_relu.evaluate(X_test, y_test, verbose=0)

print("Final Baseline Error(Using Relu): %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 7s - loss: 2.4122e-05 - acc: 1.0000 - val_loss: 0.1075 - val_acc: 0.9839
Epoch 2/10
 - 5s - loss: 2.4114e-05 - acc: 1.0000 - val_loss: 0.1075 - val_acc: 0.9839
Epoch 3/10
 - 5s - loss: 2.4107e-05 - acc: 1.0000 - val_loss: 0.1076 - val_acc: 0.9839
Epoch 4/10
 - 5s - loss: 2.4100e-05 - acc: 1.0000 - val_loss: 0.1076 - val_acc: 0.9839
Epoch 5/10
 - 5s - loss: 2.4093e-05 - acc: 1.0000 - val_loss: 0.1076 - val_acc: 0.9839
Epoch 6/10
 - 5s - loss: 2.4087e-05 - acc: 1.0000 - val_loss: 0.1076 - val_acc: 0.9839
Epoch 7/10
 - 5s - loss: 2.4080e-05 - acc: 1.0000 - val_loss: 0.1076 - val_acc: 0.9839
Epoch 8/10
 - 5s - loss: 2.4075e-05 - acc: 1.0000 - val_loss: 0.1076 - val_acc: 0.9839
Epoch 9/10
 - 5s - loss: 2.4067e-05 - acc: 1.0000 - val_loss: 0.1076 - val_acc: 0.9839
Epoch 10/10
 - 5s - loss: 2.4059e-05 - acc: 1.0000 - val_loss: 0.1076 - val_acc: 0.9839
Final Baseline Error(Using Relu): 1.61%


## Learning Rate (0.05)

In [36]:
agd = optimizers.Adagrad(lr=0.05, epsilon=0.05, decay=0.0)
model_relu.compile(loss='categorical_crossentropy', optimizer=agd, metrics=['accuracy'])

model_relu.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=500, verbose=2)
scores = model_relu.evaluate(X_test, y_test, verbose=0)

print("Final Baseline Error(Using Relu): %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 8s - loss: 2.4357e-05 - acc: 1.0000 - val_loss: 0.1077 - val_acc: 0.9837
Epoch 2/10
 - 7s - loss: 2.3515e-05 - acc: 1.0000 - val_loss: 0.1079 - val_acc: 0.9835
Epoch 3/10
 - 6s - loss: 2.3006e-05 - acc: 1.0000 - val_loss: 0.1082 - val_acc: 0.9839
Epoch 4/10
 - 6s - loss: 2.2283e-05 - acc: 1.0000 - val_loss: 0.1084 - val_acc: 0.9839
Epoch 5/10
 - 7s - loss: 2.1715e-05 - acc: 1.0000 - val_loss: 0.1086 - val_acc: 0.9838
Epoch 6/10
 - 6s - loss: 2.1251e-05 - acc: 1.0000 - val_loss: 0.1089 - val_acc: 0.9839
Epoch 7/10
 - 6s - loss: 2.0711e-05 - acc: 1.0000 - val_loss: 0.1091 - val_acc: 0.9839
Epoch 8/10
 - 6s - loss: 2.0171e-05 - acc: 1.0000 - val_loss: 0.1093 - val_acc: 0.9840
Epoch 9/10
 - 6s - loss: 1.9712e-05 - acc: 1.0000 - val_loss: 0.1094 - val_acc: 0.9840
Epoch 10/10
 - 6s - loss: 1.9227e-05 - acc: 1.0000 - val_loss: 0.1097 - val_acc: 0.9839
Final Baseline Error(Using Relu): 1.61%


## Learning Rate (0.1)

In [37]:
agd = optimizers.Adagrad(lr=0.1, epsilon=0.05, decay=0.0)
model_relu.compile(loss='categorical_crossentropy', optimizer=agd, metrics=['accuracy'])

model_relu.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=500, verbose=2)
scores = model_relu.evaluate(X_test, y_test, verbose=0)

print("Final Baseline Error(Using Relu): %.2f%%" % (100-scores[1]*100))

Train on 60000 samples, validate on 10000 samples
Epoch 1/10
 - 7s - loss: 1.9010e-05 - acc: 1.0000 - val_loss: 0.1099 - val_acc: 0.9837
Epoch 2/10
 - 6s - loss: 1.8095e-05 - acc: 1.0000 - val_loss: 0.1101 - val_acc: 0.9837
Epoch 3/10
 - 6s - loss: 1.7348e-05 - acc: 1.0000 - val_loss: 0.1106 - val_acc: 0.9837
Epoch 4/10
 - 6s - loss: 1.6597e-05 - acc: 1.0000 - val_loss: 0.1109 - val_acc: 0.9838
Epoch 5/10
 - 7s - loss: 1.5880e-05 - acc: 1.0000 - val_loss: 0.1112 - val_acc: 0.9840
Epoch 6/10
 - 7s - loss: 1.5418e-05 - acc: 1.0000 - val_loss: 0.1116 - val_acc: 0.9839
Epoch 7/10
 - 6s - loss: 1.4778e-05 - acc: 1.0000 - val_loss: 0.1119 - val_acc: 0.9841
Epoch 8/10
 - 6s - loss: 1.4318e-05 - acc: 1.0000 - val_loss: 0.1121 - val_acc: 0.9842
Epoch 9/10
 - 6s - loss: 1.3778e-05 - acc: 1.0000 - val_loss: 0.1123 - val_acc: 0.9838
Epoch 10/10
 - 6s - loss: 1.3398e-05 - acc: 1.0000 - val_loss: 0.1124 - val_acc: 0.9837
Final Baseline Error(Using Relu): 1.63%
