    ### Import required libraries along with the images dataset
 


In [5]:
import numpy as np
import pandas as pd
import matplotlib as plt
from  keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD
from keras.utils import np_utils, to_categorical
from keras.callbacks import Callback
from sklearn.model_selection import train_test_split


In [6]:
#load the dataset

(X_train, y_train), (X_val, y_val) = mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [15]:
# Data stored as tuple
len(X_train), len(y_train), len(X_val)

(60000, 60000, 10000)

In [57]:
# reshape the training datasets from pixels which are of 28* 28( multi dimensioanl arrays 28 lists/array with 28 elements each in it) to single array of len 784 elements
X_train = X_train.reshape(60000, 784)
X_val = X_val.reshape(10000, 784)
X_train= X_train.astype('float32')
X_val= X_val.astype('float32')

In [64]:
# double the trianing dataset using len func
len(X_train[1])
type(X_train[1][0]) 

784

In [66]:
# normalize the data,this step  ensures that each input parameter (in this case pixel) has a similar data distribution. And values are scale b/w 0 and 1.
# We can also use X-mean/std.dev to normalize where mean = 0, however in this case we used X- min/ max-min
X_train = X_train/255
X_val = X_val/255


In [75]:
# Perform one hot encoding on the Y label
nclass = 10
y_train = to_categorical(y_train, nclass)
y_val =to_categorical(y_val, nclass)

In [78]:
y_train[3] # has number "2" tagged for that array

array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [79]:
# Build a base line neural net work  by defining the parameters 
no_epochs =20 # number of iterations 
batch_size = 256
verbose = 1
nclass = 10
optimizer = SGD() # algo to reduce error(loss function)
no_hidden = 128
validation_split = 0.2

In [82]:
# Define the outer  layer and build the model without any hidden network 
import tensorflow as tf 
np.random.seed(1272)
model = Sequential() # composing model type, other type is functional 
model.add(Dense(nclass, input_shape= (784, )))
# final layer which gives probablities b/w 0 to 1 , softmax is generalization of sigmiod function
model.add(Activation(tf.nn.softmax)) 
model.summary()




_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 10)                7850      
_________________________________________________________________
activation_3 (Activation)    (None, 10)                0         
Total params: 7,850
Trainable params: 7,850
Non-trainable params: 0
_________________________________________________________________


In [83]:
# Complie the model
model.compile(loss= 'categorical_crossentropy', optimizer =optimizer, metrics = ['accuracy'])
history = model.fit(X_train, y_train, batch_size=batch_size, epochs = no_epochs,  verbose = verbose, validation_split = validation_split )

Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [85]:
# Score the model on the unseen test data set and compare the accuracy metrics

score = model.evaluate(X_val, y_val, verbose= verbose)
print("test score is:", score[0])
print("test accuracy :", score[1])


test score is: 0.4000422086954117
test accuracy : 0.8952


In [86]:
print(score)

[0.4000422086954117, 0.8952]


In [103]:
# Now try to build a model with hidden activation/hidden layers using sigmiod objective function/activation function for hidden layers
# However the final output layer perceptron still will have the activation function of softmax to get probabilites
# using sigmoid
# Keras compatability 
model_sigmoid = Sequential()
model_sigmoid.add(Dense(no_hidden, input_shape= (784, )))
model_sigmoid.add(Activation(tf.nn.sigmoid))
# add hidden layers of 
model_sigmoid.add(Dense(no_hidden, input_shape= (784, )))
model_sigmoid.add(Activation(tf.nn.sigmoid))
#ouptlayer
model_sigmoid.add(Dense(nclass))
model_sigmoid.add(Activation(tf.nn.softmax))
model_sigmoid.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 128)               100480    
_________________________________________________________________
activation_9 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 128)               16512     
_________________________________________________________________
activation_10 (Activation)   (None, 128)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 10)                1290      
_________________________________________________________________
activation_11 (Activation)   (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________


In [100]:

#model_sigmoid.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_5 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_6 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 100)               12900     
_________________________________________________________________
activation_7 (Activation)    (None, 100)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 10)                1010      
__________

In [104]:
# Compile model using the sigmoid activation function

model_sigmoid.compile(loss= 'categorical_crossentropy', optimizer =optimizer, metrics = ['accuracy'])
history_sigmoid = model_sigmoid.fit(X_train, y_train, batch_size=batch_size, epochs = no_epochs,  verbose = verbose, validation_split = validation_split )

Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [105]:
# we can observe that when we used sigmoid function in hidden layers the test accuracy was reduced by 20% form baseline

score_sigmoid = model_sigmoid.evaluate(X_val, y_val, verbose= verbose)
print("test score is:", score_sigmoid[0])
print("test accuracy :", score_sigmoid[1])

test score is: 1.3234374908447266
test accuracy : 0.6972


In [106]:
# let us relu activation function and comapre with base line. 
model_relu = Sequential()
model_relu.add(Dense(no_hidden, input_shape= (784, )))
model_relu.add(Activation(tf.nn.relu))
# add hidden layers of 
model_relu.add(Dense(no_hidden, input_shape= (784, )))
model_relu.add(Activation(tf.nn.relu))
#ouptlayer
model_relu.add(Dense(nclass))
model_relu.add(Activation(tf.nn.softmax))
model_relu.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_14 (Dense)             (None, 128)               100480    
_________________________________________________________________
activation_12 (Activation)   (None, 128)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 128)               16512     
_________________________________________________________________
activation_13 (Activation)   (None, 128)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 10)                1290      
_________________________________________________________________
activation_14 (Activation)   (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________________________________________________________________


In [107]:
# Compile model using the activation activation function

model_relu.compile(loss= 'categorical_crossentropy', optimizer =optimizer, metrics = ['accuracy'])
history_relu = model_relu.fit(X_train, y_train, batch_size=batch_size, epochs = no_epochs,  verbose = verbose, validation_split = validation_split )

Train on 48000 samples, validate on 12000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [108]:
# we can observe that when we used relu  function with two  hidden layers (128 eacg) the test accuracy increased  to 92.7% 
# adding drouput layers might increase accuracy further. 
score_relu = model_relu.evaluate(X_val, y_val, verbose= verbose)
print("test score is:", score_relu[0])
print("test accuracy :", score_relu[1])

test score is: 0.2507254888683558
test accuracy : 0.9279


In [109]:
%pwd

'C:\\Users\\sunayaka'