# Convolution Neural Network for MNIST

Deep Learning models can take quite a bit of time to run, particularly if GPU isn't used. 
In the interest of time, we will sample fewer observations that are 7 and fewer observations that aren't 7. 
We will build a model using that and see how it performs on the test dataset

In [63]:
#Import the required libraries
import numpy as np
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.optimizers import SGD
np.random.seed(1338)

In [64]:
path_to_data = "mnist.pkl.gz"

In [65]:
#Load the training and testing data
(X_train, y_train), (X_test, y_test) = mnist.load_data(path_to_data)


In [66]:
img_rows, img_cols = 28, 28

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols,1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols,1)

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train /= 255
X_test /= 255

In [67]:
#Seed for reproducibilty
np.random.seed(1338)

#test data

X_test = X_test.copy()
Y = y_test.copy()
#Converting the output to binary classification(Seven=1,Not Seven=0)
Y_test = Y == 7
Y_test = Y_test.astype(int)

#Selecting the examples where the output is 7
X_seven = X_train[y_train == 7].copy()
Y_seven = y_train[y_train == 7].copy()
#Selecting the examples where the output is not 7
X_not_seven = X_train[y_train != 7].copy()
Y_not_seven = y_train[y_train != 7].copy()

#Selecting 6000 random examples from the data that contains only the data where the output is not 7
random_rows = np.random.randint(0,X_seven.shape[0],6000)
X_not_seven = X_not_seven[random_rows]
Y_not_seven = Y_not_seven[random_rows]

In [68]:
#Appending the data with output as 7 and data with output as not 7
X_train = np.append(X_seven,X_not_seven)
#Reshaping the appended data to appropraite form
X_train = X_train.reshape(X_seven.shape[0] + X_not_seven.shape[0], img_rows, img_cols,1)
#Appending the labels and converting the labels to binary classification(seven=1,Not seven=0)
Y_labels = np.append(Y_seven,Y_not_seven)
Y_train = Y_labels == 7 
Y_train = Y_train.astype(int)

In [69]:
print(X_train.shape, Y_labels.shape, Y_test.shape, Y_test.shape)

(12265, 28, 28, 1) (12265,) (10000,) (10000,)


In [70]:
#Converting the classes to its binary categorical form
nb_classes = 2
Y_train = np_utils.to_categorical(Y_train, nb_classes)
Y_test = np_utils.to_categorical(Y_test, nb_classes)

# A simple CNN

In [71]:
#Initializing the values for the convolution neural network
nb_epoch = 2
batch_size = 128
# number of convolutional filters to use
nb_filters = 32
# size of pooling area for max pooling
nb_pool = 2
# convolution kernel size
nb_conv = 3

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)

In [72]:
input_shape = (img_rows, img_cols, 1)
model = Sequential()

model.add(Conv2D(32, kernel_size=(5, 5), strides=(1, 1),
                        activation='relu',
                        input_shape=input_shape))
model.add(Flatten())
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
    
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.SGD(lr=0.01),
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,verbose=1,
              validation_data=(X_test, Y_test))
          

#Evaluating the model on the test data    
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])



Train on 12265 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Test score: 0.1588409436941147
Test accuracy: 0.9469


### Adding  dense layers

In [73]:
model = Sequential()

model.add(Conv2D(32, kernel_size=(5, 5), strides=(1, 1),
                        activation='relu',
                        input_shape=input_shape))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))
    
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,verbose=1,
              validation_data=(X_test, Y_test))
          

#Evaluating the model on the test data    
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])



Train on 12265 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Test score: 0.12240466742515564
Test accuracy: 0.9591


### Adding a dropout mechanism

In [74]:
model = Sequential()

model.add(Conv2D(32, kernel_size=(5, 5), strides=(1, 1),
                        activation='relu',
                        input_shape=input_shape))
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
    
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,verbose=1,
              validation_data=(X_test, Y_test))
          

#Evaluating the model on the test data    
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])



Train on 12265 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Test score: 0.11149236168563366
Test accuracy: 0.9646


# Exercise

Convert the above in to function. Move the hyperparameters as parameters to the function and see what happens. 


In [61]:
#Function for constructing the convolution neural network
def build_model():
    .
    .
    .
    .
    .
    .
    .
    .
    
    #Evaluating the model on the test data    
    score = model.evaluate(X_test, Y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])

In [62]:
#Timing how long it takes to build the model and test it.
%timeit -n1 -r1 build_model()

Train on 11918 samples, validate on 10000 samples
Epoch 1/2
Epoch 2/2
Test score: 0.135310443386
Test accuracy: 0.9506
1 loop, best of 1: 1min 3s per loop
