# Multi Layer Perceptron Model for MNIST

Deep Learning models can take quite a bit of time to run, particularly if GPU isn't used. In the interest of time, we will sample 1000 observations that are 6 and 1000 observations that aren't 6. We will build a model using that and see how it performs on the test dataset

In [2]:
#Import the required libraries
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.utils import np_utils

In [4]:
path_to_data = ""

In [8]:
#Load the training and testing data
(X_train, y_train), (X_test, y_test) = mnist.load_data(path_to_data)
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

In [9]:
#Seed for reproducibilty
np.random.seed(1338)

#test data

X_test = X_test.copy()
Y = y_test.copy()
#Converting the output to binary classification(Six=1,Not Six=0)
Y_test = Y == 6
Y_test = Y_test.astype(int)

#Selecting the 5918 examples where the output is 6
X_six = X_train[y_train == 6].copy()
Y_six = y_train[y_train == 6].copy()
#Selecting the examples where the output is not 6
X_not_six = X_train[y_train != 6].copy()
Y_not_six = y_train[y_train != 6].copy()

#Selecting 6000 random examples from the data that contains only the data where the output is not 6
random_rows = np.random.randint(0,X_six.shape[0],6000)
X_not_six = X_not_six[random_rows]
Y_not_six = Y_not_six[random_rows]

In [10]:
#Appending the data with output as 6 and data with output as not six
X_train = np.append(X_six,X_not_six)
#Reshaping the appended data to appropraite form
X_train = X_train.reshape(X_six.shape[0] + X_not_six.shape[0], 784)
#Appending the labels and converting the labels to binary classification(Six=1,Not Six=0)
Y_labels = np.append(Y_six,Y_not_six)
Y_train = Y_labels == 6 
Y_train = Y_train.astype(int)

In [12]:
print(X_train.shape, Y_labels.shape, Y_test.shape, Y_test.shape)

(11918, 784) (11918,) (10000,) (10000,)


In [13]:
#Converting the classes to its binary categorical form
nb_classes = 2
Y_train = np_utils.to_categorical(Y_train, nb_classes)
Y_test = np_utils.to_categorical(Y_test, nb_classes)

In [18]:
#Initializing the values for the multi layer perceptron
batch_size = 128
nb_epoch = 2
#Activation Functions
activations = "sigmoid"

# A simple MLP

In [27]:
from keras.optimizers import SGD

In [50]:
np.random.seed(1338)

model = Sequential()

model.add(Dense(512, input_shape=(784,)))
model.add(Activation("sigmoid"))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))
    
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)

model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,verbose=1,
              validation_data=(X_test, Y_test))
          

score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Train on 11918 samples, validate on 10000 samples
Epoch 1/1
Test score: 0.422354313231
Test accuracy: 0.9664


### Exercise:
Increase epochs to 10 and observe the modeling performance

### Let's add one more hidden layers

In [47]:
np.random.seed(1338)

model = Sequential()

model.add(Dense(512, input_shape=(784,)))
model.add(Activation("sigmoid"))

model.add(Dense(512))
model.add(Activation("sigmoid"))

model.add(Dense(nb_classes))
model.add(Activation('softmax'))
    
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)

model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,verbose=1,
              validation_data=(X_test, Y_test))
          

score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])


Train on 11918 samples, validate on 10000 samples
Epoch 1/1
Test score: 0.948558405685
Test accuracy: 0.0958


# Regularization: Dropout

In [48]:
np.random.seed(1338)
model = Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation("sigmoid"))
model.add(Dropout(0.2))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
    
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,verbose=1,
              validation_data=(X_test, Y_test))
          

score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Train on 11918 samples, validate on 10000 samples
Epoch 1/1
Test score: 0.0622818493906
Test accuracy: 0.9771


### Exercise
Run the above for 15 epochs

### Adding more dropout layers



In [49]:
np.random.seed(1338)
model = Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation("sigmoid"))
model.add(Dropout(0.2))
model.add(Dense(512))
model.add(Activation("sigmoid"))
model.add(Dropout(0.2))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
    
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,verbose=1,
              validation_data=(X_test, Y_test))
          

score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

Train on 11918 samples, validate on 10000 samples
Epoch 1/1
Test score: 0.101808118149
Test accuracy: 0.9631


### L2 Regularizer

In [None]:
from keras.regularizers import l2

In [None]:
np.random.seed(1338)
model = Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation("sigmoid"))

model.add(Dense(512, W_regularizer=l2(0.01)))

model.add(Activation("sigmoid"))


model.add(Dropout(0.2))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))
    
model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,verbose=1,
              validation_data=(X_test, Y_test))
          

score = model.evaluate(X_test, Y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])

# Selecting activation function

In [29]:
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.optimizers import SGD

In [33]:
#Initializing the values for the multi layer perceptron
batch_size = 128
nb_epoch = 1
#Activation Functions
activations = ['relu','tanh','sigmoid']

sgd = SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False)

In [45]:
#Function for constructing the multi layer perceptron
def build_model(activation):
    
    np.random.seed(1338)
    model = Sequential()
    model.add(Dense(512, input_shape=(784,)))
    model.add(Activation(activation))
    model.add(Dropout(0.2))
    model.add(Dense(512))
    model.add(Activation(activation))
    model.add(Dropout(0.2))
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))
    
    model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

    model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,verbose=1,
              validation_data=(X_test, Y_test))
          

    score = model.evaluate(X_test, Y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])

In [35]:
for i in activations:
    print('ACTIVATION',i,'\n')
    %timeit -n1 -r1 build_model(i)
    print('\n')

ACTIVATION relu 

Train on 11918 samples, validate on 10000 samples
Epoch 1/1
Test score: 0.25144048841
Test accuracy: 0.9495
1 loop, best of 1: 5.1 s per loop


ACTIVATION tanh 

Train on 11918 samples, validate on 10000 samples
Epoch 1/1
Test score: 0.166866254115
Test accuracy: 0.9529
1 loop, best of 1: 3.27 s per loop


ACTIVATION sigmoid 

Train on 11918 samples, validate on 10000 samples
Epoch 1/1
Test score: 0.716095627594
Test accuracy: 0.1525
1 loop, best of 1: 3.36 s per loop




### Exercises:
1. Change the above to 15 epochs and observe the test accuracy

2. Modify SGD parameters and observe the test accuracy

3. Change the optimizer and observe test accuracy. For learning purpose, let's use 5 epochs

4. The weights initialization method has a significant impact on how the network learns. We will not go much into this in this workshop - but look into how and where the initialization is done.


### Available Loss Functions in `keras`
http://keras.io/objectives/

### Available Optimizers in `keras`
http://keras.io/optimizers/