Build and train a MLP Model to classify Mnist dataset

1- MLP Network accepts 1D data. So we should flatten our 2D image, then print the dimension of the result arrays.

2- Normalize data by rescaling them to (0,1)

3- Convert label arrays to 1-hot representation (keras.utils.to_categorical)

4- Define Model

Hidden Layer 1: Fully Conncted + Relu Activition (e.g. 512 Nuerons)
Hidden Layer 2: Fully Connected + Relu Activition (e.g. 512 Neurons)
Outout Layer: Fully Connected + Softmax Activition
Build and train a CNN+MLP deep learning model with Keras with followings specs for MNIST dataset:

Conv2D(32, kernel_size=(3, 3), activation='relu')
Conv2D(64, kernel_size=(3, 3), activation='relu')
MaxPooling2D(pool_size=(2, 2))
Dense(128, activation='relu')
Dense(num_classes, activation='softmax')

Also build another model with BatchNormalization and Dropout. Compare these two CNN + MLP models performance for test data

In [46]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # for plotting the digit image
%matplotlib inline  
from keras.datasets import mnist
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Input, Conv2D, MaxPooling2D, Flatten
from keras.optimizers import SGD
from keras.initializers import RandomNormal

Partially following: https://github.com/JSitter/DS-3-Deep-Learning/blob/master/notebooks/MultilayerPerceptrons.ipynb

In [47]:
# Import libaries
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten

# Data
from keras.datasets import mnist

# General
import numpy as np
import pandas as pd

# Preprocessing
from sklearn.model_selection import train_test_split

In [48]:
# data = mnist.load_data()
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [49]:
print("Shape of X_train: {}".format(X_train.shape))
print("Shape of X_test: {}".format(X_test.shape))

Shape of X_train: (60000, 28, 28)
Shape of X_test: (10000, 28, 28)


In [50]:
X_train = X_train.reshape(60000, 784).astype('float32')
X_test = X_test.reshape(10000, 784).astype('float32')
X_train.shape

(60000, 784)

### Normalize the data, but in context of pixel intenisty

In [51]:
X_train /= 255

In [52]:
y_train_one_hot = to_categorical(y_train, 10)
y_test_one_hot = to_categorical(y_test, 10)

In [53]:
# initialize the model
model = Sequential()
# Add the layers to model here.
model.add(Dense(512, activation='relu', input_shape=(784,), kernel_initializer=RandomNormal(0,0.01)))
model.add(Dense(512, activation='relu', kernel_initializer=RandomNormal(0,0.01)))
# Output Layer: Fully Connected + Softmax Activition
model.add(Dense(10, activation='softmax', kernel_initializer=RandomNormal(0,0.01)))

In [54]:
sgd = SGD(lr=0.01)
model.compile(loss='categorical_crossentropy',
             optimizer=sgd,
             metrics = ['accuracy'])

In [55]:
model.summary()
# Here we saved the raw model without any training. we will use it later.
model.save('raw_model.h5')

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 512)               401920    
_________________________________________________________________
dense_7 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_8 (Dense)              (None, 10)                5130      
Total params: 669,706
Trainable params: 669,706
Non-trainable params: 0
_________________________________________________________________


In [56]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

X_train = X_train.reshape(60000,28,28,1)
X_test = X_test.reshape(10000,28,28,1)

X_train = X_train / 255.0
X_test = X_test / 255.0

y_train_one_hot = to_categorical(y_train, 10)
y_test_one_hot = to_categorical(y_test, 10)

In [61]:
NUM_CLASSES = 10

model_complex = Sequential()
model_complex.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(28, 28, 1)))
model_complex.add(Conv2D(32, kernel_size=3, activation='relu'))
model_complex.add(MaxPooling2D(pool_size=(2, 2)))
model_complex.add(Flatten())
model_complex.add(Dense(128, activation='relu'))
model_complex.add(Dense(NUM_CLASSES, activation='softmax'))

In [65]:
model_complex.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

tensor_board = TensorBoard(log_dir='./Nathan_Graph')

In [None]:
model_complex.fit(X_train, y_train_one_hot, callbacks = [tensor_board], validation_data=(X_test, y_test_one_hot), epochs=5)



In [None]:
X_train.shape

In [67]:
from __future__ import print_function

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import RMSprop
from keras.callbacks import TensorBoard

batch_size = 128
num_classes = 10
epochs = 5

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
# model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
# model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))


model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(),
              metrics=['accuracy'])


tensor_board = TensorBoard(log_dir='./Nathan_Graph')

model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    callbacks = [tensor_board],
                    validation_data=(x_test, y_test))

60000 train samples
10000 test samples
Train on 60000 samples, validate on 10000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x13c6bcf60>

In [60]:
keras.__version__

'2.2.4'