#  Convolutional Neural Networks using Keras-Tensorflow

In [None]:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
import numpy as np

In [4]:
'''Trains a simple convnet on the MNIST dataset.
Gets to 99.25% test accuracy after 12 epochs
(there is still a lot of margin for parameter tuning).
16 seconds per epoch on a GRID K520 GPU.
'''
num_classes = 10

# input image dimensions
img_rows, img_cols = 28, 28

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [2]:
batch_size = 128
epochs = 6
model = Sequential([
    Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape),
    Conv2D(64, (3, 3), activation='relu'),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

hist = model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
y_predict = model.predict(x_test, verbose=0)
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


Train on 60000 samples, validate on 10000 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Test loss: 0.027688994653394913
Test accuracy: 0.9902999997138977


In [5]:
y_predict_amax = np.array([np.argmax(y) for y in y_predict])
y_test_amax = np.array([np.argmax(y) for y in y_test])

matrix = confusion_matrix(y_test_amax,y_predict_amax)
print(matrix)

[[ 977    0    1    0    0    0    0    1    1    0]
 [   0 1132    0    2    0    0    1    0    0    0]
 [   0    2 1023    0    0    0    0    5    2    0]
 [   0    0    2 1003    0    2    0    1    2    0]
 [   0    0    1    0  971    0    4    0    2    4]
 [   1    0    0    4    0  886    1    0    0    0]
 [   7    2    0    1    1    3  942    0    2    0]
 [   1    2    4    2    0    0    0 1015    1    3]
 [   4    0    1    0    0    0    0    1  966    2]
 [   2    1    0    2    3    5    0    4    4  988]]


In [6]:
batch_size = 128
epochs = 6
model_bn = Sequential([
    Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape),
    BatchNormalization(),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])
model_bn.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

hist_bn = model_bn.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
bn_predict = model_bn.predict(x_test, verbose=0)
score_bn = model_bn.evaluate(x_test, y_test, verbose=0)
print('Batch Norm test accuracy:', score[1])

Train on 60000 samples, validate on 10000 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Batch Norm test accuracy: 0.9902999997138977


In [27]:
batch_size = 128
epochs = 6

datagen = ImageDataGenerator(rescale=1.0/255.0)
# prepare an iterators to scale images
train_iterator = datagen.flow(x_train, y_train, batch_size=batch_size)
test_iterator = datagen.flow(x_test, y_test, batch_size=batch_size)
print('Batches train=%d, test=%d' % (len(train_iterator), len(test_iterator)))
# confirm the scaling works
batchX, batchy = train_iterator.next()

model_in = Sequential([
    Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape),
    Conv2D(64, (3, 3), activation='relu'),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])
# compile model
model_in.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# fit model with generator
model_in.fit_generator(train_iterator, steps_per_epoch=len(train_iterator), epochs=epochs)
# evaluate model
_, acc = model_in.evaluate_generator(test_iterator, steps=len(test_iterator), verbose=0)
in_predict = model_in.predict(x_test, verbose=0)
print("Image Norm Test Accuracy: {:.3f}".format(acc))

Batches train=469, test=79
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Image Norm Test Accuracy: 0.954


In [28]:
batch_size = 128
epochs = 6

datagen = ImageDataGenerator(rescale=1.0/255.0)
# prepare an iterators to scale images
train_iterator = datagen.flow(x_train, y_train, batch_size=batch_size)
test_iterator = datagen.flow(x_test, y_test, batch_size=batch_size)
print('Batches train=%d, test=%d' % (len(train_iterator), len(test_iterator)))
# confirm the scaling works
batchX, batchy = train_iterator.next()

model_ibn = Sequential([
    Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape),
    BatchNormalization(),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])
model_ibn.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

model_ibn.fit_generator(train_iterator, steps_per_epoch=len(train_iterator), epochs=epochs)
# evaluate model
_, acc = model_ibn.evaluate_generator(test_iterator, steps=len(test_iterator), verbose=0)
ibn_predict = model_ibn.predict(x_test, verbose=0)
print("Image + Batch Norm Test Accuracy: {:.3f}".format(acc))

Batches train=469, test=79
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Image + Batch Norm Test Accuracy: 0.889


In [13]:
batch_size = 128
epochs = 6

model_plus = Sequential([
    Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=input_shape),
    Conv2D(64, (3, 3), activation='relu'),
    Conv2D(32, (3, 3), activation='relu'),
    Dropout(0.25),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])
model_plus.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])

hist = model_plus.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
plus_predict = model_plus.predict(x_test, verbose=0)
score = model_plus.evaluate(x_test, y_test, verbose=0)
print('Additional Conv2D Test accuracy: ', score[1])

Train on 60000 samples, validate on 10000 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Additional Conv2D Test accuracy:  0.991100013256073


In [25]:
batch_size_list = [64,128]
epochs = 6
optimizers_list=["adam","adagrad"]
learning_rates = [.01, .001]
best_predict = None
best_acc = 0
for batch_size in batch_size_list:
    for optimizer_str in optimizers_list:
        for learning_rate in learning_rates:
            model_custom = Sequential([
                Conv2D(32, kernel_size=(3, 3),
                             activation='relu',
                             input_shape=input_shape),
                Conv2D(64, (3, 3), activation='relu'),
                Dropout(0.25),
                Flatten(),
                Dense(128, activation='relu'),
                Dropout(0.5),
                Dense(num_classes, activation='softmax')
            ])
            optimizer = None
            if optimizer_str == "adam":
                optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
            else:
                optimizer = keras.optimizers.Adagrad(learning_rate=learning_rate)

            model_custom.compile(optimizer=optimizer,
                          loss='mse',
                          metrics=["accuracy"])

            hist = model_custom.fit(x_train, y_train, epochs=epochs, batch_size = batch_size)
            score = model_custom.evaluate(x_test, y_test, verbose=0)
            if score[1] > best_acc:
                best_acc = score[1]
                best_predict = model_custom.predict(x_test, verbose=0)
            print("Epochs: {}, Optimizer: {}, Learning Rate: {}, Batch Size: {}".format(str(epochs), optimizer_str, str(learning_rate), str(batch_size)))
            print('Cutomized Model Test Accuracy: ', score[1])
print("Best Accuracy : {}".format(best_acc))

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epochs: 6, Optimizer: adam, Learning Rate: 0.01, Batch Size: 64
Cutomized Model Test Accuracy:  0.10320000350475311
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epochs: 6, Optimizer: adam, Learning Rate: 0.001, Batch Size: 64
Cutomized Model Test Accuracy:  0.9894000291824341
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epochs: 6, Optimizer: adagrad, Learning Rate: 0.01, Batch Size: 64
Cutomized Model Test Accuracy:  0.9887999892234802
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epochs: 6, Optimizer: adagrad, Learning Rate: 0.001, Batch Size: 64
Cutomized Model Test Accuracy:  0.9768000245094299
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epochs: 6, Optimizer: adam, Learning Rate: 0.01, Batch Size: 128
Cutomized Model Test Accuracy:  0.9139000177383423
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6
Epochs: 6, Optimizer: adam, Learning Rate: 0.001, Batch

| Optimizer  | Learning Rate  | Batch Size  | Test Accuracy  |
|---|---|---|---|
| Adam  | .01  | 64  | .1032  |
|  Adam  | .001  | 64  | .9894  |
|  Adagrad  | .01  | 64  | .9888  |
|  Adagrad  | .001  | 64   | .9768  |
| Adam  | .01  | 128  | .9139  |
|  Adam  | .001  | 128  | .9892  |
|  Adagrad  | .01  | 128  | .9880  |
|  Adagrad  | .001  | 128   |  .9736 |


In [26]:
y_predict_amax = np.array([np.argmax(y) for y in best_predict])
y_test_amax = np.array([np.argmax(y) for y in y_test])
matrix = confusion_matrix(y_test_amax,y_predict_amax)
print(matrix)

[[ 977    0    0    0    0    0    1    1    1    0]
 [   0 1130    2    0    0    1    2    0    0    0]
 [   1    3 1022    0    1    0    0    3    2    0]
 [   0    0    6 1000    0    1    0    2    1    0]
 [   1    0    1    0  973    0    1    0    2    4]
 [   2    0    0    3    0  878    6    0    0    3]
 [   3    2    0    1    1    2  948    0    1    0]
 [   1    0    9    0    0    0    0 1012    1    5]
 [   4    0    1    0    0    1    0    1  964    3]
 [   3    0    0    0    4    3    0    2    7  990]]


### Discussion

According to the above table with the customized hyperparameters, the best model was the one with the Adam optimizer with a learning rate of .001 with a batch size of 64. The worst performing one, by a large margin, was the Adam with a .01 learning rate and a batch size of 64.

Based on the results of everything else, batch normalization or image normalization by themselves (or neither) boasted high testing accuracy.On the other hand, strangely, combining batch and image normalization diminished the overall performance of the model.