# Objective

1. Build a CNN model for Mnist dataset 
2. Adjust the parameters for better accuracy, such as number of layers, number of nodes in each layer, optimizer, learning rate, etc

# Prepare Environment

In [1]:
%env kERAS_BACKEND = tensorflow
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

env: kERAS_BACKEND=tensorflow


# Prepare Dataset

In [2]:
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Using TensorFlow backend.


In [3]:
import keras.utils as np_utils

x_train = x_train / 255
x_test = x_test / 25
x_train = x_train.reshape(60000, 28, 28, 1)
x_test = x_test.reshape(10000, 28, 28, 1)
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)

# Build CNN

In [4]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam

def evaluate_param(feature_count=[32, 64, 128], conv_size=(3, 3), padding='same', activation='relu', pool_size=(2, 2), loss='mse', optimizer=SGD(lr=0.05)):
    model = Sequential()
    model.add(Conv2D(feature_count[0], conv_size, padding=padding, input_shape=(28, 28, 1)))
    model.add(Activation(activation))
    model.add(MaxPooling2D(pool_size=pool_size))

    model.add(Conv2D(feature_count[1], conv_size, padding=padding))
    model.add(Activation(activation))
    model.add(MaxPooling2D(pool_size=pool_size))

    model.add(Conv2D(feature_count[2], conv_size, padding=padding))
    model.add(Activation(activation))
    model.add(MaxPooling2D(pool_size=pool_size))

    model.add(Flatten())
    model.add(Dense(200))
    model.add(Activation(activation))

    model.add(Dense(10))
    model.add(Activation('softmax'))

    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
    model.summary()
    model.fit(x_train, y_train, batch_size=100, epochs=5)
    
    train_accuracy = model.evaluate(x_train, y_train)[1]
    test_accuracy = model.evaluate(x_test, y_test)[1]
    return (train_accuracy, test_accuracy)

# Tune Parameters
### Leave structure related parameters as default
    - Several options of each parameter are listed below, which lead to lots of permutations. It would take days to run all these permutations.
    - Moreover, exceptions often occur because the current layer does not have enough dimensions to process further dimension reduction.
    - To simply the problem, paramters affecting data dimensions or structures of the neural network are firstly left as default values, including feature_count, conv_size, padding and pool_size.

In [5]:
def run_tests(feature_counts, conv_sizes, paddings, activations, pool_sizes, losses, optimizers):
    accuracy_table = []
    for feature_count in feature_counts:
        for conv_size in conv_sizes:
            for padding in paddings:
                for activation in activations:
                    for pool_size in pool_sizes:
                        for loss in losses:
                            for optimizer in optimizers:
                                accuracy = -1.0
                                try:                                
                                    (train_accuracy, test_accuracy) = evaluate_param(feature_count, conv_size, padding, activation, pool_size, loss, optimizer)
                                except Exception as e:
                                    print('exception occurs!!', e)

                                accuracy_table.append({
                                    'train_accuracy': train_accuracy,
                                    'test_accuracy': test_accuracy,
                                    'feature_count': feature_count,
                                    'conv_size': conv_size,
                                    'padding': padding,
                                    'activation': activation,
                                    'pool_size': pool_size,
                                    'loss': loss,
                                    'optimizer': type(optimizer)
                                })

                                print('[', len(accuracy_table), ']', accuracy_table[-1])
    return accuracy_table

In [6]:
# structure related parameters
feature_counts = [
    #[16, 32, 64],
    #[32, 32, 32],
    [32, 64, 128],
    #[64, 64, 64],
    #[64, 128, 256],
    #[128, 128, 128],
]

conv_sizes = [
    #(2, 2),
    (3, 3),
    #(4, 4),
    #(5, 5),
    #(6, 6),
    #(7, 7),
    #(8, 8),
    #(9, 9),
    #(10, 10)
]

paddings = [
    #'valid',
    'same'
]

pool_sizes = [
    (2, 2),
    #(3, 3),
    #(4, 4),
    #(5, 5),
    #(6, 6),
    #(7, 7),
    #(8, 8),
    #(9, 9),
    #(10, 10)
]

1. Choose activation functions
    - Train models with fixed loss functions and optimizers.
    - To prevent from results biased by specific loss function or optimizer, 3 different loss functions and optimizers are choosen to create 9 tests for each activation function.
    - The choosen loose functions: mean_squared_error, hinge and categorical_crossentropy
    - The choosen optimizers: SGD, RMSprop and Adagrad, all with their default paramters

In [8]:
activations = [
    'softmax',
    'elu',
    'selu',
    'softplus',
    'softsign',
    'relu',
    'tanh',
    'sigmoid',
    'hard_sigmoid',
    'exponential',
    'linear'    
]

losses = [
    'mean_squared_error',
    'hinge',
    'categorical_crossentropy',
]

optimizers = [
    SGD(),
    RMSprop(),
    Adagrad(),
]

accuracy_table1 = run_tests(feature_counts, conv_sizes, paddings, activations, pool_sizes, losses, optimizers)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_10 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
activation_16 (Activation)   (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_17 (Activation)   (None, 14, 14, 64)        0         
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 7, 7, 128)         73856     
__________

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 3 ] {'train_accuracy': 0.11236666666666667, 'test_accuracy': 0.1135, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'softmax', 'pool_size': (2, 2), 'loss': 'mean_squared_error', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_19 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
activation_31 (Activation)   (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_32 (Activat

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 6 ] {'train_accuracy': 0.11236666666666667, 'test_accuracy': 0.1135, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'softmax', 'pool_size': (2, 2), 'loss': 'hinge', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_28 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
activation_46 (Activation)   (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_28 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_29 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_47 (Activation)   (None,

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 9 ] {'train_accuracy': 0.42065, 'test_accuracy': 0.4216, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'softmax', 'pool_size': (2, 2), 'loss': 'categorical_crossentropy', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_37 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
activation_61 (Activation)   (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_37 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_38 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_62 (Activation)  

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 12 ] {'train_accuracy': 0.0993, 'test_accuracy': 0.1032, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'elu', 'pool_size': (2, 2), 'loss': 'mean_squared_error', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_46 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
activation_76 (Activation)   (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_46 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_47 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_77 (Activation)   (None, 14

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 15 ] {'train_accuracy': 0.11236666666666667, 'test_accuracy': 0.1135, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'elu', 'pool_size': (2, 2), 'loss': 'hinge', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_55 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
activation_91 (Activation)   (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_55 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_56 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_92 (Activation)   (None, 14

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 18 ] {'train_accuracy': 0.9939833333333333, 'test_accuracy': 0.9732, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'elu', 'pool_size': (2, 2), 'loss': 'categorical_crossentropy', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_64 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
activation_106 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_64 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_65 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_107 (Acti

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 21 ] {'train_accuracy': 0.09871666666666666, 'test_accuracy': 0.098, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'selu', 'pool_size': (2, 2), 'loss': 'mean_squared_error', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_73 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
activation_121 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_73 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_74 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_122 (Activatio

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 24 ] {'train_accuracy': 0.10218333333333333, 'test_accuracy': 0.101, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'selu', 'pool_size': (2, 2), 'loss': 'hinge', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_82 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
activation_136 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_82 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_83 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_137 (Activation)  (None, 14

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 27 ] {'train_accuracy': 0.9927333333333334, 'test_accuracy': 0.9505, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'selu', 'pool_size': (2, 2), 'loss': 'categorical_crossentropy', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_91 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
activation_151 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_91 (MaxPooling (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_92 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_152 (Act

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 30 ] {'train_accuracy': 0.11236666666666667, 'test_accuracy': 0.1135, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'softplus', 'pool_size': (2, 2), 'loss': 'mean_squared_error', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_100 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_166 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_100 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_101 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_167 (Acti

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 33 ] {'train_accuracy': 0.09751666666666667, 'test_accuracy': 0.0974, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'softplus', 'pool_size': (2, 2), 'loss': 'hinge', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_109 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_181 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_109 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_110 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_182 (Activation)  (Non

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 36 ] {'train_accuracy': 0.09736666666666667, 'test_accuracy': 0.0982, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'softplus', 'pool_size': (2, 2), 'loss': 'categorical_crossentropy', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_118 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_196 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_118 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_119 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_197

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 39 ] {'train_accuracy': 0.9948, 'test_accuracy': 0.8508, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'softsign', 'pool_size': (2, 2), 'loss': 'mean_squared_error', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_127 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_211 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_127 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_128 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_212 (Activation)  (Non

activation_225 (Activation)  (None, 10)                0         
Total params: 325,282
Trainable params: 325,282
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 42 ] {'train_accuracy': 0.9944, 'test_accuracy': 0.9883, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'softsign', 'pool_size': (2, 2), 'loss': 'hinge', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_136 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_226 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_136 (MaxPoolin (None, 14, 14, 32)        0         
________________________________________

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 45 ] {'train_accuracy': 0.9943666666666666, 'test_accuracy': 0.876, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'softsign', 'pool_size': (2, 2), 'loss': 'categorical_crossentropy', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_145 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_241 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_145 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_146 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_242 (

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 48 ] {'train_accuracy': 0.9964, 'test_accuracy': 0.9927, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'relu', 'pool_size': (2, 2), 'loss': 'mean_squared_error', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_154 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_256 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_154 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_155 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_257 (Activation)  (None, 1

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 51 ] {'train_accuracy': 0.7979666666666667, 'test_accuracy': 0.7906, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'relu', 'pool_size': (2, 2), 'loss': 'hinge', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_163 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_271 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_163 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_164 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_272 (Activation)  (None, 14

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 54 ] {'train_accuracy': 0.9947, 'test_accuracy': 0.9919, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'relu', 'pool_size': (2, 2), 'loss': 'categorical_crossentropy', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_172 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_286 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_172 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_173 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_287 (Activation)  (N

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 57 ] {'train_accuracy': 0.8969666666666667, 'test_accuracy': 0.701, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'tanh', 'pool_size': (2, 2), 'loss': 'mean_squared_error', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_181 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_301 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_181 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_182 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_302 (Activation

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 60 ] {'train_accuracy': 0.9953, 'test_accuracy': 0.9611, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'tanh', 'pool_size': (2, 2), 'loss': 'hinge', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_190 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_316 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_190 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_191 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_317 (Activation)  (None, 14, 14, 64)   

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 63 ] {'train_accuracy': 0.9956166666666667, 'test_accuracy': 0.7043, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'tanh', 'pool_size': (2, 2), 'loss': 'categorical_crossentropy', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_199 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_331 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_199 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_200 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_332 (Act

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 66 ] {'train_accuracy': 0.11236666666666667, 'test_accuracy': 0.1135, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'sigmoid', 'pool_size': (2, 2), 'loss': 'mean_squared_error', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_208 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_346 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_208 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_209 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_347 (Activ

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 69 ] {'train_accuracy': 0.11236666666666667, 'test_accuracy': 0.1135, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'sigmoid', 'pool_size': (2, 2), 'loss': 'hinge', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_217 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_361 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_217 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_218 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_362 (Activation)  (None

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 72 ] {'train_accuracy': 0.9137666666666666, 'test_accuracy': 0.7992, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'sigmoid', 'pool_size': (2, 2), 'loss': 'categorical_crossentropy', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_226 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_376 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_226 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_227 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_377 (

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 75 ] {'train_accuracy': 0.11236666666666667, 'test_accuracy': 0.1135, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'hard_sigmoid', 'pool_size': (2, 2), 'loss': 'mean_squared_error', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_235 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_391 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_235 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_236 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_392 (

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 78 ] {'train_accuracy': 0.09915, 'test_accuracy': 0.1009, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'hard_sigmoid', 'pool_size': (2, 2), 'loss': 'hinge', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_244 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_406 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_244 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_245 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_407 (Activation)  (None, 14, 1

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 81 ] {'train_accuracy': 0.10441666666666667, 'test_accuracy': 0.1028, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'hard_sigmoid', 'pool_size': (2, 2), 'loss': 'categorical_crossentropy', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_253 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_421 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_253 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_254 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 84 ] {'train_accuracy': 0.10381666666666667, 'test_accuracy': 0.089, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'exponential', 'pool_size': (2, 2), 'loss': 'mean_squared_error', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_262 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_436 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_262 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_263 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_437 (Ac

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 87 ] {'train_accuracy': 0.09913333333333334, 'test_accuracy': 0.098, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'exponential', 'pool_size': (2, 2), 'loss': 'hinge', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_271 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_451 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_271 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_272 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_452 (Activation)  (N

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 90 ] {'train_accuracy': 0.09871666666666666, 'test_accuracy': 0.098, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'exponential', 'pool_size': (2, 2), 'loss': 'categorical_crossentropy', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_280 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_466 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_280 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_281 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_4

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 93 ] {'train_accuracy': 0.09863333333333334, 'test_accuracy': 0.0958, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'linear', 'pool_size': (2, 2), 'loss': 'mean_squared_error', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_289 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_481 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_289 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_290 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_482 (Activa

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 96 ] {'train_accuracy': 0.10218333333333333, 'test_accuracy': 0.101, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'linear', 'pool_size': (2, 2), 'loss': 'hinge', 'optimizer': <class 'keras.optimizers.Adagrad'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_298 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_496 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_298 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_299 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_497 (Activation)  (None, 

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 99 ] {'train_accuracy': 0.9934833333333334, 'test_accuracy': 0.9483, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'linear', 'pool_size': (2, 2), 'loss': 'categorical_crossentropy', 'optimizer': <class 'keras.optimizers.Adagrad'>}


2. Choose loss function
    - Train models with fixed activation functions and optimizers
    - To prevent from results biased by specific activation function or optimizer, 3 different activation functions and optimizers are choosen to create 9 tests for each loss function.
    - The choosen activation functions: softplus, relu, and hard_sigmoid
    - The choosen optimizers: Adadelta, Adam and Adamax, all with their default paramters

In [None]:
activations = [
    'softplus',
    'relu',
    'hard_sigmoid',
]

losses = [
    'mean_squared_error',
    'mean_absolute_error',
    'mean_absolute_percentage_error',
    'squared_hinge',
    'hinge',
    'categorical_hinge',
    'logcosh',
    'categorical_crossentropy',
    'sparse_categorical_crossentropy',
    'binary_crossentropy',
    'kullback_leibler_divergence',
    'poisson',
    'cosine_proximity'
]

optimizers = [
    Adadelta(),
    Adam(),
    Adamax(),
]

accuracy_table2 = run_tests(feature_counts, conv_sizes, paddings, activations, pool_sizes, losses, optimizers)

3. Choose optimizer
    - Train models with fixed activation functions and loss functions
    - To prevent from results biased by specific activation function or loss function, 3 different activation functions and loss functions are choosen to create 9 tests for each optimizer.
    - The choosen activation functions: elu, tanh, and exponential
    - The choosen loss functions: squared_hinge, kullback_leibler_divergence and poisson

In [None]:
activations = [
    'elu',
    'tanh',
    'exponential',
]

losses = [
    'squared_hinge',
    'kullback_leibler_divergence',
    'poisson',
]

optimizers = [
    SGD(),
    RMSprop(),
    Adagrad(),
    Adadelta(),
    Adam(),
    Adamax(),
    Nadam(),
]

accuracy_table3 = run_tests(feature_counts, conv_sizes, paddings, activations, pool_sizes, losses, optimizers)

4. Run with best paramters from the previous tests
    - According to the three tests above, even though not all of the permutations are tested, we can still find out some relatively robust and stable paramters to train our model.
    - The best and second best options for each parameter are listed in below
        - Activation functions: relu, softsign
        - Loss functions: categorical_crossentropy, poisson
        - Optimizers: RMSprop, Adamax
    - The best testing accuracy is 0.9928 with relu, categorical_crossentropy and Adamax.

In [10]:
activations = [
    'relu',
    'softsign',
]

losses = [
    'categorical_crossentropy',
    'poisson',
]

optimizers = [
    RMSprop(),
    Adamax(),
]

accuracy_table4 = run_tests(feature_counts, conv_sizes, paddings, activations, pool_sizes, losses, optimizers)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_307 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_511 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_307 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_308 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_512 (Activation)  (None, 14, 14, 64)        0         
_________________________________________________________________
max_pooling2d_308 (MaxPoolin (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_309 (Conv2D)          (None, 7, 7, 128)         73856     
__________

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 3 ] {'train_accuracy': 0.9973, 'test_accuracy': 0.9904, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'relu', 'pool_size': (2, 2), 'loss': 'poisson', 'optimizer': <class 'keras.optimizers.RMSprop'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_316 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_526 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_316 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_317 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_527 (Activation)  (None, 14, 14, 64)  

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[ 6 ] {'train_accuracy': 0.9974666666666666, 'test_accuracy': 0.9867, 'feature_count': [32, 64, 128], 'conv_size': (3, 3), 'padding': 'same', 'activation': 'softsign', 'pool_size': (2, 2), 'loss': 'categorical_crossentropy', 'optimizer': <class 'keras.optimizers.Adamax'>}
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_325 (Conv2D)          (None, 28, 28, 32)        320       
_________________________________________________________________
activation_541 (Activation)  (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_325 (MaxPoolin (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_326 (Conv2D)          (None, 14, 14, 64)        18496     
_________________________________________________________________
activation_542 (A