In [1]:
%matplotlib inline

# Assignment 01 - Exploring CNNs for MNIST

In this assignment, we want you to modify the CNN architecture that we used in the last MNIST exercise, adding new layers and altering their hyperparameters. We have already loaded and preprocessed the data for you, so you can focus on the architecture and training of your network.

----------------------
*** You do not need to alter the blocks `Imports`, `Load training data` and `Preprocessing` ***
## Imports

In [2]:
import numpy as np
from random import shuffle, seed
seed(42)  #keep this seed, in order to compare the results with your classmates

import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (15,15) # Make the figures a bit bigger

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras import optimizers

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import ParameterSampler

Using TensorFlow backend.


## Load training data

In [3]:
nb_classes = 10

# the data, shuffled and split between trainVal and test sets
(trainVal_data, trainVal_label), (X_test, y_test) = mnist.load_data()

# We want now to split the trainVal data into train and validation sets
nData = trainVal_data.shape[0]  #find the size of trainVal
nTrain = int(nData * 0.8)  #80% to train, 20% to val

randomIdx = list(range(nData))   #randomly select indexes
shuffle(randomIdx)
trainIdx = randomIdx[:nTrain] 
valIdx = randomIdx[nTrain:]

# Split the data
X_val, y_val = trainVal_data[valIdx], trainVal_label[valIdx]
X_train, y_train = trainVal_data[trainIdx], trainVal_label[trainIdx]

## Obtain class weights and samples per class

In [4]:
unique, counts = np.unique(y_train, return_counts=True)
print("Train ---> ", dict(zip(unique, counts)), "\n")


from sklearn.utils.class_weight import compute_class_weight
class_weights = compute_class_weight('balanced', np.unique(y_train), y_train)
train_class_weights = dict(enumerate(class_weights))

unique, counts = np.unique(y_test, return_counts=True)
test_sample_per_class = counts

Train --->  {0: 4724, 1: 5393, 2: 4723, 3: 4881, 4: 4704, 5: 4313, 6: 4769, 7: 5001, 8: 4730, 9: 4762} 



## Preprocessing

In [5]:
img_rows, img_cols = 28, 28

#The first dimension refers to the number of images
X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_val = X_val.reshape(X_val.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)

X_train = X_train.astype('float32')
X_val = X_val.astype('float32')
X_test = X_test.astype('float32')

X_train /= 255
X_val /= 255
X_test /= 255

Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_val = np_utils.to_categorical(y_val, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

------------------------------------------
------------------------------------------
------------------------------------------
------------------------------------------

# Task 1 [0.25 pts] - Add a fully-connected layer
Let's investigate if the network gets better as we add more layers to it. We want you to add another fully-connected layer to the network from last exercise. Your network will have:
- Convolutional layer with 10 filters of size 5x5 with ReLU activation;
- Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (24x24 to 12x12);
- An operation to flatten the feature maps into an array of size 10x12x12 = 1440
- Dropout operation with probability 0.25, applied to flattened array
- **Fully connected layer with 100 units/neurons and ReLU activation;**
- Fully connected layer with units/neurons equal to the number of classes in our problem (in this case, 10);
- Softmax activation on the last FC layer.

The architecture will look like this:


<img src="task1_cnn.png" />

In [6]:
model = Sequential()
model.add(Conv2D(10, kernel_size=(5,5), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dropout(0.25))
model.add(Dense(100, activation='relu'))
model.add(Dense(10, activation='softmax'))

## Compile and train your model

In [7]:
sgd = optimizers.SGD(lr=0.01) #lr = learning rate
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(X_train, Y_train,
          batch_size=128, epochs=1, verbose=1,
          class_weight = train_class_weights,
          validation_data=(X_val, Y_val))

Train on 48000 samples, validate on 12000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f8faf931b38>

## Evaluate on the test data

In [8]:
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy (NOT NORMALIZED):', score[1])


predicted_classes = model.predict_classes(X_test)
Y_test_classes = np.argmax(Y_test, axis=-1)

accPerClass = []
for classIdx in range(nb_classes):
    idx = (Y_test_classes == classIdx)
    
    correctPred = np.sum(predicted_classes[idx] == Y_test_classes[idx])
    accPerClass.append( correctPred / float(test_sample_per_class[classIdx]))
    
print(dict(zip(range(nb_classes),accPerClass)), "\n")
print("Normalized Acc --> ", np.mean(accPerClass))

Test loss: 0.45398229122161865
Test accuracy (NOT NORMALIZED): 0.8782
{0: 0.9612244897959183, 1: 0.9744493392070485, 2: 0.8643410852713178, 3: 0.8643564356435643, 4: 0.8869653767820774, 5: 0.7219730941704036, 6: 0.9290187891440501, 7: 0.8628404669260701, 8: 0.8316221765913757, 9: 0.8592666005946482} 

Normalized Acc -->  0.8756057854126473


----------------
----------------
# Task 2 [0.25 pts] - Add another (conv + max pooling) layers
We want you to add another convolutional layer, followed by a max pooling to the network. Your network will have:
- Convolutional layer with 10 filters of size 5x5 with ReLU activation;
- Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (24x24 to 12x12);
- **Convolutional layer with 20 filters of size 5x5 with ReLU activation;**
- **Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (8x8 to 4x4);**
- An operation to flatten the feature maps into an array of size 20x4x4 = 320
- Dropout operation with probability 0.25, applied to flattened array
- Fully connected layer with units/neurons equal to the number of classes in our problem (in this case, 10);
- Softmax activation on the last FC layer.

The architecture will look like this:


<img src="task2_cnn.png" />

In [9]:
model = Sequential()
model.add(Conv2D(10, kernel_size=(5,5), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(20, kernel_size=(5,5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dropout(0.25))
model.add(Dense(10, activation='softmax'))

## Compile and train your model

In [10]:
sgd = optimizers.SGD(lr=0.01) #lr = learning rate
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(X_train, Y_train,
          batch_size=128, epochs=1, verbose=1,
          class_weight = train_class_weights,
          validation_data=(X_val, Y_val))

Train on 48000 samples, validate on 12000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f8f895a0940>

## Evaluate on the test data

In [11]:
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy (NOT NORMALIZED):', score[1])


predicted_classes = model.predict_classes(X_test)
Y_test_classes = np.argmax(Y_test, axis=-1)

accPerClass = []
for classIdx in range(nb_classes):
    idx = (Y_test_classes == classIdx)
    
    correctPred = np.sum(predicted_classes[idx] == Y_test_classes[idx])
    accPerClass.append( correctPred / float(test_sample_per_class[classIdx]))
    
print(dict(zip(range(nb_classes),accPerClass)), "\n")
print("Normalized Acc --> ", np.mean(accPerClass))

Test loss: 0.5765314473628997
Test accuracy (NOT NORMALIZED): 0.8684
{0: 0.9724489795918367, 1: 0.9700440528634361, 2: 0.8643410852713178, 3: 0.8198019801980198, 4: 0.8441955193482689, 5: 0.7600896860986547, 6: 0.9102296450939458, 7: 0.8608949416342413, 8: 0.7874743326488707, 9: 0.8711595639246779} 

Normalized Acc -->  0.8660679786673269


----------------
----------------
# Task 3 [0.25 pts] - Add Conv + MaxPool + FC
Now combine tasks 1 and 2; adding the convolutional, max pooling and fc layer to your network. Your CNN will have:
- Convolutional layer with 10 filters of size 5x5 with ReLU activation;
- Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (24x24 to 12x12);
- **Convolutional layer with 20 filters of size 5x5 with ReLU activation;**
- **Max pooling layer with kernel 2x2, that will reduce each spatial dimension by half (8x8 to 4x4);**
- An operation to flatten the feature maps into an array of size 20x4x4 = 320
- Dropout operation with probability 0.25, applied to flattened array
- **Fully connected layer with 100 units/neurons and ReLU activation;**
- Fully connected layer with units/neurons equal to the number of classes in our problem (in this case, 10);
- Softmax activation on the last FC layer.

The architecture will look like this:


<img src="task3_cnn.png" />

In [12]:
model = Sequential()
model.add(Conv2D(10, kernel_size=(5,5), activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(20, kernel_size=(5,5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dropout(0.25))
model.add(Dense(100, activation='relu'))
model.add(Dense(10, activation='softmax'))

## Compile and train your model

In [13]:
sgd = optimizers.SGD(lr=0.01) #lr = learning rate
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(X_train, Y_train,
          batch_size=128, epochs=1, verbose=1,
          class_weight = train_class_weights,
          validation_data=(X_val, Y_val))

Train on 48000 samples, validate on 12000 samples
Epoch 1/1


<keras.callbacks.History at 0x7f8f61e8f898>

## Evaluate on the test data

In [14]:
score = model.evaluate(X_test, Y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy (NOT NORMALIZED):', score[1])


predicted_classes = model.predict_classes(X_test)
Y_test_classes = np.argmax(Y_test, axis=-1)

accPerClass = []
for classIdx in range(nb_classes):
    idx = (Y_test_classes == classIdx)
    
    correctPred = np.sum(predicted_classes[idx] == Y_test_classes[idx])
    accPerClass.append( correctPred / float(test_sample_per_class[classIdx]))
    
print(dict(zip(range(nb_classes),accPerClass)), "\n")
print("Normalized Acc --> ", np.mean(accPerClass))

Test loss: 0.5278754826545715
Test accuracy (NOT NORMALIZED): 0.861
{0: 0.9653061224489796, 1: 0.9348017621145375, 2: 0.875968992248062, 3: 0.8861386138613861, 4: 0.8615071283095723, 5: 0.7107623318385651, 6: 0.8778705636743215, 7: 0.8093385214007782, 8: 0.795687885010267, 9: 0.8681863230921705} 

Normalized Acc -->  0.858556824399864


----------------
----------------
# Task 4 [0.25 pts] - Implement a new modification
Implement **one** modification to your network and evaluate it. Some possible alterations are:
- Add more convolutional and/or max pooling layers;
- Alter the kernel size and number of filters of the conv layers;
- Try training with different batch sizes and higher number of epochs;
- Try with different activations, besides ReLU and Softmax;
- Try optimizing the CNN with a different loss;
- Try different learning rates;

In [15]:
# It's hard to decide what to change, and I don't really know how to get started
# Let's just the existing network and randomly tweak it's parameters

fib = [1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584]
conv_filters = [x for x in fib if x >= 5 and x <= 25]
kernel_sizes = [(x,x) for x in fib if x >= 2 and x <= 10]
pool_sizes = [(x,x) for x in fib if x >= 1 and x <= 5]
dropout_rate = [0, 0.125, 0.25, 0.5]
dense_units = [x for x in fib if x >= 20 and x < 200]
activations = ['relu', 'tanh', 'sigmoid']

params = {
    'conv1_filters': conv_filters,
    'conv1_kernel': kernel_sizes,
    'conv1_activation': activations,
    'pool1_size': pool_sizes,
    
    'conv2_filters': conv_filters,
    'conv2_kernel': kernel_sizes,
    'conv2_activation': activations,
    'pool2_size': pool_sizes,
    
    'dropout1_rate': dropout_rate,
    
    'dense1_units': dense_units,
    'dense1_activation': activations,
}

def create_keras_model(
        conv1_filters, conv1_kernel, conv1_activation, pool1_size,
        conv2_filters, conv2_kernel, conv2_activation, pool2_size,
        dropout1_rate, dense1_units, dense1_activation
    ):
    model = Sequential()
    model.add(Conv2D(filters=conv1_filters, kernel_size=conv1_kernel, activation=conv1_activation, input_shape=(28, 28, 1)))
    model.add(MaxPooling2D(pool_size=pool1_size))
    model.add(Conv2D(filters=conv2_filters, kernel_size=conv2_kernel, activation=conv2_activation))
    model.add(MaxPooling2D(pool_size=pool2_size))
    model.add(Flatten())
    model.add(Dropout(rate=dropout1_rate))
    model.add(Dense(units=dense1_units, activation=dense1_activation))
    model.add(Dense(10, activation='softmax'))
    return model
    
def eval_model(model_params, batch_size=128, epochs=1, verbose=0):
    model = create_keras_model(**model_params)
    
    sgd = optimizers.SGD(lr=0.01) #lr = learning rate
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

    model.fit(X_train, Y_train,
              batch_size=batch_size, epochs=epochs, verbose=verbose,
              class_weight = train_class_weights,
              validation_data=(X_val, Y_val))
    
    predicted_classes = model.predict_classes(X_test)
    Y_test_classes = np.argmax(Y_test, axis=-1)

    accPerClass = []
    for classIdx in range(nb_classes):
        idx = (Y_test_classes == classIdx)

        correctPred = np.sum(predicted_classes[idx] == Y_test_classes[idx])
        accPerClass.append( correctPred / float(test_sample_per_class[classIdx]))
    return model, np.mean(accPerClass)


In [16]:
best_model = None
best_model_params = None
best_model_acc = 0

In [17]:
for i, p in enumerate(ParameterSampler(params, n_iter=1000)):
    try:
        model, acc = eval_model(p, epochs=3)
    except KeyboardInterrupt as err: 
        break
    except:
        # Invalid parameter combination
        # Probably the kernel sizes add up to be bigger than the image itself
        print(f'[{i:04d}] Fail')
        continue
    print(f'[{i:04d}] Accuracy={acc:.2f}, Model={p} ')

    if acc > best_model_acc:
        print(f'=== New Best!!! ===')
        best_model = model
        best_model_params = p
        best_model_acc = acc
        
print('===============')
print(f'Best Model Accuracy={best_model_acc:.2f}, param={best_model_params}')

[0000] Accuracy=0.80, Model={'pool2_size': (5, 5), 'pool1_size': (1, 1), 'dropout1_rate': 0.25, 'dense1_units': 21, 'dense1_activation': 'tanh', 'conv2_kernel': (3, 3), 'conv2_filters': 5, 'conv2_activation': 'relu', 'conv1_kernel': (3, 3), 'conv1_filters': 5, 'conv1_activation': 'tanh'} 
=== New Best!!! ===
[0001] Accuracy=0.86, Model={'pool2_size': (1, 1), 'pool1_size': (3, 3), 'dropout1_rate': 0, 'dense1_units': 21, 'dense1_activation': 'relu', 'conv2_kernel': (2, 2), 'conv2_filters': 8, 'conv2_activation': 'tanh', 'conv1_kernel': (3, 3), 'conv1_filters': 21, 'conv1_activation': 'tanh'} 
=== New Best!!! ===
[0002] Fail
[0003] Accuracy=0.85, Model={'pool2_size': (3, 3), 'pool1_size': (1, 1), 'dropout1_rate': 0.125, 'dense1_units': 89, 'dense1_activation': 'tanh', 'conv2_kernel': (5, 5), 'conv2_filters': 5, 'conv2_activation': 'relu', 'conv1_kernel': (8, 8), 'conv1_filters': 21, 'conv1_activation': 'sigmoid'} 
[0004] Accuracy=0.45, Model={'pool2_size': (3, 3), 'pool1_size': (2, 2), 'd

[0033] Accuracy=0.93, Model={'pool2_size': (3, 3), 'pool1_size': (1, 1), 'dropout1_rate': 0.25, 'dense1_units': 55, 'dense1_activation': 'tanh', 'conv2_kernel': (2, 2), 'conv2_filters': 8, 'conv2_activation': 'relu', 'conv1_kernel': (8, 8), 'conv1_filters': 21, 'conv1_activation': 'relu'} 
=== New Best!!! ===
[0034] Accuracy=0.93, Model={'pool2_size': (1, 1), 'pool1_size': (3, 3), 'dropout1_rate': 0.25, 'dense1_units': 144, 'dense1_activation': 'tanh', 'conv2_kernel': (3, 3), 'conv2_filters': 21, 'conv2_activation': 'relu', 'conv1_kernel': (8, 8), 'conv1_filters': 5, 'conv1_activation': 'relu'} 
[0035] Accuracy=0.88, Model={'pool2_size': (2, 2), 'pool1_size': (2, 2), 'dropout1_rate': 0, 'dense1_units': 55, 'dense1_activation': 'tanh', 'conv2_kernel': (8, 8), 'conv2_filters': 5, 'conv2_activation': 'relu', 'conv1_kernel': (2, 2), 'conv1_filters': 13, 'conv1_activation': 'tanh'} 
[0036] Fail
[0037] Accuracy=0.16, Model={'pool2_size': (2, 2), 'pool1_size': (5, 5), 'dropout1_rate': 0.125, 

[0067] Accuracy=0.19, Model={'pool2_size': (1, 1), 'pool1_size': (2, 2), 'dropout1_rate': 0.5, 'dense1_units': 144, 'dense1_activation': 'sigmoid', 'conv2_kernel': (2, 2), 'conv2_filters': 5, 'conv2_activation': 'sigmoid', 'conv1_kernel': (3, 3), 'conv1_filters': 8, 'conv1_activation': 'relu'} 
[0068] Accuracy=0.18, Model={'pool2_size': (3, 3), 'pool1_size': (3, 3), 'dropout1_rate': 0.5, 'dense1_units': 144, 'dense1_activation': 'relu', 'conv2_kernel': (5, 5), 'conv2_filters': 8, 'conv2_activation': 'tanh', 'conv1_kernel': (2, 2), 'conv1_filters': 13, 'conv1_activation': 'sigmoid'} 
[0069] Accuracy=0.84, Model={'pool2_size': (5, 5), 'pool1_size': (2, 2), 'dropout1_rate': 0, 'dense1_units': 34, 'dense1_activation': 'tanh', 'conv2_kernel': (2, 2), 'conv2_filters': 5, 'conv2_activation': 'relu', 'conv1_kernel': (5, 5), 'conv1_filters': 21, 'conv1_activation': 'relu'} 
[0070] Accuracy=0.19, Model={'pool2_size': (2, 2), 'pool1_size': (3, 3), 'dropout1_rate': 0.5, 'dense1_units': 21, 'dense1

[0101] Accuracy=0.41, Model={'pool2_size': (3, 3), 'pool1_size': (3, 3), 'dropout1_rate': 0.5, 'dense1_units': 34, 'dense1_activation': 'relu', 'conv2_kernel': (5, 5), 'conv2_filters': 5, 'conv2_activation': 'relu', 'conv1_kernel': (8, 8), 'conv1_filters': 21, 'conv1_activation': 'tanh'} 
[0102] Fail
[0103] Accuracy=0.86, Model={'pool2_size': (3, 3), 'pool1_size': (1, 1), 'dropout1_rate': 0.5, 'dense1_units': 89, 'dense1_activation': 'tanh', 'conv2_kernel': (3, 3), 'conv2_filters': 21, 'conv2_activation': 'relu', 'conv1_kernel': (3, 3), 'conv1_filters': 21, 'conv1_activation': 'sigmoid'} 
[0104] Accuracy=0.95, Model={'pool2_size': (1, 1), 'pool1_size': (1, 1), 'dropout1_rate': 0, 'dense1_units': 144, 'dense1_activation': 'relu', 'conv2_kernel': (2, 2), 'conv2_filters': 5, 'conv2_activation': 'relu', 'conv1_kernel': (2, 2), 'conv1_filters': 13, 'conv1_activation': 'tanh'} 
[0105] Accuracy=0.34, Model={'pool2_size': (3, 3), 'pool1_size': (5, 5), 'dropout1_rate': 0, 'dense1_units': 34, 'd

[0136] Accuracy=0.77, Model={'pool2_size': (1, 1), 'pool1_size': (3, 3), 'dropout1_rate': 0, 'dense1_units': 55, 'dense1_activation': 'tanh', 'conv2_kernel': (5, 5), 'conv2_filters': 21, 'conv2_activation': 'sigmoid', 'conv1_kernel': (3, 3), 'conv1_filters': 8, 'conv1_activation': 'tanh'} 
[0137] Accuracy=0.19, Model={'pool2_size': (3, 3), 'pool1_size': (2, 2), 'dropout1_rate': 0.5, 'dense1_units': 34, 'dense1_activation': 'tanh', 'conv2_kernel': (3, 3), 'conv2_filters': 8, 'conv2_activation': 'sigmoid', 'conv1_kernel': (5, 5), 'conv1_filters': 5, 'conv1_activation': 'sigmoid'} 
[0138] Accuracy=0.59, Model={'pool2_size': (5, 5), 'pool1_size': (1, 1), 'dropout1_rate': 0.125, 'dense1_units': 21, 'dense1_activation': 'tanh', 'conv2_kernel': (2, 2), 'conv2_filters': 5, 'conv2_activation': 'tanh', 'conv1_kernel': (3, 3), 'conv1_filters': 21, 'conv1_activation': 'sigmoid'} 
[0139] Accuracy=0.89, Model={'pool2_size': (3, 3), 'pool1_size': (2, 2), 'dropout1_rate': 0.125, 'dense1_units': 89, 'd

[0173] Accuracy=0.85, Model={'pool2_size': (1, 1), 'pool1_size': (2, 2), 'dropout1_rate': 0, 'dense1_units': 55, 'dense1_activation': 'sigmoid', 'conv2_kernel': (3, 3), 'conv2_filters': 5, 'conv2_activation': 'tanh', 'conv1_kernel': (8, 8), 'conv1_filters': 8, 'conv1_activation': 'tanh'} 
[0174] Accuracy=0.77, Model={'pool2_size': (1, 1), 'pool1_size': (1, 1), 'dropout1_rate': 0, 'dense1_units': 21, 'dense1_activation': 'tanh', 'conv2_kernel': (8, 8), 'conv2_filters': 8, 'conv2_activation': 'sigmoid', 'conv1_kernel': (2, 2), 'conv1_filters': 5, 'conv1_activation': 'sigmoid'} 
[0175] Accuracy=0.69, Model={'pool2_size': (2, 2), 'pool1_size': (2, 2), 'dropout1_rate': 0.5, 'dense1_units': 34, 'dense1_activation': 'sigmoid', 'conv2_kernel': (3, 3), 'conv2_filters': 8, 'conv2_activation': 'relu', 'conv1_kernel': (3, 3), 'conv1_filters': 21, 'conv1_activation': 'tanh'} 
[0176] Accuracy=0.92, Model={'pool2_size': (1, 1), 'pool1_size': (2, 2), 'dropout1_rate': 0, 'dense1_units': 34, 'dense1_act

[0211] Accuracy=0.07, Model={'pool2_size': (5, 5), 'pool1_size': (3, 3), 'dropout1_rate': 0.125, 'dense1_units': 34, 'dense1_activation': 'sigmoid', 'conv2_kernel': (2, 2), 'conv2_filters': 21, 'conv2_activation': 'sigmoid', 'conv1_kernel': (8, 8), 'conv1_filters': 5, 'conv1_activation': 'sigmoid'} 
[0212] Accuracy=0.55, Model={'pool2_size': (3, 3), 'pool1_size': (2, 2), 'dropout1_rate': 0.25, 'dense1_units': 55, 'dense1_activation': 'sigmoid', 'conv2_kernel': (3, 3), 'conv2_filters': 13, 'conv2_activation': 'tanh', 'conv1_kernel': (5, 5), 'conv1_filters': 5, 'conv1_activation': 'tanh'} 
[0213] Accuracy=0.22, Model={'pool2_size': (5, 5), 'pool1_size': (3, 3), 'dropout1_rate': 0.5, 'dense1_units': 21, 'dense1_activation': 'tanh', 'conv2_kernel': (5, 5), 'conv2_filters': 21, 'conv2_activation': 'sigmoid', 'conv1_kernel': (2, 2), 'conv1_filters': 8, 'conv1_activation': 'tanh'} 
[0214] Accuracy=0.85, Model={'pool2_size': (2, 2), 'pool1_size': (2, 2), 'dropout1_rate': 0.125, 'dense1_units':

[0248] Accuracy=0.80, Model={'pool2_size': (2, 2), 'pool1_size': (5, 5), 'dropout1_rate': 0, 'dense1_units': 144, 'dense1_activation': 'tanh', 'conv2_kernel': (2, 2), 'conv2_filters': 21, 'conv2_activation': 'tanh', 'conv1_kernel': (8, 8), 'conv1_filters': 13, 'conv1_activation': 'relu'} 
[0249] Accuracy=0.69, Model={'pool2_size': (3, 3), 'pool1_size': (2, 2), 'dropout1_rate': 0.125, 'dense1_units': 55, 'dense1_activation': 'tanh', 'conv2_kernel': (2, 2), 'conv2_filters': 8, 'conv2_activation': 'relu', 'conv1_kernel': (8, 8), 'conv1_filters': 5, 'conv1_activation': 'sigmoid'} 
[0250] Accuracy=0.74, Model={'pool2_size': (5, 5), 'pool1_size': (1, 1), 'dropout1_rate': 0, 'dense1_units': 34, 'dense1_activation': 'relu', 'conv2_kernel': (2, 2), 'conv2_filters': 5, 'conv2_activation': 'tanh', 'conv1_kernel': (2, 2), 'conv1_filters': 8, 'conv1_activation': 'tanh'} 
[0251] Accuracy=0.10, Model={'pool2_size': (3, 3), 'pool1_size': (3, 3), 'dropout1_rate': 0, 'dense1_units': 89, 'dense1_activati

[0282] Accuracy=0.44, Model={'pool2_size': (1, 1), 'pool1_size': (3, 3), 'dropout1_rate': 0, 'dense1_units': 21, 'dense1_activation': 'relu', 'conv2_kernel': (5, 5), 'conv2_filters': 5, 'conv2_activation': 'sigmoid', 'conv1_kernel': (8, 8), 'conv1_filters': 8, 'conv1_activation': 'tanh'} 
[0283] Fail
[0284] Fail
[0285] Accuracy=0.86, Model={'pool2_size': (1, 1), 'pool1_size': (2, 2), 'dropout1_rate': 0.25, 'dense1_units': 89, 'dense1_activation': 'sigmoid', 'conv2_kernel': (3, 3), 'conv2_filters': 5, 'conv2_activation': 'tanh', 'conv1_kernel': (8, 8), 'conv1_filters': 8, 'conv1_activation': 'relu'} 
[0286] Accuracy=0.88, Model={'pool2_size': (3, 3), 'pool1_size': (1, 1), 'dropout1_rate': 0.5, 'dense1_units': 55, 'dense1_activation': 'tanh', 'conv2_kernel': (5, 5), 'conv2_filters': 8, 'conv2_activation': 'relu', 'conv1_kernel': (5, 5), 'conv1_filters': 8, 'conv1_activation': 'sigmoid'} 
[0287] Fail
[0288] Accuracy=0.20, Model={'pool2_size': (1, 1), 'pool1_size': (5, 5), 'dropout1_rate':

In [19]:
#Re-Train the best model param on more epochs to get a great result
best_model_2, best_model_acc_2 = eval_model(best_model_params, epochs=10, verbose=1)
print('===============')
print(f'Best Model Accuracy={best_model_acc_2:.2f}, param={best_model_params}')

Train on 48000 samples, validate on 12000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Best Model Accuracy=0.98, param={'pool2_size': (3, 3), 'pool1_size': (1, 1), 'dropout1_rate': 0.25, 'dense1_units': 144, 'dense1_activation': 'tanh', 'conv2_kernel': (8, 8), 'conv2_filters': 21, 'conv2_activation': 'relu', 'conv1_kernel': (2, 2), 'conv1_filters': 5, 'conv1_activation': 'relu'}
