# a quick tour for going end-to-end model building and tuning for MNIST dataset

In [3]:
%matplotlib inline

import utils
from utils import *

# Setup

In [4]:
batch_size = 64

In [5]:
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [6]:
# Remember this step is necessary for MNIST data
X_train = np.expand_dims(X_train, 1) # np.expand_dims(X_train, 3) # for channel last case
X_test = np.expand_dims(X_test, 1) # np.expand_dims(X_test, 3) # for channel last case 

In [7]:
X_train.shape, X_test.shape

((60000, 1, 28, 28), (10000, 1, 28, 28))

In [8]:
y_train[:5]

array([5, 0, 4, 1, 9], dtype=uint8)

#### turn y_train, y_test from classes into labels by onehot encoding

In [9]:
y_train = onehot(y_train)
y_test = onehot(y_test)

In [10]:
y_train[:5]

array([[ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  1.]])

In [11]:
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)

def norm_input(x): return (x - mean_px) / std_px 

# Build and Tune Model (to overfitting)
** tune it to overfitting to make sure that your model is complex enough to work well **

** after making it, then we can tune it to reduce overfitting for the next steps by different methods **

## Linear Model

In [30]:
def get_lin_model():
    model = Sequential([
        Lambda(norm_input, input_shape=(1, 28, 28)),
        Flatten(),
        Dense(10, activation='softmax')
        ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [31]:
lm = get_lin_model()

In [105]:
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=batch_size)
test_batches = gen.flow(X_test, y_test, batch_size=batch_size)

In [35]:
lm.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=1, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/1

  1/938 [..............................] - ETA: 4:37 - loss: 2.7860 - acc: 0.2500
 16/938 [..............................] - ETA: 20s - loss: 2.1186 - acc: 0.3252 
 31/938 [..............................] - ETA: 11s - loss: 1.6245 - acc: 0.4934
 46/938 [>.............................] - ETA: 8s - loss: 1.3593 - acc: 0.5812 
 61/938 [>.............................] - ETA: 7s - loss: 1.1855 - acc: 0.6352
 76/938 [=>............................] - ETA: 6s - loss: 1.0723 - acc: 0.6715
 91/938 [=>............................] - ETA: 5s - loss: 0.9777 - acc: 0.7026
106/938 [==>...........................] - ETA: 5s - loss: 0.9087 - acc: 0.7248
121/938 [==>...........................] - ETA: 4s - loss: 0.8568 - acc: 0.7401
137/938 [===>..........................] - ETA: 4s - loss: 0.8095 - acc: 0.7540
153/938 [===>..........................] - ETA: 4s - loss: 0.7712 - acc: 0.7660
169/938 [====>.........................] - ETA: 3s - loss: 0.7437 - acc: 0.7746
184/938 [====>.........

<keras.callbacks.History at 0x7fb945aad470>

In [36]:
lm.optimizer.lr = 0.1

In [37]:
lm.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=1, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/1
 - 3s - loss: 0.2987 - acc: 0.9146 - val_loss: 0.2822 - val_acc: 0.9183


<keras.callbacks.History at 0x7fb945aad240>

In [38]:
lm.optimizer.lr = 0.01

In [39]:
lm.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=3, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/4
 - 3s - loss: 0.2850 - acc: 0.9199 - val_loss: 0.2810 - val_acc: 0.9180
Epoch 2/4
 - 3s - loss: 0.2772 - acc: 0.9221 - val_loss: 0.2905 - val_acc: 0.9190
Epoch 3/4
 - 3s - loss: 0.2734 - acc: 0.9237 - val_loss: 0.2795 - val_acc: 0.9233
Epoch 4/4
 - 3s - loss: 0.2692 - acc: 0.9242 - val_loss: 0.2814 - val_acc: 0.9218


<keras.callbacks.History at 0x7fb945aad518>

# VGG style model

In [18]:
def get_model_bn():
    model = Sequential([
        Lambda(norm_input, input_shape=(1, 28, 28)),
        Conv2D(32, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(32, (3,3), activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(64, (3,3), activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
    ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [43]:
model = get_model_bn()

In [44]:
model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=1, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/1
 - 104s - loss: 0.0855 - acc: 0.9743 - val_loss: 0.0410 - val_acc: 0.9870


<keras.callbacks.History at 0x7fb9443d9ac8>

In [45]:
model.optimizer.lr = 0.1

In [47]:
model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=2, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/8
 - 102s - loss: 0.0348 - acc: 0.9889 - val_loss: 0.0321 - val_acc: 0.9890
Epoch 2/8
 - 102s - loss: 0.0246 - acc: 0.9919 - val_loss: 0.0280 - val_acc: 0.9914
Epoch 3/8
 - 102s - loss: 0.0221 - acc: 0.9927 - val_loss: 0.0279 - val_acc: 0.9913
Epoch 4/8
 - 102s - loss: 0.0183 - acc: 0.9940 - val_loss: 0.0290 - val_acc: 0.9911
Epoch 5/8
 - 102s - loss: 0.0166 - acc: 0.9947 - val_loss: 0.0300 - val_acc: 0.9921
Epoch 6/8
 - 102s - loss: 0.0152 - acc: 0.9951 - val_loss: 0.0270 - val_acc: 0.9914
Epoch 7/8
 - 102s - loss: 0.0121 - acc: 0.9959 - val_loss: 0.0269 - val_acc: 0.9910
Epoch 8/8
 - 102s - loss: 0.0112 - acc: 0.9960 - val_loss: 0.0366 - val_acc: 0.9917


<keras.callbacks.History at 0x7fb9443d99e8>

In [48]:
model.optimizer.lr = 0.01

In [52]:
model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=12, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/12
 - 102s - loss: 0.0096 - acc: 0.9970 - val_loss: 0.0375 - val_acc: 0.9918
Epoch 2/12
 - 102s - loss: 0.0084 - acc: 0.9973 - val_loss: 0.0300 - val_acc: 0.9933
Epoch 3/12
 - 102s - loss: 0.0075 - acc: 0.9975 - val_loss: 0.0306 - val_acc: 0.9912
Epoch 4/12
 - 102s - loss: 0.0067 - acc: 0.9979 - val_loss: 0.0376 - val_acc: 0.9910
Epoch 5/12
 - 102s - loss: 0.0079 - acc: 0.9973 - val_loss: 0.0300 - val_acc: 0.9927
Epoch 6/12
 - 102s - loss: 0.0068 - acc: 0.9979 - val_loss: 0.0262 - val_acc: 0.9932
Epoch 7/12
 - 102s - loss: 0.0058 - acc: 0.9980 - val_loss: 0.0327 - val_acc: 0.9924
Epoch 8/12
 - 102s - loss: 0.0059 - acc: 0.9982 - val_loss: 0.0285 - val_acc: 0.9937
Epoch 9/12
 - 102s - loss: 0.0048 - acc: 0.9986 - val_loss: 0.0301 - val_acc: 0.9936
Epoch 10/12
 - 102s - loss: 0.0036 - acc: 0.9988 - val_loss: 0.0294 - val_acc: 0.9935
Epoch 11/12
 - 102s - loss: 0.0055 - acc: 0.9983 - val_loss: 0.0300 - val_acc: 0.9937
Epoch 12/12
 - 102s - loss: 0.0044 - acc: 0.9985 - val_loss: 0.

<keras.callbacks.History at 0x7fb91ab3e358>

## Batch Norm + Data Augmentation

In [123]:
gen_augmentaton = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, height_shift_range=0.08, 
                         shear_range=0.3, zoom_range=0.08)

batches = gen_augmentaton.flow(X_train, y_train, batch_size=batch_size)
test_batches = gen.flow(X_test, y_test, batch_size=batch_size)

In [128]:
model = get_model_bn()

In [129]:
model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=1, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/1
 - 103s - loss: 0.1630 - acc: 0.9497 - val_loss: 0.0313 - val_acc: 0.9899


<keras.callbacks.History at 0x7fb9134ecac8>

In [130]:
model.optimizer.lr = 0.1

In [132]:
model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=3, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/3
 - 102s - loss: 0.0475 - acc: 0.9851 - val_loss: 0.0232 - val_acc: 0.9931
Epoch 2/3
 - 102s - loss: 0.0437 - acc: 0.9865 - val_loss: 0.0294 - val_acc: 0.9926
Epoch 3/3
 - 102s - loss: 0.0410 - acc: 0.9872 - val_loss: 0.0263 - val_acc: 0.9906


<keras.callbacks.History at 0x7fb912144a90>

In [133]:
model.optimizer.lr = 0.01

In [134]:
model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=5, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/5
 - 102s - loss: 0.0374 - acc: 0.9883 - val_loss: 0.0317 - val_acc: 0.9916
Epoch 2/5
 - 102s - loss: 0.0356 - acc: 0.9893 - val_loss: 0.0175 - val_acc: 0.9945
Epoch 3/5
 - 102s - loss: 0.0328 - acc: 0.9897 - val_loss: 0.0172 - val_acc: 0.9940
Epoch 4/5
 - 102s - loss: 0.0329 - acc: 0.9897 - val_loss: 0.0165 - val_acc: 0.9941
Epoch 5/5
 - 102s - loss: 0.0300 - acc: 0.9905 - val_loss: 0.0153 - val_acc: 0.9949


<keras.callbacks.History at 0x7fb91212c4e0>

In [135]:
model.optimizer.lr = 0.001

In [136]:
model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=12, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/12
 - 102s - loss: 0.0296 - acc: 0.9906 - val_loss: 0.0215 - val_acc: 0.9941
Epoch 2/12
 - 102s - loss: 0.0297 - acc: 0.9904 - val_loss: 0.0169 - val_acc: 0.9951
Epoch 3/12
 - 102s - loss: 0.0270 - acc: 0.9915 - val_loss: 0.0186 - val_acc: 0.9945
Epoch 4/12
 - 102s - loss: 0.0241 - acc: 0.9926 - val_loss: 0.0161 - val_acc: 0.9949
Epoch 5/12
 - 102s - loss: 0.0248 - acc: 0.9921 - val_loss: 0.0168 - val_acc: 0.9953
Epoch 6/12
 - 102s - loss: 0.0243 - acc: 0.9922 - val_loss: 0.0157 - val_acc: 0.9947
Epoch 7/12
 - 102s - loss: 0.0250 - acc: 0.9924 - val_loss: 0.0187 - val_acc: 0.9943
Epoch 8/12


KeyboardInterrupt: 

## Batch Norm + Data Augmentation + Dropout

In [12]:
def get_model_bn_dropout():
    model = Sequential([
        Lambda(norm_input, input_shape=(1, 28, 28)),
        Conv2D(32, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(32, (3,3), activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(64, (3,3), activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(0.5),
        Dense(10, activation='softmax')
    ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [13]:
model = get_model_bn_dropout()

In [147]:
model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=1, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/1

  1/938 [..............................] - ETA: 18:17 - loss: 3.7182 - acc: 0.1406
  2/938 [..............................] - ETA: 9:57 - loss: 3.3286 - acc: 0.2031 
  3/938 [..............................] - ETA: 7:11 - loss: 3.0250 - acc: 0.2396
  4/938 [..............................] - ETA: 5:46 - loss: 2.7212 - acc: 0.3125
  5/938 [..............................] - ETA: 4:57 - loss: 2.4608 - acc: 0.3719
  6/938 [..............................] - ETA: 4:22 - loss: 2.3754 - acc: 0.3854
  7/938 [..............................] - ETA: 3:58 - loss: 2.2578 - acc: 0.4040
  8/938 [..............................] - ETA: 3:40 - loss: 2.1451 - acc: 0.4219
  9/938 [..............................] - ETA: 3:26 - loss: 2.0587 - acc: 0.4479
 10/938 [..............................] - ETA: 3:14 - loss: 1.9308 - acc: 0.4766
 11/938 [..............................] - ETA: 3:05 - loss: 1.8610 - acc: 0.4901
 12/938 [..............................] - ETA: 2:57 - loss: 1.8077 - acc: 0.5065
 13

100/938 [==>...........................] - ETA: 1:34 - loss: 0.6769 - acc: 0.8058
101/938 [==>...........................] - ETA: 1:34 - loss: 0.6732 - acc: 0.8068
102/938 [==>...........................] - ETA: 1:33 - loss: 0.6710 - acc: 0.8068
103/938 [==>...........................] - ETA: 1:33 - loss: 0.6674 - acc: 0.8075
104/938 [==>...........................] - ETA: 1:33 - loss: 0.6658 - acc: 0.8077
105/938 [==>...........................] - ETA: 1:33 - loss: 0.6607 - acc: 0.8091
106/938 [==>...........................] - ETA: 1:33 - loss: 0.6598 - acc: 0.8096
107/938 [==>...........................] - ETA: 1:33 - loss: 0.6562 - acc: 0.8109
108/938 [==>...........................] - ETA: 1:32 - loss: 0.6537 - acc: 0.8115
109/938 [==>...........................] - ETA: 1:32 - loss: 0.6493 - acc: 0.8126
110/938 [==>...........................] - ETA: 1:32 - loss: 0.6456 - acc: 0.8135
111/938 [==>...........................] - ETA: 1:32 - loss: 0.6416 - acc: 0.8146
112/938 [==>....

199/938 [=====>........................] - ETA: 1:19 - loss: 0.4737 - acc: 0.8595
200/938 [=====>........................] - ETA: 1:19 - loss: 0.4732 - acc: 0.8598
201/938 [=====>........................] - ETA: 1:18 - loss: 0.4716 - acc: 0.8602
202/938 [=====>........................] - ETA: 1:18 - loss: 0.4699 - acc: 0.8608
203/938 [=====>........................] - ETA: 1:18 - loss: 0.4685 - acc: 0.8611
204/938 [=====>........................] - ETA: 1:18 - loss: 0.4673 - acc: 0.8615
205/938 [=====>........................] - ETA: 1:18 - loss: 0.4662 - acc: 0.8619
206/938 [=====>........................] - ETA: 1:18 - loss: 0.4653 - acc: 0.8620
207/938 [=====>........................] - ETA: 1:18 - loss: 0.4643 - acc: 0.8622
208/938 [=====>........................] - ETA: 1:18 - loss: 0.4631 - acc: 0.8626
209/938 [=====>........................] - ETA: 1:17 - loss: 0.4621 - acc: 0.8628
210/938 [=====>........................] - ETA: 1:17 - loss: 0.4606 - acc: 0.8632
211/938 [=====>.















<keras.callbacks.History at 0x7fb91275aac8>

In [148]:
model.optimizer.lr = 0.1

In [149]:
model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=4, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/4
 - 102s - loss: 0.0911 - acc: 0.9714 - val_loss: 0.0244 - val_acc: 0.9919
Epoch 2/4
 - 102s - loss: 0.0766 - acc: 0.9767 - val_loss: 0.0208 - val_acc: 0.9932
Epoch 3/4
 - 102s - loss: 0.0660 - acc: 0.9799 - val_loss: 0.0225 - val_acc: 0.9931
Epoch 4/4
 - 102s - loss: 0.0597 - acc: 0.9811 - val_loss: 0.0310 - val_acc: 0.9906


<keras.callbacks.History at 0x7fb91275aa90>

In [150]:
model.optimizer.lr = 0.01

In [151]:
model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=12, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

Epoch 1/12


KeyboardInterrupt: 

In [None]:
model.optimizer.lr = 0.001

In [None]:
model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=18, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))

## Ensembling

In [152]:
def fit_model():
    #model = get_model_bn_dropout()
    model = get_model_bn()
    model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=1, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))
    model.optimizer.lr = 0.1
    model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=4, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))
    model.optimizer.lr = 0.01
    model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=12, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))
    model.optimizer.lr = 0.001
    model.fit_generator(batches, steps_per_epoch=int(math.ceil(batches.n / batches.batch_size)), epochs=18, verbose=2,
                 validation_data=test_batches, validation_steps=int(math.ceil(test_batches.n/test_batches.batch_size)))
    return model

In [153]:
models = [fit_model() for i in range(6)]

Epoch 1/1
 - 104s - loss: 0.1608 - acc: 0.9496 - val_loss: 0.0338 - val_acc: 0.9893
Epoch 1/4
 - 102s - loss: 0.0701 - acc: 0.9782 - val_loss: 0.0223 - val_acc: 0.9927
Epoch 2/4
 - 102s - loss: 0.0581 - acc: 0.9822 - val_loss: 0.0345 - val_acc: 0.9882
Epoch 3/4
 - 103s - loss: 0.0541 - acc: 0.9842 - val_loss: 0.0211 - val_acc: 0.9929
Epoch 4/4
 - 103s - loss: 0.0461 - acc: 0.9854 - val_loss: 0.0237 - val_acc: 0.9923
Epoch 1/12
 - 103s - loss: 0.0439 - acc: 0.9861 - val_loss: 0.0226 - val_acc: 0.9927
Epoch 2/12
 - 102s - loss: 0.0405 - acc: 0.9875 - val_loss: 0.0236 - val_acc: 0.9923
Epoch 3/12
 - 103s - loss: 0.0384 - acc: 0.9879 - val_loss: 0.0206 - val_acc: 0.9934
Epoch 4/12
 - 102s - loss: 0.0355 - acc: 0.9890 - val_loss: 0.0147 - val_acc: 0.9954
Epoch 5/12
 - 103s - loss: 0.0333 - acc: 0.9893 - val_loss: 0.0197 - val_acc: 0.9936
Epoch 6/12
 - 103s - loss: 0.0320 - acc: 0.9899 - val_loss: 0.0166 - val_acc: 0.9949
Epoch 7/12
 - 103s - loss: 0.0322 - acc: 0.9901 - val_loss: 0.0148 - v

Epoch 11/18
 - 102s - loss: 0.0186 - acc: 0.9942 - val_loss: 0.0154 - val_acc: 0.9946
Epoch 12/18
 - 102s - loss: 0.0175 - acc: 0.9942 - val_loss: 0.0144 - val_acc: 0.9952
Epoch 13/18
 - 102s - loss: 0.0185 - acc: 0.9941 - val_loss: 0.0139 - val_acc: 0.9958
Epoch 14/18
 - 102s - loss: 0.0183 - acc: 0.9939 - val_loss: 0.0117 - val_acc: 0.9957
Epoch 15/18
 - 102s - loss: 0.0190 - acc: 0.9939 - val_loss: 0.0137 - val_acc: 0.9957
Epoch 16/18
 - 102s - loss: 0.0158 - acc: 0.9947 - val_loss: 0.0143 - val_acc: 0.9955
Epoch 17/18
 - 102s - loss: 0.0173 - acc: 0.9945 - val_loss: 0.0137 - val_acc: 0.9960
Epoch 18/18
 - 102s - loss: 0.0166 - acc: 0.9948 - val_loss: 0.0139 - val_acc: 0.9954
Epoch 1/1
 - 104s - loss: 0.1625 - acc: 0.9495 - val_loss: 0.0297 - val_acc: 0.9906
Epoch 1/4
 - 103s - loss: 0.0694 - acc: 0.9784 - val_loss: 0.0265 - val_acc: 0.9912
Epoch 2/4
 - 102s - loss: 0.0611 - acc: 0.9805 - val_loss: 0.0230 - val_acc: 0.9921
Epoch 3/4
 - 102s - loss: 0.0527 - acc: 0.9839 - val_loss: 0

Epoch 3/18
 - 103s - loss: 0.0233 - acc: 0.9927 - val_loss: 0.0155 - val_acc: 0.9947
Epoch 4/18
 - 103s - loss: 0.0229 - acc: 0.9928 - val_loss: 0.0160 - val_acc: 0.9958
Epoch 5/18
 - 103s - loss: 0.0231 - acc: 0.9922 - val_loss: 0.0132 - val_acc: 0.9958
Epoch 6/18
 - 102s - loss: 0.0220 - acc: 0.9932 - val_loss: 0.0156 - val_acc: 0.9951
Epoch 7/18
 - 102s - loss: 0.0205 - acc: 0.9937 - val_loss: 0.0135 - val_acc: 0.9961
Epoch 8/18
 - 102s - loss: 0.0184 - acc: 0.9943 - val_loss: 0.0133 - val_acc: 0.9953
Epoch 9/18
 - 103s - loss: 0.0209 - acc: 0.9936 - val_loss: 0.0159 - val_acc: 0.9954
Epoch 10/18
 - 103s - loss: 0.0199 - acc: 0.9938 - val_loss: 0.0161 - val_acc: 0.9953
Epoch 11/18
 - 103s - loss: 0.0203 - acc: 0.9934 - val_loss: 0.0129 - val_acc: 0.9959
Epoch 12/18
 - 102s - loss: 0.0194 - acc: 0.9936 - val_loss: 0.0138 - val_acc: 0.9955
Epoch 13/18
 - 103s - loss: 0.0189 - acc: 0.9941 - val_loss: 0.0178 - val_acc: 0.9957
Epoch 14/18
 - 102s - loss: 0.0187 - acc: 0.9941 - val_loss: 

In [15]:
path = '../data/mnist/'
model_path = path + 'models/'
if not os.path.exists(path): os.mkdir(path)
if not os.path.exists(model_path): os.mkdir(model_path)

In [155]:
for i, mdl in enumerate(models):
    mdl.save_weights(model_path+'cnn-mnist23-'+str(i)+'.pkl')

In [19]:
# Load Weights
"""
models = []
for i in range(6):
    model = get_model_bn()
    model.load_weights(model_path+'cnn-mnist23-'+str(i)+'.pkl')
    models.append(model)
models = np.array(models)
"""

In [20]:
evals = np.array([mdl.evaluate(X_test, y_test, batch_size=batch_size*4) for mdl in models])


  256/10000 [..............................] - ETA: 21s
  768/10000 [=>............................] - ETA: 7s 
 1280/10000 [==>...........................] - ETA: 4s
 1792/10000 [====>.........................] - ETA: 3s

  256/10000 [..............................] - ETA: 4s
 1024/10000 [==>...........................] - ETA: 1s
 1792/10000 [====>.........................] - ETA: 1s

  256/10000 [..............................] - ETA: 4s
 1024/10000 [==>...........................] - ETA: 1s
 1792/10000 [====>.........................] - ETA: 1s

  256/10000 [..............................] - ETA: 4s
 1024/10000 [==>...........................] - ETA: 1s
 1792/10000 [====>.........................] - ETA: 1s

  256/10000 [..............................] - ETA: 4s
 1024/10000 [==>...........................] - ETA: 1s
 1792/10000 [====>.........................] - ETA: 1s

  256/10000 [..............................] - ETA: 4s
 1024/10000 [==>...........................] - ETA: 1s
 1

In [21]:
evals.mean(axis=0)

array([ 0.0144,  0.9954])

In [22]:
all_preds = np.stack([mdl.predict(X_test, batch_size=batch_size*4) for mdl in models])

In [23]:
all_preds.shape

(6, 10000, 10)

In [24]:
avg_preds = all_preds.mean(axis=0)

In [38]:
#keras.metrics.categorical_accuracy(y_test, avg_preds).eval()

# https://blog.keras.io/keras-as-a-simplified-interface-to-tensorflow-tutorial.html
from keras.metrics import categorical_accuracy as accuracy

acc_value = accuracy(y_test, avg_preds)
with sess.as_default():
    eval_result = acc_value.eval()
    
eval_result.mean()

0.99680001