# a quick tour for going end-to-end model building and tuning for MNIST dataset
## Adding Pseudo Labeling Test

In [1]:
%matplotlib inline

import utils_ted
from utils_ted import *

Using TensorFlow backend.
  return f(*args, **kwds)


# Setup

In [2]:
batch_size =64

In [29]:
from keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [30]:
(X_train.shape, y_train.shape), (X_test.shape, y_test.shape)

(((60000, 28, 28), (60000,)), ((10000, 28, 28), (10000,)))

In [31]:
# Remember this step is necessary for MNIST data
X_train = np.expand_dims(X_train, 1) # np.expand_dims(X_train, 3) # for channel last case
X_test = np.expand_dims(X_test, 1) # np.expand_dims(X_test, 3) # for channel last case
y_train = y_train.reshape((-1, 1))
y_test = y_test.reshape((-1, 1))

In [32]:
(X_train.shape, y_train.shape), (X_test.shape, y_test.shape)

(((60000, 1, 28, 28), (60000, 1)), ((10000, 1, 28, 28), (10000, 1)))

In [33]:
print(y_train[:5])

[[5]
 [0]
 [4]
 [1]
 [9]]


#### turn y_train, y_test from classes into labels by onehot encoding

In [34]:
y_train = onehot(y_train)
y_test = onehot(y_test)

In [35]:
print(y_train[:5])

[[ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]]


In [39]:
mean_px = X_train.mean().astype(np.float32)
std_px = X_train.std().astype(np.float32)

def norm_input(x): return (x - mean_px) / std_px  

In [49]:
input_shape = X_train.shape[1:]
print(input_shape)

(1, 28, 28)


# Build and Tune Model (to overfitting)
** tune it to overfitting to make sure that your model is complex enough to work well **

** after making it, then we can tune it to reduce overfitting for the next steps by different methods **

## Linear Model

In [54]:
def get_lin_model():
    model = Sequential([
        BatchNormalization(input_shape=input_shape),
        Flatten(),
        Dense(10, activation='softmax')
    ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [67]:
lm = get_lin_model()

In [68]:
gen = image.ImageDataGenerator()
batches = gen.flow(X_train, y_train, batch_size=batch_size)
test_batches = gen.flow(X_test, y_test, shuffle=False, batch_size=batch_size)

In [69]:
lm.fit_generator(batches, steps_per_epoch=ceil(batches.n/batches.batch_size), epochs=2, verbose=2,
                validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))

Epoch 1/2
 - 5s - loss: 0.4251 - acc: 0.8760 - val_loss: 0.3051 - val_acc: 0.9147
Epoch 2/2
 - 4s - loss: 0.2941 - acc: 0.9168 - val_loss: 0.2884 - val_acc: 0.9183


<keras.callbacks.History at 0x7f86e58300b8>

In [70]:
lm.optimizer.lr = 0.1

In [71]:
lm.fit_generator(batches, steps_per_epoch=ceil(batches.n/batches.batch_size), epochs=5, verbose=2,
                validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))

Epoch 1/5
 - 4s - loss: 0.2795 - acc: 0.9206 - val_loss: 0.2904 - val_acc: 0.9235
Epoch 2/5
 - 4s - loss: 0.2716 - acc: 0.9237 - val_loss: 0.2949 - val_acc: 0.9217
Epoch 3/5
 - 4s - loss: 0.2660 - acc: 0.9253 - val_loss: 0.2904 - val_acc: 0.9235
Epoch 4/5
 - 4s - loss: 0.2617 - acc: 0.9276 - val_loss: 0.2943 - val_acc: 0.9214
Epoch 5/5
 - 4s - loss: 0.2576 - acc: 0.9283 - val_loss: 0.2980 - val_acc: 0.9217


<keras.callbacks.History at 0x7f86ec71ee80>

# VGG style model

In [72]:
def get_model_bn():
    model = Sequential([
        BatchNormalization(input_shape=input_shape),
        Conv2D(32, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(32, (3,3), activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(64, (3,3), activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dense(10, activation='softmax')
    ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [73]:
model = get_model_bn()

In [74]:
model.fit_generator(batches, steps_per_epoch=ceil(batches.n/batches.batch_size), epochs=2, verbose=2,
                validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))

Epoch 1/2
 - 27s - loss: 0.0916 - acc: 0.9724 - val_loss: 0.0377 - val_acc: 0.9877
Epoch 2/2
 - 24s - loss: 0.0365 - acc: 0.9883 - val_loss: 0.0296 - val_acc: 0.9911


<keras.callbacks.History at 0x7f86e78b0ac8>

In [75]:
model.optimizer.lr = 0.1

In [77]:
model.fit_generator(batches, steps_per_epoch=ceil(batches.n/batches.batch_size), epochs=5, verbose=2,
                validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))

Epoch 1/5
 - 24s - loss: 0.0196 - acc: 0.9941 - val_loss: 0.0285 - val_acc: 0.9918
Epoch 2/5
 - 24s - loss: 0.0172 - acc: 0.9947 - val_loss: 0.0288 - val_acc: 0.9924
Epoch 3/5
 - 24s - loss: 0.0138 - acc: 0.9957 - val_loss: 0.0256 - val_acc: 0.9924
Epoch 4/5
 - 24s - loss: 0.0130 - acc: 0.9955 - val_loss: 0.0265 - val_acc: 0.9924
Epoch 5/5
 - 24s - loss: 0.0097 - acc: 0.9969 - val_loss: 0.0266 - val_acc: 0.9926


<keras.callbacks.History at 0x7f86e194c7b8>

## Batch Norm + Data Augmentation

In [78]:
gen_aug = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, height_shift_range=0.08, 
                         shear_range=0.3, zoom_range=0.08)
gen = image.ImageDataGenerator()
batches = gen_aug.flow(X_train, y_train, batch_size=batch_size)
test_batches = gen.flow(X_test, y_test, shuffle=False, batch_size=batch_size)

In [79]:
model = get_model_bn()

In [80]:
model.fit_generator(batches, steps_per_epoch=ceil(batches.n/batches.batch_size), epochs=2, verbose=2,
                validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))

Epoch 1/2
 - 29s - loss: 0.1669 - acc: 0.9480 - val_loss: 0.0269 - val_acc: 0.9911
Epoch 2/2
 - 24s - loss: 0.0710 - acc: 0.9772 - val_loss: 0.0324 - val_acc: 0.9893


<keras.callbacks.History at 0x7f86e0dbc668>

In [81]:
model.optimizer.lr = 0.1

In [82]:
model.fit_generator(batches, steps_per_epoch=ceil(batches.n/batches.batch_size), epochs=5, verbose=2,
                validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))

Epoch 1/5
 - 24s - loss: 0.0573 - acc: 0.9819 - val_loss: 0.0273 - val_acc: 0.9913
Epoch 2/5
 - 24s - loss: 0.0510 - acc: 0.9841 - val_loss: 0.0182 - val_acc: 0.9938
Epoch 3/5
 - 24s - loss: 0.0472 - acc: 0.9848 - val_loss: 0.0190 - val_acc: 0.9938
Epoch 4/5
 - 25s - loss: 0.0425 - acc: 0.9865 - val_loss: 0.0186 - val_acc: 0.9946
Epoch 5/5
 - 25s - loss: 0.0403 - acc: 0.9872 - val_loss: 0.0163 - val_acc: 0.9948


<keras.callbacks.History at 0x7f86e183c048>

## Batch Norm + Data Augmentation + Dropout

In [83]:
def get_model_bn_dropout(p):
    model = Sequential([
        BatchNormalization(input_shape=input_shape),
        Conv2D(32, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(32, (3,3), activation='relu'),
        MaxPooling2D(),
        BatchNormalization(axis=1),
        Conv2D(64, (3,3), activation='relu'),
        BatchNormalization(axis=1),
        Conv2D(64, (3,3), activation='relu'),
        MaxPooling2D(),
        Flatten(),
        BatchNormalization(),
        Dense(512, activation='relu'),
        BatchNormalization(),
        Dropout(p),
        Dense(10, activation='softmax')
    ])
    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [84]:
model = get_model_bn_dropout(0.5)

In [85]:
model.fit_generator(batches, steps_per_epoch=ceil(batches.n/batches.batch_size), epochs=2, verbose=2,
                validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))

Epoch 1/2
 - 28s - loss: 0.2252 - acc: 0.9327 - val_loss: 0.0298 - val_acc: 0.9901
Epoch 2/2
 - 25s - loss: 0.0960 - acc: 0.9708 - val_loss: 0.0316 - val_acc: 0.9896


<keras.callbacks.History at 0x7f86db4e80f0>

In [86]:
model.optimizer.lr = 0.1

In [87]:
model.fit_generator(batches, steps_per_epoch=ceil(batches.n/batches.batch_size), epochs=8, verbose=2,
                validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))

Epoch 1/8
 - 25s - loss: 0.0770 - acc: 0.9772 - val_loss: 0.0235 - val_acc: 0.9922
Epoch 2/8
 - 25s - loss: 0.0660 - acc: 0.9794 - val_loss: 0.0192 - val_acc: 0.9937
Epoch 3/8
 - 25s - loss: 0.0607 - acc: 0.9817 - val_loss: 0.0196 - val_acc: 0.9932
Epoch 4/8
 - 25s - loss: 0.0550 - acc: 0.9824 - val_loss: 0.0214 - val_acc: 0.9924
Epoch 5/8
 - 24s - loss: 0.0537 - acc: 0.9832 - val_loss: 0.0149 - val_acc: 0.9958
Epoch 6/8
 - 25s - loss: 0.0479 - acc: 0.9851 - val_loss: 0.0192 - val_acc: 0.9935
Epoch 7/8
 - 25s - loss: 0.0501 - acc: 0.9851 - val_loss: 0.0158 - val_acc: 0.9952
Epoch 8/8
 - 25s - loss: 0.0463 - acc: 0.9859 - val_loss: 0.0184 - val_acc: 0.9947


<keras.callbacks.History at 0x7f86d9c87f60>

## Ensembling

In [88]:
def fit_model():
    model = get_model_bn_dropout(0.5)
    model.fit_generator(batches, steps_per_epoch=ceil(batches.n/batches.batch_size), epochs=1, verbose=0,
                validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))
    model.optimizer.lr = 0.1
    model.fit_generator(batches, steps_per_epoch=ceil(batches.n/batches.batch_size), epochs=4, verbose=0,
                validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))
    model.optimizer.lr = 0.01
    model.fit_generator(batches, steps_per_epoch=ceil(batches.n/batches.batch_size), epochs=8, verbose=0,
                validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))
    model.optimizer.lr = 0.001
    model.fit_generator(batches, steps_per_epoch=ceil(batches.n/batches.batch_size), epochs=12, verbose=0,
                validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))
    return model

In [89]:
models = [fit_model() for i in range(6)]

In [90]:
path = '../data/mnist/'
model_path = path + 'models/'
if not os.path.exists(model_path): os.makedirs(model_path)

In [91]:
for i, mdl in enumerate(models):
    mdl.save_weights(model_path+'cnn-mnist-aws-prac2'+ str(i)+'.h5')

In [92]:
# Load Weights
models = []
for i in range(6):
    model = get_model_bn_dropout(0.5)
    model.load_weights(model_path+'cnn-mnist-aws-prac2'+ str(i)+'.h5')
    models.append(model)

In [93]:
evals = np.array([mdl.evaluate(X_test, y_test, batch_size=batch_size*4, verbose=2) for mdl in models])


  256/10000 [..............................] - ETA: 35s
  768/10000 [=>............................] - ETA: 11s
 1280/10000 [==>...........................] - ETA: 7s 
 1792/10000 [====>.........................] - ETA: 5s
 2304/10000 [=====>........................] - ETA: 3s

  256/10000 [..............................] - ETA: 31s
 1024/10000 [==>...........................] - ETA: 7s 
 1792/10000 [====>.........................] - ETA: 4s

  256/10000 [..............................] - ETA: 33s
 1024/10000 [==>...........................] - ETA: 8s 
 1792/10000 [====>.........................] - ETA: 4s

  256/10000 [..............................] - ETA: 30s
 1024/10000 [==>...........................] - ETA: 7s 
 1792/10000 [====>.........................] - ETA: 4s

  256/10000 [..............................] - ETA: 32s
 1024/10000 [==>...........................] - ETA: 8s 
 1792/10000 [====>.........................] - ETA: 4s

  256/10000 [..............................] - E

In [94]:
evals.mean(axis=0)

array([ 0.01354901,  0.99576667])

In [96]:
all_preds = np.stack([mdl.predict(X_test, batch_size=batch_size*4, verbose=2) for mdl in models])

In [97]:
avg_preds = all_preds.mean(axis=0)

In [183]:
def eval_accuracy(test_labels, preds):
    #keras.metrics.categorical_accuracy(y_test, avg_preds).eval()
    
    # https://blog.keras.io/keras-as-a-simplified-interface-to-tensorflow-tutorial.html
    with sess.as_default():
        eval_result = accuracy(test_labels, preds).eval()
    return eval_result.mean()

In [184]:
eval_accuracy(y_test, avg_preds)

0.99720001

## Pseudo-labeling

In [174]:
class MixIterator(object):
    
    def __init__(self, iters):
        self.iters = iters
        self.n = sum([itr.n for itr in self.iters])
        self.batch_size = sum([itr.batch_size for itr in self.iters])
    
    def reset(self):
        for itr in self.iters: itr.reset()
    
    def __iter__(self):
        return self
    
    def __next__(self, *args, **kwargs):
        nexts = [next(itr) for itr in self.iters]
        n0 = np.concatenate([n[0] for n in nexts])
        n1 = np.concatenate([n[1] for n in nexts])
        return (n0, n1)

In [175]:
avg_preds.shape, y_test.shape

((10000, 10), (10000, 10))

In [176]:
gen_aug = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, height_shift_range=0.08, 
                         shear_range=0.3, zoom_range=0.08)
gen = image.ImageDataGenerator()
batches = gen_aug.flow(X_train, y_train, batch_size=batch_size)
pseudolabel_batches = gen.flow(X_test, avg_preds, batch_size=batch_size//4)

In [177]:
mix_pseudolabel_batches = MixIterator([batches, pseudolabel_batches])

In [178]:
mix_pseudolabel_batches.n, mix_pseudolabel_batches.batch_size

(70000, 80)

In [179]:
model = get_model_bn_dropout(0.5)

In [180]:
model.fit_generator(mix_pseudolabel_batches, steps_per_epoch=ceil(mix_pseudolabel_batches.n/mix_pseudolabel_batches.batch_size), 
                    epochs=2, verbose=2,
                    validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))

Epoch 1/2
 - 30s - loss: 0.1930 - acc: 0.9421 - val_loss: 0.0205 - val_acc: 0.9930
Epoch 2/2
 - 25s - loss: 0.0790 - acc: 0.9764 - val_loss: 0.0147 - val_acc: 0.9947


<keras.callbacks.History at 0x7f868c7532b0>

In [181]:
model.optimizer.lr = 0.1

In [182]:
model.fit_generator(mix_pseudolabel_batches, steps_per_epoch=ceil(mix_pseudolabel_batches.n/mix_pseudolabel_batches.batch_size), 
                    epochs=8, verbose=2,
                    validation_data=test_batches, validation_steps=(test_batches.n/test_batches.batch_size))

Epoch 1/8
 - 25s - loss: 0.0637 - acc: 0.9808 - val_loss: 0.0159 - val_acc: 0.9951
Epoch 2/8
 - 25s - loss: 0.0558 - acc: 0.9834 - val_loss: 0.0138 - val_acc: 0.9959
Epoch 3/8
 - 26s - loss: 0.0524 - acc: 0.9844 - val_loss: 0.0169 - val_acc: 0.9952
Epoch 4/8
 - 26s - loss: 0.0510 - acc: 0.9855 - val_loss: 0.0203 - val_acc: 0.9939
Epoch 5/8
 - 26s - loss: 0.0440 - acc: 0.9868 - val_loss: 0.0124 - val_acc: 0.9962
Epoch 6/8
 - 25s - loss: 0.0456 - acc: 0.9872 - val_loss: 0.0147 - val_acc: 0.9957
Epoch 7/8
 - 25s - loss: 0.0405 - acc: 0.9885 - val_loss: 0.0139 - val_acc: 0.9959
Epoch 8/8
 - 25s - loss: 0.0424 - acc: 0.9878 - val_loss: 0.0147 - val_acc: 0.9964


<keras.callbacks.History at 0x7f868b801dd8>

#### Pseudo-labeling: Finally after you are confident, you even can add validation dataset into Pseudo Labeling and hope it would improves