## Data preparation

In [1]:
from mnist import MNIST
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
mndata = MNIST('data')

In [3]:
train_x, train_y = mndata.load_training()
test_x, test_y = mndata.load_testing()

In [4]:
set(train_y)

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}

In [5]:
def normalize(data):
    return (np.array(data) - 128) / 128

In [6]:
train_x = normalize(train_x)
test_x = normalize(test_x)

In [7]:
def change_to_2d(data):
    return data.reshape(data.shape[0], 28, 28, 1)

In [8]:
train_x = change_to_2d(train_x)
test_x = change_to_2d(test_x)

In [9]:
def add_padding(data):
    return np.array([np.pad(x, (1,1), 'constant', constant_values = -1) for x in data])

In [10]:
train_x = add_padding(train_x)
test_x = add_padding(test_x)

In [11]:
def one_hot_encode(data):
    a = np.array(data)
    b = np.zeros((a.size, a.max()+1))
    b[np.arange(a.size),a] = 1
    return b

In [12]:
train_y = one_hot_encode(train_y)
test_y = one_hot_encode(test_y)

In [13]:
print(train_x.shape, train_y.shape)

(60000, 30, 30, 3) (60000, 10)


In [14]:
print(test_x.shape, test_y.shape)

(10000, 30, 30, 3) (10000, 10)


## Model

In [15]:
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization, Flatten, MaxPooling2D
from tensorflow.nn import relu, elu
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.losses import *
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.contrib.layers import xavier_initializer

In [16]:
initializer = xavier_initializer()

In [17]:
    def initialize_model(activation_f, if_maxpooling, metric, optimizer):
        inputs = Input(shape = train_x.shape[1:])

        conv2d_1 = Conv2D(
            filters = 32,
            kernel_size = (3,3),
            strides = 1,
            activation = activation_f,
            kernel_initializer = initializer,
        )(inputs)
        batch_norm_1 = BatchNormalization()(conv2d_1)
        if not if_maxpooling:
            conv2d_2 = Conv2D(
                filters = 8,
                kernel_size = (3,3),
                strides = 2,
                activation = activation_f,
                kernel_initializer = initializer,
            )(batch_norm_1)
            batch_norm_2 = BatchNormalization()(conv2d_2)
            flatten = Flatten()(batch_norm_2)
        else:
            conv2d_2 = Conv2D(
                filters = 8,
                kernel_size = (3,3),
                strides = 1,
                activation = activation_f,
                kernel_initializer = initializer,
            )(batch_norm_1)
            batch_norm_2 = BatchNormalization()(conv2d_2)
            maxpool_1 = MaxPooling2D(
                pool_size = (3, 3),
                strides = 2
            )(batch_norm_2)
            flatten = Flatten()(maxpool_1)

        fully_connected_1 = Dense(
            256,
        )(flatten)
        batch_norm_3 = BatchNormalization()(fully_connected_1)
        fully_connected_2 = Dense(
            64,
            activation = activation_f
        )(batch_norm_3)
        batch_norm_4 = BatchNormalization()(fully_connected_2)

        outputs = Dense(10)(batch_norm_4)

        model = Model(
            inputs = inputs,
            outputs = outputs
        )

        if optimizer == RMSprop:
            model.compile(
                loss = mean_squared_error, # log_loss
                optimizer = RMSprop(0.001),
                metrics = [metric]
            )
        else:
            model.compile(
                loss = mean_squared_error, # log_loss
                optimizer = Adam(0.001, 0.9, 0.999),
                metrics = [metric]
            )

        return model

In [19]:
metric = 'accuracy'
model = initialize_model(elu, True, metric, RMSprop)

W0814 12:36:07.652720 139697052030720 deprecation.py:506] From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0814 12:36:07.863226 139697052030720 deprecation.py:323] From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [20]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 30, 30, 3)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 32)        896       
_________________________________________________________________
batch_normalization (BatchNo (None, 28, 28, 32)        128       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 26, 26, 8)         2312      
_________________________________________________________________
batch_normalization_1 (Batch (None, 26, 26, 8)         32        
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 8)         0         
_________________________________________________________________
flatten (Flatten)            (None, 1152)              0     

In [21]:
model.predict(test_x[:3])

array([[ 0.47337604,  0.41172054, -0.9077127 ,  0.32523066, -0.50798476,
         0.02040365,  0.34365672,  0.5439178 , -0.03887302,  0.62110186],
       [ 0.3366934 , -0.00316644, -1.0535611 ,  0.9356792 ,  0.06846376,
         0.5025847 ,  0.15332343,  0.40786284,  0.07333498,  0.366526  ],
       [ 1.1479437 ,  0.53493   , -1.0955286 ,  0.3334712 , -0.4620191 ,
         0.00872223,  0.26178268,  0.6620734 , -0.42056897,  0.6678951 ]],
      dtype=float32)

In [22]:
early_stop = EarlyStopping(monitor='val_acc', patience=10)

In [23]:
from itertools import product
models_list = list(product([relu, elu], [True, False], [Adam, RMSprop]))

In [None]:
results = []
for i, model_params in enumerate(models_list):
    model = initialize_model(model_params[0], model_params[1], metric, model_params[2])
    history = model.fit(
        train_x, train_y,
        epochs = 30,
        validation_split = 0.2, 
        verbose = 0,
        callbacks = [early_stop],
    )
    results.append(history)
    pd.DataFrame(history.history).to_csv('history_'+str(i))
    pd.DataFrame([np.where(x == max(x))[0][0] for x in model.predict(test_x)]).to_csv('results_'+str(i))
    model.save('model_'+str(i))

In [None]:
def plot_history(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    plt.figure()
    plt.xlabel('Epoch')
    plt.plot(hist['epoch'], hist['acc'],
           label='Train Error')
    plt.plot(hist['epoch'], hist['val_acc'],
           label = 'Val Error')
    m = max(max([history.history['acc'] + history.history['val_acc']]))
    plt.ylim([0,m * 1.1])
    plt.legend()

In [None]:
plot_history(history)

In [None]:
model.evaluate(test_x, test_y, verbose=0)[1]

In [None]:
real = np.where(test_y == np.max(test_y))[1]

In [None]:
pred = [np.where(x == max(x))[0][0] for x in model.predict(test_x)]

In [None]:
def backup_prev():
    return {
        'history': pd.DataFrame(history.history),
        'eval': model.evaluate(test_x, test_y, verbose=0)[1]
    }

In [None]:
#prev = backup_prev()