## Data preparation

In [1]:
from mnist import MNIST
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
mndata = MNIST('data')

In [3]:
train_x, train_y = mndata.load_training()
test_x, test_y = mndata.load_testing()

In [4]:
set(train_y)

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}

In [5]:
def normalize(data):
    return (np.array(data) - 128) / 128

In [6]:
train_x = normalize(train_x)
test_x = normalize(test_x)

In [7]:
def change_to_2d(data):
    return data.reshape(data.shape[0], 28, 28, 1)

In [8]:
train_x = change_to_2d(train_x)
test_x = change_to_2d(test_x)

In [9]:
def add_padding(data):
    return np.array([np.pad(x, (1,1), 'constant', constant_values = -1) for x in data])

In [10]:
train_x = add_padding(train_x)
test_x = add_padding(test_x)

In [11]:
def one_hot_encode(data):
    a = np.array(data)
    b = np.zeros((a.size, a.max()+1))
    b[np.arange(a.size),a] = 1
    return b

In [12]:
train_y = one_hot_encode(train_y)
test_y = one_hot_encode(test_y)

In [13]:
print(train_x.shape, train_y.shape)

(60000, 30, 30, 3) (60000, 10)


In [14]:
print(test_x.shape, test_y.shape)

(10000, 30, 30, 3) (10000, 10)


## Model

In [15]:
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization, Flatten
from tensorflow.nn import relu, elu
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.losses import *
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.contrib.layers import xavier_initializer

In [16]:
initializer = xavier_initializer()

In [17]:
inputs = Input(shape = train_x.shape[1:])

conv2d_1 = Conv2D(
    filters = 64,
    kernel_size = (3,3),
    strides = 1,
    activation = relu,
    kernel_initializer = initializer,
    bias_initializer = initializer
)(inputs)
batch_normalization_1 = BatchNormalization()(conv2d_1)
conv2d_2 = Conv2D(
    filters = 32,
    kernel_size = (3,3),
    strides = 2,
    activation = relu,
    kernel_initializer = initializer,
    bias_initializer = initializer
)(batch_normalization_1)
batch_normalization_2 = BatchNormalization()(conv2d_2)
conv2d_3 = Conv2D(
    filters = 16,
    kernel_size = (3,3),
    strides = 2,
    activation = relu,
    kernel_initializer = initializer,
    bias_initializer = initializer
)(batch_normalization_2)
batch_normalization_3 = BatchNormalization()(conv2d_3)

flatten = Flatten()(batch_normalization_3)
fully_connected_1 = Dense(128)(flatten)
batch_normalization_4 = BatchNormalization()(fully_connected_1)
fully_connected_2 = Dense(64)(batch_normalization_4)
batch_normalization_5 = BatchNormalization()(fully_connected_2)

outputs = Dense(10)(batch_normalization_5)

W0809 17:00:12.445380 139966145050432 deprecation.py:506] From /home/jacek/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [18]:
model = Model(
    inputs = inputs,
    outputs = outputs
)

In [19]:
metric = 'accuracy'

In [20]:
model.compile(
    loss = mean_squared_error,
    optimizer = RMSprop(0.001), # Adam(0.001, 0.9, 0.999)
    metrics = [metric]
)

W0809 17:00:12.901677 139966145050432 deprecation.py:323] From /home/jacek/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/losses/losses_impl.py:121: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [21]:
model.predict(test_x[:3])

array([[ 0.6890665 , -1.145865  ,  1.0728432 ,  0.02210943, -0.14071453,
        -0.52005154, -0.8046337 , -0.12531132, -0.00878625, -0.03268802],
       [ 0.64394885, -1.0754912 ,  1.0047871 ,  0.1045613 , -0.11363176,
        -0.42558044, -0.726297  , -0.04619647,  0.08751945, -0.15806879],
       [ 0.68167865, -1.1679993 ,  1.0500405 ,  0.03809807, -0.15459265,
        -0.5143209 , -0.766315  , -0.17588109,  0.05885762, -0.08421794]],
      dtype=float32)

In [22]:
early_stop = EarlyStopping(monitor='val_loss', patience=5)

In [23]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 30, 30, 3)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 64)        1792      
_________________________________________________________________
batch_normalization (BatchNo (None, 28, 28, 64)        256       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 13, 13, 32)        18464     
_________________________________________________________________
batch_normalization_1 (Batch (None, 13, 13, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 6, 6, 16)          4624      
_________________________________________________________________
batch_normalization_2 (Batch (None, 6, 6, 16)          64    

In [None]:
history = model.fit(
    train_x, train_y,
    epochs = 30,
    validation_split = 0.2, 
    verbose = 0,
    callbacks = [early_stop],
)

In [None]:
def plot_history(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Abs Error [MPG]')
    plt.plot(hist['epoch'], hist['acc'],
           label='Train Error')
    plt.plot(hist['epoch'], hist['val_acc'],
           label = 'Val Error')
    m = max(max([history.history['acc'] + history.history['val_acc']]))
    plt.ylim([0,m * 1.1])
    plt.legend()

In [None]:
plot_history(history)

In [None]:
model.evaluate(test_x, test_y, verbose=0)[1]

In [None]:
real = np.where(test_y == np.max(test_y))[1]

In [None]:
pred = [np.where(x == max(x))[0][0] for x in model.predict(test_x)]