In [1]:
# Basic imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Get data from keras datasets
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz


In [3]:
# Check shape of datasets

# Train
print('X train shape: ', x_train.shape)
print('Y train shape: ', y_train.shape)

# Test
print('X test shape: ', x_test.shape)
print('Y test shape: ', y_test.shape)

X train shape:  (60000, 28, 28)
Y train shape:  (60000,)
X test shape:  (10000, 28, 28)
Y test shape:  (10000,)


In [4]:
# Reshape channels to data

x_train = x_train.reshape(-1,28,28,1)
y_train = y_train.reshape(-1,1)

# Reshape y to categorical
from keras.utils.np_utils import to_categorical
y_train = to_categorical(y_train, num_classes=10)

print('X train shape: ', x_train.shape)
print('Y train shape: ', y_train.shape)

X train shape:  (60000, 28, 28, 1)
Y train shape:  (60000, 10)


In [5]:
# Import layers
from keras import backend as K

# Set to train phase and set that channels are last (=tensorflow backend)
K.set_learning_phase(1)
K.set_image_data_format('channels_last')

# GPU settings
if 'tensorflow' == K.backend():
    import tensorflow as tf
    from keras.backend.tensorflow_backend import set_session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.allocator_type = 'BFC'
    config.gpu_options.visible_device_list = "0"
    set_session(tf.Session(config=config))

In [6]:
# Import layers etc
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization

from keras import optimizers

In [7]:
# Define model

model = Sequential()

model.add(Convolution2D(input_shape=(28,28,1), filters=4, kernel_size=5, strides=1))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Convolution2D(filters=8, kernel_size=3, strides=1))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Convolution2D(filters=16, kernel_size=3, strides=1))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Convolution2D(filters=16, kernel_size=3, strides=1))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(1,1)))

model.add(Flatten())
model.add(Dense(6*6*16, kernel_initializer='normal'))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256, kernel_initializer='normal'))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(128, kernel_initializer='normal'))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(10, kernel_initializer='normal'))
model.add(Activation('softmax'))

adam = optimizers.Adam(lr=0.05, decay=0.01)

model.compile(loss = 'categorical_crossentropy', optimizer = adam)

In [8]:
# Fit

from keras.callbacks import EarlyStopping

cb = [EarlyStopping(monitor='val_loss', min_delta=1e-06, patience=10)]

model.fit(x_train, y_train, epochs=300, batch_size=32, validation_split=0.1, callbacks=cb)

Train on 54000 samples, validate on 6000 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/30

Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300
Epoch 81/300
Epoch 82/300
Epoch 83/300
Epoch 84/300
Epoch 85/300
Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300
Epoch 113/300
Epoch 114/300
Epoch 115/300
Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300
Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Ep

Epoch 151/300
Epoch 152/300
Epoch 153/300


<keras.callbacks.History at 0x1e52054cf60>

In [10]:
from keras.models import load_model

# Save model
#model.save('C:/Users/Daniel/MNIST/mnist_cnn_first.h5')

# Load model

model = load_model('C:/Users/Daniel/MNIST/mnist_cnn_first.h5')

In [11]:
# Reshape channels to data

x_test = x_test.reshape(-1,28,28,1)
y_test = y_test.reshape(-1,1)

# Reshape y to categorical
y_test = to_categorical(y_test, num_classes=10)

print('X test shape: ', x_test.shape)
print('Y test shape: ', y_test.shape)

X test shape:  (10000, 28, 28, 1)
Y test shape:  (10000, 10)


In [13]:
# Out-sample predictions NN Model

y_test_hat = model.predict(x_test).astype('float64')

In [111]:
# binary predictions
y_hat_bin = (y_test_hat == y_test_hat.max(axis=1)[:,None]).astype(int)

In [175]:
# Accuracy metrics
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

conf_mat = []
for i in range(0,y_test.shape[1]):
    conf_mat.append(confusion_matrix(y_test[:,i],y_hat_bin[:,i]))
conf_mat = np.array(conf_mat)
    
print(accuracy_score(y_test, y_hat_bin))
print(f1_score(y_test, y_hat_bin, average='weighted'))
print(conf_mat)

0.9304
0.93093873835
[[[8991   29]
  [  32  948]]

 [[8844   21]
  [  32 1103]]

 [[8865  103]
  [  71  961]]

 [[8916   74]
  [  87  923]]

 [[8978   40]
  [  40  942]]

 [[9027   81]
  [ 108  784]]

 [[9022   20]
  [  44  914]]

 [[8941   31]
  [  86  942]]

 [[8810  216]
  [ 106  868]]

 [[8910   81]
  [  90  919]]]
