In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


In [2]:
mnist = tf.keras.datasets.mnist

(X_train, y_train),(X_test, y_test) = mnist.load_data()

print("The MNIST dataset has a training size of %d examples" %len(X_train))
print("The MNIST dataset has a test size of %d examples" %len(X_test))

The MNIST dataset has a training size of 60000 examples
The MNIST dataset has a test size of 10000 examples


In [3]:
X_train = X_train.astype('float32')/255
X_test = X_test.astype('float32')/255

print('X_train shaoe:', X_train.shape)
print(X_train.shape[0], 'train smaples')
print(X_test.shape[0], 'test smaples')


X_train shaoe: (60000, 28, 28)
60000 train smaples
10000 test smaples


In [4]:
from keras.utils import np_utils

num_classes = 10 
# print first ten (integer-valued) training labels
print('Integer-valued labels:')
print(y_train[:10])

# one-hot encode the labels
# convert class vectors to binary class matrices
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)

# print first ten (one-hot) training labels
print('One-hot labels:')
print(y_train[:10])

Integer-valued labels:
[5 0 4 1 9 2 1 3 1 4]
One-hot labels:
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


In [5]:
img_rows, img_cols = 28, 28

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

print('input_shape: ', input_shape)
print('x_train shape:', X_train.shape)

input_shape:  (28, 28, 1)
x_train shape: (60000, 28, 28, 1)


In [6]:
## Model 1
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D

# build the model object
model = Sequential()

# CONV_1: add CONV layer with RELU activation and depth = 32 kernels
model.add(Conv2D(32, kernel_size=(3, 3), padding='same',activation='relu',input_shape=(28,28,1)))
# POOL_1: downsample the image to choose the best features 
model.add(MaxPooling2D(pool_size=(2, 2)))

# CONV_2: here we increase the depth to 64
model.add(Conv2D(64, (3, 3),padding='same', activation='relu'))
# POOL_2: more downsampling
model.add(MaxPooling2D(pool_size=(2, 2)))

# flatten since too many dimensions, we only want a classification output
model.add(Flatten())

# FC_1: fully connected to get all relevant data
model.add(Dense(64, activation='relu'))

# FC_2: output a softmax to squash the matrix into output probabilities for the 10 classes
model.add(Dense(10, activation='softmax'))

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 3136)              0         
_________________________________________________________________
dense (Dense)                (None, 64)                200768    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                6

In [7]:
# compile the model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', 
              metrics=['accuracy'])

In [8]:
from tensorflow.keras.callbacks import ModelCheckpoint   

# train the model
checkpointer = ModelCheckpoint(filepath='model.weights.best.hdf5', verbose=1, 
                               save_best_only=True)
hist = model.fit(X_train, y_train, batch_size=32, epochs=12,
          validation_data=(X_test, y_test), callbacks=[checkpointer], 
          verbose=2, shuffle=True)

Epoch 1/12
1875/1875 - 13s - loss: 0.1374 - accuracy: 0.9577 - val_loss: 0.0588 - val_accuracy: 0.9814

Epoch 00001: val_loss improved from inf to 0.05878, saving model to model.weights.best.hdf5
Epoch 2/12
1875/1875 - 9s - loss: 0.0437 - accuracy: 0.9865 - val_loss: 0.0480 - val_accuracy: 0.9843

Epoch 00002: val_loss improved from 0.05878 to 0.04797, saving model to model.weights.best.hdf5
Epoch 3/12
1875/1875 - 9s - loss: 0.0316 - accuracy: 0.9905 - val_loss: 0.0307 - val_accuracy: 0.9888

Epoch 00003: val_loss improved from 0.04797 to 0.03071, saving model to model.weights.best.hdf5
Epoch 4/12
1875/1875 - 8s - loss: 0.0249 - accuracy: 0.9930 - val_loss: 0.0308 - val_accuracy: 0.9900

Epoch 00004: val_loss did not improve from 0.03071
Epoch 5/12
1875/1875 - 9s - loss: 0.0203 - accuracy: 0.9942 - val_loss: 0.0258 - val_accuracy: 0.9914

Epoch 00005: val_loss improved from 0.03071 to 0.02584, saving model to model.weights.best.hdf5
Epoch 6/12
1875/1875 - 9s - loss: 0.0168 - accuracy: 

In [9]:
## Model 2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D

# build the model object
model_1 = Sequential()

# CONV_1: add CONV layer with RELU activation and depth = 32 kernels
model_1.add(Conv2D(32, kernel_size=(3, 3), padding='same',activation='relu',input_shape=(28,28,1)))
model_1.add(Conv2D(64, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
# POOL_1: downsample the image to choose the best features 
model_1.add(MaxPooling2D(pool_size=(2, 2)))

# CONV_2: here we increase the depth to 64
model_1.add(Conv2D(64, (3, 3),padding='same', activation='relu'))
model_1.add(Conv2D(128, (3, 3),padding='same', activation='relu'))
# POOL_2: more downsampling
model_1.add(MaxPooling2D(pool_size=(2, 2)))

# flatten since too many dimensions, we only want a classification output
model_1.add(Flatten())

# FC_1: fully connected to get all relevant data
model_1.add(Dense(64, activation='relu'))

# FC_2: output a softmax to squash the matrix into output probabilities for the 10 classes
model_1.add(Dense(10, activation='softmax'))

model_1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 26, 26, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 13, 64)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 13, 13, 64)        36928     
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 13, 13, 128)       73856     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 6, 6, 128)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 4608)             

In [10]:
# compile the model
model_1.compile(loss='categorical_crossentropy', optimizer='rmsprop', 
              metrics=['accuracy'])
# train the model
checkpointer = ModelCheckpoint(filepath='model_1.weights.best.hdf5', verbose=1, 
                               save_best_only=True)
hist = model_1.fit(X_train, y_train, batch_size=64, epochs=10,
          validation_data=(X_test, y_test), callbacks=[checkpointer], 
          verbose=1, shuffle=True)

Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.03303, saving model to model_1.weights.best.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 0.03303 to 0.03094, saving model to model_1.weights.best.hdf5
Epoch 3/10

Epoch 00003: val_loss improved from 0.03094 to 0.03008, saving model to model_1.weights.best.hdf5
Epoch 4/10

Epoch 00004: val_loss improved from 0.03008 to 0.02732, saving model to model_1.weights.best.hdf5
Epoch 5/10

Epoch 00005: val_loss improved from 0.02732 to 0.02358, saving model to model_1.weights.best.hdf5
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.02358
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.02358
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.02358
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.02358
Epoch 10/10

Epoch 00010: val_loss did not improve from 0.02358


In [11]:
## Model 3
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D

# build the model object
model_3 = Sequential()

# CONV_1: add CONV layer with RELU activation and depth = 32 kernels
model_3.add(Conv2D(32, kernel_size=(3, 3), padding='same',activation='relu',input_shape=(28,28,1)))
model_3.add(Conv2D(64, kernel_size=(3, 3),padding='same',activation='relu',input_shape=(28,28,1)))
model_3.add(Conv2D(128, kernel_size=(3, 3),padding='same',activation='relu',input_shape=(28,28,1)))
# POOL_1: downsample the image to choose the best features 
model_3.add(MaxPooling2D(pool_size=(2, 2)))

# CONV_2: here we increase the depth to 64
model_3.add(Conv2D(32, (3, 3),padding='same', activation='relu'))
model_3.add(Conv2D(64, (3, 3),padding='same', activation='relu'))
model_3.add(Conv2D(128, (3, 3), activation='relu'))
# POOL_2: more downsampling
model_3.add(MaxPooling2D(pool_size=(2, 2)))

# flatten since too many dimensions, we only want a classification output
model_3.add(Flatten())

# FC_1: fully connected to get all relevant data
model_3.add(Dense(64, activation='relu'))

# FC_2: output a softmax to squash the matrix into output probabilities for the 10 classes
model_3.add(Dense(10, activation='softmax'))

model_3.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 28, 28, 32)        320       
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 28, 28, 64)        18496     
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 28, 28, 128)       73856     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 14, 14, 128)       0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 14, 14, 32)        36896     
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 12, 12, 128)      

In [12]:
# compile the model
model_3.compile(loss='categorical_crossentropy', optimizer='rmsprop', 
              metrics=['accuracy'])
# train the model
checkpointer = ModelCheckpoint(filepath='model_3.weights.best.hdf5', verbose=1, 
                               save_best_only=True)
hist = model_3.fit(X_train, y_train, batch_size=64, epochs=10,
          validation_data=(X_test, y_test), callbacks=[checkpointer], 
          verbose=1, shuffle=True)

Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.03356, saving model to model_3.weights.best.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 0.03356 to 0.02935, saving model to model_3.weights.best.hdf5
Epoch 3/10

Epoch 00003: val_loss improved from 0.02935 to 0.02702, saving model to model_3.weights.best.hdf5
Epoch 4/10

Epoch 00004: val_loss improved from 0.02702 to 0.02210, saving model to model_3.weights.best.hdf5
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.02210
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.02210
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.02210
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.02210
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.02210
Epoch 10/10

Epoch 00010: val_loss did not improve from 0.02210


In [13]:
## Model 4
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D

# build the model object
model_4 = Sequential()

# CONV_1: add CONV layer with RELU activation and depth = 32 kernels
model_4.add(Conv2D(32, kernel_size=(3, 3), padding='same',activation='relu',input_shape=(28,28,1)))
model_4.add(Conv2D(64, kernel_size=(3, 3),padding='same',activation='relu',input_shape=(28,28,1)))
model_4.add(Conv2D(128, kernel_size=(3, 3),padding='same',activation='relu',input_shape=(28,28,1)))
# POOL_1: downsample the image to choose the best features 
model_4.add(MaxPooling2D(pool_size=(2, 2)))

# CONV_2: here we increase the depth to 64
model_4.add(Conv2D(32, (3, 3),padding='same', activation='relu'))
model_4.add(Conv2D(64, (3, 3),padding='same', activation='relu'))
model_4.add(Conv2D(128, (3, 3),padding='same', activation='relu'))
# POOL_2: more downsampling
model_4.add(MaxPooling2D(pool_size=(2, 2)))

# flatten since too many dimensions, we only want a classification output
model_4.add(Flatten())

# FC_1: fully connected to get all relevant data
model_4.add(Dense(64, activation='relu'))

# FC_2: output a softmax to squash the matrix into output probabilities for the 10 classes
model_4.add(Dense(10, activation='softmax'))

model_4.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 28, 28, 64)        18496     
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 28, 28, 128)       73856     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 14, 14, 128)       0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 14, 14, 32)        36896     
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 14, 14, 64)        18496     
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 14, 14, 128)      

In [14]:
# compile the model
model_4.compile(loss='categorical_crossentropy', optimizer='rmsprop', 
              metrics=['accuracy'])
# train the model
checkpointer = ModelCheckpoint(filepath='model_4.weights.best.hdf5', verbose=1, 
                               save_best_only=True)
hist = model_4.fit(X_train, y_train, batch_size=64, epochs=10,
          validation_data=(X_test, y_test), callbacks=[checkpointer], 
          verbose=1, shuffle=True)

Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.07183, saving model to model_4.weights.best.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 0.07183 to 0.02916, saving model to model_4.weights.best.hdf5
Epoch 3/10

Epoch 00003: val_loss did not improve from 0.02916
Epoch 4/10

Epoch 00004: val_loss improved from 0.02916 to 0.02164, saving model to model_4.weights.best.hdf5
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.02164
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.02164
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.02164
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.02164
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.02164
Epoch 10/10

Epoch 00010: val_loss did not improve from 0.02164


In [15]:
## Model 5
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D

# build the model object
model_5 = Sequential()

# CONV_1: add CONV layer with RELU activation and depth = 32 kernels
model_5.add(Conv2D(32, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))

# POOL_1: downsample the image to choose the best features 
model_5.add(MaxPooling2D(pool_size=(2, 2)))

# CONV_2: here we increase the depth to 64
model_5.add(Conv2D(32, (3, 3),padding='same', activation='relu'))
model_5.add(Conv2D(64, (3, 3),padding='same', activation='relu'))
model_5.add(Conv2D(128, (3, 3),padding='same', activation='relu'))
# POOL_2: more downsampling
model_5.add(MaxPooling2D(pool_size=(2, 2)))

# flatten since too many dimensions, we only want a classification output
model_5.add(Flatten())

# FC_1: fully connected to get all relevant data
model_5.add(Dense(64, activation='elu'))

# FC_2: output a softmax to squash the matrix into output probabilities for the 10 classes
model_5.add(Dense(10, activation='softmax'))

model_5.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_18 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 13, 13, 32)        9248      
_________________________________________________________________
conv2d_20 (Conv2D)           (None, 13, 13, 64)        18496     
_________________________________________________________________
conv2d_21 (Conv2D)           (None, 13, 13, 128)       73856     
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 6, 6, 128)         0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 4608)             

In [16]:
# compile the model
model_5.compile(loss='categorical_crossentropy', optimizer='rmsprop', 
              metrics=['accuracy'])
# train the model
checkpointer = ModelCheckpoint(filepath='model_5.weights.best.hdf5', verbose=1, 
                               save_best_only=True)
hist = model_5.fit(X_train, y_train, batch_size=64, epochs=10,
          validation_data=(X_test, y_test), callbacks=[checkpointer], 
          verbose=1, shuffle=True)

Epoch 1/10

Epoch 00001: val_loss improved from inf to 0.03125, saving model to model_5.weights.best.hdf5
Epoch 2/10

Epoch 00002: val_loss did not improve from 0.03125
Epoch 3/10

Epoch 00003: val_loss improved from 0.03125 to 0.02909, saving model to model_5.weights.best.hdf5
Epoch 4/10

Epoch 00004: val_loss improved from 0.02909 to 0.02343, saving model to model_5.weights.best.hdf5
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.02343
Epoch 6/10

Epoch 00006: val_loss did not improve from 0.02343
Epoch 7/10

Epoch 00007: val_loss did not improve from 0.02343
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.02343
Epoch 9/10

Epoch 00009: val_loss did not improve from 0.02343
Epoch 10/10

Epoch 00010: val_loss did not improve from 0.02343
