In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
mnist = tf.keras.datasets.mnist

(X_train, y_train),(X_test, y_test) = mnist.load_data()

print("The MNIST dataset has a training size of %d examples" %len(X_train))
print("The MNIST dataset has a test size of %d examples" %len(X_test))

The MNIST dataset has a training size of 60000 examples
The MNIST dataset has a test size of 10000 examples


In [3]:
X_train = X_train.astype('float32')/255
X_test = X_test.astype('float32')/255

print('X_train shaoe:', X_train.shape)
print(X_train.shape[0], 'train smaples')
print(X_test.shape[0], 'test smaples')


X_train shaoe: (60000, 28, 28)
60000 train smaples
10000 test smaples


In [4]:
from keras.utils import np_utils

num_classes = 10 
# print first ten (integer-valued) training labels
print('Integer-valued labels:')
print(y_train[:10])

# one-hot encode the labels
# convert class vectors to binary class matrices
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)

# print first ten (one-hot) training labels
print('One-hot labels:')
print(y_train[:10])

Integer-valued labels:
[5 0 4 1 9 2 1 3 1 4]
One-hot labels:
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


In [5]:
img_rows, img_cols = 28, 28

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)

print('input_shape: ', input_shape)
print('x_train shape:', X_train.shape)

input_shape:  (28, 28, 1)
x_train shape: (60000, 28, 28, 1)


In [10]:
## Model 1
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D,BatchNormalization,Activation

# build the model object
model = Sequential()

# CONV_1: add CONV layer with RELU activation and depth = 32 kernels
model.add(Conv2D(8, kernel_size=(3, 3),activation='relu',input_shape=(28,28,1)))
model.add(BatchNormalization())

model.add(Conv2D(8, (3,3),  activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(8, (3,3), activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.1))

# POOL_1: downsample the image to choose the best features 
model.add(Conv2D(8, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(8, (3,3), activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(16, (3,3), activation='relu'))
model.add(Conv2D(16, (3,3) ,activation='relu'))
model.add(Conv2D(16, (1,1) ,activation='relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Conv2D(14, (3,3), activation='relu'))
# # model.add(Conv2D(14, (3,3), activation='relu'))
# model.add(BatchNormalization())
# model.add(Dropout(0.1))

model.add(Conv2D(10, 4, 4))                                          

# # Here we are Flateening our dat i.e making it one dimensional which we will feed to the network.
model.add(Flatten())


# # FC_2: output a softmax to squash the matrix into output probabilities for the 10 classes
model.add(Activation('softmax'))

model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_16 (Conv2D)           (None, 26, 26, 8)         80        
_________________________________________________________________
batch_normalization_8 (Batch (None, 26, 26, 8)         32        
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 24, 24, 8)         584       
_________________________________________________________________
batch_normalization_9 (Batch (None, 24, 24, 8)         32        
_________________________________________________________________
dropout_6 (Dropout)          (None, 24, 24, 8)         0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 22, 22, 8)         584       
_________________________________________________________________
batch_normalization_10 (Batc (None, 22, 22, 8)        

In [11]:
# compile the model
model.compile(loss='categorical_crossentropy', optimizer='Adam', 
              metrics=['accuracy'])


In [12]:
from tensorflow.keras.callbacks import ModelCheckpoint   

# train the model
checkpointer = ModelCheckpoint(filepath='model.weights.best.hdf5', verbose=1, 
                               save_best_only=True)
hist = model.fit(X_train, y_train, batch_size=32, epochs=20,
          validation_data=(X_test, y_test), callbacks=[checkpointer], 
          verbose=1, shuffle=True)

Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.07768, saving model to model.weights.best.hdf5
Epoch 2/20

Epoch 00002: val_loss improved from 0.07768 to 0.06029, saving model to model.weights.best.hdf5
Epoch 3/20

Epoch 00003: val_loss improved from 0.06029 to 0.05345, saving model to model.weights.best.hdf5
Epoch 4/20

Epoch 00004: val_loss improved from 0.05345 to 0.04364, saving model to model.weights.best.hdf5
Epoch 5/20

Epoch 00005: val_loss improved from 0.04364 to 0.03381, saving model to model.weights.best.hdf5
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.03381
Epoch 7/20

Epoch 00007: val_loss did not improve from 0.03381
Epoch 8/20

Epoch 00008: val_loss did not improve from 0.03381
Epoch 9/20

Epoch 00009: val_loss did not improve from 0.03381
Epoch 10/20

Epoch 00010: val_loss did not improve from 0.03381
Epoch 11/20

Epoch 00011: val_loss improved from 0.03381 to 0.03322, saving model to model.weights.best.hdf5
Epoch 12/20

Epoch 00012: val_loss impr