In [58]:
# Importing the required libraries

# For Data Manipulation
import numpy as np
import pandas as pd

# For Loading the data and train_test_split
from data_utils import load_data, load_labels
from sklearn.model_selection import train_test_split

# CNN Model
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import TensorBoard, ReduceLROnPlateau, CSVLogger
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, BatchNormalization

# print("Tensorflow version = ", tf.__version__)

Tensorflow version =  2.2.0-rc2


In [0]:
# Loading the training data
train_data = load_data(r"data/train-images-idx3-ubyte.gz")

# Loading the training labels
train_labels = load_labels(r"data/train-labels-idx1-ubyte.gz")

# Loading the testing data
test_data = load_data(r"data/t10k-images-idx3-ubyte.gz")

# Loading the testing labels
test_labels = load_labels(r"data/t10k-labels-idx1-ubyte.gz")

In [28]:
# Printing the shapes of the data and labels
print("Train Data Shape = ", train_data.shape)
print("Train Labels Shape = ", train_labels.shape)

print("Test Data Shape = ", test_data.shape)
print("Test Labels Shape = ", test_labels.shape)

Train Data Shape =  (60000, 28, 28, 1)
Train Labels Shape =  (60000,)
Test Data Shape =  (10000, 28, 28, 1)
Test Labels Shape =  (10000,)


In [0]:
# For Label Encoding
train_labels = pd.get_dummies(train_labels).values
test_labels = pd.get_dummies(test_labels).values

In [30]:
# Splitting the training into training and validation data
x_train, x_val, y_train, y_val = train_test_split(train_data, train_labels, test_size = 0.15)

# Printing the shapes of data and labels
print("X_Train Data Shape = ", x_train.shape)
print("X_Train Labels Shape = ", y_train.shape)

print("X_Val Data Shape = ", x_val.shape)
print("Y_Val Labels Shape = ", y_val.shape)

X_Train Data Shape =  (51000, 28, 28, 1)
X_Train Labels Shape =  (51000, 10)
X_Val Data Shape =  (9000, 28, 28, 1)
Y_Val Labels Shape =  (9000, 10)


In [0]:
# CNN Model
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = 3, padding = "same", activation = "relu", input_shape = (28, 28, 1)))
model.add(Conv2D(filters = 64, kernel_size = 3, padding = "same", activation = "relu"))
model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
model.add(BatchNormalization())
model.add(Dropout(rate = 0.4))

model.add(Conv2D(filters = 128, kernel_size = 3, padding = "same", activation = "relu"))
model.add(Conv2D(filters = 256, kernel_size = 3, padding = "same", activation = "relu"))
model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
model.add(BatchNormalization())
model.add(Dropout(rate = 0.4))

model.add(Conv2D(filters = 128, kernel_size = 3, padding = "same", activation = "relu"))
model.add(Conv2D(filters = 64, kernel_size = 3, padding = "same", activation = "relu"))
model.add(MaxPooling2D(pool_size = (2, 2), strides = (2, 2)))
model.add(BatchNormalization())
model.add(Dropout(rate = 0.4))

model.add(Conv2D(filters = 32, kernel_size = 3, padding = "same", activation = "relu"))
model.add(Dropout(rate = 0.4))

model.add(Flatten())

model.add(Dense(units = 128, activation = "relu"))
model.add(Dense(units = 64, activation = "relu"))
model.add(Dense(units = 32, activation = "relu"))

model.add(Dense(units = 10, activation = "softmax"))

In [0]:
# Optimizer
optimizer = Adam(lr = 0.001)

In [0]:
# Compiling the Model
model.compile(optimizer = optimizer, loss = "categorical_crossentropy", metrics = ['accuracy'])

In [51]:
# Model Summary
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_21 (Conv2D)           (None, 28, 28, 32)        320       
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 28, 28, 64)        18496     
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
batch_normalization_9 (Batch (None, 14, 14, 64)        256       
_________________________________________________________________
dropout_12 (Dropout)         (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 14, 14, 128)       73856     
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 14, 14, 256)      

In [0]:
# Callbacks
tensorboard = TensorBoard(log_dir = "logs", histogram_freq = 1)

csvlogger = CSVLogger("training/training_log.csv")

reduce_lr = ReduceLROnPlateau(monitor = "val_loss", factor = 0.1, patience = 3, verbose = 1, min_lr = 1e-4)

In [0]:
# Image Data Generator
datagen = ImageDataGenerator(rotation_range = 20, width_shift_range = 0.2, height_shift_range = 0.2, brightness_range = [0.1, 0.15], shear_range = 0.2, zoom_range = 0.15)

datagen.fit(x_train)

In [0]:
# Hyperparameters
BATCH_SIZE = 32
EPOCHS = 15

In [55]:
# Fitting the model
history = model.fit(datagen.flow(x_train, y_train, batch_size = BATCH_SIZE), steps_per_epoch = len(x_train) / BATCH_SIZE, epochs = EPOCHS, validation_data = (x_val, y_val), callbacks = [tensorboard, csvlogger, reduce_lr])

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.0001.


In [None]:
# Saving the entire model
model.save("model/m.model")

In [0]:
# Saving the model weights
model.save("model/CNN_MNIST.h5")

In [None]:
# Loading TensorBoard
%tensorboard --logdir logs

### Testing the Trained CNN Model

In [0]:
# Loading Model
loaded_model = load_model('model/m.model')

In [0]:
# Predicting on testing data
predictions = loaded_model.predict(test_data)

In [0]:
# Counting the number of wrong predictions
count = 0
for i in range(len(test_data)):
  guess = np.argmax(predictions[i])
  actual = np.argmax(test_labels[i])
  if guess != actual:
    count += 1

In [83]:
# Test Accuracy
print("Accuracy on testing data = {:3.2f} %".format((100 * (len(test_data) - count) / len(test_data))))

Accuracy on testing data = 98.96 %
