# MNIST Handwritten Digit Recognizer

Digit Recognition using Multiple CNNs and then summing up the probablities of all CNNs to predict an accurate answer.
Also, generating additional 25 million images to achceive data augmentation, using resizing, rotation, zoom, etc.

In [2]:
# LOAD LIBRARIES
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization, MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras import layers
from keras import Model

Using TensorFlow backend.


# Load Kaggle's 42,000 training images

In [3]:
# LOAD THE DATA
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [4]:
# PREPARE DATA FOR NEURAL NETWORK
Y_train = train["label"]
X_train = train.drop('label',axis = 1)
X_train = X_train.values.reshape(-1,28,28,1)
X_test = test.values.reshape(-1,28,28,1)
Y_train = to_categorical(Y_train, num_classes = 10)

# Generate 25 million more images!! (Data Augmentation)
by randomly rotating, scaling, and shifting Kaggle's 42,000 images.

In [5]:
# CREATE MORE IMAGES VIA DATA AUGMENTATION
datagen = ImageDataGenerator(rescale=1./255,
                             rotation_range=10,
                             zoom_range = 0.10,
                             width_shift_range=0.1,
                             height_shift_range=0.1)
val_datagen = ImageDataGenerator(rescale=1./255)

# Build n-number of Convolutional Neural Networks!

In [6]:
# BUILD CONVOLUTIONAL NEURAL NETWORKS

def initiate_model(nets=1):
    model = [0] *nets
    for j in range(nets):
        model[j] = Sequential()

        model[j].add(Conv2D(32, kernel_size = 3, activation='relu', input_shape = (28, 28, 1)))
        model[j].add(BatchNormalization())
        model[j].add(Conv2D(64, kernel_size = 5, strides=2, padding='same', activation='relu'))
        model[j].add(BatchNormalization())
        model[j].add(Dropout(0.4))
        model[j].add(MaxPooling2D((2,2)))

        model[j].add(Conv2D(128, kernel_size = 3, activation='relu'))
        model[j].add(BatchNormalization())
        model[j].add(Dropout(0.4))
        model[j].add(MaxPooling2D((2,2)))

        model[j].add(Flatten())
        model[j].add(Dropout(0.4))
        model[j].add(Dense(10, activation='softmax'))

        # COMPILE WITH ADAM OPTIMIZER AND CROSS ENTROPY COST
        model[j].compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    
    return model

# Model Summary

In [7]:
initiate_model(1)[0].summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 26, 26, 32)        128       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 13, 13, 64)        51264     
_________________________________________________________________
batch_normalization_2 (Batch (None, 13, 13, 64)        256       
_________________________________________________________________
dropout_1 (Dropout)          (None, 13, 13, 64)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 4, 4, 128)        

# Training CNNs

In [10]:
# DECREASE LEARNING RATE EACH EPOCH
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.95 ** x)
# TRAIN NETWORKS
epochs = 1
nets=1
model_list = initiate_model(nets)
history_list = [0] * nets

for j in range(nets):
    X_train2, X_val2, Y_train2, Y_val2 = train_test_split(X_train, Y_train, test_size = 0.1)
    X_val2 = X_val2/255
    history_list[j] = model_list[j].fit_generator(datagen.flow(X_train2,Y_train2, batch_size=64),
                                                  validation_data = (X_val2,Y_val2),
                                                  epochs = epochs, steps_per_epoch = X_train2.shape[0]//64, callbacks=[annealer], verbose=2)
    #print("CNN {0:d}:, Train accuracy={1:.5f}, Validation accuracy={2:.5f}".format(j+1, max(history_list[j].history['acc']), max(history_list[j].history['val_acc'])))
print('Training Done')

Epoch 1/1
 - 153s - loss: 0.6487 - accuracy: 0.8221 - val_loss: 0.1376 - val_accuracy: 0.9614
Training Done


# Ensemble 15 CNN predictions and submit

In [11]:
# ENSEMBLE PREDICTIONS AND SUBMIT
results = np.zeros( (X_test.shape[0],10) ) 
for j in range(nets):
    results = results + model_list[j].predict(X_test)
results = np.argmax(results,axis = 1)
results = pd.Series(results,name="Label")
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)
submission.to_csv("Multiple_CNN.csv",index=False)

# Credits
The code here was inspired by the following outstanding Kaggle kernels (in addition to the publications above).

* [Yassine Ghouzam][1] - [Introduction to CNN Keras - 0.997 (top 6%)][2]
* [Peter Grenholm][5] - [Welcome to deep learning (CNN 99%)][6]
* [Ding Li][3] - [Digits Recognition With CNN Keras][4]
* [Aditya Soni][7] - [MNIST with Keras for Beginners(.99457)][8]

[1]:https://www.kaggle.com/yassineghouzam
[2]:https://www.kaggle.com/yassineghouzam/introduction-to-cnn-keras-0-997-top-6
[3]:https://www.kaggle.com/dingli
[4]:https://www.kaggle.com/dingli/digits-recognition-with-cnn-keras
[5]:https://www.kaggle.com/toregil
[6]:https://www.kaggle.com/toregil/welcome-to-deep-learning-cnn-99/
[8]:https://www.kaggle.com/adityaecdrid/mnist-with-keras-for-beginners-99457/
[7]:https://www.kaggle.com/adityaecdrid