# Introduction 
#### Digit Recognition MNIST dataset is kind a hello world activity for practising Convolutional Neural Networks (CNN). In this notebook, I will try to find optimum parameters for CNN model.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Read and Identify Dataset

In [None]:
train_data = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
test_data = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")
print(f"Shape of train data : {train_data.shape}\nShape of test data : {test_data.shape}")

In [None]:
train_data.head()

In [None]:
test_data.head()

#### Train data is labeled however test data is not.

In [None]:
y_train = train_data["label"]
x_train = train_data.drop(labels=["label"], axis = 1)

In [None]:
y_train

In [None]:
x_train.shape

In [None]:
plt.figure(figsize=(8,6))
sns.countplot(y_train)
plt.xlabel("Digit's Classes")
plt.ylabel("Frequency")
plt.title("Frequency of Digits")
plt.show()

#### Data seems to be balanced.

### Plotting some samples

In [None]:
for i in range(0,4):
    plt.subplot(2,2,1)
    img = x_train.iloc[i].to_numpy() #converting to matrix
    img = img.reshape((28,28))
    plt.imshow(img,cmap='gray')
    plt.axis("off")
    plt.show()

## Normalization - Reshape - Encoding

* As you know, colors take values 0-255 (RGB). In CNN it is better to make them 0-1. That means grey scale. This make our code faster. Also it reduce the effect of illumination's differences.
* Train and test images (28 x 28). We reshape all data to 28x28x1 3D matrices. Keras needs an extra dimension in the end which correspond to channels. Our images are gray scaled so it use only one channel.
* Encode labels to one hot vectors : (One hot encoding)
    * 2 => [0,0,1,0,0,0,0,0,0,0]
    * 4 => [0,0,0,0,1,0,0,0,0,0]

In [None]:
# Normalization
x_train = x_train / 255.0
test_data = test_data / 255.0
print(f"Shape of x_train : {x_train.shape}\nShape of test_data : {test_data.shape}")

In [None]:
# Reshape
x_train = x_train.values.reshape(-1,28,28,1)
test_data = test_data.values.reshape(-1,28,28,1)
print(f"Shape of x_train : {x_train.shape}\nShape of test_data : {test_data.shape}")

In [None]:
# Encoding
from keras.utils.np_utils import to_categorical #converts to one hot encoding
y_train = to_categorical(y_train, num_classes=10)

## Split

#### There is also test data for testing model, so we use validation instead of test.

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, Y_train, Y_val = train_test_split(x_train, y_train, test_size = 0.1, random_state=2)
print("X_train shape : ",X_train.shape)
print("X_val shape : ",X_val.shape)
print("Y_train shape: ",Y_train.shape)
print("Y_val shape : ",Y_val.shape)

# CNN

In [None]:
from sklearn.metrics import confusion_matrix
import itertools
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization, Activation
from keras.optimizers import RMSprop,Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

In [None]:
#CNN
model=Sequential()

model.add(Conv2D(filters=64, kernel_size = (3,3), activation="relu", input_shape=(28,28,1)))
model.add(Conv2D(filters=64, kernel_size = (3,3), activation="relu"))

model.add(MaxPool2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size = (3,3), activation="relu"))
model.add(Conv2D(filters=128, kernel_size = (3,3), activation="relu"))

model.add(MaxPool2D(pool_size=(2,2)))
model.add(BatchNormalization())    
model.add(Conv2D(filters=256, kernel_size = (3,3), activation="relu"))
    
model.add(MaxPool2D(pool_size=(2,2)))

#ANN
model.add(Flatten())
model.add(BatchNormalization())
model.add(Dense(512,activation="relu"))
    
model.add(Dense(10,activation="softmax"))
    
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
optimizer = Adam(lr = 0.002, beta_1=0.9, beta_2=0.999)

In [None]:
model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

## Data Augmentation

In [None]:
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # dimesion reduction
        rotation_range=5,  # randomly rotate images in the range 5 degrees
        zoom_range = 0.1, # Randomly zoom image 10%
        width_shift_range=0.1,  # randomly shift images horizontally 10%
        height_shift_range=0.1,  # randomly shift images vertically 10%
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

datagen.fit(X_train)

In [None]:
history = model.fit_generator(datagen.flow(X_train,Y_train, batch_size=128),
                              epochs = 25, validation_data = (X_val,Y_val), steps_per_epoch=X_train.shape[0] // 128)

## Evaluation

In [None]:
final_loss, final_accuracy = model.evaluate(X_val, Y_val, verbose=0)
print("Final loss: {0:.4f}, final accuracy: {1:.4f}".format(final_loss, final_accuracy))

In [None]:
model.summary()

In [None]:
plt.plot(history.history['loss'], color='b')
plt.plot(history.history['val_loss'], color='r')
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.title("Validation Loss")
plt.show()
plt.plot(history.history['accuracy'], color='b')
plt.plot(history.history['val_accuracy'], color='r')
plt.xlabel("Number of Epochs")
plt.ylabel("Accuracy")
plt.title("Validation Accuracy")
plt.show()

In [None]:
# Prediction from validation 
Y_pred = model.predict(X_val)
Y_pred_class = np.argmax(Y_pred, axis=1) # prediction classes to one hot vectors
Y_true = np.argmax(Y_val, axis=1)

confusion_matrix = confusion_matrix(Y_true, Y_pred_class)
f, ax = plt.subplots(figsize=(8,8))
sns.heatmap(confusion_matrix, annot=True, linewidths=0.01, cmap="Greens", linecolor="gray", fmt=".1f", ax=ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [None]:
for i in range(len(confusion_matrix)):
    print("Class : ",str(i))
    print("Number of Wrong Predictions : ", str(sum(confusion_matrix[i])-confusion_matrix[i][i]), "out of "+str(sum(confusion_matrix[i])))
    print("Percentage of True Predictions : {:.2f}%".format(confusion_matrix[i][i] / (sum(confusion_matrix[i])/100) ))
    print("***********************************************************")

## Submission

In [None]:
predictions = model.predict_classes(test_data, verbose=0)

submissions=pd.DataFrame({"ImageId": list(range(1,len(predictions)+1)),
                         "Label": predictions})
submissions.to_csv("FEG.csv", index=False, header=True)