### -- DIGIT RECOGNIZER DATASET WITH CNN USING KERAS --
1. [IMPORT THE DATA AND A QUICK LOOK](#1)
     * [Getting the Test Data](#2)
     * [Creating x_train and y_train](#3)
     * [A quick visualization](#4)
     * [Looking for sample pictures](#5)
     
     
1. [EDITS FOR MAKING THE DATA SUITABLE FOR FUTURE](#6)     
     * [Setting the shapes](#7)
     * [Train-Test-Split](#8)
1. [IMPORTING LIBRARIES AND BUILDING THE MODEL](#9)
     * [Defining the Optimizer](#10)
     * [Compiling](#11)
     * [Epochs and Batch Size](#12)
     * [Data Augmentation](#13)
1. [FITTING THE MODEL](#14)
1. [VISUALIZATION](#15)

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns 

import warnings 
warnings.filterwarnings("ignore")



# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

<a id = "1"></a>
# IMPORT THE DATA AND A QUICK LOOK

In [None]:
train = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
print(train.shape)
train.head()

<a id = "2"></a>
### Lets get our test data

In [None]:
test = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")
print(test.shape)
test.head()

<a id = "3"></a>
### Lets Create x_train and y_train

In [None]:
y_train = train["label"]
x_train = train.drop(["label"],axis = 1)

In [None]:
y_train.shape

<a id = "4"></a>
> ### A Quick Visualization

In [None]:
plt.figure(figsize = (15,7))
sns.countplot(y_train,palette = "icefire")
plt.title("Number of digit classes")
y_train.value_counts()

<a id = "5"></a>
### Lets look at what we have..

In [None]:
# plot some samples
img = x_train.iloc[0].values
img = img.reshape((28,28))
plt.imshow(img,cmap='gray')
plt.title(train.iloc[0,0])
plt.axis("off")
plt.show()

In [None]:
# plot some samples
img = x_train.iloc[51].values
img = img.reshape((28,28))
plt.imshow(img,cmap='gray')
plt.title(train.iloc[51,0])
plt.axis("off")
plt.show()

In [None]:
x_train = x_train / 255.0
test = test / 255
print(x_train.shape)
print(test.shape)

In [None]:
y_train.shape

<a id = "6"></a>
# Lets Start Having Our Data Suitable for the Models

<a id = "7"></a>
### We should set the shapes for implementation

In [None]:
x_train = x_train.values.reshape(-1,28,28,1)
test = test.values.reshape(-1,28,28,1)
print("x_train: ",x_train.shape)
print("test: ",test.shape)

In [None]:
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding / label encoding
y_train = to_categorical(y_train,num_classes = 10)

In [None]:
y_train.shape

In [None]:
y_train

<a id = "8"></a>
### Train Test Split

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_val,y_train,y_val = train_test_split(x_train,y_train,test_size = 0.1,random_state = 2)

print("x_train shape: ",x_train.shape)
print("x_val shape: ",x_val.shape)
print("y_train shape: ",y_train.shape)
print("y_val shape: ",y_val.shape)

<a id = "9"></a>
# Import Necessary Libraries and Create the Model

In [None]:
from sklearn.metrics import confusion_matrix
import itertools
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten,Conv2D,MaxPool2D,BatchNormalization
from keras.optimizers import RMSprop,Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau

model = Sequential()
#
model.add(Conv2D(filters = 32,kernel_size = (5,5),padding = "Same",
                activation = "relu",input_shape = (28,28,1)))
model.add(MaxPool2D(pool_size = (2,2)))
model.add(Dropout(0.25))
model.add(BatchNormalization(
    axis=-1,
    momentum=0.99,
    epsilon=0.001,
    center=True,
    scale=True,
    beta_initializer="zeros",
    gamma_initializer="ones",
    moving_mean_initializer="zeros",
    moving_variance_initializer="ones"
))

model.add(Conv2D(filters = 64,kernel_size = (3,3),padding = "Same",
                activation = "relu"))
model.add(MaxPool2D(pool_size = (2,2), strides = (2,2)))
model.add(Dropout(0.25))
model.add(BatchNormalization(
    axis=-1,
    momentum=0.99,
    epsilon=0.001,
    center=True,
    scale=True,
    beta_initializer="zeros",
    gamma_initializer="ones",
    moving_mean_initializer="zeros",
    moving_variance_initializer="ones"
))

model.add(Conv2D(filters = 128,kernel_size = (3,3),padding = "Same",
                activation = "relu"))
model.add(MaxPool2D(pool_size = (2,2), strides = (2,2)))
model.add(Dropout(0.25))
model.add(BatchNormalization(
    axis=-1,
    momentum=0.99,
    epsilon=0.001,
    center=True,
    scale=True,
    beta_initializer="zeros",
    gamma_initializer="ones",
    moving_mean_initializer="zeros",
    moving_variance_initializer="ones"
))

model.add(Conv2D(filters = 256,kernel_size = (3,3),padding = "Same",
                activation = "relu"))
model.add(MaxPool2D(pool_size = (2,2), strides = (2,2)))
model.add(Dropout(0.25))
model.add(BatchNormalization(
    axis=-1,
    momentum=0.99,
    epsilon=0.001,
    center=True,
    scale=True,
    beta_initializer="zeros",
    gamma_initializer="ones",
    moving_mean_initializer="zeros",
    moving_variance_initializer="ones"
))


# Fully Connected
model.add(Flatten())

model.add(Dense(256,activation = "relu")) #hidden layer
model.add(BatchNormalization())
          
model.add(Dense(120,activation = "relu"))
model.add(BatchNormalization())
          
model.add(Dense(120,activation = "relu"))
model.add(BatchNormalization())
          
model.add(Dense(120,activation = "relu"))
model.add(BatchNormalization())
          
model.add(Dense(100,activation = "relu"))
model.add(Dropout(0.5))
model.add(BatchNormalization())

model.add(Dense(10,activation = "softmax")) #output layer

<a id = "10"></a>
### We will define the optimizer

In [None]:
optimizer = Adam(lr = 0.001,beta_1=0.9, beta_2=0.999) # the optimizer tries to find the best learning rate for our model.

<a id = "11"></a>
### Compile Model
* Categorical crossenthropy for multi class

In [None]:
model.compile(optimizer=optimizer,loss = "categorical_crossentropy",metrics = ["accuracy"])

<a id = "12"></a>
## Epochs and Batch Size
* Say you have a dataset of 10 examples (or samples). You have a batch size of 2, and you've specified you want the algorithm to run for 3 epochs. Therefore, in each epoch, you have 5 batches (10/2 = 5). Each batch gets passed through the algorithm, therefore you have 5 iterations per epoch.
* reference: https://stackoverflow.com/questions/4752626/epoch-vs-iteration-when-training-neural-networks

In [None]:
epochs = 60
batch_size = 250

<a id = "13"></a>
### Data Augmentation
 * To avoid overfitting we take one pic and by changing it produce new pics from that

In [None]:
datagen = ImageDataGenerator(rotation_range=0.9,
                            zoom_range= 0.5,
                            width_shift_range= 0.9,
                            height_shift_range=0.5)
datagen.fit(x_train)

In [None]:
x_train.shape

<a id = "14"></a>
# Fit the model

In [None]:
history = model.fit_generator(datagen.flow(x_train,y_train,batch_size = batch_size),
                             epochs=epochs,validation_data= (x_val,y_val),steps_per_epoch=x_train.shape[0] // batch_size)

## Achived 98.64% Accuracy!

<a id = "15"></a>
## Lets see what the loss looks like

In [None]:
# Plot the loss and accuracy curves for training and validation
plt.plot(history.history["val_loss"],color = "g",label = "validation loss")
plt.title("Test Loss")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()


## Lets visualize the confusion matrix by using SNS heatmap

In [None]:
import seaborn as sns
# Predict the values from the validation dataset
y_pred = model.predict(x_val)
# Convert predictions classes to one hot vectors
y_pred_classes = np.argmax(y_pred,axis = 1)
# Convert validation observations to one hot vectors 
y_true = np.argmax(y_val,axis = 1)
# Compute the confusion matrix
confusion_mtx = confusion_matrix(y_true,y_pred_classes)
# plot the cf
f,ax = plt.subplots(figsize = (8,8))
sns.heatmap(confusion_mtx,annot = True,linewidths=0.01,cmap = "Greens",linecolor = "gray",fmt = ".1f",ax = ax)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()