## MNIST Competition

<img src="https://miro.medium.com/max/1168/1*2lSjt9YKJn9sxK7DSeGDyw.jpeg">

## Import necessary libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
from sklearn.model_selection import train_test_split 
import tensorflow as tf
from tensorflow import keras
import seaborn as sns
from sklearn.metrics import confusion_matrix

In [None]:
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

## Load data

In [None]:
train = pd.read_csv('../input/digit-recognizer/train.csv')
test = pd.read_csv('../input/digit-recognizer/test.csv')

In [None]:
X = train.drop('label',axis = 1)
y = train['label']

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.8, random_state=42)

## Data normalization

In [None]:
X_train = X_train.values / 255.0
X_val = X_val.values / 255.0
X_test = test.values / 255.0

## Data visualization 

In [None]:
small_data = X_train[:50]

In [None]:
def plots(m,n):
    ### Function for plots
    plt.figure(figsize=(10,6))
    for plot_index in range(len(small_data)):
        plt.subplot(m,n,plot_index+1)
        plt.imshow(small_data[plot_index].reshape(28,28),cmap = 'binary')
        plt.axis('off')
    plt.show()

In [None]:
plots(10,5)

In [None]:
print(np.array(y_train[:50]).reshape(10,5))

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(train['label'])
plt.title('Histogram of numbers 0-9')
plt.xlabel('Label',fontsize = 12)
plt.xlabel('Count',fontsize = 12)
plt.show()

## CNN

In [None]:
X_train = X_train.reshape(-1,28, 28, 1)

In [None]:
X_val = X_val.reshape(-1,28, 28, 1)

In [None]:
X_test = X_test.reshape(-1,28, 28, 1)

In [None]:
callback = keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [None]:
cnn_model = keras.models.Sequential([
                keras.layers.Conv2D(64, 5, activation = 'relu', padding = 'same', input_shape = (28,28,1)),
                keras.layers.MaxPooling2D(2),
                keras.layers.Conv2D(128, 3, activation = 'relu', padding = 'same'),
                keras.layers.Conv2D(128, 3, activation = 'relu', padding = 'same'),
                keras.layers.MaxPooling2D(2),
                keras.layers.Conv2D(256, 3, activation = 'relu', padding = 'same'),
                keras.layers.Flatten(),
                keras.layers.Dense(128, activation = 'relu'),
                keras.layers.Dropout(0.25),
                keras.layers.Dense(64, activation = 'relu'),
                keras.layers.Dropout(0.25),
                keras.layers.Dense(32, activation = 'relu'),
                keras.layers.Dense(32, activation = 'relu'),
                keras.layers.Dropout(0.25),
                keras.layers.Dense(10, activation = 'softmax')
])

In [None]:
cnn_model.summary()

- Network start with the first layer which is a Conv2D filters (5,5) being applied to the input image. Zero padding is used in first layer (and in every layer) creating 64 convoluted images. 
- After that, the 64 outputs are reduced in size using a MaxPooling2D (2,2) with a stride of 2.
- The next two layers have filters (3,3) and takes the 64 images as input and creates 128 outputs which are again reduced in size by a MaxPooling2D layer.
- Next, the last convoluted layer is applied with filter (3,3) which creates 256 outputs.
- Finally there are a few pairs of neuron layers and Dropout (to avoid overlearing) with last layer with softmax function representing the probability that the image belongs to that class. 

In [None]:
cnn_model.compile(optimizer='nadam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
cnn_history = cnn_model.fit(X_train,y_train,epochs=50,batch_size = 64,validation_data = (X_val,y_val),callbacks = [callback])

## Loss and accuracy plots

In [None]:
pd.DataFrame(cnn_history.history)[['val_accuracy','accuracy']].plot(figsize=(12,6))
plt.title('Learning history')
plt.xticks(range(1,max(cnn_history.epoch)+1))
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()

In [None]:
pd.DataFrame(cnn_history.history)[['val_loss','loss']].plot(figsize=(12,6))
plt.title('Learning history')
plt.xticks(range(1,max(cnn_history.epoch)+1))
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

In [None]:
val_pred = cnn_model.predict_classes(X_val)

### Confustion matrix

In [None]:
conf_mat = confusion_matrix(y_val, val_pred)
plt.figure(figsize = (12,6))
sns.heatmap(conf_mat,annot = True,  fmt='g', cmap='viridis')
plt.title('Confusion matrix on validation data', fontsize = 12)
plt.ylabel('Predicted', fontsize = 12)
plt.xlabel('Actual', fontsize = 12)
plt.show()

## Error plots

In [None]:
errors = (val_pred != y_val)

In [None]:
def plot_errors(n,m):
    plt.figure(figsize=(15,10))
    for i in range(15):
        plt.subplot(n,m,i+1)
        plt.imshow(X_val[errors][i].reshape(28,28), cmap = 'binary')
        plt.title('Predicted value: {}, True value: {}'.format(val_pred[errors][i],y_val.values[errors][i]))
        plt.axis('off')
    plt.show()

In [None]:
plot_errors(5,3)

In [None]:
test_pred = cnn_model.predict_classes(X_test)

In [None]:
results = pd.DataFrame({'ImageId':range(1,28001),'Label':test_pred})

In [None]:
results.to_csv('Submission.csv',index = False)