In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Conv2D,Flatten,MaxPooling2D,Dropout,BatchNormalization,Activation
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.model_selection import train_test_split

# Loading and Preprocessing 

In [None]:
filepath_train = "/kaggle/input/digit-recognizer/train.csv"
filepath_test = "/kaggle/input/digit-recognizer/test.csv"

In [None]:
df = pd.read_csv(filepath_train)
df.head()

In [None]:
df.isnull().sum().sort_values(ascending = False) 

## No null values :)

In [None]:
pd.read_csv(filepath_test).head()

In [None]:
def import_data(filepath):
    
    X = pd.read_csv(filepath).drop('label',axis= 1)
    y = pd.read_csv(filepath).loc[:,'label']
    
    return (np.array(X), np.array(y))

X_train,y_train = import_data(filepath_train)
# X_test,y_test = import_data(filepath_test) # since there is no label column in test.csv 

In [None]:
X_test = pd.read_csv(filepath_test)
X_test = np.array(X_test)

In [None]:
X_test.shape

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
np.unique(y_train)

## Data Distribution

In [None]:
plt.figure(figsize = (20,15))

plt.xticks(size=15)
sns.countplot(y_train,linewidth = 3,edgecolor=sns.color_palette("Set2"))
plt.title('Distribution of labels in the train dataset', fontdict={'color' : 'Black' , 'fontsize' : 30})

plt.show()

In [None]:
print("Before Reshaping : ")
print("Shape of X_train :" ,X_train.shape)
print("Shape of y_train :" ,y_train.shape)
print("Shape of X_test :" ,X_test.shape)

## Reshaping Images

In [None]:
X_train = X_train.reshape(len(X_train), 28,28,1)
X_test = X_test.reshape(len(X_test), 28,28,1)

y_train = tf.keras.utils.to_categorical(y_train)

In [None]:
print("After Reshaping : ")
print("Shape of X_train :" ,X_train.shape)
print("Shape of y_train :" ,y_train.shape)
print("Shape of X_test :" ,X_test.shape)

In [None]:
X_train = X_train/255
X_test = X_test/255

## Visualize some data 

In [None]:
L = 5
W = 5
fig, axes = plt.subplots(L, W, figsize = (15,15))
axes = axes.ravel()

for i in range(0, L * W):  
    axes[i].imshow(X_train[i],cmap='gray')
    axes[i].set_title("Digit = "+str(i))
    axes[i].axis('off')
plt.subplots_adjust(wspace=0.5)

# Constructing a CNN model 

In [None]:
model = Sequential()
model.add(Conv2D(filters = 16,kernel_size = (3,3),activation = 'relu', padding = 'same', input_shape = (28,28,1)))
model.add(BatchNormalization())

model.add(Conv2D(filters = 32,kernel_size = (3,3),activation = 'relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(filters = 128,kernel_size = (3,3),activation = 'relu'))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(filters = 256,kernel_size = (3,3),activation = 'relu'))
model.add(MaxPooling2D((2,2)))
model.add(Dropout(0.2))

In [None]:
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dense(10,activation='softmax')) 
model.summary()

In [None]:
 X_train.shape[0]//128

In [None]:
model.compile(optimizer='adam',metrics = ['accuracy'],loss = 'categorical_crossentropy')
history = model.fit( 
    X_train, y_train ,
    batch_size = 300  , 
    epochs = 30
)

In [None]:
#Visualizing the training performance
plt.figure(figsize=(12, 8))

plt.subplot(2, 2, 1)
plt.plot(history.history['loss'], label='Loss')
plt.plot(history.history['accuracy'], label='accuracy')

plt.legend()
plt.grid()
plt.title('Training Acc and Loss evolution')

In [None]:
predictions = model.predict(X_test)

In [None]:
predictions[0]

In [None]:
results = np.argmax(predictions, axis= 1)

In [None]:
results[0]

In [None]:
results = pd.Series(results, name="Label")
results.head(10)

In [None]:
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)
submission.to_csv("submission.csv", index=False)

## Thank you !