In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# MNIST Digits - Classification Using SVM

In this notebook, we'll explore the popular MNIST dataset and build an SVM model to classify handwritten digits. <a href='http://yann.lecun.com/exdb/mnist/'>Here is a detailed description of the dataset.</a>

We'll divide the analysis into the following parts:
- Data understanding and cleaning
- Data preparation for model building
- Building an SVM model - hyperparameter tuning, model evaluation etc.

# Loading Libraries

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Load Dataset

In [None]:
df_train = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
df_test = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")

In [None]:
df_train.head()

In [None]:
df_train.info()

In [None]:
df_train.describe()

In [None]:
sns.countplot(x= df_train["label"]) # Almost balanced dataset
plt.show()

# Spliting into predictor and target

In [None]:
y_train = df_train.pop('label')
X_train = df_train

# Reshaping and scaling the predictors

In [None]:
X_train = X_train.values.reshape(-1, 28, 28, 1)
X_test = df_test.values.reshape(-1, 28, 28, 1)

In [None]:
X_train = X_train/255
X_test = X_test/255

In [None]:
plt.imshow(X_train[5], cmap='gray')
plt.show()
plt.imshow(X_train[8], cmap='gray')
plt.show()

In [None]:
X_train.shape, y_train.shape

# Model Building

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,MaxPooling2D,Flatten,Dense,Dropout
from tensorflow.keras import callbacks

In [None]:
model=Sequential([
    Conv2D(32, (5,5) , activation='relu' , input_shape=(28,28,1)),
    MaxPooling2D(pool_size=(2,2)),
    
    Conv2D(64,(5,5), activation ='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(0.25),
    
    Conv2D(64,(3,3), activation ='relu'),
    MaxPooling2D(pool_size=(2,2)),
    Dropout(0.25),
    
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])

In [None]:
callback = callbacks.EarlyStopping(monitor='loss', patience=5)

In [None]:
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train, epochs=50, batch_size=64, callbacks=[callback])

# Model Predictions

In [None]:
y_test = model.predict(X_test)

In [None]:
y_test = np.argmax(y_test, axis = 1)

In [None]:
index_list = []
for i in list(df_test.index):
    index_list.append(i+1)

In [None]:
submission_df = pd.DataFrame({
    "ImageId": index_list,
    "Label": y_test
})

In [None]:
submission_df.to_csv("submission_cnn.csv", index = False)