# Digit Recognizer

**Competition Submission**

* **Author : Sabarishwaran G**
* **Contact : sabarish261101@gmail.com**

# Importing required packages

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.utils import shuffle
from keras.models import Sequential
from tensorflow import keras
from keras.layers import Dense, Dropout, Flatten, BatchNormalization
from keras.callbacks import EarlyStopping

# Loading data

In [None]:
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv') # Loading Test Data
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv') # Loading Train Data

# Shape of Dataset

test_shape = np.shape(test)
train_shape = np.shape(train)

print(f'Shape of Test Data : {test_shape}')
print(f'Shape of Train Data : {train_shape}')

img_size = (28,28,1)

# Data visualization

## Sample image from train data

In [None]:
n = np.random.randint(0,train_shape[0]) # Generating Random Number
sample = train.iloc[n,1:]
sample = np.array(sample)
sample = np.reshape(sample,img_size)

plt.imshow(sample)
plt.title('Digit : ' + str(train.iloc[n,0]))

## Unique classes & their count

In [None]:
label_count = dict(train.label.value_counts())
print(f' Labels and their count :\n {label_count}')

x = list(label_count.keys())
y = list(label_count.values())

fig = plt.figure(figsize = (10,5))
plt.bar(x,y,width = 0.5)
plt.title('Labels vs Count')
plt.xlabel('Labels')
plt.ylabel('Count')
plt.show()

# Preprocessing

In [None]:
train_images = []

for i in range(train_shape[0]):
    data = train.iloc[i,1:]
    data = np.array(data)
    data = np.reshape(data,img_size)
    train_images.append(data)
    
train_images = np.array(train_images)

In [None]:
test_images = []

for i in range(test_shape[0]):
    data = test.iloc[i,:]
    data = np.array(data)
    data = np.reshape(data,img_size)
    test_images.append(data)

test_images = np.array(test_images)

In [None]:
train_labels = list(train.iloc[:,0])
train_labels = np.array(train_labels)
train_labels = np.reshape(train_labels,(train_shape[0],1))

In [None]:
train_images, train_labels = shuffle(train_images, train_labels, random_state=10)

In [None]:
val_images, val_labels = train_images[:8400], train_labels[:8400]

# CNN Model

In [None]:
Model = keras.models.Sequential([
    keras.layers.Conv2D(filters=96, kernel_size=(3,3), activation='relu', input_shape=(28,28,1)),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(2,2)),
    keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(2,2)),
    keras.layers.Conv2D(filters=384, kernel_size=(3,3), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.Conv2D(filters=256, kernel_size=(3,3), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPool2D(pool_size=(2,2)),
    keras.layers.Flatten(),
    keras.layers.Dense(4096, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(10, activation='softmax')
])

early_stopping = EarlyStopping(min_delta = 0.001,patience = 20,restore_best_weights = True,verbose = 0)

# Compile
Model.compile(optimizer = "adam" , loss = 'sparse_categorical_crossentropy' , metrics = ['accuracy'])

# Train
hist = Model.fit(train_images, train_labels, batch_size = 256, epochs = 50,callbacks = [early_stopping],validation_data=(val_images, val_labels))

Model.summary()

In [None]:
predictions = Model.predict(test_images)
predictions = np.argmax(predictions,axis=1)

# Predicted image

In [None]:
n = np.random.randint(0,test_shape[0]) # Generating Random Number
sample = test.iloc[n,:]
sample = np.array(sample)
sample = np.reshape(sample,img_size)

plt.imshow(sample)
plt.title('Digit : ' + str(predictions[n]))

# Result

In [None]:
results = []

for i in range(len(predictions)):
    results.append([i+1,predictions[i]])
    
results = pd.DataFrame(results, columns = ['ImageId','Label'], index = None)
results = results.set_index('ImageId')
results.head()

results.to_csv('/kaggle/working/Digit_Recognizer_result.csv')