# Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
%matplotlib inline
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
import seaborn as sns

# Read the dataset

In [None]:
train_data = pd.read_csv('../input/digit-recognizer/train.csv')

In [None]:
train_data.head()

In [None]:
train_data.info()

In [None]:
test_data = pd.read_csv('../input/digit-recognizer/test.csv')

In [None]:
test_data.head()

# Visualizing the data

In [None]:
image = np.array(train_data.drop('label',axis=1).iloc[0])
label = train_data.iloc[0]['label']

In [None]:
label

In [None]:
image

In [None]:
print(image.shape)

## Visualise a digit

In [None]:
plt.imshow(image.reshape(28,28,1), cmap='gray')
plt.axis('off')

## Visualize Random Images

A function to visualize random images in the training set

In [None]:
def visualise_random_image():
    index = np.random.randint(0,42000)
    image = np.array(train_data.drop('label',axis=1).iloc[index])
    label = train_data.iloc[index]['label']
    plt.imshow(image.reshape(28,28,1), cmap='gray')
    plt.title(label)  
    plt.axis('off')

In [None]:
plt.figure(figsize=(12, 8))
for i in range(50):
    ax = plt.subplot(5, 10, i + 1)
    visualise_random_image()

## Find the number of unique labels

In [None]:
train_data['label'].value_counts()

In [None]:
plt.figure(figsize=(8,6))
sns.countplot(x='label', data=train_data)

## Percentage of classes in dataset

seems the data is well balanced all the classes are nearly 10%

In [None]:
(train_data['label'].value_counts()/len(train_data))*100

## Train-test split

In [None]:
X = train_data.drop('label', axis=1)
y = train_data['label']

In [None]:
(x_train1, y_train1), (x_test1, y_test1) = tf.keras.datasets.mnist.load_data()

train1 = np.concatenate([x_train1, x_test1], axis=0)
y_train1 = np.concatenate([y_train1, y_test1], axis=0)

Y_train1 = y_train1
X_train1 = train1.reshape(-1, 28*28)

In [None]:
X_train = np.concatenate((X.values, X_train1))
y_train = np.concatenate((y, y_train1))

In [None]:
X_train = X_train.reshape(-1,28,28,1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size = 0.1, random_state=101)

## model1

In [None]:
!nvidia-smi

## Normalize the data

In [None]:
X_train = X_train/255.0
X_test = X_test/255.0

In [None]:
X_train = X_train.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

# Final model with whole data

In [None]:
X = X.values.reshape(-1,28,28,1)
X = X/255.0

In [None]:
X.shape

In [None]:
X_train1.shape

In [None]:
X = np.concatenate((X, X_train1.reshape(-1, 28,28, 1)))
y = np.concatenate((y, y_train1))

In [None]:
X.shape

In [None]:
y.shape

In [None]:
X = X/255

In [None]:
model7 = tf.keras.Sequential([
    tf.keras.layers.Conv2D(64, (5,5), input_shape=(28,28,1), activation='relu', padding='same'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(64, (5,5), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(2,2),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Conv2D(64, (3,3), input_shape=(28,28,1), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(64, (3,3), input_shape=(28,28,1), activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPool2D(2,2),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation="relu"),
    tf.keras.layers.Dropout(0.25),
    tf.keras.layers.Dense(10, activation="softmax")
])

model7.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                 optimizer=tf.keras.optimizers.Adam(),
                 metrics=["accuracy"])

# Fit the model
history = model7.fit(X,y,epochs=50)

## Predicting on the test data

In [None]:
test_data = test_data/255.0
test_data = test_data.values.reshape(-1,28,28,1)

In [None]:
predictions = model7.predict(test_data)
predictions = predictions.argmax(axis=1)
predictions

In [None]:
sample_submission = pd.read_csv('../input/digit-recognizer/sample_submission.csv')

In [None]:
sample_submission

In [None]:
sample_submission['Label'] = predictions

In [None]:
sample_submission

In [None]:
sample_submission['Label'].unique()

In [None]:
sample_submission['Label'].value_counts()

In [None]:
sample_submission.to_csv("submission.csv",index=False)