# Keras Net Digit Classifier with 97% accuracy (Begineer Friendly)
<img src= 'https://i1.wp.com/www.marktechpost.com/wp-content/uploads/2019/10/1_QAAGYDHreoRm4vEArNzTTQ.png?resize=372%2C238&ssl=1' alt ="Titanic" style='width: 900px;'>

## Importing required dependencies

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os # used by kaggle's kernel to setup enviroment
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

from PIL import Image # Used for creating images from csv file
import matplotlib.pyplot as plt # Used for displaying the images
import tensorflow.keras as keras # Importing Keras library
from sklearn.model_selection import train_test_split # Used for creating train and validation set

## Displaying data and images

In [None]:
df = pd.read_csv('../input/digit-recognizer/train.csv') # Loads the train csv file into pandas dataframe

In [None]:
df.head()

In [None]:
df['label'].value_counts()# Count of no of pictures for each digit

In [None]:
max(df['pixel0']) # A number of pixels have no information for any image, ex pixel0

In [None]:
# # Creating a list of images which are 2d arrays of size 28x28 from the csv file
# images = [] # Stores all the images
# img = [] # Stores one image at a time
# row = []
# for i in range(0,df.shape[0]):
#     for j in range(0,785):
#         if(j%28 == 0 and j!=0):
#             img.append(row)
#             row = []
#             if j!=784:
#                 row.append(df['pixel'+str(j)][i])
#         else:
#             row.append(df['pixel'+str(j)][i])
#     images.append(img)
#     img = []

In [None]:
images_new = np.array(df.drop(columns = ['label']),dtype= np.float32)/255

In [None]:
images_new = images_new.reshape(-1,28,28,1)

In [None]:
labels = df['label']
labels= keras.utils.to_categorical(labels,num_classes = 10)

In [None]:
plt.figure(figsize = (12,7))
plt.subplot(2,2,1)
plt.imshow(images_new[6],cmap = 'gray')
plt.subplot(2,2,2)
plt.imshow(images_new[22],cmap = 'gray')
plt.subplot(2,2,3)
plt.imshow(images_new[105],cmap = 'gray')
plt.subplot(2,2,4)
plt.imshow(images_new[200],cmap = 'gray')


In [None]:
plt.figure(figsize = (12,7))
plt.subplot(2,2,1)
plt.imshow(images_new[100],cmap = 'gray')
plt.subplot(2,2,2)
plt.imshow(images_new[101],cmap = 'gray')
plt.subplot(2,2,3)
plt.imshow(images_new[135],cmap = 'gray')
plt.subplot(2,2,4)
plt.imshow(images_new[290],cmap = 'gray')

In [None]:
# Pixel values at the edges always zero , as the image has black background

In [None]:
## Preparing data for model training

In [None]:
data = df.drop(columns = ['label'])
#Collecting pixels which never have non zero values
redundant = []
j = 0
for i in data.columns:
    if max(data[i])==0:
        redundant.append(j)
    j = j+1    

In [None]:
print(redundant)

In [None]:
len(redundant)

In [None]:
# Creating the list of images which are redundant (no non zero values)
drop = []
for i in redundant:
    drop.append('pixel'+str(i))

In [None]:
len(drop)

In [None]:
# Droppping the redundant pixels
data_train = data.drop(columns = drop)

In [None]:
data_train.head()

In [None]:
print(data_train['pixel500'].unique())
print(len(data_train['pixel500'].unique()))

In [None]:
# Normalizing The data set by dividing each value by 255
data_normalized = data_train/255

In [None]:
data_normalized['pixel500'].unique()

In [None]:
len(data_normalized.columns)

In [None]:
data_normalized['labels']= df['label']

In [None]:
# Creating a train and a validation set
train,val = train_test_split(data_normalized,test_size = 0.2)

In [None]:
train = pd.DataFrame(train)
val = pd.DataFrame(val)

In [None]:
train.columns = data_normalized.columns
val.columns = data_normalized.columns
train.head()

In [None]:
train_labels = train['labels']

In [None]:
val_label = val['labels']
val = val.drop(columns = ['labels'])

In [None]:
train.drop(columns = ['labels'],inplace = True)
train.shape

In [None]:
# train = train.to_numpy()
# val = val.to_numpy()

In [None]:
# train

## Training a basic neural network

In [None]:
# Creating a neural net with single hidden layer of 128 neurons.
keras.backend.clear_session()
model = keras.models.Sequential([
    keras.layers.Dense(128,activation = 'relu',input_dim =708),
    keras.layers.Dense(10,activation = 'softmax')
])
# Setting callback early stopping to prevent model to over train on the training data
# Setting model checkpoint call back to save the model with the best validation accuracy in the training 
# period
early = keras.callbacks.EarlyStopping(patience = 10)
model_check = keras.callbacks.ModelCheckpoint('model.h5',save_best_only = True)
model.compile(loss = 'sparse_categorical_crossentropy',optimizer = 'adam',metrics = ['accuracy'])
history = model.fit(train,train_labels,epochs = 600,validation_data = (val,val_label),callbacks = [model_check,early])

In [None]:
sample = pd.read_csv('../input/digit-recognizer/sample_submission.csv')

In [None]:
sample.head()

In [None]:
model = keras.models.load_model('model.h5')

In [None]:
test = pd.read_csv('../input/digit-recognizer/test.csv')
test = test.drop(columns = drop)
predictions = model.predict(test)

In [None]:
pred = []
for i in predictions:
    k = 0
    for j in i:
        if j>0.5:
            pred.append(k)
        k = k+1  

In [None]:
test = pd.read_csv('../input/digit-recognizer/test.csv')
drop = test.columns

In [None]:
test['Label'] = pred

In [None]:
test = test.drop(columns = drop)


In [None]:
ImageId = []
for i in range(1,28001):
    ImageId.append(i)

In [None]:
test['ImageId'] = ImageId

In [None]:
test.head()

In [None]:
test.to_csv('Submit_2.csv',index = False)

In [None]:
images_new.shape

In [None]:
images_train = images_new.reshape(28,28,1,-1)

In [None]:
images_train.shape

In [None]:
images_train[:,:,:,2].shape

In [None]:
plt.imshow(images_train[:,:,:,2])

In [None]:
nRows,nCols,nDims = images_new.shape[1:]
train_data = images_new.reshape(images_new.shape[0], nRows, nCols, nDims)
# test_data = .reshape(test_images.shape[0], nRows, nCols, nDims)
input_shape = (nRows, nCols, nDims)

train_data = train_data.astype('float32')
# test_data = test_data.astype('float32')

In [None]:
train_data/=255

In [None]:
train_data.shape

In [None]:
labels.shape

## Building a Convolutional Nueral Net

In [None]:
keras.backend.clear_session()
model = keras.models.Sequential([
                                    keras.layers.Conv2D(32,(3,3),batch_size = 32,activation= 'relu',padding = 'same',input_shape = input_shape),
                                    keras.layers.MaxPooling2D(2,2),
                                    keras.layers.Conv2D(16,(3,3),activation= 'relu'),
                                    keras.layers.MaxPooling2D(2,2),
                                    keras.layers.Conv2D(8,(3,3),activation= 'relu'),
                                    keras.layers.MaxPooling2D(2,2),
                                    keras.layers.Flatten(),
                                    keras.layers.Dense(16,activation='relu'),
                                    keras.layers.Dense(10,activation='softmax')
                              ])
# Setting callback early stopping to prevent model to over train on the training data
# Setting model checkpoint call back to save the model with the best validation accuracy in the training 
# period
early = keras.callbacks.EarlyStopping(patience = 20)


In [None]:
model_check = keras.callbacks.ModelCheckpoint('model.h5',save_best_only = True)
model.compile(loss = 'categorical_crossentropy',optimizer='adam',metrics = ['accuracy'])
history = model.fit(train_data,labels,epochs = 600,validation_split = 0.2,callbacks = [model_check,early])

In [None]:
test = pd.read_csv('../input/digit-recognizer/test.csv')

In [None]:
test_imag = np.array(test,dtype= np.float32)/255
test_imag = test_imag.reshape(-1,28,28,1)

In [None]:
predictions_cnn = model.predict(test_imag)

In [None]:
predictions_cnn

In [None]:
pred = []
for i in predictions_cnn:
    k=0
    count = 0
    m = 0
    for j in i:
        if k<j:
            k = j
            m = count
        count = count+1    
    pred.append(m)        

In [None]:
sample['Label']= pred

In [None]:
sample.head()

In [None]:
sample.to_csv('Submit_cnn2.csv',index = False)