In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train=pd.read_csv('../input/digit-recognizer/train.csv')

In [None]:
train

In [None]:
# Each image is 28 pixels in height and 28 pixels in width, for a total of 784 pixels in total. Each pixel has a single pixel-value associated with it,
# indicating the lightness or darkness of that pixel, with higher numbers meaning darker. This pixel-value is an integer between 0 and 255, inclusive.

import seaborn as sns
from matplotlib import pyplot as plt

# We take first 9 digits
plt.figure(figsize=(8,8))
for i in range(9):
    image=np.asarray(train.iloc[i,1:].values.reshape((28,28))/255); # create matrix of 28*28
    ax=plt.subplot(3,3,i+1);
    ax.grid(False)
    plt.imshow(image,cmap='gray')
plt.show()
    



In [None]:
# Observation: In dataset there are some same digits but with different dimensions like 0,1 etc.
hue_color={0:'#b5e48c',1:'#99d98c',2:'#76c893',3:'#52b69a',4:'#34a0a4',5:"#168aad",
          6:"#1a759f",7:"#1e6091",8:"#184e77",9:'#013a63'}
plt.style.use("fivethirtyeight")
ax=sns.countplot(data=train,x='label',palette=hue_color)
plt.gcf().set_size_inches(12,6)
plt.show()

In [None]:
data=train.drop(columns='label')
nonZeroPixel=[]
j=0 
for i in data.columns:
    if max(data[i])==0:
        nonZeroPixel.append(j)
    j=j+1 
    
# In this code we find the pixels which never have non zero value. 
# We do not need the pixels which have only 0 value for all the lables. Because its does not give any new information about the lables.

In [None]:
print(nonZeroPixel)

# There are 76 pixels which have only 0 value(indicate black colour)

In [None]:
print(len(nonZeroPixel))

In [None]:
# Create list of nonZeroPixels

drop=[]
for i in nonZeroPixel:
    drop.append('pixel'+str(i))

# Some elements of drop list    
print(drop[:15])
len(drop)

In [None]:
train_data=train.drop(columns=drop)
len(train_data.columns)

In [None]:
train_data

In [None]:
print(train_data['pixel770'].unique())


In [None]:
print(len(train_data['pixel770'].unique()))

In [None]:
# First we normalize the data

normalized_data=train_data/255


In [None]:
print(normalized_data['pixel770'].unique())
print(len(normalized_data.columns))

In [None]:
normalized_data['label']=train['label']
normalized_data.head()

# Model Building

In [None]:
from matplotlib import pyplot as plt
import tensorflow.keras as keras
from sklearn.model_selection import train_test_split

In [None]:
X= normalized_data.iloc[:,1:]
Y=normalized_data.iloc[:,0:1]


In [None]:
train,val,train_label,val_label=train_test_split(X,Y,test_size=0.2)
train.shape

# Train basic neural network

In [None]:
# Create Sequential neural network with single layer of 128 neurons
from tensorflow.keras.models import Sequential

keras.backend.clear_session()
model=keras.models.Sequential([
    # HIDDEN LAYER with 128 neurons
    keras.layers.Dense(128,activation='relu',input_dim=708),
    # OUTPUT LAYER with 10 class labels
    keras.layers.Dense(10,activation='softmax')
])
# Setting callback EarlyStopping to prevent the overfitting of the model
early=keras.callbacks.EarlyStopping(patience=10)
# Setting callbacks ModelCheckpoint to save the model with best validation accuracy during training period in 'model.h5' file format
model_check=keras.callbacks.ModelCheckpoint('model.h5',save_best_only=True)
model.compile(loss = 'sparse_categorical_crossentropy',optimizer = 'adam',metrics = ['accuracy'])
history=model.fit(train,train_label,epochs=15,validation_data=(val,val_label),callbacks=[model_check,early])


In [None]:
sample=pd.read_csv('../input/digit-recognizer/sample_submission.csv')

In [None]:
sample

In [None]:
model=keras.models.load_model('model.h5')

In [None]:
test=pd.read_csv('../input/digit-recognizer/test.csv')
test=test.drop(columns=drop)
test.head()

In [None]:
predications=model.predict(test)
predications.shape

In [None]:
predications

Observation: 1.  Neural  network returns 2D array of 10 class labels(0-9) which contain 0 and 1.

             2. The class label which has 0 value, means the predicated digit does not belong to that class label.
             
             3. The class label which has value 1, means the predicated digit belongs to that class label

In [None]:
#Retrieve most likely classes
pred=np.argmax(predications,axis=1)

In [None]:
pred

Observation:  1. Above array represents the predicated class labels.

In [None]:
submission=pd.DataFrame({"ImageId":test.index+1,"Label":pred})
submission

In [None]:
submission.to_csv('submission.csv',index=False)