# Using cnn for mnist dataset

**libraries used**

* numpy
* pandas
* tensorflow

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Dropout, Flatten
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

## define hyper parameters

- learningRate : define your learning rate for Adam optimizer
- shuffleSize  : define your shuffle size for dataset, any number greater than the size of dataset is preferred
- batchSize    : define your batch size for training data
- displayStep  : define after how many steps you want to display metrics, one step means one batch of data

In [None]:
# hyper parameters
learningRate = 0.001
shuffleSize = 42000
batchSize = 256
numSteps = 1500
displayStep = 20

# dataset

- import dataset from keras library
- convert xTrain and xTest into float32
- normalize data by dividing with 255
- reshape dataset such that each input tensor is of shape 28 * 28 * 1

In [None]:
(xTrain, yTrain), (xTest, yTest) = mnist.load_data()
# Convert to float32.
xTrain, xTest = np.array(xTrain, np.float32), np.array(xTest, np.float32)
# Normalize images value from [0, 255] to [0, 1].
xTrain, xTest = xTrain / 255., xTest / 255.
# reshape matrix
xTrain = np.reshape(xTrain,(-1,28,28,1))
xTest = np.reshape(xTest,(-1,28,28,1))

## manage your dataset

- create a dataset using imported data from mnist
- shuffle(shuffleSize) shuffles the dataset
- batch(batchSize) divides the dataset into batches each containing "batchSize" inputs
- repeat(-1) repeats the dataset infinite times

In [None]:
trainData = tf.data.Dataset.from_tensor_slices((xTrain,yTrain))
trainData = trainData.repeat(-1).shuffle(shuffleSize).batch(batchSize)

## architecture

- using keras.Layers import required layers
- Conv2D is convolutional layer
- MaxPool2D is maxpooling layer
- Dropout layer is used only during training
- Flatten layer flattens the previous layer
- Dense is a classic neural network layer

In [None]:
class cnn(Model):
    def __init__(self):
        super(cnn,self).__init__()
        self.c1 = Conv2D(filters = 32, kernel_size = (5,5), padding = 'Same', activation = 'relu', input_shape = (28,28,1))
        self.c2 = Conv2D(filters = 32, kernel_size = (5,5), padding = 'Same', activation = 'relu')
        self.mp1 = MaxPool2D(pool_size = (2,2))
        self.dout1 = Dropout(0.25)
        self.c3 = Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same', activation = 'relu')
        self.c4 = Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same', activation = 'relu')
        self.mp2 = MaxPool2D(pool_size = (2,2), strides = (2,2))
        self.dout2 = Dropout(0.25)
        self.flatten  = Flatten()
        self.d1 = Dense(256,activation = 'relu')
        self.dout3 = Dropout(0.5)
        self.d2 = Dense(10)
        
    def call(self,x,is_training = False):
        x = self.c1(x)
        x = self.c2(x)
        x = self.mp1(x)
        x = self.dout1(x,training = is_training)
        x = self.c3(x)
        x = self.c4(x)
        x = self.mp2(x)
        x = self.dout2(x,training = is_training)
        x = self.flatten(x)
        x = self.d1(x)
        x = self.dout3(x,training = is_training)
        x = self.d2(x)
        if is_training:
            return x
        return tf.nn.softmax(x)

## create a cnn object

In [None]:
myCNN = cnn()

In [None]:
def lossOp(pred,true):
    true = tf.cast(true,dtype = tf.int64)
    return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(true,pred))

def accuracy(pred,true):
    pred = tf.math.argmax(pred,1)
    acc = tf.keras.metrics.Accuracy()
    _ = acc.update_state(pred,true)
    return acc.result().numpy()

optimizer = tf.optimizers.Adam(learningRate)

def optimize(x,y):
    with tf.GradientTape() as g:
        pred = myCNN(x,is_training = True)
        loss = lossOp(pred,y)
    trainVars = myCNN.trainable_variables
    gradients = g.gradient(loss,trainVars)
    optimizer.apply_gradients(zip(gradients,trainVars))
    


In [None]:
for step,(batchX,batchY) in enumerate(trainData.take(numSteps),1):
    optimize(batchX,batchY)
    if step % displayStep == 0:
        pred = myCNN(batchX)
        loss = lossOp(pred,batchY)
        acc = accuracy(pred,batchY)
        print("Step: %i, loss: %f, accu: %f"%(step,loss,acc))

In [None]:
pred = myCNN(xTest,is_training = False)
accu = accuracy(pred,yTest)
loss = lossOp(pred,yTest)
print("Cross validation loss: %f, accuracy: %f"%(loss,accu))

In [None]:
testDF = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')
xTest = testDF.to_numpy()
xTest = np.array(xTest,dtype = np.float32)/255.
xTest = np.reshape(xTest,(-1,28,28,1))
pred = myCNN(xTest,is_training = False)
pred = tf.argmax(pred,1).numpy()
Id = np.arange(1,28001)
out = pd.DataFrame({'ImageId' : Id, 'Label' : pred})
out.to_csv('outputFile.csv', index=False)