# MNIST Data Set

Image Shape: (28, 28, 1) <br>
Number of nodes in input layer: 784 = 28 * 28 <br>
Number of nodes in output layer: 10 <br>
https://www.tensorflow.org/datasets/catalog/mnist

## Importing required packages

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow.python.keras import Sequential
from tensorflow.python.keras.layers import Flatten, Dense

## Loading data with 3 different options

In [2]:
data0 = tfds.load(name = 'mnist')
data0

{'test': <DatasetV1Adapter shapes: {image: (28, 28, 1), label: ()}, types: {image: tf.uint8, label: tf.int64}>,
 'train': <DatasetV1Adapter shapes: {image: (28, 28, 1), label: ()}, types: {image: tf.uint8, label: tf.int64}>}

In [3]:
data1 = tfds.load(name = 'mnist', as_supervised = True)
data1

{'test': <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>,
 'train': <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>}

`as_supervised = False` loads the data set in form of dictionary, with keys `image` and `label`. <br>
`as_supervised = True` loas the data set in form of 2-tuple structure, i.e. **(image, label)**

In [4]:
data2 = tfds.load(name = 'mnist', as_supervised = True, with_info = True)
data2

({'test': <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>,
  'train': <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>},
 tfds.core.DatasetInfo(
     name='mnist',
     version=3.0.0,
     description='The MNIST database of handwritten digits.',
     homepage='http://yann.lecun.com/exdb/mnist/',
     features=FeaturesDict({
         'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
         'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
     }),
     total_num_examples=70000,
     splits={
         'test': 10000,
         'train': 60000,
     },
     supervised_keys=('image', 'label'),
     citation="""@article{lecun2010mnist,
       title={MNIST handwritten digit database},
       author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
       journal={ATT Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
       volume={2},
       year={2010}
     }""",
     redistribution_info=,
 ))

In [5]:
print(data0['test'])
print(data1['test'])
print(data2[0]['test'])

<DatasetV1Adapter shapes: {image: (28, 28, 1), label: ()}, types: {image: tf.uint8, label: tf.int64}>
<DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>
<DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>


In [6]:
data, info = data2[0], data2[1]

In [7]:
print(data)

{'test': <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>, 'train': <DatasetV1Adapter shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>}


In [8]:
print(info)

tfds.core.DatasetInfo(
    name='mnist',
    version=3.0.0,
    description='The MNIST database of handwritten digits.',
    homepage='http://yann.lecun.com/exdb/mnist/',
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
    }),
    total_num_examples=70000,
    splits={
        'test': 10000,
        'train': 60000,
    },
    supervised_keys=('image', 'label'),
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann. lecun. com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
    redistribution_info=,
)



## Data Preprocessing

In [9]:
train, test = data['train'], data['test'] # Storing "train" and "test" data in 2 variables 

In [10]:
info.splits['train'] # Accessing "splits" option of "info"

<tfds.core.SplitInfo num_examples=60000>

In [11]:
info.splits['train'].num_examples # Accessing the number of samples present in "train" data

60000

#### Storing the number of samples in training, validation and test data

In [12]:
num_train = 0.9 * info.splits['train'].num_examples # Storing 90% of "train" data for training
num_train = tf.cast(num_train, tf.int64) # Typecasting the above value to an int (Rounding off)

In [13]:
num_validation = 0.1 * info.splits['train'].num_examples # Storing 10% of "train" data for validation
num_validation = tf.cast(num_validation, tf.int64) # Typecasting the above value to an int (Rounding off)

In [14]:
num_test = info.splits['test'].num_examples # Storing the number of "test" data (Already an integer)

#### Scaling all images

In [15]:
# Scaling the image to get result between 0 to 1
# All MNIST images have values between 0 to 255. Hence dividing each value by 255.

def scaling_func(image, label):
    image = tf.cast(image, tf.float32) # Typecasting all available images into "float32" type 
    image = image / 255. # To ensure that we get a float as a result
    return image, label

In [16]:
scaled_train_validation = train.map(scaling_func) # Applying scaling function on training and validation data
scaled_test = test.map(scaling_func) # Applyting scaling function on test data

#### Shuffling all images

In [17]:
buffer_size = 6000 # Number of samples which will be shuffled at a time

In [18]:
# Shuffling training and validation data, based on given buffer size
shuffled_train_validation = scaled_train_validation.shuffle(buffer_size) 
shuffled_test = scaled_test.shuffle(buffer_size) # Shuffling test data, based on given buffer size

#### Creating training, validation and test data

In [19]:
validation = shuffled_train_validation.take(num_validation) # Finally, creating validation data

In [20]:
training = shuffled_train_validation.skip(num_validation) # Finally, creating training data

`.take(x)` will take the first "x" images for validation. <br>
`.skip(x)` will skip the first "x" images, and take all the remaining data for training.

In [21]:
testing = shuffled_test.take(num_test)

#### Batching

In [22]:
batch_size = 100 # Number of data samples, present in a batch

In [23]:
training = training.batch(batch_size) # Training data needs to be batched for optimal usage of computing power

In [24]:
validation = validation.batch(num_validation) 
# Validation data is not required to be batched
# But, the model expects the validation data set in batch form
# Hence, creating only a single batch for validation data

In [25]:
testing = testing.batch(num_test)

#### Reshaping validation data to match that of training and testing data

In [26]:
validation_inputs, validation_targets = next(iter(validation))
# `iter` makes the "validation" object iterable
# `next` iterates "validation" object, and unpacks its 2 tuples into "validation_inputs" and "validation_targets"

## Model Creation

In [27]:
input_layer_size = 784 # Number of nodes in input layer
output_layer_size = 10 # Number of nodes in each hidden layer of the model
hidden_layer_size = 100 # Number of nodes in output layer

In [32]:
model = Sequential()
model.add(Flatten(input_shape = (28, 28, 1)))
model.add(Dense(hidden_layer_size, activation = 'relu'))
model.add(Dense(hidden_layer_size, activation = 'relu'))
model.add(Dense(output_layer_size, activation = 'softmax'))

In [33]:
model.summary() # Gives a brief summary of our model, with additional data about number of parameters

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 100)               78500     
_________________________________________________________________
dense_4 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_5 (Dense)              (None, 10)                1010      
Total params: 89,610
Trainable params: 89,610
Non-trainable params: 0
_________________________________________________________________


## Selecting loss function and optimization algorithm

In [38]:
# Setting different hyperparameters for SGD optimizer to be used in the model
sgd = tf.keras.optimizers.SGD(learning_rate = 0.1, momentum = 0.9, nesterov = True)

## Training the model

In [40]:
num_epochs = 5 # Number of epochs we want to train our model

In [41]:
model.fit(training,
          epochs = num_epochs,
          validation_data = (validation_inputs, validation_targets),
          verbose = 1,
          validation_steps = 10
         )

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f9670167990>

## Testing the model

In [43]:
model.evaluate(testing)



[0.06898871064186096, 0.978]

In [45]:
test_loss, test_accuracy = model.evaluate(testing) # Tuple unpacking



In [47]:
print("Test Loss: ", test_loss)
print("Test Accuracy: ", 100* test_accuracy, "%")

Test Loss:  0.06898871064186096
Test Accuracy:  97.79999852180481 %
