In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [2]:
mnist_data, mnist_info = tfds.load(name='mnist', with_info = True, as_supervised = True)
# as supervised => loads data in a 2 tuple structure
# with info => provides tuple containing version, features, no of samples etc

In [3]:
mnist_info

tfds.core.DatasetInfo(
    name='mnist',
    full_name='mnist/3.0.1',
    description="""
    The MNIST database of handwritten digits.
    """,
    homepage='http://yann.lecun.com/exdb/mnist/',
    data_path='C:\\Users\\sreedev\\tensorflow_datasets\\mnist\\3.0.1',
    file_format=tfrecord,
    download_size=11.06 MiB,
    dataset_size=21.00 MiB,
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
        'test': <SplitInfo num_examples=10000, num_shards=1>,
        'train': <SplitInfo num_examples=60000, num_shards=1>,
    },
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
      volume={2},
      year={2010}
   

In [4]:
mnist_train, mnist_test = mnist_data['train'], mnist_data['test']

#setting validation data number
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples 
num_validation_samples = tf.cast(num_validation_samples, tf.int64)
#num_examples = no of test images that are fed
#tf.cast => to make sure the dataset is of integer number rather than float.

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [5]:
def scale(image,label):
    image = tf.cast(image, tf.float32)
    image /=255. # /=255 => c /= a is equivalent to c = c / a. here image = image/255, to make all the values between 0 to 1.
    # .is provided at the end of 255 to make the value as a float
    return image, label

In [6]:
scaled_train_and_validation_data = mnist_train.map(scale) # .map() applies a custom transformation to a given dataset.
test_data = mnist_test.map(scale)

In [7]:
#Shuffling data = keeping the same inforamtion in a different order, inorder to avoid patterns in dataset

buffer_size = 10000
shuffled_scaled_train_and_validation_data = scaled_train_and_validation_data.shuffle(buffer_size)

validation_data = shuffled_scaled_train_and_validation_data.take(num_validation_samples)
#take(), inorder to take a specific number of data from a dataset
train_data = shuffled_scaled_train_and_validation_data.skip(num_validation_samples)
#skip(), skips the data that is given in the parenthesis and takes the rest


In [8]:
# we use minibatch gradient descent, so we need to divide the data into small batches
batch_size = 100

train_data = train_data.batch(batch_size) # To take the dataset as batches of defines numbers 'batch_size'
validation_data = validation_data.batch(num_validation_samples) # To take the whole dataset rather than batches
test_data = test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data)) 
#iter() = to make the dataset iterable but will not load any data
#next() = loads the next batch of iterable data, here we have only one batch.

### Model
#### Outlining the model

In [14]:
input_size = 784
output_size = 10
hidden_layer_size = 100

model =tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape=(28,28,1)), #Flatten() transforms a tensor into a vector.
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'), #first hidden layer
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'), #second hidden layer
                            tf.keras.layers.Dense(output_size, activation = 'softmax') # to convert outputs into probabilities
                            ])

### Optimizer and Loss function

In [15]:
model.compile(optimizer = 'adam',loss='sparse_categorical_crossentropy', metrics = ['accuracy'])

In [16]:
num_epochs = 5

model.fit(train_data,epochs = num_epochs, validation_data = (validation_inputs, validation_targets), verbose=2)
# 1. at the begining of each epoch, the training loss will be set to 0
# 2. The algorithm will iterate over the preset number of batches
# 3. The weights and biases will be updated as many times as the batches
# 4. get value of loss function
# 5. Gets trainig accuracy
# 6. Algorithm will forward propagate the whole validation set.

Epoch 1/5
540/540 - 2s - loss: 0.3456 - accuracy: 0.9007 - val_loss: 0.1915 - val_accuracy: 0.9447 - 2s/epoch - 4ms/step
Epoch 2/5
540/540 - 2s - loss: 0.1393 - accuracy: 0.9588 - val_loss: 0.1114 - val_accuracy: 0.9675 - 2s/epoch - 3ms/step
Epoch 3/5
540/540 - 2s - loss: 0.0982 - accuracy: 0.9697 - val_loss: 0.1060 - val_accuracy: 0.9668 - 2s/epoch - 4ms/step
Epoch 4/5
540/540 - 2s - loss: 0.0772 - accuracy: 0.9770 - val_loss: 0.0840 - val_accuracy: 0.9742 - 2s/epoch - 3ms/step
Epoch 5/5
540/540 - 2s - loss: 0.0628 - accuracy: 0.9807 - val_loss: 0.0649 - val_accuracy: 0.9807 - 2s/epoch - 3ms/step


<keras.callbacks.History at 0x1429eab2370>

### Testing the model

In [17]:
test_loss, test_accuracy = model.evaluate(test_data)



In [23]:
print('test loss = ', round(test_loss*100,3), '%')

test loss =  8.538 %


In [22]:
print('Test Accuracy = ', round(test_accuracy*100,3), '%')

Test Accuracy =  97.22 %
