In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [2]:
data, data_info = tfds.load(name='mnist', with_info=True, as_supervised=True)



In [3]:
train, test = data['train'], data['test']       #train test variables

num_validation_data = 0.1 * data_info.splits['train'].num_examples    #creating validation dataset out-of train set
num_validation_data = tf.cast(num_validation_data, tf.int64)

test_sample = data_info.splits['test'].num_examples 
test_sample = tf.cast(test_sample, tf.int64)

def scale(image,label):                         #scaling function
    image = tf.cast(image,tf.float64)
    image /= 255.
    return image,label

scaled_train_data = train.map(scale)   #scaling train set
scaled_test_data = test.map(scale)     #scaling test set

#shuffling the data for better learning

buffer_size = 10000

shuffled_train_data = scaled_train_data.shuffle(buffer_size)

validation_data = shuffled_train_data.take(num_validation_data)
train_data = shuffled_train_data.skip(num_validation_data)

#batch size if 1 the SGD stocastic gradient descent
#batch size if n where 'n' is full set size the it is GD
#if batch size is in between i.e. 1 < batch_size < n , then its mini-GD 
BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)        
validation_data = validation_data.batch(num_validation_data)
test_data = scaled_test_data.batch(test_sample)

#only train data is splitted into batch as validation set propagates forward as 1whole batch 

validation_inputs, validation_targets = next(iter(validation_data))

#### Model

Outline of the model

In [4]:
input_size = 784         #as every num img size is 28*28 
output_size = 10         #as there are 10 digits 0 to 9
hidden_layer_size = 50   #choosed by us


model = tf.keras.Sequential([
                             tf.keras.layers.Flatten(input_shape=(28,28,1)),
                             tf.keras.layers.Dense(hidden_layer_size, activation='relu'),  #1st hidden laayer
                             tf.keras.layers.Dense(hidden_layer_size, activation='relu'),  #2nd hidden layer
                             tf.keras.layers.Dense(output_size, activation='softmax')         #output layer
                            ])

Choosing optimizer and loss function

In [5]:
"""Types of loss func:
1] binary_crossentropy - used when the target data is binary
2] categorical_crossentropy - it needs the target data to be one hot encoded
3] sparse_categorical_crossentropy - it itself applies one hot encoding to the target data
"""
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

#### Training

In [6]:
num_epochs = 10

model.fit(train_data, epochs=num_epochs, validation_data=(validation_inputs, validation_targets), validation_steps=1, verbose=2)

Epoch 1/10
540/540 - 3s - loss: 0.4153 - accuracy: 0.8827 - val_loss: 0.2255 - val_accuracy: 0.9397
Epoch 2/10
540/540 - 3s - loss: 0.1816 - accuracy: 0.9481 - val_loss: 0.1835 - val_accuracy: 0.9488
Epoch 3/10
540/540 - 2s - loss: 0.1421 - accuracy: 0.9588 - val_loss: 0.1503 - val_accuracy: 0.9555
Epoch 4/10
540/540 - 2s - loss: 0.1164 - accuracy: 0.9656 - val_loss: 0.1362 - val_accuracy: 0.9585
Epoch 5/10
540/540 - 2s - loss: 0.0972 - accuracy: 0.9707 - val_loss: 0.1152 - val_accuracy: 0.9663
Epoch 6/10
540/540 - 3s - loss: 0.0852 - accuracy: 0.9744 - val_loss: 0.0985 - val_accuracy: 0.9717
Epoch 7/10
540/540 - 3s - loss: 0.0724 - accuracy: 0.9783 - val_loss: 0.0889 - val_accuracy: 0.9713
Epoch 8/10
540/540 - 3s - loss: 0.0664 - accuracy: 0.9800 - val_loss: 0.0846 - val_accuracy: 0.9747
Epoch 9/10
540/540 - 2s - loss: 0.0600 - accuracy: 0.9820 - val_loss: 0.0768 - val_accuracy: 0.9758
Epoch 10/10
540/540 - 3s - loss: 0.0544 - accuracy: 0.9831 - val_loss: 0.0642 - val_accuracy: 0.9778

<tensorflow.python.keras.callbacks.History at 0x17e28700208>

#### Testing

In [7]:
loss, accuracy = model.evaluate(test_data)

      1/Unknown - 1s 512ms/step - loss: 0.0969 - accuracy: 0.9713

In [8]:
print("Test loss = {0:.2f} , Test accuracy = {1:.2f}%".format(loss, accuracy*100))

Test loss = 0.10 , Test accuracy = 97.13%
