In [1]:
import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds

In [2]:
mnist_dataset, mnist_info = tfds.load(name='mnist', with_info=True, as_supervised=True) # True, loads the data in a 2-tuple structure[input,target]

In [28]:
mnist_info

tfds.core.DatasetInfo(
    name='mnist',
    version=3.0.1,
    description='The MNIST database of handwritten digits.',
    homepage='http://yann.lecun.com/exdb/mnist/',
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
    }),
    total_num_examples=70000,
    splits={
        'test': 10000,
        'train': 60000,
    },
    supervised_keys=('image', 'label'),
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
    redistribution_info=,
)

### preprocess the data, first we split it

In [8]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']

num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples # we take validation samples from the training by tf operations
num_validation_samples = tf.cast(num_validation_samples, tf.int64) # as we're not sure if the validation will be integer, so we make it integer anyway by this formula

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples, tf.int64)

def scale(image,label):
    image = tf.cast(image, tf.float32) # as the images number between 0-255 , we divide it by 255 to get range(0,1)
    image /=255. #the dot means that we want the image as float
    return image, label

scaled_train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

BUFFER_SIZE = 1000 # this attribute to shuffle the data 1000 samples each time, if its 1, then no shuffling, why to shuffle? 
# because we want the model learn better with no easy reading, usually if the model is not shuffled, the data will be sorted and easy to guess.
# so shuffling optmizes the computational power

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)
validation_data = shuffled_train_and_validation_data.take(num_validation_samples) # validation dataset created
train_data = shuffled_train_and_validation_data.skip(num_validation_samples) #means that don't mix validation with the training data

BATCH_SIZE = 100
train_data = train_data.batch(BATCH_SIZE) # this method combines the consecutive elements of the dataset into batchs each = 100
validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

validation_inputs, validation_targets = next(iter(validation_data))

## The Model

In [21]:
input_size = 782
output_size = 10
hidden_layer_size = 200

model = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape=(28,28,1)), #flat matrices into column vectors 
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'), #h1
                            #tf.keras.layers.Dense(hidden_layer_size, activation='tanh'), #h2
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'), #h3
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            ])

### choosing the optimizer and the loss function

In [22]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## Training the model, here we fit the model we built and see if its actually well made.

What happens inside epoch?
1. at the beginning of each epoch, the training loss will be set to zero
2. the algorithm will iterate over a preset number of batches, all from train_data
3. the weights and biases will be updated as many times as there are batches
4. we will get a value of the loss func, indicating how the training is going
5. we will also see the training accuracy
6. at the end , the algorithm will forward propagate the whole validation set
when reaching max epochs, train will be over

In [24]:
NUM_EPOCHS = 10
model.fit(train_data, epochs =NUM_EPOCHS, validation_data=(validation_inputs,validation_targets), verbose=2)

Epoch 1/10
540/540 - 3s - loss: 0.0258 - accuracy: 0.9920 - val_loss: 0.0905 - val_accuracy: 0.9748
Epoch 2/10
540/540 - 3s - loss: 0.0228 - accuracy: 0.9926 - val_loss: 0.0771 - val_accuracy: 0.9793
Epoch 3/10
540/540 - 3s - loss: 0.0195 - accuracy: 0.9933 - val_loss: 0.1120 - val_accuracy: 0.9697
Epoch 4/10
540/540 - 3s - loss: 0.0156 - accuracy: 0.9950 - val_loss: 0.0763 - val_accuracy: 0.9802
Epoch 5/10
540/540 - 3s - loss: 0.0145 - accuracy: 0.9950 - val_loss: 0.1002 - val_accuracy: 0.9772
Epoch 6/10
540/540 - 3s - loss: 0.0129 - accuracy: 0.9959 - val_loss: 0.0857 - val_accuracy: 0.9797
Epoch 7/10
540/540 - 3s - loss: 0.0135 - accuracy: 0.9956 - val_loss: 0.0875 - val_accuracy: 0.9798
Epoch 8/10
540/540 - 3s - loss: 0.0131 - accuracy: 0.9954 - val_loss: 0.0820 - val_accuracy: 0.9813
Epoch 9/10
540/540 - 3s - loss: 0.0079 - accuracy: 0.9974 - val_loss: 0.0830 - val_accuracy: 0.9825
Epoch 10/10
540/540 - 3s - loss: 0.0093 - accuracy: 0.9970 - val_loss: 0.1019 - val_accuracy: 0.9795

<tensorflow.python.keras.callbacks.History at 0x213643e6148>

### Test the model

In [25]:
test_loss, test_accuracy = model.evaluate(test_data)



In [27]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy *100.))

Test loss: 0.11. Test accuracy: 97.99%
