### Import the libraries

In [5]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

### Load the dataset

In [10]:
mnist_dataset, mnist_info=tfds.load(name='mnist', as_supervised=True, with_info=True) # As supervised loads the data in a two
# tuple structure(inputs and targets). With info provides a tuple containing info about version, features and 
# the number of samples in the dataset

### Preprocess the Data

In [None]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test'] # We define the train and test datasets. 
# The MNIST dataset does not come with validation samples so we have to do a split. 

# We take 10 percent of the training dataset as our validation samples
mnist_validation_samples = 0.1 * mnist_info.splits['train'].num_examples

# The computation above could result in a number of samples with decimal places which isn't ideal, so we cast the data
mnist_validation_samples = tf.cast(mnist_validation_samples, tf.int64)

# We do the same for the test data as we would need this later
mnist_test_samples = mnist_info.splits['test'].num_examples
mnist_test_samples = tf.cast(mnist_test_samples, tf.int64)

# To ensure our result is numerically stable, we scale the data set to have inputs between 0 and 1

def scale(image, label):
    image = tf.cast(image, tf.float32) # To ensure the values are in float as we will be scaling
    image /= 255.0 # Dividing by 255 will keep all values between 0 and 1
    return image, label

scaled_train_validation = mnist_train.map(scale) # This transforms the dataset using the scale function created above
scaled_test_data = mnist_test.map(scale) # Same process for the test dataset as we want the test data to have the 
# same magnitude as that of the training and validation data

# We shuffle the dataset

BUFFER_SIZE = 10000 # We create a buffer size in order to optimize the computer's memory. This buffer will be used to
# shuffle 10000 samples per time

shuffled_scaled_train_validation = scaled_train_validation.shuffle(BUFFER_SIZE)

# We can now split the dataset into training and validation dataset

validation_data = shuffled_scaled_train_validation.take(mnist_validation_samples) # Takes 10 percent of the shuffled
# data as the validation data
training_data = shuffled_scaled_train_validation.skip(mnist_validation_samples) # Skip as many samples as we have in
# our validation data

# We apply a mini-batch gradient descent with a batch size of 100
# The validation data doesn't need to be batched as we don't need to update the weights and biases(backward propagation)
# The inputs from the validation data are only pushed through the net(forward propagation), hence we do create mini
# batches for our validation data

BATCH_SIZE = 100
training_data = training_data.batch(BATCH_SIZE)


# Since our model expects our validation and test data in batch form too, we also batch the data but all the data goes in 
# a single batch

validation_data = validation_data.batch(mnist_validation_samples)
test_data = scaled_test_data.batch(mnist_test_samples) # Test data also goes in a single batch

# We create an iterable and load the next batch using the next object
validation_inputs, validation_targets = next(iter(validation_data))