In [2]:
import tensorflow as tf
import numpy as np
import tensorflow_datasets as tfds

# Data 

The dataset is called MNIST and refers to handwritten digit recognition. You can find more about it on Yann LeCun's website (Director of AI Research, Facebook). He is one of the pioneers of what we've been talking about and of more complex approaches that are widely used today, such as covolutional neural networks (CNNs). 

The dataset provides 70,000 images (28x28 pixels) of handwritten digits (1 digit per image). 

The goal is to write an algorithm that detects which digit is written. Since there are only 10 digits (0, 1, 2, 3, 4, 5, 6, 7, 8, 9), this is a classification problem with 10 classes. 

In [3]:
# with_info=True will also provide us with a tuple containing information about the version, features, number of samples
# we will use this information a bit below and we will store it in mnist_info
# as_supervised=True will load the dataset in a 2-tuple structure (input, target) 
# alternatively, as_supervised=False, would return a dictionary

mnist_dataset,mnist_info = tfds.load(name = 'mnist',with_info = True, as_supervised = True)

Downloading and preparing dataset mnist/3.0.0 (download: 11.06 MiB, generated: Unknown size, total: 11.06 MiB) to C:\Users\PSK\tensorflow_datasets\mnist\3.0.0...


local data directory. If you'd instead prefer to read directly from our public
GCS bucket (recommended if you're running on GCP), you can instead set
data_dir=gs://tfds-data/datasets.



HBox(children=(IntProgress(value=0, description='Dl Completed...', max=4, style=ProgressStyle(description_widt…



Dataset mnist downloaded and prepared to C:\Users\PSK\tensorflow_datasets\mnist\3.0.0. Subsequent calls will reuse this data.


# Data Pre-Processing

In [4]:
#Split the data into train and test
mnist_train,mnist_test = mnist_dataset['train'],mnist_dataset['test']

#we need to get the validation set from the train set 
#we will be taking out 10% of the train dataset as validation set
num_vaidation_samples = 0.1 * mnist_info.splits['train'].num_examples
#we are casting it into numerical
num_vaidation_samples = tf.cast(num_vaidation_samples, tf.int64)

num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_test_samples,tf.int64)

In [5]:
#we would like to scale our data in some way to make the result more numerically stable
#in this case we will simply prefer to have inputs between 0 and 1
def scale(image,label):
    image = tf.cast(image,tf.float32)
    # we are transforming the different shades of black from 0-255 into 0 and 1
    image = image/255.
    return image,label

In [9]:
# the method .map() allows us to apply a custom transformation to a given dataset
scaled_train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

In [10]:
BUFFER_SIZE = 10000

shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)
validation_data = shuffled_train_and_validation_data.take(num_vaidation_samples)
#we skipped the validation data
train_data = shuffled_train_and_validation_data.skip(num_vaidation_samples)

# MiniBatch Gradient Descent

Batch Size = 1 means Stochaistics Gradient Descent

Batch Size = numbe of samples is normal Gradient Descent

1 < Batch Size < Number of Sample = Mini Batch Gradient Descent


In [11]:
BATCH_SIZE = 100

train_data = train_data.batch(BATCH_SIZE)

#we will not be batching the test and validation dataset because in batching we calculate the average loss
#we will be requiring accurate loos in the test and validation 
#one more reason is we will not be backpropagating in the test and validation data

#here the total validation data is one batch 
validation_data = validation_data.batch(num_vaidation_samples)
test_data = test_data.batch(num_test_samples)

# takes next batch (it is the only batch)
# because as_supervized=True, we've got a 2-tuple structure
validation_inputs, validation_targets = next(iter(validation_data))

In [24]:
input_size = 784
output_size = 10
hidden_layer = 100

In [25]:
model = tf.keras.Sequential([
                            tf.keras.layers.Flatten(input_shape =(28,28,1)),
                            tf.keras.layers.Dense(hidden_layer,activation = 'relu'),
                            tf.keras.layers.Dense(hidden_layer,activation = 'relu'),
                            #output layer
                            tf.keras.layers.Dense(output_size,activation = 'softmax')
                                ])

# Optimizer and Loss Function

In [26]:
model.compile(optimizer = 'adam',loss = 'sparse_categorical_crossentropy',metrics = ['accuracy'])

# Training 

In [28]:
EPOCHS = 5

model.fit(train_data,
          epochs = EPOCHS,
          validation_data = (validation_inputs,validation_targets),
          validation_steps= 10,
          verbose = 2)

Epoch 1/5
540/540 - 6s - loss: 0.0464 - accuracy: 0.9864 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 5s - loss: 0.0409 - accuracy: 0.9868 - val_loss: 0.0509 - val_accuracy: 0.9847
Epoch 3/5
540/540 - 4s - loss: 0.0327 - accuracy: 0.9898 - val_loss: 0.0484 - val_accuracy: 0.9852
Epoch 4/5
540/540 - 4s - loss: 0.0287 - accuracy: 0.9909 - val_loss: 0.0392 - val_accuracy: 0.9888
Epoch 5/5
540/540 - 5s - loss: 0.0240 - accuracy: 0.9922 - val_loss: 0.0361 - val_accuracy: 0.9903


<tensorflow.python.keras.callbacks.History at 0x15d4c8ab048>

# Test 

In [29]:
test_loss, test_accuracy = model.evaluate(test_data)



In [31]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.08. Test accuracy: 97.71%
