## MNIST Dataset practice

### Import relevant packages

In [1]:
import numpy as np
import tensorflow as tf 
import tensorflow_datasets as tfds

### Data

In [2]:
#datasets will be stored in C:\Users\*USERNAME*\tensorflow_datasets\...
mnist_dataset, mnist_info= tfds.load(name='mnist',with_info=True, as_supervised=True)
#as_supervised=True, loads the data in a 2-tuple structure [input,target]
#as_supervised=False,returns a dictionary
#with_info=True, provides a tuple containing info about the version, features, #samples of the dataset
#split data into train and test
mnist_train, mnist_test= mnist_dataset['train'], mnist_dataset['test']
#validation is manually split into 10% of train set
num_validation_samples=0.1*mnist_info.splits['train'].num_examples
#tf.cast(x,dtype)- cast converts variable into given data type
num_validation_samples=tf.cast(num_validation_samples,tf.int64)
num_test_samples=mnist_info.splits['test'].num_examples
num_test_samples=tf.cast(num_test_samples,tf.int64)

#scale the image grayscale level [0,255] to between [0,1], simply transforming the values
#define a function called: scale, that will take an MNIST image and its label
def scale(image,label):
    image=tf.cast(image,tf.float32)
    image /=255. #'.' represents we want result in float
    return image, label
#scales the whole train dataset and stores it in the new variable
#dataset.map(*function*)- applies a custom trasnformation to a given dataset. Takes as input as function
#that determines the transformation
scaled_train_and_validation_data=mnist_train.map(scale)

# we scale and batch the test data,so it has the same magnitude as the train and validation
# there would be a single batch, equal to the size of the test data
#no need to shuffle test data as its not used for training purpose, no worry about overfitting
test_data = mnist_test.map(scale)

#splitting no of samples to shuffle at a time
#BUFFER_SIZE= 1, no shuffling
#BUFFER_SIZE>=num_of_samples,shuffling is uniform
BUFFER_SIZE=10000 

shuffled_train_and_validation_data=scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

#extracting the train and validation
#create a batch with a batch size equal to the total number of validation samples
validation_data= shuffled_train_and_validation_data.take(num_validation_samples)

#the train_data is everything else, so we skip as many samples as there are in the validation dataset
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

#using mini-batch gradient descent
#BATCH_SIZE=1, Stochastic gradient descent (SGD)
#BATCH_SIZE=# of samples, (single batch) Gradient Descent
#1<BATCH_SIZE<# of samples= mini-batch GD
BATCH_SIZE=150

#dataset.batch(batch_size) - a method that combines the consecutive elemens of a dataset into batches
#how many samples the tensor should take in each batch
train_data= train_data.batch(BATCH_SIZE)

#when batching we find avg loss but in case of validation and testing we want exact values
#batch the validation data
validation_data = validation_data.batch(num_validation_samples)

# batch the test data
test_data = test_data.batch(num_test_samples)


# takes next batch (it is the only batch)
# because as_supervized=True, we've got a 2-tuple structure
#iter() makes the validation_data iterable(iterate one element at a time), but wont load any data
#next() loads the next batch,i.e loads the next element of iterable object
validation_inputs, validation_targets = next(iter(validation_data))

### Model

### Outline the model

In [3]:
input_size = 784
output_size = 10
# Use same hidden layer size for both hidden layers. Not a necessity.
hidden_layer_size = 50
    
#laying down the model- stacking the layers
model = tf.keras.Sequential([
    
    # the first layer (the input layer) each observation is 28x28x1 pixels, therefore it is a tensor of rank 3
    #'Flatten' that simply takes our 28x28x1 tensor and orders it into a (None,) 
    # or (28x28x1,) = (784,) vector
    # this allows us to actually create a feed forward neural network
    # tf.keras.layers.Flatten(original shape)-trasnforms(flattens) tensor into a vector
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)), # input layer
    
    # tf.keras.layers.Dense(output size) is implementing: output = activation(dot(input, weight) + bias)
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 1st hidden layer
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'), # 2nd hidden layer
    
    # the final layer is activation is with softmax, gives a probability of output
    tf.keras.layers.Dense(output_size, activation='softmax') # output layer
])

In [4]:
# hidden_layer_size = 5000
# batch_size = 150
# NUM_EPOCHS = 10

### Optimizer and loss function

In [5]:
#model.compile(optimizer,loss,metrics)- configures the model for training
#Use this crossentropy loss function when there are two or more label classes. 
#categorical_crossentropy- expects that you've one-hot encoded the targets. Here we didnt so we use 
#sparse_categorical_crossentropy- which applies one hot encoding
#we can also includ metrics we want to obtain, here it's accuracy

#custom_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
#model.compile(optimizer=custom_optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Training

In [6]:
# determine the maximum number of epochs
NUM_EPOCHS = 10
VALIDATION_STEPS = num_validation_samples // BATCH_SIZE
# we fit the model, specifying the
# training data
# the total number of epochs
# and the validation data we just created ourselves in the format: (inputs,targets)
model.fit(train_data, epochs=NUM_EPOCHS, validation_data=(validation_inputs, validation_targets), validation_steps=VALIDATION_STEPS, verbose=2)

Epoch 1/10
360/360 - 4s - loss: 0.4713 - accuracy: 0.8670 - val_loss: 0.2160 - val_accuracy: 0.9387
Epoch 2/10
360/360 - 4s - loss: 0.2038 - accuracy: 0.9414 - val_loss: 0.1614 - val_accuracy: 0.9532
Epoch 3/10
360/360 - 4s - loss: 0.1563 - accuracy: 0.9548 - val_loss: 0.1317 - val_accuracy: 0.9618
Epoch 4/10
360/360 - 5s - loss: 0.1273 - accuracy: 0.9630 - val_loss: 0.1129 - val_accuracy: 0.9675
Epoch 5/10
360/360 - 8s - loss: 0.1082 - accuracy: 0.9681 - val_loss: 0.0965 - val_accuracy: 0.9722
Epoch 6/10
360/360 - 8s - loss: 0.0923 - accuracy: 0.9731 - val_loss: 0.0892 - val_accuracy: 0.9742
Epoch 7/10
360/360 - 7s - loss: 0.0839 - accuracy: 0.9752 - val_loss: 0.0796 - val_accuracy: 0.9765
Epoch 8/10
360/360 - 8s - loss: 0.0745 - accuracy: 0.9778 - val_loss: 0.0764 - val_accuracy: 0.9790
Epoch 9/10
360/360 - 8s - loss: 0.0677 - accuracy: 0.9800 - val_loss: 0.0674 - val_accuracy: 0.9792
Epoch 10/10
360/360 - 8s - loss: 0.0624 - accuracy: 0.9810 - val_loss: 0.0655 - val_accuracy: 0.9810

<tensorflow.python.keras.callbacks.History at 0x1d55216ac88>

Validation loss can be used to keep an eye on overfitting
#### Validation accuracy shows the true accuracy of the model, since training accuracy shows avg accuracy across batches. While validation accuracy is that of the whole validation set

### Testing

In [10]:
#Only test, after adjusting the model. if we adjust after testing we overfit the test dataset
test_loss, test_accuracy = model.evaluate(test_data)

      1/Unknown - 1s 743ms/step - loss: 0.0961 - accuracy: 0.9706

In [12]:
print('Test loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))

Test loss: 0.10. Test accuracy: 97.06%
