# Learning NN with MNIST dataset

In [48]:
#import dependencies

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds # for importing the dataset

In [51]:
tf.version

<module 'tensorflow_core._api.v2.version' from '/opt/anaconda3/envs/py3-TF2.0/lib/python3.7/site-packages/tensorflow_core/_api/v2/version/__init__.py'>

In [3]:
#load and preprocess the data

mnist_dataset, mnist_info = tfds.load(name ='mnist', with_info ='true', as_supervised='true')
# the method tfds.load() actually download the specify name='mnist' dataset into the local directory for the first time.
# for later runs, the method draw the data locally.
# the argument with_info = 'True' make the method return the info of the data as well. In this case I stored in the different variable called mnist_info
# the argument as_supervised = 'True' make the data came with input-target format for easier uses with supervised learning.

2022-02-01 10:35:22.338906: I tensorflow/core/platform/cpu_feature_guard.cc:145] This TensorFlow binary is optimized with Intel(R) MKL-DNN to use the following CPU instructions in performance critical operations:  SSE4.1 SSE4.2
To enable them in non-MKL-DNN operations, rebuild TensorFlow with the appropriate compiler flags.
2022-02-01 10:35:22.342494: I tensorflow/core/common_runtime/process_util.cc:115] Creating new thread pool with default inter op setting: 8. Tune using inter_op_parallelism_threads for best performance.


In [4]:
#just checking
mnist_dataset

{'test': <PrefetchDataset shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>,
 'train': <PrefetchDataset shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>}

In [5]:
#just checking
mnist_info

tfds.core.DatasetInfo(
    name='mnist',
    full_name='mnist/3.0.1',
    description="""
    The MNIST database of handwritten digits.
    """,
    homepage='http://yann.lecun.com/exdb/mnist/',
    data_path='/Users/noonpritsana/tensorflow_datasets/mnist/3.0.1',
    download_size=11.06 MiB,
    dataset_size=21.00 MiB,
    features=FeaturesDict({
        'image': Image(shape=(28, 28, 1), dtype=tf.uint8),
        'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=10),
    }),
    supervised_keys=('image', 'label'),
    disable_shuffling=False,
    splits={
        'test': <SplitInfo num_examples=10000, num_shards=1>,
        'train': <SplitInfo num_examples=60000, num_shards=1>,
    },
    citation="""@article{lecun2010mnist,
      title={MNIST handwritten digit database},
      author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
      journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
      volume={2},
      year={2010}
    }""",
)

In [6]:
# from the info, we learned that the data are (28,28,1) image tensor call 'image' and label
# also the 70000 observations has been splited into 60k train and 10k test observations
mnist_train = mnist_dataset['train']
mnist_test = mnist_dataset['test']

# still we need a validation dataset for testing of the overfitting. We will draw that from the train dataset
# let's slice 10% from the training set to the validation test using the 'num_examples' we have got from the dataset info
num_validation_sample = 0.1* mnist_info.splits['train'].num_examples

# Just in case that the 10% of the num_examples is a float (we need to split the observation with a gap between two observation)
# Thus, cast a num_validation_sample into an int64 class
num_validation_sample = tf.cast(num_validation_sample,tf.int64)

# also defining the variable which contain the number of the test data 
# this time cast it to the tf.int64 at the same time
num_test_sample = tf.cast(mnist_info.splits['test'].num_examples,tf.int64)



In [7]:
#check the amount of observations
num_validation_sample

<tf.Tensor: id=278, shape=(), dtype=int64, numpy=6000>

In [8]:
#check the amount of observations
num_test_sample

<tf.Tensor: id=280, shape=(), dtype=int64, numpy=10000>

### Lets scale the data for better numarical stability

The photo is actually the grayscale of a hand written digits in 28*28 pixcel. The grayscale is represented by a number 0 to 255 with 0 represent a total white and 255 a total black.

In [34]:
# scale the observation by dividing with 255 make a variable range from 0 to 1 instead of 0 to 255

# we know that the dataset came with 2 part: the actual image and a label. 
# Thus to pass the dataset into the scale function, we need to pass both arguments.
# Also the returns must have the same name as the arguments for furthur compatability 

def scale(image, label):
    #to divided by 255, we need to make sure that the numerator is a float. Cast it.
    image = tf.cast(image,tf.float64)
    image = image/255
    return image, label

# By using the tf.map() method we can apply a certain function to the dataset. 
# Here we use .map() to apply scale() we have just defined
#Here we scaled the entire train data. The valid will be extracted later.
scaled_train_and_validation_data = mnist_train.map(scale) 

#do the same for the test
test_data = mnist_test.map(scale)

### Shuffle the data before splits into train and valid

In [35]:
# set the amount of observations to be shuffle at once. 
# Without the buffer, if the dataset is too big, ram won't be enought to make a a shuffle.
buffer = 10000 
shuffled_train_and_test = scaled_train_and_validation_data.shuffle(buffer)

### Actually spliting the data

using tf.take() and tf.skip() to extract the bulk of data needed.

In [36]:
validation_data = shuffled_train_and_test.take(num_validation_sample)
train_data = shuffled_train_and_test.skip(num_validation_sample)

### Batching Data

Batching is very beneficial in training since the process can be itereate from batches to batches. We would get the feedback from every iterations instead of only once form the entire dataset.

** Noted that the batched iteration return the average loss and accuracy **

In [37]:
batch_size = 100

train_data = train_data.batch(batch_size)

# since the train data is in the batched form, the model expected the validation and test data tobe in batch form as well.
# Therefore, we have to make 1 batch for both of the dataset by setting the batch_size with the number of its' observations.
test_data = test_data.batch(num_test_sample)
validation_data = validation_data.batch(num_validation_sample)

validation_inputs, validation_targets = next(iter(validation_data))

now the train /test / validation data are ready to be fed into the model 

### Construct the model

In [38]:
#define hyperparameters

input_size = 28*28 #since the size of the drawing is 28*28 pixcel. We will flatten() it to become 28*28=784
output_size = 10 #since we need 10 categiries from 0 to 9
hidden_layer_size = 50 #arbitrary number

model = tf.keras.Sequential([
                            # flatten() tensor with mnist_info.features.shape['image'] (in this case (28,28,1)) into a vector.
                            tf.keras.layers.Flatten(input_shape=mnist_info.features.shape['image']),
                            # 1st hidden layer
                            # relu return the actual possitive amount or else return 0
                            tf.keras.layers.Dense(hidden_layer_size,activation='relu'), 
                            # 2nd hidden layer  
                            tf.keras.layers.Dense(hidden_layer_size,activation='tanh'), 
                            # output layer
                            tf.keras.layers.Dense(output_size, activation='softmax')
                            #softmax return the probability of each output for the total of 1                            
                            ])


### Picking the optimizer and loss function

let's use **adam** for the optimizer since it's offer both stochastic and momentum property which leads to both speed and acceptable accuracy.

let's use **sparse_categorical_crossentropy** for the loss function since we didn't one-hot encoded the data.

Also, let's the metrics be **accuracy**

We can state all three hyperparameters with the method .compile()

In [39]:
custom_optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=custom_optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

### Train the model

In [40]:
train_data

<BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float64, tf.int64)>

In [41]:
#set the hyperparameter
n_epochs = 10

#fit the model
model.fit(x=train_data, epochs=n_epochs, validation_data=(validation_inputs,validation_targets),validation_steps=1, verbose=2)

Epoch 1/10
540/540 - 7s - loss: 0.4057 - accuracy: 0.8904 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00


2022-02-01 11:10:30.120848: W tensorflow/core/common_runtime/base_collective_executor.cc:216] BaseCollectiveExecutor::StartAbort Out of range: End of sequence
	 [[{{node IteratorGetNext}}]]


Epoch 2/10
540/540 - 7s - loss: 0.1792 - accuracy: 0.9472 - val_loss: 0.1495 - val_accuracy: 0.9567


2022-02-01 11:10:37.222741: W tensorflow/core/common_runtime/base_collective_executor.cc:216] BaseCollectiveExecutor::StartAbort Out of range: End of sequence
	 [[{{node IteratorGetNext}}]]


Epoch 3/10
540/540 - 7s - loss: 0.1304 - accuracy: 0.9619 - val_loss: 0.1149 - val_accuracy: 0.9670


2022-02-01 11:10:43.916070: W tensorflow/core/common_runtime/base_collective_executor.cc:216] BaseCollectiveExecutor::StartAbort Out of range: End of sequence
	 [[{{node IteratorGetNext}}]]


Epoch 4/10
540/540 - 7s - loss: 0.1050 - accuracy: 0.9693 - val_loss: 0.1038 - val_accuracy: 0.9692


2022-02-01 11:10:50.628494: W tensorflow/core/common_runtime/base_collective_executor.cc:216] BaseCollectiveExecutor::StartAbort Out of range: End of sequence
	 [[{{node IteratorGetNext}}]]


Epoch 5/10
540/540 - 7s - loss: 0.0861 - accuracy: 0.9748 - val_loss: 0.0926 - val_accuracy: 0.9718


2022-02-01 11:10:57.495729: W tensorflow/core/common_runtime/base_collective_executor.cc:216] BaseCollectiveExecutor::StartAbort Out of range: End of sequence
	 [[{{node IteratorGetNext}}]]


Epoch 6/10
540/540 - 7s - loss: 0.0754 - accuracy: 0.9776 - val_loss: 0.0825 - val_accuracy: 0.9763


2022-02-01 11:11:04.366229: W tensorflow/core/common_runtime/base_collective_executor.cc:216] BaseCollectiveExecutor::StartAbort Out of range: End of sequence
	 [[{{node IteratorGetNext}}]]


Epoch 7/10
540/540 - 7s - loss: 0.0636 - accuracy: 0.9814 - val_loss: 0.0666 - val_accuracy: 0.9793


2022-02-01 11:11:10.898194: W tensorflow/core/common_runtime/base_collective_executor.cc:216] BaseCollectiveExecutor::StartAbort Out of range: End of sequence
	 [[{{node IteratorGetNext}}]]


Epoch 8/10
540/540 - 7s - loss: 0.0563 - accuracy: 0.9837 - val_loss: 0.0635 - val_accuracy: 0.9815


2022-02-01 11:11:17.473250: W tensorflow/core/common_runtime/base_collective_executor.cc:216] BaseCollectiveExecutor::StartAbort Out of range: End of sequence
	 [[{{node IteratorGetNext}}]]


Epoch 9/10
540/540 - 6s - loss: 0.0482 - accuracy: 0.9857 - val_loss: 0.0559 - val_accuracy: 0.9847


2022-02-01 11:11:23.918273: W tensorflow/core/common_runtime/base_collective_executor.cc:216] BaseCollectiveExecutor::StartAbort Out of range: End of sequence
	 [[{{node IteratorGetNext}}]]


Epoch 10/10
540/540 - 6s - loss: 0.0418 - accuracy: 0.9879 - val_loss: 0.0536 - val_accuracy: 0.9845


2022-02-01 11:11:30.366675: W tensorflow/core/common_runtime/base_collective_executor.cc:216] BaseCollectiveExecutor::StartAbort Out of range: End of sequence
	 [[{{node IteratorGetNext}}]]


<tensorflow.python.keras.callbacks.History at 0x7fa993f66ed0>

### Tuning the hyperparameters

This is the exercise from the lesson.

In [17]:
# changing the hidden layers to 200: increasing the width of the model. -> Longer training time + better accuracy
# addding another layer of the hidden layer: increasing the depth of themodel. -> Longer training time
# adding both width and depth size = 200 and layer = 5. -> longer training time + better accuracy
# try Sigmoid activation function instead of RELU: -> longer training time + worse accuracy
# try ReLu for the first hidden and tanh activation function for the second ->  fast and accurate 
# adjust the batch size to 10000 from 100 -> very fast and accurate 
# adjust the batch size to 1 (this is the Simple gradient descent) -> this takes so much time
# adjust the learning rate to 0.0001% this takes longer time since the epoch goes up but for the first five epochs the accuracy is better already
# adjust the learning rate to 0.02% -> fast and accurate



### Actually testing the modle

The accuracy and loss function from those earlier epochs are form the "average loss and accuracy" from the test batches and from validation data.(we are overfitting the validation data) We are not getting any real testing accuracy and loss yet. Let's do it now.

In [52]:
test_loss, test_accuracy = model.evaluate(test_data)
print('test_loss = {} : test_accuracy = {}'.format(test_loss,test_accuracy))

InternalError: Cache should only be read after it has been completed. [Op:MakeIterator]