## tensor flow MNIST

In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

## data

In [2]:
#loading the data
#as supervised=true will load the dataset into 2 tuple structure (input,target)
#with_info=provides us with info on the version,info,features,no of samples etc and we store it in mnist_info
mnist_data,mnist_info=tfds.load(name='mnist',with_info=True,as_supervised=True)

In [3]:
## extracting the train and the test data
mnist_train,mnist_test=mnist_data['train'],mnist_data['test']
# in the tensor flow data set module the train dataset is much bigger than the test so validation data is taken from the train
#lets take 10% of it to serve as validation
num_validation_samples=0.1*mnist_info.splits['train'].num_examples
#we dont know weather it will be an integer, to solve this issue we cast it,casting will cast the variable to the data type
num_validation_samples=tf.cast(num_validation_samples,tf.int64)

num_test_samples=mnist_info.splits['test'].num_examples
num_test_samples=tf.cast(num_test_samples,tf.int64)

#scaling the data to make it more nuemerically stable
#we convert the image into float so that when we divide by 255 we get answers in float and not integers
def scale(image,label):
    image=tf.cast(image,tf.float32)
    image/=255.
    return(image,label)
# the below fuction will scale the entire train data and store it in the new variable created
scaled_train_and_validation_data=mnist_train.map(scale)
test_data=mnist_test.map(scale)

### suffling - keeping the info same but in different order

In [4]:
## buffer size is used when we are dealing with enormous datasets that we cant suffle at once
# if buffer size=1 no shuffling
#if buffer size>=num of samples uniform shuffling
#buffer size<num of samples we are optimizing the computational power of the comp
buffer_size=10000

shuffle_train_and_validation_data=scaled_train_and_validation_data.shuffle(buffer_size)
# our validation data is already stored in num_validation_samples we can use a method called take to extract them from the shuffle
validation_data=shuffle_train_and_validation_data.take(num_validation_samples )
#similary we can extract the train data by taking everything else except the validation data from the shuffle
train_data=shuffle_train_and_validation_data.skip(num_validation_samples)

BATCH_SIZE=100
#method called batch is present which is used to combine the consecutive elements into batches
train_data=train_data.batch(BATCH_SIZE)
#while batching we find the average loss but in testing and validation we take all data at once
validation_data=validation_data.batch(num_validation_samples)
test_data=test_data.batch(num_test_samples)

validation_inputs,validation_targets=next(iter(validation_data))
#next loads the next batch
#iter craetes an object to be iterated one elemnet at a time

## MODEL

### OUTLINE THE MODEL

In [5]:
# there are 28*28=784 input, 10 outputs(10 digits), we are assuming 2 hidden layers
input_size=784
output_size=10
hidden_layer_size=100
# our data is such that each observation is 28*28*1
#flatten transforms the tensor into a matrix

model=tf.keras.Sequential([
                           tf.keras.layers.Flatten(input_shape=(28,28,1)),
    #since there are 2 hidden layers
                           tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
                           tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    #activation function of the output layer must transform the values into probability distribution
                           tf.keras.layers.Dense(output_size,activation='softmax')
                        ])

## choose the optimizer and the loss function

In [6]:
#configures the model for training
#adam=adaptive moment estimation
# metrics tells the one we have to calculate 
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

## traning

In [7]:
num_epochs=5
#number of iterations
model.fit(train_data,epochs=num_epochs,validation_data=(validation_inputs,validation_targets),validation_steps=10,verbose=2)
#accuracy shows in what percentage of the cases the outputs were equal to the targets
#val_accuracy is the true accuracy of the model
#cause the traning accuracy is the average accuracy of the model

Epoch 1/5
540/540 - 7s - loss: 0.3361 - accuracy: 0.9036 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 6s - loss: 0.1374 - accuracy: 0.9592 - val_loss: 0.1198 - val_accuracy: 0.9630
Epoch 3/5
540/540 - 6s - loss: 0.0983 - accuracy: 0.9700 - val_loss: 0.1029 - val_accuracy: 0.9705
Epoch 4/5
540/540 - 7s - loss: 0.0730 - accuracy: 0.9775 - val_loss: 0.0690 - val_accuracy: 0.9835
Epoch 5/5
540/540 - 8s - loss: 0.0588 - accuracy: 0.9822 - val_loss: 0.0750 - val_accuracy: 0.9782


<tensorflow.python.keras.callbacks.History at 0x13848d39188>

## test the model

In [8]:
# by fine tuning the above we are overfitting the above dataset
test_loss,test_accuracy=model.evaluate(test_data)



In [9]:
print(test_loss,test_accuracy)
#getting a value close to the validation accuracy shows that they are not overfitted
#test accuracy is the accuracy we would expect to observe if we deploy the model in the real world

0.09700703620910645 0.9704
