In [17]:
#70000 handwritten digits --> classify 0 - 9
#easy to build up to CNN

<tf.Tensor: id=292, shape=(), dtype=int64, numpy=6000>

In [2]:
import numpy as np
import tensorflow as tf

import tensorflow_datasets as tfds
from ipywidgets import FloatProgress

## Data

In [3]:
mnist_dataset, mnist_info = tfds.load(name="mnist", with_info = True, as_supervised = True)
#load to 2-tuple structures: [input, target]

In [4]:
mnist_train, mnist_test = mnist_dataset['train'], mnist_dataset['test']
#we have to split train to train/test on our own --> take 10% as validation
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples
num_validation_samples = tf.cast(num_validation_samples, tf.int64) #convert a varible to integer
#we got them in the mnist info


num_test_samples = mnist_info.splits['test'].num_examples
num_test_samples = tf.cast(num_validation_samples, tf.int64)

In [5]:
#scale intput (0, 1)
def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255. #chia 255 vi range input (0, 255)
    return image, label
scaled_train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

In [6]:
#Now we shuffle the data - as random as possible
BUFFER_SIZE = 10000 #case of enourmous dataset - shuffle 10000 per time; BUFFER_SIZE = 1 --> nothing happen, BUFFER_SIZE > num_samples --> shuffle at once
shuffled_train_and_validation_data = scaled_train_and_validation_data.shuffle(BUFFER_SIZE)

validation_data = shuffled_train_and_validation_data.take(num_validation_samples)
train_data = shuffled_train_and_validation_data.skip(num_validation_samples)

BATCH_SIZE = 100
train_data = train_data.batch(BATCH_SIZE) #combine consecutive elements of dataset into batches
#In SGD, it uses only a single sample, i.e., a batch size of one, to perform each iteration


validation_data = validation_data.batch(num_validation_samples)
#take the whole validation data at once -> increase accuracy. We dont want estimation, we want accurate8     
#batch size = num_validation_samples --> use the whole dataset for each iteration

test_data = test_data.batch(num_test_samples) 

#sgd: we split data into batches. Update the weight after every batch instead of every epoch (8: 12)

validation_inputs, validation_targets = next(iter(validation_data)) 
#iter make the dataset iterable, but not load data
#next load the next batch (only 1 batch -> load input + target)


# Model

## Outline the model

In [7]:
#784 inputs
#10 output nodes
#width, depth -> hyperparameter
input_size = 784
output_size = 10
hidden_layer_size = 100

#our data from tfds is 28x28x1. We dont know CNN -> flatten into vector
#tf.keras.layers.Flatten(Original shape) - flatten tensor into vector
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape = (28, 28, 1)),
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'), #1st hidden layer dot product of input and weight + bias
    tf.keras.layers.Dense(hidden_layer_size, activation = 'relu'),
    tf.keras.layers.Dense(output_size, activation = 'softmax')
])

## Choose the optimizer and the loss function

In [8]:
#model.compile(optimizer, loss) configures the model for training
#best choice: Adam 
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics = ['accuracy'])
# accuracy is the fraction of predictions our model got right

## Training

In [9]:
NUM_EPOCHS = 5

model.fit(train_data, epochs = NUM_EPOCHS, validation_data = validation_data, verbose = 2)

Epoch 1/5
540/540 - 12s - loss: 0.3345 - accuracy: 0.9046 - val_loss: 0.0000e+00 - val_accuracy: 0.0000e+00
Epoch 2/5
540/540 - 10s - loss: 0.1438 - accuracy: 0.9569 - val_loss: 0.1217 - val_accuracy: 0.9637
Epoch 3/5
540/540 - 9s - loss: 0.1001 - accuracy: 0.9691 - val_loss: 0.0958 - val_accuracy: 0.9710
Epoch 4/5
540/540 - 10s - loss: 0.0757 - accuracy: 0.9767 - val_loss: 0.0644 - val_accuracy: 0.9798
Epoch 5/5
540/540 - 10s - loss: 0.0622 - accuracy: 0.9806 - val_loss: 0.0610 - val_accuracy: 0.9817


<tensorflow.python.keras.callbacks.History at 0x24743eb9648>

### 540 is the # of batches, loss: value of loss function, accuracy: % of cases outputs; val_loss and val_accuracy: values of validation_data (val_accuracy: true accuracy of whole set, while accuracy is just average accuracy)

## Test the model

In [10]:
#validation is not test. We consider validation accuracy as benchmark of how good the model is. The test data set is reality
#the one our model has never see.
test_loss, test_accuracy = model.evaluate(test_data)



In [11]:
test_loss

0.072178415954113

In [12]:
test_accuracy

0.9762

In [13]:
#after testing the model, we are not allowed to change the model. The model already knows the data.
#if we get 50-60% -> overfit model -> fail misserably in the rife. This is what we expect in real life