In [1]:
# mnist dataset is already heavily preprocessed. Just scale & split
# required.

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf  # for creation & training of nn
import tensorflow_datasets as tfds   # to obtain mnist dataset
import datetime

Downloading & Preprocessing data # this step has nothing to do with tensorboard

In [3]:
# Parameters to be used later on-
BUFFER_SIZE = 70_000
BATCH_SIZE = 128
NUM_EPOCHS = 20

In [4]:
mnist_dataset, mnist_info = tfds.load(name = 'mnist', with_info=True, as_supervised=True)

In [5]:
mnist_train, mnist_test = mnist_dataset['train'],mnist_dataset['test']

In [6]:
# In ML we like no's that are standardised in some way. So  technique in images is to scale pixel values b/w 0 and 1 which originally would be b/w 0 and 255 for greyscale.
# Divide all pixels in dataset by 255, so value will be b/w 0 & 1.

In [7]:
mnist_train

<PrefetchDataset shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>

In [8]:
mnist_test

<PrefetchDataset shapes: ((28, 28, 1), ()), types: (tf.uint8, tf.int64)>

In [9]:
def scale(image, label):
    image = tf.cast(image,tf.float32)
    image /= 255.

    return image, label

In [10]:
# Scale every image in train and test dataset
train_and_validation_data = mnist_train.map(scale)
test_data = mnist_test.map(scale)

In [11]:
# Validation set is crucial to prevent overfitting using early stopping
# Split training set manually to create a validation set (10% of
# training set)

In [12]:
mnist_info.splits['train'].num_examples

60000

In [13]:
mnist_info.splits['test'].num_examples

10000

In [14]:
num_validation_samples = 0.1 * mnist_info.splits['train'].num_examples

In [15]:
num_validation_samples

6000.0

In [16]:
num_validation_samples = tf.cast(num_validation_samples, tf.int64)

In [17]:
# we need validation set to contain data with same distribution as training data

In [18]:
train_and_validation_data = train_and_validation_data.shuffle(BUFFER_SIZE)

In [19]:
train_data = train_and_validation_data.skip(num_validation_samples)
validation_data = train_and_validation_data.take(num_validation_samples)

In [20]:
# Test Data
num_test_samples = mnist_info.splits['test'].num_examples

In [21]:
num_test_samples = tf.cast(num_test_samples, tf.int64)

In [22]:
num_test_samples

<tf.Tensor: shape=(), dtype=int64, numpy=10000>

In [23]:
# batch dataset for optimal performance of network
# batch size generally recommended in power of 2 (32,64,128) etc

# validation & test sets not necessarily be batched as we do not
# backward propagate on them however model expects them to be batched to get proper dimensions.

In [24]:
train_data = train_data.batch(BATCH_SIZE)

validation_data = validation_data.batch(num_validation_samples)
test_data = test_data.batch(num_test_samples)

Build, Train and Test Network

In [25]:
# Define configuration of n/w
# Compile model
# Fit model on training data

In [26]:
# keras.sequential refers to fact that layers come 1 after another in sequence. argument for this is a list of all layers
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(50,5,activation='relu', input_shape=(28,28,1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Conv2D(50,3,activation='relu'),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10)
])

In [27]:
model.summary(line_length = 60)
# 1st dimension is None in output shape cz all data is batched

Model: "sequential"
____________________________________________________________
 Layer (type)              Output Shape            Param #  
 conv2d (Conv2D)           (None, 24, 24, 50)      1300     
                                                            
 max_pooling2d (MaxPooling  (None, 12, 12, 50)     0        
 2D)                                                        
                                                            
 conv2d_1 (Conv2D)         (None, 10, 10, 50)      22550    
                                                            
 max_pooling2d_1 (MaxPooli  (None, 5, 5, 50)       0        
 ng2D)                                                      
                                                            
 flatten (Flatten)         (None, 1250)            0        
                                                            
 dense (Dense)             (None, 10)              12510    
                                                            
Tota

In [28]:
# Above model has 36,360 parameters all of which are trainable.
# Trainable parameters r weights of r n/w, the parameters that model is trying to learn. In our model this refers to different no's in the kernel & the final dense layer.
# parameters which should not be changed during learning process r non-trainable.

In [29]:
# Include softmax activation function in the loss function itself instead of dense layer

In [30]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True)

# The from_logits=True attribute inform the loss function that the output values generated by the model are not normalized, a.k.a. logits. In other words, the softmax function has not been applied on them to produce a probability distribution.

# if from_logits=False, means the input is a probability and usually you should have some softmax activation in your last layer.

In [31]:
# Compile model with loss function & optimizer

model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

In [32]:
# To check overfitting, early stopping is the tool.
# In tensorflow, early stopping is a callback.
# Callbacks are functions that are called at the end of each epoch

In [33]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = 'val_loss',
    mode = 'auto',
    min_delta = 0,
    patience = 2,
    verbose = 0,
    restore_best_weights = True
)

In [34]:
# define relevant directory where we will log r data in this nb
# different models with diff hyperparameters so creating different folders for different runs, to distinguish b/w them incorporate local date & time into them.

log_dir = "logs\\fit\\" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# callback is a function that is executed at end of each epoch
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [35]:

model.fit(
    train_data,
    epochs = NUM_EPOCHS,
    # earlystopping callback should always be last element of list
    callbacks = [tensorboard_callback, early_stopping],
    validation_data = validation_data,
    verbose = 2 # print info at end of each epoch
)

Epoch 1/20
422/422 - 18s - loss: 0.2732 - accuracy: 0.9212 - val_loss: 0.1013 - val_accuracy: 0.9692 - 18s/epoch - 43ms/step
Epoch 2/20
422/422 - 18s - loss: 0.0719 - accuracy: 0.9785 - val_loss: 0.0559 - val_accuracy: 0.9828 - 18s/epoch - 42ms/step
Epoch 3/20
422/422 - 17s - loss: 0.0526 - accuracy: 0.9844 - val_loss: 0.0468 - val_accuracy: 0.9858 - 17s/epoch - 41ms/step
Epoch 4/20
422/422 - 18s - loss: 0.0437 - accuracy: 0.9864 - val_loss: 0.0364 - val_accuracy: 0.9872 - 18s/epoch - 42ms/step
Epoch 5/20
422/422 - 18s - loss: 0.0362 - accuracy: 0.9888 - val_loss: 0.0289 - val_accuracy: 0.9925 - 18s/epoch - 43ms/step
Epoch 6/20
422/422 - 18s - loss: 0.0312 - accuracy: 0.9901 - val_loss: 0.0284 - val_accuracy: 0.9912 - 18s/epoch - 43ms/step
Epoch 7/20
422/422 - 18s - loss: 0.0275 - accuracy: 0.9917 - val_loss: 0.0178 - val_accuracy: 0.9948 - 18s/epoch - 42ms/step
Epoch 8/20
422/422 - 18s - loss: 0.0241 - accuracy: 0.9924 - val_loss: 0.0248 - val_accuracy: 0.9932 - 18s/epoch - 42ms/step


<keras.callbacks.History at 0x27ab6519df0>

Visualizations in Tensorboard

In [36]:
# Start tensorboard application
%load_ext tensorboard

In [37]:
%tensorboard --logdir "logs/fit"

Launching TensorBoard...