<a href="https://colab.research.google.com/github/rahiakela/machine-learning-research-and-practice/blob/main/hands-on-machine-learning-with-scikit-learn-keras-and-tensorflow/12-custom-models-and-training-with-tensorflow/07_custom_training_loop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Custom training loop

In this notebook, we will train a model using a custom training loop to tackle the Fashion MNIST dataset.


##Setup

In [1]:
import sys
import sklearn
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow import keras

from tqdm.notebook import trange

from collections import OrderedDict
import numpy as np
import os
import time

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

## Dataset

Let's start by loading and preparing the Fashion MNIST dataset. 

In [2]:
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

x_train_full = x_train_full.astype(np.float32) / 255.
x_valid, x_train = x_train_full[:5000], x_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
x_test = x_test.astype(np.float32) / 255.

In [3]:
def random_batch(X, y, batch_size=32):
  idx = np.random.randint(len(X), size=batch_size)
  return X[idx], y[idx]

##Step 1

_Exercise: Display the epoch, iteration, mean training loss, and mean accuracy over each epoch (updated at each iteration), as well as the validation loss and accuracy at the end of each epoch._

In [4]:
np.random.seed(42)
tf.random.set_seed(42)

In [5]:
model = keras.models.Sequential([
  keras.layers.Flatten(input_shape=[28, 28]),
  keras.layers.Dense(100, activation="relu"),
  keras.layers.Dense(10, activation="softmax"),
])

In [6]:
n_epochs = 5
batch_size = 32
n_steps = len(x_train) // batch_size

optimizer = keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

In [7]:
with trange(1, n_epochs + 1, desc="All epochs") as epochs:
  for epoch in epochs:
    with trange(1, n_steps + 1, desc="Epoch {}/{}".format(epoch, n_epochs)) as steps:
      for step in steps:
        x_batch, y_batch = random_batch(x_train, y_train)
        with tf.GradientTape() as tape:
          y_pred = model(x_batch)
          main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
          loss = tf.add_n([main_loss] + model.losses)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        for variable in model.variables:
          if variable.constraint is not None:
            variable.assign(variable.constraint(variable))
        status = OrderedDict()
        mean_loss(loss)
        status["loss"] = mean_loss.result().numpy()

        for metric in metrics:
          metric(y_batch, y_pred)
          status[metric.name] = metric.result().numpy()
        steps.set_postfix(status)
      y_pred = model(x_valid)
      status["val_loss"] = np.mean(loss_fn(y_valid, y_pred))
      status["val_accuracy"] = np.mean(keras.metrics.sparse_categorical_accuracy(tf.constant(y_valid, dtype=np.float32), y_pred))
      steps.set_postfix(status)
    for metric in [mean_loss] + metrics:
      metric.reset_states()

All epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/1718 [00:00<?, ?it/s]

##Step 2

_Exercise: Try using a different optimizer with a different learning rate for the upper layers and the lower layers._

In [8]:
np.random.seed(42)
tf.random.set_seed(42)

In [9]:
lower_layers = keras.models.Sequential([
  keras.layers.Flatten(input_shape=[28, 28]),
  keras.layers.Dense(100, activation="relu")
])

upper_layers = keras.models.Sequential([
  keras.layers.Dense(10, activation="softmax")
])

model = keras.models.Sequential([lower_layers, upper_layers])

In [10]:
lower_optimizer = keras.optimizers.SGD(learning_rate=1e-4)
upper_optimizer = keras.optimizers.Nadam(learning_rate=1e-3)

In [11]:
n_epochs = 5
batch_size = 32
n_steps = len(x_train) // batch_size

optimizer = keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

In [14]:
with trange(1, n_epochs + 1, desc="All epochs") as epochs:
  for epoch in epochs:
    with trange(1, n_steps + 1, desc="Epoch {}/{}".format(epoch, n_epochs)) as steps:
      for step in steps:
        x_batch, y_batch = random_batch(x_train, y_train)
        with tf.GradientTape(persistent=True) as tape:
          y_pred = model(x_batch)
          main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
          loss = tf.add_n([main_loss] + model.losses)
        # apply optimizer for lower and upper layer
        for layers, optimizer in ((lower_layers, lower_optimizer), (upper_layers, upper_optimizer)):
          gradients = tape.gradient(loss, layers.trainable_variables)
          optimizer.apply_gradients(zip(gradients, layers.trainable_variables))
        del tape

        for variable in model.variables:
          if variable.constraint is not None:
            variable.assign(variable.constraint(variable))
        status = OrderedDict()
        mean_loss(loss)
        status["loss"] = mean_loss.result().numpy()

        for metric in metrics:
          metric(y_batch, y_pred)
          status[metric.name] = metric.result().numpy()
        steps.set_postfix(status)
      y_pred = model(x_valid)
      status["val_loss"] = np.mean(loss_fn(y_valid, y_pred))
      status["val_accuracy"] = np.mean(keras.metrics.sparse_categorical_accuracy(tf.constant(y_valid, dtype=np.float32), y_pred))
      steps.set_postfix(status)
    for metric in [mean_loss] + metrics:
      metric.reset_states()

All epochs:   0%|          | 0/5 [00:00<?, ?it/s]

Epoch 1/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 2/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 3/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 4/5:   0%|          | 0/1718 [00:00<?, ?it/s]

Epoch 5/5:   0%|          | 0/1718 [00:00<?, ?it/s]