Train a model using a custom training loop to tackle the Fashion MNIST dataset (see Chapter 10 ).

Display the epoch, iteration, mean training loss, and mean accuracy over each epoch (updated at each iteration), as well as the validation loss and accuracy at the end of each epoch.

Try using a different optimizer with a different learning rate for the upper layers and the lower layers.

In [2]:
import tensorflow as tf
import tensorflow.keras as keras

from sklearn.model_selection import train_test_split

In [3]:
(X_train, y_train), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [4]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=5000)

In [5]:
X_train.shape

(55000, 28, 28)

In [6]:
X_val.shape

(5000, 28, 28)

In [7]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

In [8]:
def build_model(input_shape=None, output_shape=10, n_hidden=3, n_neurons=200, learning_rate=1e-3, compile=True):
    if not input_shape:
      input_shape = [28, 28]
    model = keras.models.Sequential([keras.layers.Flatten(input_shape=input_shape)])
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation="relu"))
    model.add(keras.layers.Dense(output_shape, activation="softmax"))
                  
    if compile:
        model.compile(
            loss="sparse_categorical_crossentropy",
            optimizer=keras.optimizers.SGD(learning_rate=learning_rate),
            metrics=["accuracy"],
        )
    
    return model

In [23]:
# vs = build_model().trainable_variables
# len(vs)

l = (4 // 2) & (~0 << 1)
# l = l - l % 2
l

2

In [28]:
def print_status_bar(epoch, iteration, total, metrics):
  str_metrics = " - ".join(f"{name} {metric:.4f}" for (name, metric) in sorted(metrics.items(), key=lambda x: x[0]))
  end = "" if iteration < total else "\n"
  print(f"\r{iteration}/{total} {str_metrics}")

def train_model(model, train_dataset, X_val, y_val, batch_size=32, n_epochs=10):
  dataset = train_dataset.shuffle(len(train_dataset)).batch(batch_size).enumerate().prefetch(1)
  n_batches = dataset.cardinality().numpy()
  # accuracy = keras.metrics.Accuracy()
  # mean_loss = keras.metrics.Mean()
  # accuracy = keras.metrics.Accuracy()
  sample_weights = tf.ones(batch_size)

  rmsprop_optimizer = keras.optimizers.RMSprop(learning_rate=1e-3)
  nadam_optimizer = keras.optimizers.Nadam(learning_rate=1e-4)

  split_idx = (len(model.layers) // 2) & (~0 << 1)

  for epoch in range(1, n_epochs + 1):
    print(f"epoch: {epoch}/{n_epochs}")
    for i, (X_batch, y_batch) in dataset:
      with tf.GradientTape() as tape:
        y_pred = model(X_batch)
        loss = model.compute_loss(x=X_batch, y=y_batch, y_pred=y_pred)
      gradients = tape.gradient(loss, model.trainable_variables)
      rmsprop_optimizer.apply_gradients(zip(gradients[:split_idx], model.trainable_variables[:split_idx]))
      nadam_optimizer.apply_gradients(zip(gradients[split_idx:], model.trainable_variables[split_idx:]))
      metrics = model.compute_metrics(x=X_batch, y=y_batch, y_pred=y_pred, sample_weight=sample_weights)
      if i % 200 == 0:
        print_status_bar(epoch, i, n_batches, metrics)


    metrics = model.compute_metrics(x=X_batch, y=y_batch, y_pred=y_pred, sample_weight=sample_weights)
    val_metrics = model.evaluate(x=X_val, y=y_val)
    metrics.update(dict(zip([f"validation_{name}" for name in model.metrics_names], val_metrics)))
    print_status_bar(epoch, n_batches, n_batches, metrics)
    model.reset_metrics()

In [29]:
model = build_model()
train_model(model, train_dataset, X_val, y_val)


epoch: 1/10
0/1719 accuracy 0.0000 - loss 108.9092
200/1719 accuracy 0.6286 - loss 13.7216
400/1719 accuracy 0.6698 - loss 9.5615
600/1719 accuracy 0.6900 - loss 7.7180
800/1719 accuracy 0.7046 - loss 6.5351
1000/1719 accuracy 0.7124 - loss 5.7164
1200/1719 accuracy 0.7181 - loss 5.1406
1400/1719 accuracy 0.7245 - loss 4.6746
1600/1719 accuracy 0.7287 - loss 4.3164
1719/1719 accuracy 0.7314 - loss 4.1301 - validation_accuracy 0.7612 - validation_loss 1.6760
epoch: 2/10
0/1719 accuracy 0.7812 - loss 1.5566
200/1719 accuracy 0.7725 - loss 1.5901
400/1719 accuracy 0.7710 - loss 1.5830
600/1719 accuracy 0.7675 - loss 1.5783
800/1719 accuracy 0.7692 - loss 1.5349
1000/1719 accuracy 0.7744 - loss 1.4680
1200/1719 accuracy 0.7772 - loss 1.4296
1400/1719 accuracy 0.7784 - loss 1.3918
1600/1719 accuracy 0.7802 - loss 1.3624
1719/1719 accuracy 0.7808 - loss 1.3468 - validation_accuracy 0.8100 - validation_loss 1.0398
epoch: 3/10
0/1719 accuracy 0.7500 - loss 1.4704
200/1719 accuracy 0.7996 - los