In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

import time
from tqdm import trange
from collections import OrderedDict

In [None]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full.astype(np.float32) / 255.
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test.astype(np.float32) / 255.

In [None]:
print("X_train:",X_train.shape)
print("y_train:",y_train.shape)
print("X_test:",X_test.shape)
print("y_test:",y_test.shape)
print("X_valid:",X_valid.shape)
print("y_valid",y_valid.shape)

X_train: (55000, 28, 28)
y_train: (55000,)
X_test: (10000, 28, 28)
y_test: (10000,)
X_valid: (5000, 28, 28)
y_valid (5000,)


# 自定义训练循环

### a.
Exercise: Display the epoch, iteration, mean training loss, and mean accuracy over each epoch (updated at each iteration), as well as the validation loss and accuracy at the end of each epoch.

In [None]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(100, activation="relu"),
    keras.layers.Dense(10, activation="softmax"),
])

In [None]:
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size #迭代次数Iterations
# model.compile(loss="sparse_categorical_crossentropy",
#               optimizer="sgd",
#               metrics=["accuracy"])

#多分类任务
optimizer = keras.optimizers.Nadam(learning_rate=0.01)
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

In [None]:
n_steps

1718

np.random.randint(low，high=None，size=None,dtype)：返回随机整型数组，范围为[low,high),若high未填写则[0，low)

In [None]:
def random_batch(X, y, batch_size=32):
    """从训练集中随机采样一个batch的实例"""
    idx = np.random.randint(len(X), size=batch_size)
    return X[idx], y[idx]

{:.4f}：格式化小数点后四位数字的浮点数，使用回车\r和end=""却白状态栏始终打印在同一行上

In [None]:
def print_status_bar(iteration, total,loss,metrics):
    """
    显示训练状态，包括步数、步总数、从轮次开始以来的平均损失、其他指标，
    """
    metrics = " - ".join(["{}: {:.4f}".format(m.name, m.result())
                         for m in [loss] + (metrics or [])])
    end = "" if iteration < total else "\n"
    #当迭代次数未达到总迭代次数时end=空，达到时结束（换一行）
    print("\r{}/{} - ".format(iteration, total) ,metrics,end=' ')

apply_gradients(grads_and_vars, name=None, skip_gradients_aggregation=False, **kwargs)，grads_and_vars：List of (gradient, variable) pairs.

In [None]:
for epoch in range(1, n_epochs + 1):
    #该循环用于一代训练（每个轮次）（使用训练集的全部数据对模型进行一次完整训练）
  print("Epoch {}/{}".format(epoch, n_epochs))
    
  for step in range(1, n_steps + 1):
        #该循环用于轮次内（每个epoch内部）的批（每个batch）处理
        
      X_batch, y_batch =random_batch(X_train, y_train)
        #从训练集中随机采样抽取一个batch的实例
      with tf.GradientTape() as tape:
          y_pred = model(X_batch)#使用模型作为函数，对一个批次进行预测
          main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
          #计算主要损失：每个实例的损失的均值
          loss = tf.add_n([main_loss] + model.losses)#加上每层都有的l2正则化损失
            
      gradients = tape.gradient(loss, model.trainable_variables)
        #针对每个可训练的变量计算损失的梯度
      optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        #将每个可训练参数的元素打包成元组
        #用优化器执行梯度下降,更新可训练变量的参数
      for variable in model.variables:
          if variable.constraint is not None:
                #constraints 模块的函数允许在优化期间对网络参数设置约束（例如非负性）
                #当存在约束时，在梯度下降结束后应用
              variable.assign(variable.constraint(variable))
        
      loss=mean_loss(loss)#更新平均损失#mean_loss = keras.metrics.Mean(name="loss")
      for metric in metrics:
          metric(y_batch, y_pred)
      #在轮次内显示状态栏，实时更新参数
      print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)

  y_pred = model(X_valid)
  val_loss= np.mean(loss_fn(y_valid, y_pred))
  val_accuracy=np.mean(keras.metrics.sparse_categorical_accuracy(
                 tf.constant(y_valid, dtype=np.float32), y_pred))
  print(" - val_loss: {:.4f}".format(val_loss))
  print(" - val_accuracy: {:.4f}".format(val_accuracy))
    
  #重置平均损失和指标的状态
  for metric in [mean_loss] + metrics:
      metric.reset_states()

Epoch 1/5
54976/55000 -  mean: 0.3707 - sparse_categorical_accuracy: 0.8692  - val_loss: 0.4218
 - val_accuracy: 0.8610
Epoch 2/5
54976/55000 -  mean: 0.3620 - sparse_categorical_accuracy: 0.8690  - val_loss: 0.3991
 - val_accuracy: 0.8618
Epoch 3/5
54976/55000 -  mean: 0.3639 - sparse_categorical_accuracy: 0.8705  - val_loss: 0.4041
 - val_accuracy: 0.8664
Epoch 4/5
54976/55000 -  mean: 0.3602 - sparse_categorical_accuracy: 0.8722  - val_loss: 0.4198
 - val_accuracy: 0.8632
Epoch 5/5
54976/55000 -  mean: 0.3490 - sparse_categorical_accuracy: 0.8747  - val_loss: 0.4274
 - val_accuracy: 0.8648


## b.
Exercise: Try using a different optimizer with a different learning rate for the upper layers and the lower layers.

In [None]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
lower_layers = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(100, activation="relu"),
])
upper_layers = keras.models.Sequential([
    keras.layers.Dense(10, activation="softmax"),
])
model = keras.models.Sequential([
    lower_layers, upper_layers
])

lower_optimizer = keras.optimizers.SGD(learning_rate=1e-4)
upper_optimizer = keras.optimizers.Nadam(learning_rate=1e-3)

In [None]:
n_epochs = 5
batch_size = 32
n_steps = len(X_train) // batch_size
loss_fn = keras.losses.sparse_categorical_crossentropy
mean_loss = keras.metrics.Mean()
metrics = [keras.metrics.SparseCategoricalAccuracy()]

In [None]:
for epoch in range(1, n_epochs + 1):
    #该循环用于一代训练（每个轮次）（使用训练集的全部数据对模型进行一次完整训练）
  print("Epoch {}/{}".format(epoch, n_epochs))
    
  for step in range(1, n_steps + 1):
        #该循环用于轮次内（每个epoch内部）的批（每个batch）处理
        
      X_batch, y_batch =random_batch(X_train, y_train)
        #从训练集中随机采样抽取一个batch的实例
      with tf.GradientTape() as tape:
          y_pred = model(X_batch)#使用模型作为函数，对一个批次进行预测
          main_loss = tf.reduce_mean(loss_fn(y_batch, y_pred))
          #计算主要损失：每个实例的损失的均值
          loss = tf.add_n([main_loss] + model.losses)#加上每层都有的l2正则化损失
            
      gradients = tape.gradient(loss, model.trainable_variables)
        #针对每个可训练的变量计算损失的梯度
      optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        #将每个可训练参数的元素打包成元组
        #用优化器执行梯度下降,更新可训练变量的参数
      for variable in model.variables:
          if variable.constraint is not None:
                #constraints 模块的函数允许在优化期间对网络参数设置约束（例如非负性）
                #当存在约束时，在梯度下降结束后应用
              variable.assign(variable.constraint(variable))
        
      loss=mean_loss(loss)#更新平均损失#mean_loss = keras.metrics.Mean(name="loss")
      for metric in metrics:
          metric(y_batch, y_pred)
      #在轮次内显示状态栏，实时更新参数
      print_status_bar(step * batch_size, len(y_train), mean_loss, metrics)

  y_pred = model(X_valid)
  val_loss= np.mean(loss_fn(y_valid, y_pred))
  val_accuracy=np.mean(keras.metrics.sparse_categorical_accuracy(
                 tf.constant(y_valid, dtype=np.float32), y_pred))
  print(" - val_loss: {:.4f}".format(val_loss))
  print(" - val_accuracy: {:.4f}".format(val_accuracy))
    
  #重置平均损失和指标的状态
  for metric in [mean_loss] + metrics:
      metric.reset_states()

Epoch 1/5
54976/55000 -  mean: 0.4524 - sparse_categorical_accuracy: 0.8430  - val_loss: 0.4866
 - val_accuracy: 0.8440
Epoch 2/5
54976/55000 -  mean: 0.4509 - sparse_categorical_accuracy: 0.8452  - val_loss: 0.4603
 - val_accuracy: 0.8494
Epoch 3/5
54976/55000 -  mean: 0.4315 - sparse_categorical_accuracy: 0.8491  - val_loss: 0.4676
 - val_accuracy: 0.8524
Epoch 4/5
54976/55000 -  mean: 0.4297 - sparse_categorical_accuracy: 0.8504  - val_loss: 0.4870
 - val_accuracy: 0.8506
Epoch 5/5
54976/55000 -  mean: 0.4185 - sparse_categorical_accuracy: 0.8550  - val_loss: 0.5129
 - val_accuracy: 0.8458
