In [None]:
# 2020-10-26 created by Akson

In [None]:
# Code11.1
# 加载所需要的数据集

import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

pixel_means = X_train.mean(axis=0, keepdims=True)
pixel_stds = X_train.std(axis=0, keepdims=True)
X_train_scaled = (X_train - pixel_means) / pixel_stds
X_valid_scaled = (X_valid - pixel_means) / pixel_stds
X_test_scaled = (X_test - pixel_means) / pixel_stds

In [None]:
# Code11.2
# 批量归一化建模方法

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape = [28, 28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, activation = 'elu', kernel_initializer = 'he_normal'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(100, activation = 'elu', kernel_initializer = 'he_normal'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(10, activation = 'softmax')
])

print(model.summary())

# 查看第一个bn层的参数
for var in model.layers[1].variables:
    print(var.name, var.trainable)
    

In [None]:
# Code11.3
# 在隐藏层和激活层之间使用批量归一化

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape = [28, 28]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(300, kernel_initializer = 'he_normal', use_bias = False),
    keras.layers.BatchNormalization(),
    keras.layers.Activation('elu'),
    keras.layers.Dense(100, kernel_initializer = 'he_normal', use_bias = False),
    keras.layers.Activation('elu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(10, activation = 'softmax')
])

model.compile(loss="sparse_categorical_crossentropy",
              optimizer=keras.optimizers.SGD(lr=1e-3),
              metrics=["accuracy"])
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid))

In [None]:
# Code11.4
# 查看BatchNormalization的源代码

# print(keras.layers.__file__)

In [None]:
# Code11.5
# 梯度裁剪的设置方法

# 在设置最优化算法时加这个参数即可
# optimizer = keras.optimizers.SGD(clipvalue = 1.0) # 只对处于阈值之外的值进行缩放，会改变方向
# optimizer = keras.optimizers.SGD(clipnorm = 1.0) # 保留方向，但有可能消除较小值的影响
# model.compile(loss = 'mse', optimizer = optimizer)

In [None]:
# Code11.6
# 拆分数据集

def split_dataset(X, y):
    y_5_or_6 = (y == 5) | (y == 6) # sandals or shirts
    y_A = y[~y_5_or_6]
    y_A[y_A > 6] -= 2 # class indices 7, 8, 9 should be moved to 5, 6, 7
    y_B = (y[y_5_or_6] == 6).astype(np.float32) # binary classification task: is it a shirt (class 6)?
    return ((X[~y_5_or_6], y_A),
            (X[y_5_or_6], y_B))

(X_train_A, y_train_A), (X_train_B, y_train_B) = split_dataset(X_train, y_train)
(X_valid_A, y_valid_A), (X_valid_B, y_valid_B) = split_dataset(X_valid, y_valid)
(X_test_A, y_test_A), (X_test_B, y_test_B) = split_dataset(X_test, y_test)
X_train_B = X_train_B[:200]
y_train_B = y_train_B[:200]


In [None]:
# Code11.7

# 训练一个模型
model_A = keras.models.Sequential()
model_A.add(keras.layers.Flatten(input_shape=[28, 28]))
for n_hidden in (300, 100, 50, 50, 50):
    model_A.add(keras.layers.Dense(n_hidden, activation="selu"))
model_A.add(keras.layers.Dense(8, activation="softmax"))

model_A.compile(loss="sparse_categorical_crossentropy", optimizer=keras.optimizers.SGD(lr=1e-3), metrics=["accuracy"])

history = model_A.fit(X_train_A, y_train_A, epochs=20, validation_data=(X_valid_A, y_valid_A))

# 保存模型
model_A.save("my_model_A.h5")

In [None]:
# Code11.8
# 尝试重用已有的模型

model_A = keras.models.load_model('my_model_A.h5')
# 为了避免直接修改模型A，先创建一个A的副本
# 克隆A的架构
model_A_clone = keras.models.clone_model(model_A)
# 设置权重
model_A_clone.set_weights(model_A.get_weights())

# 重用A的某几层（一般是底层）
model_B_on_A = keras.models.Sequential(model_A_clone.layers[:-1])
# 再添加自己的一些东西
model_B_on_A.add(keras.layers.Dense(1, activation = 'sigmoid'))

# 为了避免破坏重用的权重，先把重用的层冻结
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = False
    
model_B_on_A.compile(loss = 'binary_crossentropy', optimizer = 'sgd', metrics = ['accuracy'])
# 先训练几次
history = model_B_on_A.fit(X_train_B, y_train_B, epochs = 4, validation_data = (X_valid_B, y_valid_B))

# 解冻
for layer in model_B_on_A.layers[:-1]:
    layer.trainable = True

# 在训练几次，这一次记得要调小最优化算法的学习率
optimizer = keras.optimizers.SGD(lr = 1e-4)
model_B_on_A.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
# 在接着训练
history = model_B_on_A.fit(X_train_B, y_train_B, epochs = 16, validation_data = (X_valid_B, y_valid_B))

print(model_B_on_A.evaluate(X_test_B, y_test_B))

In [None]:
# Code11.9
# 动量优化

# 只要改变这个参数就好
# optimizer = keras.optimizers.SGD(lr = 0.001, momentum = 0.9)

In [None]:
# Code11.10
# Nesterov加速梯度

# 只要改变这个参数就好
# optimizer = keras.optimizers.SGD(lr = 0.001, momentum = 0.9, nesterov = True)

In [None]:
# Code11.11
# RMSProp

# 只要改变这个参数就好
# optimizer = keras.optimizers.RMSprop(lr = 0.001, rho = 0.9)

In [None]:
# Code11.12
# Adam

# 只要改变这个参数就好
# optimizer = keras.optimizers.Adma(lr = 0.001, beta_1 = 0.9, beta_2 = 0.999)

In [None]:
# Code11.13
# 学习率调度

# 幂调度
# optimizer = keras.optimizers.SGD(lr = 0.01, decay = 1e-4)

# 指数调度，要注意使用epoch时如果有中断训练的情况可能会变复杂
def exponential_decay(lr0, s):
    def exponential_decay_fn(epoch):
        return lr0 * 0.1 **(epoch / s)
    return exponential_decay_fn

# exponential_decay_fn = exponential_decay(lr0 = 0.01, s = 20)

# lr_scheduler = keras.callbacks.LearningRateScheduler(exponential_decay_fn)
# history = model.fit(X_train_scaled, y_train, epochs=n_epochs, validation_data=(X_valid_scaled, y_valid), callbacks=[lr_scheduler])

# 将学习率作为参数
def exponential_decay_fn(epoch, lr):
    return lr * 0.1 ** (1 / 20)

# 分段恒定调度
def piecewise_constant_fn(epoch):
    if epoch < 5:
        return 0.01
    elif epoch < 15:
        return 0.005
    else:
        return 0.001

# 性能调度（如果性能连续几个伦次不变的话，改变学习率）
lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor = 0.5, patience = 5)

# 可以使用调度方法来定义学习率，然后将学习率再传给优化器
# s = 20 * len(X_train)
# learning_rate = keras.optimizers.schedules.ExponentialDecay(0.01, s, 0.1)
# optimizer = keras.optimizers.SGD(learning_rate)


In [None]:
# Code11.14
# l1 l2 正则化

layer = keras.layers.Dense(100, activation = 'elu', kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.l2(0.01))

# 使用循环来重构代码或使用partial来打包某层的格式
from functools import partial

RegularizedDense = partial(keras.layers.Dense, activation = 'elu', kernel_initializer = 'he_normal', kernel_regularizer = keras.regularizers.l2(0.01))

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape = [28, 28]),
    RegularizedDense(300),
    RegularizedDense(100),
    RegularizedDense(10, activation = 'softmax', kernel_initializer = 'glorot_uniform')
])


In [None]:
# Code11.15
# Dropout

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape = [28, 28]),
    keras.layers.Dropout(rate = 0.2),
    keras.layers.Dense(300, activation = 'elu', kernel_initializer = 'he_normal'),
    keras.layers.Dropout(rate = 0.2),
    keras.layers.Dense(100, activation = 'elu', kernel_initializer = 'he_normal'),
    keras.layers.Dropout(rate = 0.2),
    keras.layers.Dense(10, activation = 'softmax')
])

model.compile(loss="sparse_categorical_crossentropy", optimizer="nadam", metrics=["accuracy"])
n_epochs = 5
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
                    validation_data=(X_valid_scaled, y_valid))


In [None]:
# Code11.16
# MC Dropout

