In [1]:
import sys
import sklearn
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os

np.random.seed(42)
tf.random.set_seed(42)

In [2]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

"""Z-score标准化输入使其平均数为0，标准差为1，
    加快梯度下降求最优解速度，可能可以提高精度"""
pixel_means = X_train.mean(axis=0, keepdims=True)#平均数
pixel_stds = X_train.std(axis=0, keepdims=True)#标准差
X_train_scaled = (X_train - pixel_means) / pixel_stds
X_valid_scaled = (X_valid - pixel_means) / pixel_stds
X_test_scaled = (X_test - pixel_means) / pixel_stds

### ℓ1 regularization with a factor of 0.1

In [3]:
layer = keras.layers.Dense(100, activation="elu",
                           kernel_initializer="he_normal",
                           kernel_regularizer=keras.regularizers.l1(0.1))

### ℓ2 regularization with a factor of 0.01：得到稀疏模型

In [4]:
layer = keras.layers.Dense(100, activation="elu",
                           kernel_initializer="he_normal",
                           kernel_regularizer=keras.regularizers.l2(0.01))


### ℓ2 regularization with a factor of 0.01

In [5]:
layer = keras.layers.Dense(100, activation="elu",
                           kernel_initializer="he_normal",
                           kernel_regularizer=keras.regularizers.l1_l2(0.1, 0.01))
#l2（）返回一个正则化函数，在训练过程的每个步骤中调用它并计算正则化损失

In [6]:
"""常规方法构建模型每一层，重复部分多，易出错"""
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dense(300, activation="elu",
                       kernel_initializer="he_normal",
                       kernel_regularizer=keras.regularizers.l2(0.01)),
    keras.layers.Dense(100, activation="elu",
                       kernel_initializer="he_normal",
                       kernel_regularizer=keras.regularizers.l2(0.01)),
    keras.layers.Dense(10, activation="softmax",
                       kernel_regularizer=keras.regularizers.l2(0.01))
])

#### functools.partial（）：为带有一些默认参数值的任何可调用对象创建小的包装函数

In [7]:
from functools import partial

#包装每个神经网络层的默认参数值
RegularizedDense = partial(keras.layers.Dense,
                           activation="elu",
                           kernel_initializer="he_normal",
                           kernel_regularizer=keras.regularizers.l2(0.01))

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    RegularizedDense(300),
    RegularizedDense(100),
    RegularizedDense(10, activation="softmax")
])

### Dropout：使用keras.layers.Dropout（）创建Dropout层

In [8]:
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.Dropout(rate=0.2),
    #训练期间随机将一些输入设置为0，将其他输入除以保留概率（1-p）
    #训练后直接将输入传递到下一层
    
    keras.layers.Dense(300, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dropout(rate=0.2),
    
    keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    keras.layers.Dropout(rate=0.2),
    
    keras.layers.Dense(10, activation="softmax")
])

### Alpha Dropout：适用于基于SELU的自归一化网络的正则化，会保留其输入的均值和标准差（常规Dropout会破坏自归一化）

In [9]:
#自归一化网络条件：所有隐藏层使用selu激活函数，lecun正态初始化，是顺序架构
model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    keras.layers.AlphaDropout(rate=0.2),
    
    keras.layers.Dense(300, activation="selu", kernel_initializer="lecun_normal"),
    keras.layers.AlphaDropout(rate=0.2),
    
    keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
    keras.layers.AlphaDropout(rate=0.2),
    
    keras.layers.Dense(10, activation="softmax")
])

In [10]:
model.compile(loss="sparse_categorical_crossentropy", 
              optimizer="nadam", metrics=["accuracy"])
n_epochs = 2
history = model.fit(X_train_scaled, y_train, epochs=n_epochs,
                    validation_data=(X_valid_scaled, y_valid))

Epoch 1/2
Epoch 2/2


### MC Dropout（蒙特卡罗）

In [11]:
y_probas = np.stack([model(X_test_scaled, training=True)
                     for sample in range(100)])
#training=True确保Dropout激活
#对测试集进行100个预测并堆叠
#蒙特卡罗参数100可调节，数值越高预测及不确定性估计的精度越高

y_proba = y_probas.mean(axis=0)
y_std = y_probas.std(axis=0)

In [18]:
model.predict(X_test_scaled[:1])
#返回一个矩阵，每个实例一行，每个类一列
#查看第一个实例的预测情况
#测试集10000个实例10个类，predict（）出来的矩阵形状[10000,10]

array([[3.0125043e-21, 4.5713839e-16, 7.1757368e-24, 2.1428976e-18,
        1.0159120e-18, 2.4534505e-02, 1.7383883e-23, 8.0024507e-03,
        1.9826592e-13, 9.6746308e-01]], dtype=float32)

In [13]:
np.round(model.predict(X_test_scaled[:1]), 2)
#将矩阵的值取小数点后两位

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.02, 0.  , 0.01, 0.  , 0.97]],
      dtype=float32)

In [14]:
##查看第一个实例的100次dropout预测情况
#y_probas矩阵形状[100，10000,10]
np.round(y_probas[:10, :1], 2)
#取第一个实例的100次预测中的前10次预测

array([[[0.  , 0.  , 0.  , 0.  , 0.  , 0.13, 0.  , 0.24, 0.  , 0.63]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.06, 0.  , 0.11, 0.  , 0.83]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.01, 0.  , 0.98]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.44, 0.  , 0.03, 0.  , 0.52]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.2 , 0.  , 0.29, 0.01, 0.5 ]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.11, 0.  , 0.1 , 0.  , 0.79]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.03, 0.  , 0.05, 0.  , 0.92]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.13, 0.  , 0.38, 0.  , 0.49]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.09, 0.  , 0.06, 0.  , 0.85]],

       [[0.  , 0.  , 0.  , 0.  , 0.  , 0.07, 0.  , 0.33, 0.  , 0.6 ]]],
      dtype=float32)

In [15]:
y_std = y_probas.std(axis=0)
np.round(y_std[:1], 2)
#对第一个维度取平均，得[10000,10]的矩阵

array([[0.  , 0.  , 0.  , 0.  , 0.  , 0.19, 0.  , 0.12, 0.01, 0.21]],
      dtype=float32)

In [22]:
#标准Dropout的精确度
y_pred_ = model.predict(X_test_scaled[:])
y_pred = np.argmax(y_pred_, axis=1)
accuracy = np.sum(y_pred== y_test) / len(y_test)
accuracy

0.8425

In [23]:
# MC Dropout后的精确度
y_pred = np.argmax(y_proba, axis=1)
accuracy = np.sum(y_pred == y_test) / len(y_test)
accuracy

0.8434

#### 若模型在训练过程中包含特殊运行的层（如BN层），则不能上述强制训练模式

In [24]:
class MCDropout(keras.layers.Dropout):
    def call(self, inputs):
        return super().call(inputs, training=True)
    #继承Dropout层定义MCDropout类，让其代替Dropout层

class MCAlphaDropout(keras.layers.AlphaDropout):
    def call(self, inputs):
        return super().call(inputs, training=True)
    #继承AlphaDropout层定义MCAlphaDropout类，让其代替Dropout层
    
mc_model = keras.models.Sequential([
    #将MCDropout类替换掉Dropout层
    #将已使用Dropout训练好的模型的权重复制到新模型中
    MCAlphaDropout(layer.rate) if isinstance(layer, keras.layers.AlphaDropout) else layer
    for layer in model.layers
])

### 最大范数正则化（Max norm）

In [25]:
layer = keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal",
                           kernel_constraint=keras.constraints.max_norm(1.))
#将每个隐藏层的kernel_constraint参数设置为具有适当的最大值的max_norm()约束
#每次训练迭代后fit（）会调用max_norm()返回的对象，并将该层权重参数传递给该对象，
#获得返回的缩放权重后替换该层权重

MaxNormDense = partial(keras.layers.Dense,
                       activation="selu", kernel_initializer="lecun_normal",
                       kernel_constraint=keras.constraints.max_norm(1.))
#包装模型每个层的重复参数

model = keras.models.Sequential([
    keras.layers.Flatten(input_shape=[28, 28]),
    MaxNormDense(300),
    MaxNormDense(100),
    keras.layers.Dense(10, activation="softmax")
])