In [1]:
import tensorflow as tf
fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist
X_train, y_train = X_train_full[:-5000], y_train_full[:-5000]
X_valid, y_valid = X_train_full[-5000:], y_train_full[-5000:]
X_train, X_valid, X_test = X_train / 255., X_valid / 255., X_test / 255.
# todo: 通过自定义的MCDropout类搭建模型，能不能直接复制训练好的dropout模型的权重到新模型（新模型的架构 和  dropout模型一样，只是MCDropout替换了Dropout), 复制完后，看看预测的效果，指定蒙特卡洛样本数量
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.Dropout(rate=0.2),
	tf.keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"),
    tf.keras.layers.Dropout(rate=0.2),
    tf.keras.layers.Dense(10, activation="softmax")
])
optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=0.01, momentum=0.9)
model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])

history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

Epoch 1/10
   1/1719 [..............................] - ETA: 5:00 - loss: 2.4681 - accuracy: 0.1562

2025-09-08 18:39:02.747322: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [2]:
class MCDropout(tf.keras.layers.Dropout):
    def call(self, inputs, training=False):
        return super().call(inputs, training=True)

MCDrop_model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    MCDropout(0.2),
	tf.keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"),
    MCDropout(0.2),
    tf.keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"),
    MCDropout(0.2),
    tf.keras.layers.Dense(10, activation="softmax")
])
MCDrop_model.set_weights(model.get_weights())

import numpy as np
y_probas = np.stack([MCDrop_model(X_test) for sample in range(100)])
y_proba = y_probas.mean(axis=0)
y_pred = np.argmax(y_proba, axis=1)
print(f"accuracy: {np.mean(y_pred == y_test)}")

accuracy: 0.8606


In [3]:
# todo: 随堂练习：实验目标：Dropout vs AlphaDropout 对比（针对 SELU 激活函数）

def build_model(use_alphaDropout=False):
    if use_alphaDropout is False:
        model = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(28,28)),
            tf.keras.layers.Dropout(rate=0.2),
            tf.keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"),
            tf.keras.layers.Dropout(rate=0.2),
            tf.keras.layers.Dense(100, activation="relu", kernel_initializer="he_normal"),
            tf.keras.layers.Dropout(rate=0.2),
            tf.keras.layers.Dense(10, activation='softmax')
        ])
        optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
        model.compile(loss="sparse_categorical_crossentropy",optimizer=optimizer,metrics=["accuracy"])
        history = model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))
        return history
    else:
        model = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(28,28)),
            tf.keras.layers.AlphaDropout(0.2),
            tf.keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
            tf.keras.layers.AlphaDropout(rate=0.2),
            tf.keras.layers.Dense(100, activation="selu", kernel_initializer="lecun_normal"),
            tf.keras.layers.AlphaDropout(rate=0.2),
            tf.keras.layers.Dense(10, activation='softmax')
        ])
        pixel_means = X_train.mean(axis=0, keepdims=True)
        pixel_stds = X_train.std(axis=0, keepdims=True)
        X_train_scaled = (X_train - pixel_means) / pixel_stds
        X_valid_scaled = (X_valid - pixel_means) / pixel_stds
        X_test_scaled = (X_test - pixel_means) / pixel_stds
        optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
        model.compile(loss="sparse_categorical_crossentropy",optimizer=optimizer,metrics=["accuracy"])
        history = model.fit(X_train_scaled, y_train, epochs=10, validation_data=(X_valid_scaled, y_valid))
        return history

In [4]:
build_model()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x16a5b74f0>

In [5]:
build_model(True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x16c83c940>

In [6]:
def build_model(seed=42):
    tf.random.set_seed(seed)
    return tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=[28, 28]),
        tf.keras.layers.Dense(100, activation="relu",
                              kernel_initializer="he_normal"),
        tf.keras.layers.Dense(100, activation="relu",
                              kernel_initializer="he_normal"),
        tf.keras.layers.Dense(100, activation="relu",
                              kernel_initializer="he_normal"),
        tf.keras.layers.Dense(10, activation="softmax")
    ])

def build_and_train_model(optimizer):
    model = build_model()
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer,
                  metrics=["accuracy"])
    return model.fit(X_train, y_train, epochs=5,
                     validation_data=(X_valid, y_valid))

In [7]:
# todo: 填充如下代码，最终训练 用了最大范数正则化技术的 神经网络
from functools import partial
MaxNormDense = partial(
    tf.keras.layers.Dense,
    activation='relu',
    kernel_initializer='he_normal',
    kernel_regularizer=tf.keras.constraints.max_norm(1.)
) # todo：使用partial 冻结Dense的除了神经元数量的其他参数， 指定使用最大范数正则化

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28,28)),
    MaxNormDense(100),
    MaxNormDense(100),
    MaxNormDense(10, activation='softmax')
]) # todo：使用Sequential搭建 2个隐藏层，每个都100神经元的网络 （使用上MaxNormDense）
optimizer = tf.keras.optimizers.legacy.SGD(learning_rate=0.001, momentum=0.9, nesterov=True) # todo: 创建使用动量优化的优化器

... # todo: 编译模型
history = build_and_train_model(optimizer) # todo: 训练模型

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
