In [8]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization, Input
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
import numpy as np
import os

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

# 加载MNIST数据集
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# 数据预处理
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

x_train = x_train.reshape(-1, 28, 28, 1)  # 保持原始图像形状并添加通道维度
x_test = x_test.reshape(-1, 28, 28, 1)

y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# 数据增强
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    shear_range=0.1,
    fill_mode="nearest",
)
datagen.fit(x_train)

# 构建BP神经网络模型
model = Sequential(
    [
        Input(shape=(28, 28, 1)),  # 使用Input层指定输入形状
        Flatten(),  # 展平图像

        Dense(512, activation="relu"),  # 输入层 -> 隐藏层 1
        BatchNormalization(),  # 批量归一化
        Dropout(0.4),

        Dense(256, activation="relu"),  # 隐藏层 2
        BatchNormalization(),
        Dropout(0.4),
        
        Dense(128, activation="relu"),  # 隐藏层 3
        BatchNormalization(),
        Dropout(0.4),
        
        Dense(64, activation="relu"),  # 隐藏层 4
        BatchNormalization(),
        Dropout(0.4),
        
        Dense(10, activation="softmax"),  # 输出层
    ]
)

# 编译模型
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),  # 降低学习率
    loss="categorical_crossentropy",  # 多分类交叉熵损失
    metrics=["accuracy"],
)

# 回调函数
reduce_lr = ReduceLROnPlateau(
    monitor="val_loss", factor=0.5, patience=8, min_lr=1e-6, verbose=1
)
early_stopping = EarlyStopping(
    monitor="val_loss", patience=20, restore_best_weights=True, verbose=1
)

# 训练模型
model.fit(
    datagen.flow(x_train, y_train, batch_size=32),
    epochs=200,
    validation_data=(x_test, y_test),
    callbacks=[reduce_lr, early_stopping],
)

# 测试模型
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")

# 保存模型
model.save("mnist_model_bp_v1.keras")

Epoch 1/200
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 34ms/step - accuracy: 0.4063 - loss: 1.8082 - val_accuracy: 0.7779 - val_loss: 0.8089 - learning_rate: 0.0010
Epoch 2/200
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 35ms/step - accuracy: 0.4528 - loss: 1.6070 - val_accuracy: 0.7388 - val_loss: 0.9167 - learning_rate: 0.0010
Epoch 3/200
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 34ms/step - accuracy: 0.4442 - loss: 1.6247 - val_accuracy: 0.7357 - val_loss: 0.9401 - learning_rate: 0.0010
Epoch 4/200
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 34ms/step - accuracy: 0.4371 - loss: 1.6414 - val_accuracy: 0.7302 - val_loss: 0.9846 - learning_rate: 0.0010
Epoch 5/200
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 34ms/step - accuracy: 0.4363 - loss: 1.6484 - val_accuracy: 0.7056 - val_loss: 1.0222 - learning_rate: 0.0010
Epoch 6/200
[1m1875/1875[0m [32m━━━━━━━━━━