In [4]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# 加载MNIST数据集
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# 数据预处理
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype('float32') / 255

test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype('float32') / 255

# 将标签进行one-hot编码
train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

# 定义神经网络参数
input_size = 28 * 28
hidden_size = 2
output_size = 10
learning_rate = 0.1
epochs = 2

# 初始化权重和偏置
W1 = np.random.randn(input_size, hidden_size) * 0.01
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size) * 0.01
b2 = np.zeros((1, output_size))

# 定义激活函数及其导数
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# 训练神经网络
for epoch in range(epochs):
    for i in range(len(train_images)):
        # 前向传播
        x = train_images[i].reshape(1, input_size)
        y = train_labels[i].reshape(1, output_size)

        # 隐藏层
        z1 = np.dot(x, W1) + b1
        a1 = sigmoid(z1)

        # 输出层
        z2 = np.dot(a1, W2) + b2
        a2 = sigmoid(z2)

        # 计算误差
        error = y - a2

        # 反向传播
        d2 = error * sigmoid_derivative(a2)
        d1 = np.dot(d2, W2.T) * sigmoid_derivative(a1)

        # 更新权重和偏置
        W2 += learning_rate * np.dot(a1.T, d2)
        b2 += learning_rate * np.sum(d2, axis=0, keepdims=True)
        W1 += learning_rate * np.dot(x.T, d1)
        b1 += learning_rate * np.sum(d1, axis=0, keepdims=True)

    # 每个epoch结束后打印损失
    if epoch % 1 == 0:
        # 计算训练集损失
        z1 = np.dot(train_images, W1) + b1
        a1 = sigmoid(z1)
        z2 = np.dot(a1, W2) + b2
        a2 = sigmoid(z2)
        loss = np.mean(np.square(train_labels - a2))
        print(f'Epoch {epoch}, Loss: {loss}')

# 测试神经网络
correct_predictions = 0
for i in range(len(test_images)):
    x = test_images[i].reshape(1, input_size)
    y = test_labels[i].reshape(1, output_size)

    # 前向传播
    z1 = np.dot(x, W1) + b1
    a1 = sigmoid(z1)
    z2 = np.dot(a1, W2) + b2
    a2 = sigmoid(z2)

    # 预测结果
    predicted_label = np.argmax(a2)
    true_label = np.argmax(y)

    if predicted_label == true_label:
        correct_predictions += 1

accuracy = correct_predictions / len(test_images)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Epoch 0, Loss: 0.07212776373274456
Epoch 1, Loss: 0.0709464942445466
Test Accuracy: 36.20%
