## 准备数据

In [7]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, datasets

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # or any {'0', '1', '2'}

def mnist_dataset():
    (x, y), (x_test, y_test) = datasets.mnist.load_data()
    #normalize
    x = x/255.0
    x_test = x_test/255.0
    
    return (x, y), (x_test, y_test)

In [8]:
print(list(zip([1, 2, 3, 4], ['a', 'b', 'c', 'd'])))

[(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')]


## 建立模型，改为pytorch版本

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim

class myModel(nn.Module):
    def __init__(self):
        super(myModel, self).__init__()
        ####################
        '''声明模型对应的参数'''
        ####################
        self.fc1 = nn.Linear(28 * 28, 128)  # 输入 28*28，隐藏层 128
        self.fc2 = nn.Linear(128, 10)       # 隐藏层 128，输出 10

    def forward(self, x):
        ####################
        '''实现模型函数体，返回未归一化的logits'''
        ####################
        x = x.view(-1, 28 * 28)         # 展平输入
        h1 = torch.tanh(self.fc1(x))    # 第一层 + tanh 激活
        logits = self.fc2(h1)           # 第二层，输出 logits
        return logits

        
model = myModel()

optimizer = optim.Adam(model.parameters(), lr=0.001)

## 计算 loss

In [10]:
# @tf.function
# def compute_loss(logits, labels):
#     return tf.reduce_mean(
#         tf.nn.sparse_softmax_cross_entropy_with_logits(
#             logits=logits, labels=labels))

# @tf.function
# def compute_accuracy(logits, labels):
#     predictions = tf.argmax(logits, axis=1)
#     return tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32))

# @tf.function
# def train_one_step(model, optimizer, x, y):
#     with tf.GradientTape() as tape:
#         logits = model(x)
#         loss = compute_loss(logits, y)

#     # compute gradient
#     trainable_vars = [model.W1, model.W2, model.b1, model.b2]
#     grads = tape.gradient(loss, trainable_vars)
#     for g, v in zip(grads, trainable_vars):
#         v.assign_sub(0.01*g)

#     accuracy = compute_accuracy(logits, y)

#     # loss and accuracy is scalar tensor
#     return loss, accuracy

# @tf.function
# def test(model, x, y):
#     logits = model(x)
#     loss = compute_loss(logits, y)
#     accuracy = compute_accuracy(logits, y)
#     return loss, accuracy

In [11]:
# 计算交叉熵损失
def compute_loss(logits, labels):
    loss_fn = nn.CrossEntropyLoss()
    return loss_fn(logits, labels)

# 计算准确率
def compute_accuracy(logits, labels):
    predictions = torch.argmax(logits, dim=1)
    return (predictions == labels).float().mean()

# 训练一步
def train_one_step(model, optimizer, x, y):
    model.train()  # 设为训练模式
    optimizer.zero_grad()  # 清除梯度

    logits = model(x)  # 前向传播
    loss = compute_loss(logits, y)  # 计算损失

    loss.backward()  # 反向传播计算梯度
    optimizer.step()  # 更新参数

    accuracy = compute_accuracy(logits, y)  # 计算准确率

    return loss.item(), accuracy.item()  # 返回标量值

# 测试
def test(model, x, y):
    model.eval()  # 设为评估模式
    with torch.no_grad():  # 禁用梯度计算
        logits = model(x)
        loss = compute_loss(logits, y)
        accuracy = compute_accuracy(logits, y)
    
    return loss.item(), accuracy.item()

## 实际训练

In [12]:
# train_data, test_data = mnist_dataset()
# for epoch in range(50):
#     loss, accuracy = train_one_step(model, optimizer, 
#                                     tf.constant(train_data[0], dtype=tf.float32), 
#                                     tf.constant(train_data[1], dtype=tf.int64))
#     print('epoch', epoch, ': loss', loss.numpy(), '; accuracy', accuracy.numpy())
# loss, accuracy = test(model, 
#                       tf.constant(test_data[0], dtype=tf.float32), 
#                       tf.constant(test_data[1], dtype=tf.int64))

# print('test loss', loss.numpy(), '; accuracy', accuracy.numpy())

In [13]:
# 训练循环
train_data, test_data = mnist_dataset()  # 获取 MNIST 数据集

for epoch in range(50):
    loss, accuracy = train_one_step(
        model, optimizer,
        torch.tensor(train_data[0], dtype=torch.float32),
        torch.tensor(train_data[1], dtype=torch.int64)
    )
    print(f'Epoch {epoch}: Loss {loss:.4f}; Accuracy {accuracy:.4f}')

# 测试阶段
loss, accuracy = test(
    model,
    torch.tensor(test_data[0], dtype=torch.float32),
    torch.tensor(test_data[1], dtype=torch.int64)
)

print(f'Test Loss: {loss:.4f}; Accuracy: {accuracy:.4f}')


Epoch 0: Loss 2.3163; Accuracy 0.0684
Epoch 1: Loss 2.2107; Accuracy 0.2708
Epoch 2: Loss 2.1118; Accuracy 0.5028
Epoch 3: Loss 2.0179; Accuracy 0.6046
Epoch 4: Loss 1.9282; Accuracy 0.6455
Epoch 5: Loss 1.8418; Accuracy 0.6713
Epoch 6: Loss 1.7584; Accuracy 0.6917
Epoch 7: Loss 1.6781; Accuracy 0.7091
Epoch 8: Loss 1.6010; Accuracy 0.7243
Epoch 9: Loss 1.5273; Accuracy 0.7377
Epoch 10: Loss 1.4571; Accuracy 0.7495
Epoch 11: Loss 1.3906; Accuracy 0.7607
Epoch 12: Loss 1.3279; Accuracy 0.7702
Epoch 13: Loss 1.2688; Accuracy 0.7783
Epoch 14: Loss 1.2134; Accuracy 0.7850
Epoch 15: Loss 1.1614; Accuracy 0.7915
Epoch 16: Loss 1.1127; Accuracy 0.7970
Epoch 17: Loss 1.0670; Accuracy 0.8017
Epoch 18: Loss 1.0243; Accuracy 0.8059
Epoch 19: Loss 0.9843; Accuracy 0.8102
Epoch 20: Loss 0.9469; Accuracy 0.8145
Epoch 21: Loss 0.9119; Accuracy 0.8187
Epoch 22: Loss 0.8792; Accuracy 0.8223
Epoch 23: Loss 0.8486; Accuracy 0.8260
Epoch 24: Loss 0.8200; Accuracy 0.8302
Epoch 25: Loss 0.7932; Accuracy 0.8