## 准备数据

In [16]:
import os 
import numpy as np  
import tensorflow as tf  
from tensorflow import keras  
from tensorflow.keras import layers, optimizers, datasets  

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 设置环境变量以抑制TensorFlow日志消息

def mnist_dataset():
    (x, y), (x_test, y_test) = datasets.mnist.load_data()  # 将MNIST数据集加载到训练和测试数据中
    # 对图像的像素值进行归一化
    x = x / 255.0  # 归一化训练数据
    x_test = x_test / 255.0  # 归一化测试数据
    
    return (x, y), (x_test, y_test)  # 返回归一化的训练和测试数据


In [17]:
print(list(zip([1, 2, 3, 4], ['a', 'b', 'c', 'd'])))

[(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd')]


## 建立模型

In [18]:
class myModel(keras.Model):
    def __init__(self):
        ####################
        '''声明模型对应的参数'''
        ####################
        super(myModel, self).__init__()
        self.flatten = layers.Flatten()  # 展平层，将输入展平为一维向量
        self.dense1 = layers.Dense(128, activation='relu')  # 第一个全连接层，128个神经元，ReLU激活函数
        self.dense2 = layers.Dense(10)  # 第二个全连接层，10个神经元，没有激活函数


    def __call__(self, x):
        ####################
        '''实现模型函数体，返回未归一化的logits'''
        ####################
        x = self.flatten(x)  # 将输入展平为一维向量
        x = self.dense1(x)  # 第一个全连接层
        x = self.dense2(x)  # 第二个全连接层

        # return logits
        return x
    
        
model = myModel()

optimizer = optimizers.Adam()

## 计算 loss

In [19]:
@tf.function
def compute_loss(logits, labels):
    return tf.reduce_mean(  
        tf.nn.sparse_softmax_cross_entropy_with_logits(  # 计算稀疏softmax交叉熵损失
            logits=logits, labels=labels))  # 使用logits和标签计算损失

@tf.function
def compute_accuracy(logits, labels):
    predictions = tf.argmax(logits, axis=1)  # 获取logits中每个样本预测的类别
    return tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32))  # 计算准确率

@tf.function
def train_one_step(model, optimizer, x, y):
    with tf.GradientTape() as tape:  # 创建记录梯度的上下文管理器
        logits = model(x)  # 获取模型的logits
        loss = compute_loss(logits, y)  # 计算损失函数

    trainable_vars = model.trainable_variables  # 获取可训练参数列表
    grads = tape.gradient(loss, trainable_vars)  # 计算梯度
    optimizer.apply_gradients(zip(grads, trainable_vars))  # 使用优化器更新参数

    accuracy = compute_accuracy(logits, y)  # 计算准确率

    # 损失和准确率是标量张量
    return loss, accuracy

@tf.function
def test(model, x, y):
    logits = model(x)  # 获取模型的logits
    loss = compute_loss(logits, y)  # 计算损失函数
    accuracy = compute_accuracy(logits, y)  # 计算准确率
    return loss, accuracy


## 实际训练

In [20]:
train_data, test_data = mnist_dataset()
for epoch in range(50):
    loss, accuracy = train_one_step(model, optimizer, 
                                    tf.constant(train_data[0], dtype=tf.float32), 
                                    tf.constant(train_data[1], dtype=tf.int64))
    print('epoch', epoch, ': loss', loss.numpy(), '; accuracy', accuracy.numpy())
loss, accuracy = test(model, 
                      tf.constant(test_data[0], dtype=tf.float32), 
                      tf.constant(test_data[1], dtype=tf.int64))

print('test loss', loss.numpy(), '; accuracy', accuracy.numpy())

epoch 0 : loss 2.4027283 ; accuracy 0.0729
epoch 1 : loss 2.234963 ; accuracy 0.17641667
epoch 2 : loss 2.0854886 ; accuracy 0.3315
epoch 3 : loss 1.9496121 ; accuracy 0.45536667
epoch 4 : loss 1.823599 ; accuracy 0.53721666
epoch 5 : loss 1.7050716 ; accuracy 0.5979
epoch 6 : loss 1.5928643 ; accuracy 0.6458167
epoch 7 : loss 1.486653 ; accuracy 0.68675
epoch 8 : loss 1.386588 ; accuracy 0.71968335
epoch 9 : loss 1.2931621 ; accuracy 0.7432333
epoch 10 : loss 1.2067496 ; accuracy 0.7619
epoch 11 : loss 1.1273943 ; accuracy 0.77676666
epoch 12 : loss 1.0548664 ; accuracy 0.78815
epoch 13 : loss 0.9887492 ; accuracy 0.7966167
epoch 14 : loss 0.9286861 ; accuracy 0.80471665
epoch 15 : loss 0.8744337 ; accuracy 0.81191665
epoch 16 : loss 0.8257808 ; accuracy 0.8186833
epoch 17 : loss 0.7823361 ; accuracy 0.82423335
epoch 18 : loss 0.7434625 ; accuracy 0.8286
epoch 19 : loss 0.7084825 ; accuracy 0.83311665
epoch 20 : loss 0.67688894 ; accuracy 0.8380833
epoch 21 : loss 0.6484126 ; accuracy