In [62]:
import tensorflow as tf
from tensorflow.keras import datasets

In [63]:
# 加载 mnist 数据集
(x, y), _ = datasets.mnist.load_data()
# 有 60000 张图片，每张图片都是 28 * 28 的
print(x.shape)
# 图片的值
print(y.shape)

(60000, 28, 28)
(60000,)


In [64]:
# 转换 x 为 tensor 数据 格式为 float32 
# 因为色彩值是 0-255 除以 255 就是 [0-255]=>[0-1.] 便于处理 
x = tf.convert_to_tensor(x, dtype=tf.float32) / 255.
y = tf.convert_to_tensor(y, dtype=tf.int32)

In [65]:
train_db = tf.data.Dataset.from_tensor_slices((x, y)).batch(125)

In [66]:
# 图片的维度是 28*28 平着放也就是 784
# 然后对 b 张图片进行降维处理  先变成 256 然后变成 128 最后变成 10 （因为结果 0 - 9 也就是十个数）
# [b, 784] => [b, 256] => [b, 128] => [b, 10]
# tensorflow 会记录 Variable 可以直接求导
# 截断的产生正态分布的随机数，即随机数与均值的差值若大于两倍的标准差，则重新生成
# stddev 标准差
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

In [67]:
lr = 1e-3
# 循环10次下面的步骤
for epoch in range(10):
    # 刚才对数据进行了切分
    # 下面的 for 就是循环切分的数据
    for step, (x, y) in enumerate(train_db):
        # [b, 28, 28] => [b, 28*28]
        # 把三维数据变成二维 也就是把图片数据变成一行
        x = tf.reshape(x, [-1, 28*28])
        # 里面的所有 Variable 数据都会被tensorflow记录
        # 可以自动求导
        with tf.GradientTape() as tape:
            # [b, 784]@[784, 256] + [256] => [b, 256] + [256] => [b, 256] + [b, 256]
            h1 = x @ w1 + b1
            h1 = tf.nn.relu(h1)
            # [b, 256] => [b, 128]
            h2 = h1 @ w2 + b2
            h2 = tf.nn.relu(h2)
            # [b, 128] => [b, 10]
            out = h2 @ w3 + b3
            # y: [b] => [b, 10]
            y_onehot = tf.one_hot(y, depth=10)
            # mse = mean(sum(y-out)^2)
            # 求平方和
            loss = tf.square(y_onehot - out)
            loss = tf.reduce_mean(loss)
        grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
        # w1 = w1 - lr * w1_grad
        # 用 w1 - lr * grads[0] 并赋值给 w1
        w1.assign_sub(lr * grads[0])
        b1.assign_sub(lr * grads[1])
        w2.assign_sub(lr * grads[2])
        b2.assign_sub(lr * grads[3])
        w3.assign_sub(lr * grads[4])
        b3.assign_sub(lr * grads[5])
        # 迭代 100 次后输出一次结果
        if step % 100 == 0:
            print(epoch, step, 'loss: ', float(loss))

0 0 loss:  0.30867183208465576
0 100 loss:  0.20889152586460114
0 200 loss:  0.18648980557918549
0 300 loss:  0.176169753074646
0 400 loss:  0.16235509514808655
1 0 loss:  0.15100857615470886
1 100 loss:  0.15689125657081604
1 200 loss:  0.1524558663368225
1 300 loss:  0.15121577680110931
1 400 loss:  0.1408216506242752
2 0 loss:  0.12968459725379944
2 100 loss:  0.13755886256694794
2 200 loss:  0.13341549038887024
2 300 loss:  0.13654020428657532
2 400 loss:  0.12728823721408844
3 0 loss:  0.11574900895357132
3 100 loss:  0.12468554824590683
3 200 loss:  0.12056072056293488
3 300 loss:  0.1262807548046112
3 400 loss:  0.11767496168613434
4 0 loss:  0.1060013696551323
4 100 loss:  0.11528414487838745
4 200 loss:  0.11120573431253433
4 300 loss:  0.11860466003417969
4 400 loss:  0.11047586798667908
5 0 loss:  0.09886004030704498
5 100 loss:  0.10813496261835098
5 200 loss:  0.10391082614660263
5 300 loss:  0.11250125616788864
5 400 loss:  0.10468688607215881
6 0 loss:  0.093265369534492