#### 前章回顾
* 张量创建
* 张量进行切片索引
* 张量维度变化
* 张量数学运算

#### 本章目标
* 前向传播
* 构建3层神经网络
* $out = ReLU\{ReLU\{ReLU[X@W_1 + b_1]@W_2 + b_2\}@W_3 + b_3\}$

In [2]:
import numpy as np
import tensorflow as tf

In [3]:
# 第一层
# 截断的正太分布初始化权值
w1 = tf.Variable(tf.random.truncated_normal([784, 256], stddev=0.1))
b1 = tf.Variable(tf.zeros([256]))

In [4]:
# 第二层
w2 = tf.Variable(tf.random.truncated_normal([256, 128], stddev=0.1))
b2 = tf.Variable(tf.zeros([128]))

In [5]:
# 第三层
w3 = tf.Variable(tf.random.truncated_normal([128, 10], stddev=0.1))
b3 = tf.Variable(tf.zeros([10]))

In [6]:
# 前向计算时，首先将shape[b, 28, 28]调整为[b, 784]
x = tf.random.normal([2, 28, 28])
print(x.shape)
x = tf.reshape(x, [-1, 28 * 28])
print(x.shape)

(2, 28, 28)
(2, 784)


In [7]:
# 第一层计算
h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
h1 = tf.nn.relu(h1)
h1.shape

TensorShape([2, 256])

In [8]:
# 第二层计算
h2 = h1@w2 + b2
h2 = tf.nn.relu(h2)
h2.shape

TensorShape([2, 128])

In [9]:
# 第三层及计算
out = h2@w3 + b3
out.shape

TensorShape([2, 10])

In [13]:
# 真实的标注转换为one_hot码
y = tf.constant([0,1])
y_onehot = tf.one_hot(y, depth=10)
y_onehot

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)>

In [14]:
# 计算输出层和标签之间的方差
loss = tf.square(y_onehot - out)
loss

<tf.Tensor: shape=(2, 10), dtype=float32, numpy=
array([[1.8951963 , 7.7249737 , 8.831971  , 0.47138783, 6.1189456 ,
        0.6345111 , 5.926991  , 0.14453362, 1.849305  , 2.1363316 ],
       [0.39925453, 6.5381403 , 0.06446642, 0.04693377, 0.05781179,
        1.1091652 , 0.02716741, 0.56003857, 5.799991  , 0.06695092]],
      dtype=float32)>

In [15]:
# 计算平均方差
loss = tf.reduce_mean(loss)
loss

<tf.Tensor: shape=(), dtype=float32, numpy=2.5202034>

In [16]:
# 梯度记录器
with tf.GradientTape() as tape:
    h1 = x@w1 + tf.broadcast_to(b1, [x.shape[0], 256])
    h1 = tf.nn.relu(h1)
    h2 = h1@w2 + b2
    h2 = tf.nn.relu(h2)
    out = h2@w3 + b3   
    loss = tf.reduce_mean(tf.square(y_onehot - out))

grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])    

# 梯度更新，原地更新
lr = 0.001
w1.assign_sub(lr * grads[0])
b1.assign_sub(lr * grads[1])
w2.assign_sub(lr * grads[2])
b2.assign_sub(lr * grads[3])
w3.assign_sub(lr * grads[4])
b3.assign_sub(lr * grads[5])

<tf.Variable 'UnreadVariable' shape=(10,) dtype=float32, numpy=
array([-7.4479547e-05, -5.3363625e-04, -2.7179613e-04, -4.6993489e-05,
       -2.7140911e-04, -2.5660709e-05, -2.5993667e-04,  1.1285331e-04,
        1.0484256e-04, -1.7203685e-04], dtype=float32)>