## Tensorflow的核心概念自动微分Autodiff

In [1]:
import tensorflow as tf

### 1. 简单的求导

对于公式：y = $x^2$ + 4x，求y对x的导数，即y=2x+4

In [2]:
x = tf.Variable(3.)

with tf.GradientTape() as t:
    y = x*x + 4*x

# 导数应该是：2*x + 4 = 2*3 + 4 = 9
dy_dx = t.gradient(y, x)
print(dy_dx)

tf.Tensor(10.0, shape=(), dtype=float32)


### 2. 在模型中的使用

#### 构造数据

In [3]:
import numpy as np

features = np.array([
    [1,2,3,4],
    [5,6,7,8],
    [9,10,11,12]
])

labels = np.array([1, 0, 1])

print(features.shape, labels.shape)

(3, 4) (3,)


#### 搭建一个model

In [4]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(3, input_shape=(4,)),
        tf.keras.layers.Dense(1, activation="sigmoid")
    ]
)

In [5]:
# 待更新的参数，已经进行了初始化
model.trainable_weights

[<tf.Variable 'dense/kernel:0' shape=(4, 3) dtype=float32, numpy=
 array([[-0.6656144 , -0.42939597,  0.07242769],
        [ 0.19940329, -0.5502339 , -0.87196594],
        [-0.5693209 , -0.59339136, -0.547462  ],
        [ 0.48374104,  0.4053948 ,  0.91519713]], dtype=float32)>,
 <tf.Variable 'dense/bias:0' shape=(3,) dtype=float32, numpy=array([0., 0., 0.], dtype=float32)>,
 <tf.Variable 'dense_1/kernel:0' shape=(3, 1) dtype=float32, numpy=
 array([[-0.44360936],
        [-0.159796  ],
        [ 0.30311763]], dtype=float32)>,
 <tf.Variable 'dense_1/bias:0' shape=(1,) dtype=float32, numpy=array([0.], dtype=float32)>]

#### 自动计算微分更新参数

In [6]:
# loss function 
loss_func=tf.keras.losses.BinaryCrossentropy()

In [7]:
with tf.GradientTape(persistent=True) as tape:
    # 对于features，经过model计算，输出logits
    logits = model(features)
    print("logits:\n", logits)
    
    loss_value = loss_func(labels, logits)
    print()
    print("loss_value:\n", loss_value)

logits:
 tf.Tensor(
[[0.5969129 ]
 [0.8312506 ]
 [0.94248176]], shape=(3, 1), dtype=float32)

loss_value:
 tf.Tensor(0.784854, shape=(), dtype=float32)


In [8]:
# 计算梯度，
gradients = tape.gradient(loss_value, model.trainable_weights)
gradients

[<tf.Tensor: shape=(4, 3), dtype=float32, numpy=
 array([[-0.4784321 , -0.17233977,  0.32691196],
        [-0.53323925, -0.19208227,  0.36436158],
        [-0.5880464 , -0.21182479,  0.40181124],
        [-0.6428535 , -0.2315673 ,  0.43926087]], dtype=float32)>,
 <tf.Tensor: shape=(3,), dtype=float32, numpy=array([-0.05480713, -0.01974251,  0.03744964], dtype=float32)>,
 <tf.Tensor: shape=(3, 1), dtype=float32, numpy=
 array([[-0.5318511 ],
        [-1.3236315 ],
        [-0.36949193]], dtype=float32)>,
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.12354819], dtype=float32)>]

#### 使用计算的梯度更新模型的参数

In [9]:
# 使用优化器，给变量model.trainable_weights应用梯度
optimizer = tf.keras.optimizers.Adam()
optimizer.apply_gradients(zip(gradients, model.trainable_weights))

<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

In [10]:
# 模型参数，我们会看到进行了更新
model.trainable_weights

[<tf.Variable 'dense/kernel:0' shape=(4, 3) dtype=float32, numpy=
 array([[-0.66461444, -0.428396  ,  0.0714277 ],
        [ 0.20040327, -0.5492339 , -0.87296593],
        [-0.56832093, -0.5923914 , -0.548462  ],
        [ 0.48474103,  0.40639478,  0.91419715]], dtype=float32)>,
 <tf.Variable 'dense/bias:0' shape=(3,) dtype=float32, numpy=array([ 0.00099994,  0.00099984, -0.00099992], dtype=float32)>,
 <tf.Variable 'dense_1/kernel:0' shape=(3, 1) dtype=float32, numpy=
 array([[-0.44260937],
        [-0.158796  ],
        [ 0.30411762]], dtype=float32)>,
 <tf.Variable 'dense_1/bias:0' shape=(1,) dtype=float32, numpy=array([-0.00099997], dtype=float32)>]