In [2]:
import tensorflow as tf
print(tf.__version__)

2.0.0-rc0


## 自动求导机制

- GradientTape是eager模式下计算梯度用的
- watch(tensor)

作用：确保某个tensor被tape追踪

参数:tensor：一个Tensor或者一个Tensor列表

- gradient(target, sources)

作用：根据tape上面的上下文来计算某个或者某些tensor的梯度参数

target:被微分的Tensor或Tensor列表，你可以理角为经过某个函数之后的值

sources：Tensors或者Variables列表（当然可以只有一个值），你可以理解为函数的某个变量

返回：

一个列表表示各个变理的梯度值，和source中的变量列表一一对应，表明这个变量的梯度。

下面的例子可中的梯度计算部分可以更直观的理解这个函数的用法。

In [2]:
x = tf.constant(3.0)

with tf.GradientTape() as g:
    g.watch(x)
    y = x * x
    dy_dx = g.gradient(y, x) #y` = 2*x = 2*3 =6

In [3]:
dy_dx

<tf.Tensor: id=5, shape=(), dtype=float32, numpy=6.0>

## 案例1、模型自动求导

构建模型（神经网络的前向传播）->定义损失函数->定义优化函数->定义tape->模型得到预测值->前向传播得到loss->反向传播->用优化函数将计算出来的梯度更新到变量上面去

In [3]:
class MyModel(tf.keras.Model):
    
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__(name='my_model')
        self.num_classes = num_classes
        # 定义自己需要的层
        self.dense_1 = tf.keras.layers.Dense(32, activation='relu') # 隐藏层
        self.dense_2 = tf.keras.layers.Dense(num_classes) #输出层
    def call(self, inputs):
        # 定义前向传播
        # 使用在__init__定义的层
        x = self.dense_1(inputs)
        return self.dense_2(x)

In [7]:
import numpy as np
# 10 分类问题
data = np.random.random((1000, 32))
labels = np.random.random((1000,10))


In [9]:
model = MyModel(num_classes=10)

loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

with tf.GradientTape() as tape:
    predictions = model(data)
    loss = loss_object(labels, predictions)

gradients = tape.gradient(loss, model.trainable_variables) # 求梯度

optimizer.apply_gradients(zip(gradients, model.trainable_variables))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

In [10]:
model.trainable_variables

[<tf.Variable 'my_model/dense_6/kernel:0' shape=(32, 32) dtype=float32, numpy=
 array([[ 0.20745091,  0.15229149, -0.1540774 , ...,  0.12217597,
         -0.11634819, -0.28319484],
        [-0.08796345,  0.23935138, -0.14834969, ...,  0.01834869,
          0.28798032,  0.01716036],
        [ 0.27218992,  0.07507293,  0.11915177, ...,  0.21946742,
          0.17711297,  0.0881404 ],
        ...,
        [ 0.17850195, -0.21385773, -0.30325305, ..., -0.0405313 ,
         -0.05310488, -0.10680339],
        [ 0.03528245, -0.30543593,  0.10261664, ..., -0.05817035,
         -0.18255249,  0.01146668],
        [ 0.01756158,  0.03250366, -0.02718802, ..., -0.03306036,
          0.00696599, -0.25986782]], dtype=float32)>,
 <tf.Variable 'my_model/dense_6/bias:0' shape=(32,) dtype=float32, numpy=
 array([ 0.00099211, -0.00099979,  0.00099996,  0.00099999, -0.00100001,
         0.00100001,  0.001     , -0.00100002, -0.00100001, -0.00100002,
         0.00099994,  0.00100001, -0.00099989, -0.000985  