In [1]:
import tensorflow as tf
tf.__version__


'2.2.0'

## 1. 举个例子
$$y = 2 \cdot XX^T$$


#### 【方法1】向量版

In [2]:
x = tf.Variable(range(4), dtype=tf.float32)
x

<tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([0., 1., 2., 3.], dtype=float32)>

In [3]:
# 2 * (1*1+2*2+3*3), 內积, y标量

with tf.GradientTape() as t:
    y = 2 * tf.tensordot(x,x,axes=1)
y

<tf.Tensor: shape=(), dtype=float32, numpy=28.0>

In [4]:
# y' = 4x

with tf.GradientTape() as t:
    y = 2 * tf.tensordot(x,x,axes=1)
    x_grad = t.gradient(y, x)
x_grad

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([ 0.,  4.,  8., 12.], dtype=float32)>

In [5]:
x_grad == 4 * x

<tf.Tensor: shape=(4,), dtype=bool, numpy=array([ True,  True,  True,  True])>

In [6]:
## 关于x的另一个函数

with tf.GradientTape() as t:
    y = tf.reduce_sum(x)
t.gradient(y, x)

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([1., 1., 1., 1.], dtype=float32)>

In [7]:
## 向量的反向传播。一般都是对标量求导！！因为loss是个一维向量！！

with tf.GradientTape() as t:
    y = x * x  # y = tf.reduce_sum(x*x)
t.gradient(y, x)

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 2., 4., 6.], dtype=float32)>

#### 【方法2】

In [8]:
x = tf.reshape(tf.Variable(range(4), dtype=tf.float32), shape=(4,1))
x

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[0.],
       [1.],
       [2.],
       [3.]], dtype=float32)>

In [9]:
with tf.GradientTape() as t:
    t.watch(x)
    y = 2 * tf.matmul(tf.transpose(x), x)

y

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[28.]], dtype=float32)>

In [10]:
dy_dx = t.gradient(y, x)
dy_dx

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[ 0.],
       [ 4.],
       [ 8.],
       [12.]], dtype=float32)>

## 2. 训练&预测

#### 【方法1】

In [11]:
x = tf.Variable(range(4), dtype=tf.float32)
x

<tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([0., 1., 2., 3.], dtype=float32)>

In [12]:
## 将某些计算移动到记录的计算图之外

with tf.GradientTape(persistent=True) as t:
    y = x * x
    u = tf.stop_gradient(y)  ## 把y的结果作为常量，而不是关于x的函数
    z = u * x
x_grad = t.gradient(z, x)
x_grad, x_grad == u

(<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 1., 4., 9.], dtype=float32)>,
 <tf.Tensor: shape=(4,), dtype=bool, numpy=array([ True,  True,  True,  True])>)

In [13]:
t.gradient(y,x), t.gradient(y,x) == 2 * x

(<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 2., 4., 6.], dtype=float32)>,
 <tf.Tensor: shape=(4,), dtype=bool, numpy=array([ True,  True,  True,  True])>)

#### 【方法2】

In [14]:
x = tf.reshape(tf.Variable(range(4), dtype=tf.float32), shape=(4,1))
x

<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[0.],
       [1.],
       [2.],
       [3.]], dtype=float32)>

In [15]:
with tf.GradientTape(persistent=True) as g:
    g.watch(x)
    y = x * x
    z = y * y
    dz_dx = g.gradient(z, x)  # 4x^3
    dy_dx = g.gradient(y, x)  # 2x

dz_dx, dy_dx



(<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
 array([[  0.],
        [  4.],
        [ 32.],
        [108.]], dtype=float32)>,
 <tf.Tensor: shape=(4, 1), dtype=float32, numpy=
 array([[0.],
        [2.],
        [4.],
        [6.]], dtype=float32)>)

## 3. python控制流的梯度计算

In [16]:
def f(x):
    b = x * 2
    
    ## while循环次数，取决于x
    while tf.norm(b) < 1000:
        b = b * 2
    
    ## if 语句的结果，取决于x
    if tf.reduce_sum(b) > 0:
        c = b
    else:
        c = 100 * b
    return c

#### 【方法1】

In [17]:
x = tf.Variable(tf.random.normal(shape=()))
with tf.GradientTape() as t:
    c = f(x)
c_grad = t.gradient(c, x)
c_grad

<tf.Tensor: shape=(), dtype=float32, numpy=1024.0>

In [18]:
c_grad == c / x

<tf.Tensor: shape=(), dtype=bool, numpy=True>

#### 【方法2】

In [19]:
x = tf.random.normal((1,1),dtype=tf.float32)
with tf.GradientTape() as t:
    t.watch(x)
    c = f(x)
    
c_grad = t.gradient(c,x)
c_grad

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[102400.]], dtype=float32)>

In [20]:
c_grad == c / x

<tf.Tensor: shape=(1, 1), dtype=bool, numpy=array([[ True]])>