# autodiff in tensorflow

In [2]:
"""
tensorflow中使用tf.GradientTape开启上下文管理器，并对Operations进行'监控'，用于自动微分。
Trainable variables：会被自动监控;
Tensors：需要手动监控，通过调用该上下文管理器的`watch` method;
"""
import tensorflow as tf
# normal Tensor
x = tf.constant(3.0)
with tf.GradientTape() as g:
    g.watch(x)
    y = x * x
    dy_dx = g.gradient(y, x)
    print(f"y=x^2, dy/dx=2x={dy_dx}")

y=x^2, dy/dx=2x=6.0


In [5]:
w = tf.Variable(5.0)
with tf.GradientTape() as tape:
    z = w ** 3 
    dz_dw = tape.gradient(z, w)
    print(f"dz/dw={dz_dw}")

dz/dw=75.0


In [8]:
 """支持高阶导"""
x = tf.Variable(5.0)
with tf.GradientTape() as g:
    with tf.GradientTape() as gg:
        gg.watch(x)
        y = x * x
        dy_dx = gg.gradient(y, x)  # dy_dx = 2 * x
        print(f"dy/dx=2x={dy_dx}")
    d2y_dx2 = g.gradient(dy_dx, x)  # d2y_dx2 = 2
    print(f"d2y/dx2={d2y_dx2}")

dy/dx=2x=10.0
d2y/dx2=2.0


# gradient in nn.layers

In [31]:
import tensorflow.keras.layers as layers
"""dense layer"""
x = tf.Variable([[1.0, 1.0], [2.0, 2.0]])
w = tf.Variable([[2.0,], [3.0,]])
with tf.GradientTape(persistent = True) as tape:
    y = tf.matmul(x, w)
    print(y)
    dy_dw = tape.gradient(y, w)
    print(f"dy/dw={dy_dw}")
    dy_dx = tape.gradient(y, x)
    print(f"dy/dx={dy_dx}")
del tape

tf.Tensor(
[[ 5.]
 [10.]], shape=(2, 1), dtype=float32)
dy/dw=w^T=[[3.]
 [3.]]
dy/dx=[[2. 3.]
 [2. 3.]]


In [37]:
x = tf.constant([[1., 2., 3.],[4., 5., 6.],[7., 8., 9.]])
x = tf.reshape(x, [1, 3, 3, 1])
max_pool_2d = layers.MaxPooling2D(pool_size=(2, 2),strides=(1, 1), padding='valid')
max_pool_2d.build(input_shape=(1, 3, 3, 1))
with tf.GradientTape() as tape:
    tape.watch(max_pool_2d.variables)
    y = max_pool_2d(x)
    print(x)
    dy_dx = tape.gradient(y, max_pool_2d.variables)
    print(f"dy/dw={dy_dw}")

tf.Tensor(
[[[[1.]
   [2.]
   [3.]]

  [[4.]
   [5.]
   [6.]]

  [[7.]
   [8.]
   [9.]]]], shape=(1, 3, 3, 1), dtype=float32)
dy/dw=[[3.]
 [3.]]
