In [0]:
# !pip install tensorflow==2.0.0beta1

In [1]:
import numpy as np
import tensorflow as tf

print(tf.__version__)

2.0.0-beta1


## Gradient tapes

In [2]:
x = tf.ones(shape=(2, 2))

with tf.GradientTape() as t:
    t.watch(x)
    y = tf.reduce_sum(x)
    z = tf.multiply(y, y)
    
dz_dx = t.gradient(z, x)

print(dz_dx.numpy())

[[8. 8.]
 [8. 8.]]


In [3]:
# intermediate gradient
x = tf.ones(shape=(2, 2))

with tf.GradientTape() as t:
    t.watch(x)
    y = tf.reduce_sum(x)
    z = tf.multiply(y, y)
    
dz_dy = t.gradient(z, y)

print(dz_dy.numpy())

8.0


In [4]:
# これはエラーになる
# .gradientが一度呼ばれるとGradientTapeは解放される
x = tf.ones(shape=(2, 2))

with tf.GradientTape() as t:
    t.watch(x)
    y = tf.reduce_sum(x)
    z = tf.multiply(y, y)
    
dz_dx = t.gradient(z, x)    
dz_dy = t.gradient(z, y)

RuntimeError: ignored

In [0]:
# 複数回.gradientを呼ぶ場合はpersistent=Trueとする
# リソースは手動で解放
x = tf.ones(shape=(2, 2))

with tf.GradientTape(persistent=True) as t:
    t.watch(x)
    y = tf.reduce_sum(x)
    z = tf.multiply(y, y)
    
dz_dx = t.gradient(z, x)    
dz_dy = t.gradient(z, y)

del t

## Higher-order gradients

In [6]:
x = tf.Variable(2.0)

with tf.GradientTape() as t:
    with tf.GradientTape() as t2:
        y = x * x
    
    dy_dx = t2.gradient(y, x)
d2y_d2x = t.gradient(dy_dx, x)

print(dy_dx)
print(d2y_d2x)

tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(2.0, shape=(), dtype=float32)


## .watchについて

cf. tf.GradientTapeのドキュメント（argsのwatch_accessed_variablesの部分など）

デフォルトではtrainableなVariableを自動でwatchしている。細かいコントロールをしたい時は`watch_accessed_variables=False`とする。

In [0]:
tf.GradientTape?

In [7]:
a = tf.Variable(2.0)
b = tf.Variable(3.0)
c = tf.Variable(1.0)

with tf.GradientTape() as t:
    y = a**2 + b * c

dy_da, dy_db, dy_dc = t.gradient(y, [a, b, c])

print(dy_da)
print(dy_db)
print(dy_dc)

W0721 14:24:13.977720 139852788111232 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1205: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


tf.Tensor(4.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(3.0, shape=(), dtype=float32)


In [8]:
a = tf.Variable(2.0)
b = tf.Variable(3.0)
c = tf.Variable(1.0)

with tf.GradientTape(watch_accessed_variables=False) as t:
    t.watch([b, c])
    y = a**2 + b * c

dy_da, dy_db, dy_dc = t.gradient(y, [a, b, c])

print(dy_da)
print(dy_db)
print(dy_dc)

None
tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(3.0, shape=(), dtype=float32)
