In [3]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [11]:
x = tf.Variable(4.0)
with tf.GradientTape() as tape:
    y = x**2

In [12]:
y

<tf.Tensor: shape=(), dtype=float32, numpy=16.0>

In [13]:
dy_dx = tape.gradient(y, x)

In [14]:
dy_dx

<tf.Tensor: shape=(), dtype=float32, numpy=8.0>

In [16]:
w = tf.Variable(tf.random.normal((4, 2)))

In [18]:
b = tf.Variable(tf.ones(2, dtype=tf.float32))

In [19]:
x = tf.Variable([[10., 20., 30., 40.]])

In [21]:
with tf.GradientTape(persistent=True) as tape:
    y = tf.matmul(x, w) + b
    loss = tf.reduce_mean(y**2)

In [22]:
dl_dw, dl_db = tape.gradient(loss, [w, b])

In [23]:
dl_dw

<tf.Tensor: shape=(4, 2), dtype=float32, numpy=
array([[ -661.6483 ,   -80.44242],
       [-1323.2966 ,  -160.88484],
       [-1984.945  ,  -241.32726],
       [-2646.5933 ,  -321.76968]], dtype=float32)>

In [24]:
dl_db

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([-66.16483 ,  -8.044242], dtype=float32)>

In [25]:
tape.gradient(y, [w, b])

[<tf.Tensor: shape=(4, 2), dtype=float32, numpy=
 array([[10., 10.],
        [20., 20.],
        [30., 30.],
        [40., 40.]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([1., 1.], dtype=float32)>]

In [28]:
layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[10., 20., 30., 40.]])

In [30]:
with tf.GradientTape() as tape:
    y = layer(x)
    loss = tf.reduce_sum(y**2)

In [31]:
grad = tape.gradient(loss, layer.trainable_variables)

In [32]:
grad

[<tf.Tensor: shape=(4, 2), dtype=float32, numpy=
 array([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([0., 0.], dtype=float32)>]

In [43]:
# Non-trainable variables and constants
x1 = tf.Variable(4.0)
x2 = tf.Variable(5.0, trainable=False)

In [41]:
x3 = tf.add(x1, x2)
x3

<tf.Tensor: shape=(), dtype=float32, numpy=9.0>

In [42]:
x4 = tf.constant(5.0)
x4

<tf.Tensor: shape=(), dtype=float32, numpy=5.0>

In [36]:
with tf.GradientTape() as tape:
    y = x1**2 + x2**2 + x3**2 + x4**2

In [37]:
grad = tape.gradient(y, [x1, x2, x3, x4])

In [38]:
grad  # x2 is set to not trainable, x3 is therefore not trainable either, and x4 is a constant!

[<tf.Tensor: shape=(), dtype=float32, numpy=8.0>, None, None, None]

In [39]:
with tf.GradientTape() as tape:
    tape.watch(x2)  # Force the tape to watch
    y = x1**2 + x2**2 + x3**2 + x4**2

In [40]:
grad = tape.gradient(y, [x1, x2, x3, x4])
grad

[<tf.Tensor: shape=(), dtype=float32, numpy=8.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>,
 None,
 None]

In [44]:
with tf.GradientTape(watch_accessed_variables=False) as tape:
    tape.watch(x1)
    y = x1**2 + x2**2
    # Will only track x1, not x2

In [45]:
grad = tape.gradient(y, [x1, x2])

In [46]:
grad

[<tf.Tensor: shape=(), dtype=float32, numpy=8.0>, None]

In [47]:
# Gradients handle branches and control flow
x = tf.constant(1.0)
x1 = tf.Variable(5.0)
x2 = tf.Variable(6.0)

In [48]:
with tf.GradientTape() as tape:
    tape.watch(x)
    if x > 0:
        y = x1**2
    else:
        y = x2**2

In [49]:
grad = tape.gradient(y, [x1, x2])
grad  # Knows that x1 is used, x2 is not

[<tf.Tensor: shape=(), dtype=float32, numpy=10.0>, None]

In [51]:
x = tf.constant(-1.0)
with tf.GradientTape() as tape:
    tape.watch(x)
    if x > 0:
        y = x1**2
    else:
        y = x2**2

In [52]:
grad = tape.gradient(y, [x1, x2])
grad  # Knows that x2 is used, x1 is not

[None, <tf.Tensor: shape=(), dtype=float32, numpy=12.0>]