In [8]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# Gradient tape

In [9]:
x = tf.Variable(3.0)

# use tape to record forward ops to compute gradients later
with tf.GradientTape() as tape:
    y = x**2

In [10]:
# compute gradient
dy_dx = tape.gradient(y, x)
dy_dx.numpy()

6.0

In [11]:
# gradient tape can be used on any tensor
w = tf.Variable(tf.random.normal((3, 2)), name='w')
b = tf.Variable(tf.zeros(2, dtype=tf.float32), name='b')
x = [[1., 2., 3.]]

with tf.GradientTape(persistent=True) as tape:
    y = x @ w + b
    loss = tf.reduce_mean(y**2)

In [12]:
# to get gradient of loss with respect to both variables, can pass them to the gradient method
[dl_dw, dl_db] = tape.gradient(loss, [w, b])

In [13]:
# gradient of the source will have same shape
print(w.shape)
print(dl_dw.shape)

(3, 2)
(3, 2)


In [14]:
# can pass variables as list, or dicts
my_vars = {
    'w': w,
    'b': b
}

grad = tape.gradient(loss, my_vars)
grad['b']

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.5528994, 3.563752 ], dtype=float32)>

# Gradients of a model

In [15]:
# tf trainable_variable aggregates every variable of a model, useful for computing gradients of a model

layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1., 2., 3.]])

with tf.GradientTape() as tape:
    y = layer(x)
    loss = tf.reduce_mean(y**2)

# calculate gradient with respect to every trainable variable
grad = tape.gradient(loss, layer.trainable_variables)

In [16]:
for var, g in zip(layer.trainable_variables, grad):
    print(f'{var.name}, shape: {g.shape}')

dense/kernel:0, shape: (3, 2)
dense/bias:0, shape: (2,)
