In [1]:
### Automatic differentiation and gradient tape

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
tf.enable_eager_execution()

W0902 15:13:49.946985 140735803462528 __init__.py:690] 

  TensorFlow's `tf-nightly` package will soon be updated to TensorFlow 2.0.

  Please upgrade your code to TensorFlow 2.0:
    * https://www.tensorflow.org/beta/guide/migration_guide

  Or install the latest stable TensorFlow 1.X release:
    * `pip install -U "tensorflow==1.*"`

  Otherwise your code may be broken by the change.

  


In [3]:
# TF provides Gradient tape which allows you to compute the gradient
# with respect to the input variables automatically

# TF records all the computation that happend inside a 'Gradient tape'
# and compute the gradient by using chain rule derivatives

In [10]:
# Example 1: 

x = tf.ones((2, 2))

# Starts the Gradient Tape recording
with tf.GradientTape() as t:
    t.watch(x) # Ensures that the input x is being traced by this Gradient tape. (tf.Variable are automatically traced)
    y = tf.reduce_sum(x)
    z = tf.multiply(y, y)

print('x: ', x)
print('y: ', y)
print('z: ', z)
print('-'*30)

# Compute the derivative of z with respect to x
dz_dx = t.gradient(z, x)
print('dz_dx: ', dz_dx)
print('-'*30)  
# Make sur that the derivative is correct
for i in range(2):
    for j in range(2):
        assert dz_dx[i][j].numpy() == 8.0
    

x:  tf.Tensor(
[[1. 1.]
 [1. 1.]], shape=(2, 2), dtype=float32)
y:  tf.Tensor(4.0, shape=(), dtype=float32)
z:  tf.Tensor(16.0, shape=(), dtype=float32)
------------------------------
dz_dx:  tf.Tensor(
[[8. 8.]
 [8. 8.]], shape=(2, 2), dtype=float32)
------------------------------


RuntimeError: GradientTape.gradient can only be called once on non-persistent tapes.

In [12]:
# Example 2: 

x = tf.ones((2, 2))

# Starts the Gradient Tape recording
with tf.GradientTape() as t:
    t.watch(x) # Ensures that the input x is being traced by this Gradient tape. (tf.Variable are automatically traced)
    y = tf.reduce_sum(x)
    z = tf.multiply(y, y)
    
# Can also get the gradient of 'intermediate' value: y
dz_dy = t.gradient(z, y)
print('dz_dy: ', dz_dy)
assert dz_dy.numpy() == 8.0

dz_dy:  tf.Tensor(8.0, shape=(), dtype=float32)


In [16]:
# By default, the resources held by a GradientTape are released as soon as GradientTape.gradient() method is called.
# To compute multiple gradients over the same computation, create a persistent gradient tape.
# This allows multiple calls to the gradient() method

# Example 3: 

x = tf.constant(3.0)

# Starts the Gradient Tape recording
with tf.GradientTape(persistent = True) as t:
    t.watch(x) # Ensures that the input x is being traced by this Gradient tape. (tf.Variable are automatically traced)
    y = x * x
    z = y * y

# Compute the gradient
dz_dy = t.gradient(z, y)
dz_dx = t.gradient(z, x)
print('dz_dy: ', dz_dy)
print('dz_dx: ', dz_dx)

# Drop the reference
del t

dz_dy:  tf.Tensor(18.0, shape=(), dtype=float32)
dz_dx:  tf.Tensor(108.0, shape=(), dtype=float32)


In [20]:
# Example 4: Combine Gradient Tape with python method

def f(x, y):
    ''' Apply a random function on x and y
    '''
    output = 1.0
    for i in range(y):
        if (i>1) and (i<5):
            output = tf.multiply(output, x) # output is a new tensor for each call the f(x, y)
    return output

def compute_gradient(x, y):
    ''' Compute the gradient of the function f on x
    '''
    with tf.GradientTape() as t:
        t.watch(x)
        output = f(x, y)
    return t.gradient(output, x)

x = tf.convert_to_tensor(2.0)

assert compute_gradient(x, 6).numpy() == 12.0
assert compute_gradient(x, 5).numpy() == 12.0
assert compute_gradient(x, 4).numpy() == 4.0 

In [25]:
# We can also compute higher order gradient by stacking GradientTape() context

x = tf.Variable(1.0) # Variable so that it is automatically watched
print('x: ', x)
print('-'*30)

with tf.GradientTape() as t:
    with tf.GradientTape() as t2:
        y = x * x * x 
    dy_dx = t2.gradient(y, x)
dy2_dx2 = t.gradient(dy_dx, x)

print('dy_dx: ', dy_dx)
print('dy2_dx2: ', dy2_dx2)
    

x:  <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=1.0>
------------------------------
dy_dx:  tf.Tensor(3.0, shape=(), dtype=float32)
dy2_dx2:  tf.Tensor(6.0, shape=(), dtype=float32)
