In [None]:
import tensorflow as tf

Illustration 1: Simple differentiation

In [None]:
# function 3(w1)^2+2(w1)(w2)
def f(w1, w2):
  return 3 * w1 ** 2 + 2 * w1 * w2

In [None]:
# say w1 =5, and w2 = 3, then calculate df/d(w1)
w1, w2 = 5, 3
eps = 1e-6
(f(w1 + eps, w2) - f(w1, w2)) / eps


36.000003007075065

In [None]:
# say w1 =5, and w2 = 3, then calculate df/d(w2)
(f(w1, w2 + eps) - f(w1, w2)) / eps

10.000000003174137

In [None]:
# the above is achived by using tensorflow GradientTape()
w1, w2 = tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape:
  z = f(w1, w2)

gradients = tape.gradient(z, [w1, w2])

gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

In [None]:
# the above is achived by using tensorflow GradientTape()
w1, w2 = tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape:
  z = 3*(w1**2) + 2*(w1*w2)

gradients = tape.gradient(z, [w1, w2])

gradients

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]

Illustration 2: Simple differentiation

In [None]:
# Create 4 tensors
a = tf.constant(1.)
b = tf.constant(2.)
c = tf.constant(3.)
w = tf.constant(4.)

In [None]:
with tf.GradientTape() as tape:# Track derivative
  tape.watch([w]) # Add w to derivative watch list
  # Design the function
  y = a * w**2 + b * w + c


In [None]:
# Auto derivative calculation
[dy_dw] = tape.gradient(y, [w])
print(dy_dw) # print the derivative

tf.Tensor(10.0, shape=(), dtype=float32)


Illustration 3: Chain Rule

In [None]:
x = tf.constant(1.)
w1 = tf.constant(2.)
b1 = tf.constant(1.)
w2 = tf.constant(2.)
b2 = tf.constant(1.)

In [None]:
# Create gradient recorder
# To compute multiple gradients over the same computation, 
# create a persistent gradient tape.
# set persistent=True in tf.GradientTape() 
with tf.GradientTape(persistent=True) as tape:
  # Manually record gradient info for non-tf.Variable variables
  tape.watch([w1, b1, w2, b2])
  # Create two layer neural network
  y1 = x * w1 + b1
  y2 = y1 * w2 + b2

In [None]:
# Solve partial derivatives
dy2_dy1 = tape.gradient(y2, [y1])[0]
dy1_dw1 = tape.gradient(y1, [w1])[0]
dy2_dw1 = tape.gradient(y2, [w1])[0]

In [None]:
dy2_dw2 = tape.gradient(y2,[w2])[0]
dy2_db2 = tape.gradient(y2,[b2])[0]

dy1_dw1 = tape.gradient(y1,[w1])[0]
dy1_db1 = tape.gradient(y1,[b1])[0]



In [None]:
# Valdiate chain rule
print(dy2_dy1 * dy1_dw1)
print(dy2_dw1)

tf.Tensor(2.0, shape=(), dtype=float32)
tf.Tensor(2.0, shape=(), dtype=float32)
