In a neural network, we calculate the error in the forward propagation. Then we get the gradient in the backward propagation. In tensorflow, we only need to realize forward process and the backward propagation is calculated automatically.

# tf.GradientTape

In [1]:
import tensorflow as tf
import numpy as np

In [3]:
x = tf.Variable(0.0, name="x", dtype=tf.float32)
a = tf.constant(1.0)
b = tf.constant(-2.0)
c = tf.constant(1.0)

In [4]:
with tf.GradientTape() as tape:
    y = a*tf.pow(x,2) + b * x + c

In [5]:
dy_dx = tape.gradient(y, x)

In [6]:
dy_dx

<tf.Tensor: shape=(), dtype=float32, numpy=-2.0>

In [8]:
x.assign(1.0)

<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=1.0>

In [11]:
with tf.GradientTape() as tape:
    y = a*tf.pow(x,2) + b * x + c
tape.gradient(y, x)

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>

With different values of x, the gradients are not same. It's reasonable, yet we should launch tf.GradientTape for each calculation.

We can also calculate the gradient for scalar value, such as a, b, c in the defined function.

In [13]:
with tf.GradientTape() as tape:
    tape.watch([a, b, c])
    y = a*tf.pow(x,2) + b * x + c

In [14]:
tape.gradient(y,[x,a,b,c])

[<tf.Tensor: shape=(), dtype=float32, numpy=0.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1.0>]

In [15]:
x.assign(10.0)
with tf.GradientTape() as tape:
    tape.watch([a, b, c])
    y = a*tf.pow(x,2) + b * x + c
tape.gradient(y,[x,a,b,c])

[<tf.Tensor: shape=(), dtype=float32, numpy=18.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=100.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=10.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1.0>]

We can use tf.GradientTape in tf.GrandientTape to calculate the second derivative.

In [17]:
with tf.GradientTape() as tape2:
    with tf.GradientTape() as tape1:   
        y = a*tf.pow(x,2) + b*x + c
    dy_dx = tape1.gradient(y,x)   
tape2.gradient(dy_dx,x)

<tf.Tensor: shape=(), dtype=float32, numpy=2.0>

In [18]:
@tf.function
def get_gradient(x):
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)

    x = tf.cast(x,tf.float32)
    with tf.GradientTape() as tape:
        tape.watch(x)
        y = a*tf.pow(x,2)+b*x+c
    dy_dx = tape.gradient(y,x) 

    return((dy_dx,y))

In [19]:
get_gradient(3)

(<tf.Tensor: shape=(), dtype=float32, numpy=4.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=4.0>)

In [20]:
get_gradient(1)

(<tf.Tensor: shape=(), dtype=float32, numpy=0.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.0>)

In [21]:
get_gradient(2)

(<tf.Tensor: shape=(), dtype=float32, numpy=2.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=1.0>)

Now we use tf.GradientTape to calculate the min value of a function.

In [22]:
a = tf.constant(1.0)
b = tf.constant(-2.0)
c = tf.constant(1.0)

In the first version, we use tf.GradientTape to calculate the gradient and update x w.r.t a leraning rate.

In [29]:
x = tf.Variable(0.0,name = "x",dtype = tf.float32)
lr = tf.constant(0.01)
cnt = 0
while cnt < 1000:
    cnt += 1
    with tf.GradientTape() as tape:
        y = a*tf.pow(x,2) + b*x + c
    dx = tape.gradient(y, x)
    x.assign_add(- lr * dx)

In [30]:
y

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>

In [31]:
x

<tf.Variable 'x:0' shape=() dtype=float32, numpy=0.99999857>

In the first version, we use tf.GradientTape to calculate the gradient and update x by an optimizer.

In [32]:
x = tf.Variable(0.0,name = "x",dtype = tf.float32)
opt = tf.keras.optimizers.SGD(learning_rate=0.01)
cnt = 0
while cnt < 1000:
    cnt += 1
    with tf.GradientTape() as tape:
        y = a*tf.pow(x,2) + b*x + c
    dx = tape.gradient(y, x)
    opt.apply_gradients(grads_and_vars=[(dx, x)])

In [33]:
y

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>

In [34]:
x

<tf.Variable 'x:0' shape=() dtype=float32, numpy=0.99999857>

In [35]:
x = tf.Variable(0.0,name = "x",dtype = tf.float32)

# Attention, no arguments for f()
def f():   
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    y = a*tf.pow(x,2)+b*x+c
    return(y)

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)   
for _ in range(1000):
    optimizer.minimize(f,[x])   


In [36]:
f()

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>

In [37]:
x

<tf.Variable 'x:0' shape=() dtype=float32, numpy=0.99999857>

In [38]:
x = tf.Variable(0.0,name = "x",dtype = tf.float32)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

@tf.function
def minimizef():
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)

    for _ in tf.range(1000): # use tf.range
        with tf.GradientTape() as tape:
            y = a*tf.pow(x,2) + b*x + c
        dy_dx = tape.gradient(y,x)
        optimizer.apply_gradients(grads_and_vars=[(dy_dx,x)])

    y = a*tf.pow(x,2) + b*x + c
    return y

tf.print(minimizef())
tf.print(x)

0
0.999998569


In [41]:
x = tf.Variable(0.0,name = "x",dtype = tf.float32)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)   

@tf.function
def fun_to_min():   
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    y = a*tf.pow(x,2)+b*x+c
    return(y)

@tf.function
def train(epoch, f, var):  
    for _ in tf.range(epoch):  
        optimizer.minimize(f,[var])
    return(f())


tf.print(train(1000,  fun_to_min, x))
tf.print(x)

0
0.999998569
