In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import sys
import time
import sklearn
from tensorflow import keras

In [2]:
def f(x):
    return 3. * x ** 2 + 2. * x - 1

def approximate_derivative(f, x, eps = 1e-3):
    return (f(x + eps) - f(x - eps)) / (2. * eps)

print(approximate_derivative(f, 2.))


13.99999999999757


In [16]:
def g(x1, x2):
    return (x1 + 5.) * (x2 ** 2)

def approximate_gradient(g, x1, x2, eps = 1e-3):
    df_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)
    df_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)
    return (df_x1, df_x2)

print(approximate_gradient(g, 2., 3.))

(8.999999999993236, 41.999999999994486)


In [17]:
x1 = tf.Variable(2.)
x2 = tf.Variable(3.)
with tf.GradientTape() as tape:
    z= g(x1, x2)
    
dz_x1 = tape.gradient(z, x1)
print(dz_x1)
try:
    dz_x2 = tape.gradient(z, x2)
except RuntimeError as ex:
    print(ex)

tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.


In [18]:
x1 = tf.Variable(2.)
x2 = tf.Variable(3.)
with tf.GradientTape(persistent = True) as tape:
    z= g(x1, x2)
    
dz_x1 = tape.gradient(z, x1)
print(dz_x1)

dz_x2 = tape.gradient(z, x2)
print(dz_x2)

del tape

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(42.0, shape=(), dtype=float32)


In [19]:
x1 = tf.Variable(2.)
x2 = tf.Variable(3.)
with tf.GradientTape() as tape:
    z= g(x1, x2)
    
dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)


[<tf.Tensor: id=317, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=323, shape=(), dtype=float32, numpy=42.0>]


In [20]:
x1 = tf.constant(2.)
x2 = tf.constant(3.)
with tf.GradientTape() as tape:
    z= g(x1, x2)
    
dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)

[None, None]


In [21]:
x1 = tf.constant(2.)
x2 = tf.constant(3.)
with tf.GradientTape() as tape:
    tape.watch(x1) # 对constant无法求导，可以使用watch来关注，得到导数
    tape.watch(x2)
    z= g(x1, x2)
    
dz_x1x2 = tape.gradient(z, [x1, x2])
print(dz_x1x2)

[<tf.Tensor: id=339, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=345, shape=(), dtype=float32, numpy=42.0>]


In [22]:
x = tf.Variable(5.)
with tf.GradientTape() as tape:
    z1 = 3 * x + 5
    z2 = x ** 2
    dz_x = tape.gradient([z1, z2], x)
print(dz_x)    # df: z1->x + df:z2 -> x

tf.Tensor(13.0, shape=(), dtype=float32)


In [24]:
x1 = tf.Variable(2.)
x2 = tf.Variable(3.)
with tf.GradientTape(persistent = True) as outer_tape:
    with tf.GradientTape(persistent = True) as inner_tape:
        z= g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad, [x1, x2]) 
               for inner_grad in inner_grads]
del inner_tape
del outer_tape

print(outer_grads)

[[None, <tf.Tensor: id=414, shape=(), dtype=float32, numpy=6.0>], [<tf.Tensor: id=425, shape=(), dtype=float32, numpy=6.0>, <tf.Tensor: id=423, shape=(), dtype=float32, numpy=14.0>]]


In [27]:
learning_rate = 0.1
x = tf.Variable(0.)

for _ in range(50):
    with tf.GradientTape() as tape:
        z = f(x)
        dx = tape.gradient(z, x)
        x.assign_sub(learning_rate * dx)
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>


In [28]:
learning_rate = 0.1
x = tf.Variable(0.)

optimizer = tf.optimizers.SGD(lr = learning_rate)

for _ in range(50):
    with tf.GradientTape() as tape:
        z = f(x)
        dx = tape.gradient(z, x)
        optimizer.apply_gradients([(dx, x)])
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
