In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [35]:
# intro to tf.GradientTape -> inputs are tf.Variable and not tf.Tensor
# tf.GradientTape.gradient(var, wrt can be list, dict, ... ) -> o/p in same form

# scalar variable input
scalar = tf.Variable(3.0)

# tensor rank 1 input
w = tf.Variable(tf.random.normal((3,2)), dtype = tf.float32, name='w')
b = tf.Variable(tf.zeros(2, dtype=tf.float32), name='b')
x = [[1,2,3]]

with tf.GradientTape(persistent=True) as tape:
    y = scalar**2
    y1 = x@w+b
    loss = tf.reduce_mean(y1**2)
    
print(y1.dtype)    
print("dy/dscalar: ",tape.gradient(y,scalar).numpy())

# lets do dy_dw and dy_db. Pass them as list and dictionary
lists = tape.gradient(loss,[w,b])
dictionary     = tape.gradient(loss, {'w':w, 'b':b})


print(w.shape)
assert (dy_dw.shape==w.shape), (w.shape," not compatible with dw shape ", dy_dw.shape)    
print("dictionary ", dictionary)
print("lists: ",lists)

<dtype: 'float32'>
dy/dscalar:  6.0
(3, 2)
dictionary  {'w': <tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[  1.5121956,  -4.5140023],
       [  3.0243912,  -9.028005 ],
       [  4.5365868, -13.542007 ]], dtype=float32)>, 'b': <tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 1.5121956, -4.5140023], dtype=float32)>}
lists:  [<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[  1.5121956,  -4.5140023],
       [  3.0243912,  -9.028005 ],
       [  4.5365868, -13.542007 ]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 1.5121956, -4.5140023], dtype=float32)>]


In [54]:
# Gradinets wrt model
# Generally, we want gradients wrt to trainable variables of our model
# So its common to collect trainable variables (tf.Variable) in a tf.Module (or its derived classes layers.Layer, keras.Model)
# which can be assessed by Module.trainable_variables

# 2 nodes in one layer. One input of three feautres -> w1,w2,w3.
layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1,2,3]], dtype=tf.float32)

with tf.GradientTape(persistent=True) as tape0:
    y = layer(x)
    loss = tf.reduce_mean(y**2)
    
grad = tape0.gradient(loss, layer.trainable_variables)  
print(tape0.gradient(loss,y))

grad
print(y)
print("layer.trainable_variables: ",layer.trainable_variables)
for var,g in zip(layer.trainable_variables, grad):
    print(f'{var.name}, shape: {g.shape}')

tf.Tensor([[3.348121 0.      ]], shape=(1, 2), dtype=float32)


[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
 array([[ 3.348121,  0.      ],
        [ 6.696242,  0.      ],
        [10.044363,  0.      ]], dtype=float32)>,
 <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.348121, 0.      ], dtype=float32)>]

tf.Tensor([[3.348121 0.      ]], shape=(1, 2), dtype=float32)
layer.trainable_variables:  [<tf.Variable 'dense_16/kernel:0' shape=(3, 2) dtype=float32, numpy=
array([[-0.09771031,  0.4348526 ],
       [ 0.72272885, -0.6991186 ],
       [ 0.6667912 , -0.8456247 ]], dtype=float32)>, <tf.Variable 'dense_16/bias:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>]
dense_16/kernel:0, shape: (3, 2)
dense_16/bias:0, shape: (2,)


In [48]:
# controlling what tape watches. tape will watch only tf.Variable(value, trainable=True)
# It can not watch tf.Variable+tf.Tensor==tf.Tensor. however, you can force it to watch by tape.watch(tf.Tensor)

x = tf.constant(3.0) # --->>Tensor
with tf.GradientTape() as tape:
    tape.watch(x)
    y = x**2
    
print(tape.gradient(y,x))    

# Customization tf.GradientTape(persistent=True, watch_accessed_variables=False)

tf.Tensor(6.0, shape=(), dtype=float32)


In [64]:
# Multiple targets gradient

x = tf.Variable(2.0)
x0 = tf.linspace(-10.0, 10.0, 200+1)

with tf.GradientTape() as tape:
    y0 = x**2
    y1 = 1 / x
    y = x * [3., 4.]
    if len(x0)>0:
        tape.watch(x0)
        ys = tf.nn.sigmoid(x0)
    else:
        ys = None

# Following is dy0_dx + dy1_dx + dy_dx     
print(tape.gradient({'y0': y0, 'y1': y1, 'y': y}, x).numpy())



10.75


In [None]:
# Hacks:
# x = tf.Variable(...)
# x+1-> Tensor -> NOT watch by default
# x.assign_add(1) -> Variable
# int and string are not differentiable -> can lead to None
# if you want Zeros instead of None-> tape.gradient(z, x, unconnected_gradients=tf.UnconnectedGradients.ZERO)
