<a href="https://colab.research.google.com/github/nakib103/tensorflow/blob/master/auto_differanciation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
import tensorflow as tf
import numpy as np

In [3]:
# Tensorflow keep track of operations order during the forward pass. During the backward pass, 
# TensorFlow traverses this list of operations in reverse order to compute gradients.

w = tf.Variable([2., 3.], name='weight')
b = tf.Variable([5.], name='bias')
x = [[1., 2.],
     [5., 8.]]

with tf.GradientTape(persistent=True) as tape:
  y = x * w + b
  loss = tf.reduce_mean(y)

[dw, db] = tape.gradient(loss, [w, b])

# the shape of the gradient is the shape of the source 
print(dw, db)

# the gradient gives same output format as provided in source
my_vars = {
    'w' : tf.Variable([2., 3.], name='w'),
    'b' : tf.Variable([5.], name='b')
}

grad = tape.gradient(loss, my_vars)
print(grad['w'], grad['b'])

tf.Tensor([1.5 2.5], shape=(2,), dtype=float32) tf.Tensor([1.], shape=(1,), dtype=float32)
None None


In [7]:
# using with a model

layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1., 4., 2.]])

with tf.GradientTape() as tape:
  y = layer(x)
  loss = tf.reduce_mean(y**2)

grad = tape.gradient(loss, layer.trainable_variables)
print(grad)

[<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[ 3.6528475,  6.37145  ],
       [14.61139  , 25.4858   ],
       [ 7.305695 , 12.7429   ]], dtype=float32)>, <tf.Tensor: shape=(2,), dtype=float32, numpy=array([3.6528475, 6.37145  ], dtype=float32)>]


In [11]:
# what is watched

# A trainable variable
x0 = tf.Variable(3.0, name='x0')
# Not trainable
x1 = tf.Variable(3.0, name='x1', trainable=False)
# Not a Variable: A variable + tensor returns a tensor.
x2 = tf.Variable(2.0, name='x2') + 1.0
# Not a variable
x3 = tf.constant(3.0, name='x3')

with tf.GradientTape(watch_accessed_variables=False) as tape:
  tape.watch(x2)
  y = (x0**2) + (x1**2) + (x2**2)

watched = [var.name for var in tape.watched_variables()]
print(watched)

grad = tape.gradient(y, [x0, x1, x2, x3])

for g in grad:
  print(g)

[]
None
None
tf.Tensor(6.0, shape=(), dtype=float32)
None


In [20]:
# gradient for intermediate representation

x = tf.Variable([2., 5.])
with tf.GradientTape(persistent=True) as tape:
  y = x + 1
  z = y**3

print(tape.gradient(z, y))
print(tape.gradient(y, x))
del tape

tf.Tensor([ 27. 108.], shape=(2,), dtype=float32)
tf.Tensor([1. 1.], shape=(2,), dtype=float32)


In [24]:
# if target of the gradient is not scalar the sum of gradient is shown

x = tf.Variable(2.)
with tf.GradientTape() as tape:
  y = x * [1., 5.]

print(tape.gradient(y, x))

with tf.GradientTape() as tape:
  y0 = x ** 2
  y1 = 1 / x

print(tape.gradient([y0, y1], x))

# for single gradient use Jacobians

tf.Tensor(6.0, shape=(), dtype=float32)
tf.Tensor(3.75, shape=(), dtype=float32)


In [33]:
# control flow -- check
# Getting a None gradient

# when target and source is not connected
x = tf.Variable([3., 5.])
y = tf.Variable(5.)

with tf.GradientTape() as tape:
  z = y * y

print(tape.gradient(z, x))

# for tensors
# did calculation outside of tensorflow
with tf.GradientTape() as tape:
  y = np.mean(x ** 2)
  z = tf.reduce_mean(y)

print(tape.gradient(z, x))

# the target or source tensor are not type float32
x = tf.cast(x, tf.int32)

with tf.GradientTape() as tape:
  y = x ** 2

print(tape.gradient(y, x))

# gradient from stateful object
x0 = tf.Variable(3.0)
x1 = tf.Variable(0.0)

with tf.GradientTape() as tape:
  # x1 = x1 + x0        -- this works
  x1.assign_add(x0)     # -- this does not work
  # The tape starts recording from x1.
  y = x1**2   # y = (x1 + x0)**2

# This doesn't work.
print(tape.gradient(y, x0))   #dy/dx0 = 2*(x1 + x2)

# Some operation registered as non-differeantiable or no gradient

None
None
None
tf.Tensor(6.0, shape=(), dtype=float32)


In [34]:
# to receive 0 instead of None
x = tf.Variable([2., 2.])
y = tf.Variable(3.)

with tf.GradientTape() as tape:
  z = y**2
print(tape.gradient(z, x, unconnected_gradients=tf.UnconnectedGradients.ZERO))

tf.Tensor([0. 0.], shape=(2,), dtype=float32)
