# The Gradient Tape

This notebook provides an introduction to how automatic differentiation is achieved in 
Tensorflow. 

In [1]:
import tensorflow as tf

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

2024-11-14 14:37:08.638728: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731591428.712428  545418 cuda_dnn.cc:8498] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731591428.731300  545418 cuda_blas.cc:1410] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-14 14:37:08.870798: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# Create a tensorflow variable. All operations with TF-variables can be tracked
x = tf.Variable(4.0)

# Tensorflow uses the so-called gradient tape to record operations
with tf.GradientTape() as tape:
    y = x**2
# Find the gradient of y with respect to x. dy_dx = 2x, x=4 => dy_dx = 8
dy_dx = tape.gradient(y, x)
dy_dx.numpy()

np.float32(8.0)

In [None]:
# This block demonstrates how gradient descent (optimizers) use the gradient tape to 
# update model weights during training. 

# Define a simple model with a single neuron
model = tf.keras.Sequential([tf.keras.layers.Dense(1)])

# Define an optimizer i.e. the gradient descent algorithm to use. In this case we are using 
# stochastic gradient descent
optimizer = tf.optimizers.SGD(learning_rate=0.01)

# Sample data
x = tf.constant([[2.0]])  # Input
y_true = tf.constant([[5.0]])  # Target output


# Forward pass and backpropagation with GradientTape
with tf.GradientTape() as tape:
    y_pred = model(x)  # Model prediction
    loss = tf.losses.mse(y_true, y_pred)  # Compute loss

# Compute the gradient of the loss function with respect to the weights/biases in the model
gradients = tape.gradient(loss, model.trainable_variables)  # Compute gradients
print(gradients)

# Update the parameters in the model 
optimizer.apply_gradients(zip(gradients, model.trainable_variables)) 


[<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[-9.295926]], dtype=float32)>, <tf.Tensor: shape=(1,), dtype=float32, numpy=array([-4.647963], dtype=float32)>]


<KerasVariable shape=(), dtype=int64, path=SGD/iteration>