In [None]:
import tensorflow as tf
import numpy as np

gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
  try:
    tf.config.set_logical_device_configuration(
        gpus[0],
        [tf.config.LogicalDeviceConfiguration(memory_limit=1024)])
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Virtual devices must be set before GPUs have been initialized
    print(e)

We first create a set of 20 values randomly sampled between 0 and 100. We then create targets by multiplying the values with pi. 

So our dataset is sampled from a function $f(x) = \pi x$

Now we want to approximate this function with a linear model that has the form $f_{model}(x) = a x$, where $a$ is the parameter that we want to learn.

For this we use the 20 data points, initialize the parameter $a$ to a value far from $\pi$ and then do gradient descent with the individual examples (also called stochastic gradient descent). We show the entire dataset only once to the model or in other words we only train for one epoch. For gradient descent, we use tensorflow's gradient tape for automatic differentiation to obtain loss gradients with respect to the trainable parameter $a$.


What is shown in this notebook regarding the use of tf.GradientTape() can be used for any data and any model. Indeed we can obtain the gradients with respect to millions of parameters with the same general structure, except then we get the list of trainable variables by using ".trainable_variables" on the tf.keras.Model class.

In [4]:
# a simple linear univariate model function without bias
def model(x, parameter):
    return x * parameter

In [5]:
# generate data (X) and targets (Y)

X = tf.random.uniform((20,1), minval= 0, maxval = 10)
Y = X * np.pi

In [6]:
# initialize parameter variable to a value far away from pi
parameter_estimate = tf.Variable(7.5, trainable=True, dtype=tf.float32)

# set learning rate
learning_rate = tf.constant(0.005, dtype=tf.float32)

In [7]:
#iterate over epochs
for epoch in range(2):

    # iterate over training examples
    for x,y in zip(X,Y):
        
        # within GradientTape context manager, calculate loss between targets and prediction
        with tf.GradientTape() as tape:

            prediction = model(x, parameter_estimate)

            loss = (prediction - y)**2

        # outside of context manager, obtain gradients with respect to list of trainable variables
        gradients = tape.gradient(loss, [parameter_estimate])

        # apply gradients scaled by learning rate to parameters
        new_parameter_val = parameter_estimate - learning_rate * gradients

        # assign new parameter values
        parameter_estimate.assign(new_parameter_val[0])

In [8]:
tf.print(parameter_estimate)

3.14159274


In [11]:
print(parameter_estimate == np.pi)

tf.Tensor(True, shape=(), dtype=bool)
