In [48]:
import tensorflow as tf
import numpy as np

# Using loss functions and optimizers in Tensorflow

Tensorflow has a lot of inbuilt functions and classes that are very convenient to use.

In [tf.keras.losses](https://www.tensorflow.org/api_docs/python/tf/keras/losses/) we find loss functions and in [tf.keras.optimizers](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers) we find optimizers, including the standard gradient descent.

## Mean Squared Error

In [49]:
BATCH_SIZE = 8
N_PREDICTED_FEATURES = 5

targets = tf.random.uniform((BATCH_SIZE, N_PREDICTED_FEATURES))
predictions = tf.random.uniform((BATCH_SIZE, N_PREDICTED_FEATURES))


mean_squared_error = tf.keras.losses.MSE(targets,predictions)
print("error for each training example in the batch:\n", mean_squared_error.numpy(), "\n")

average_mse = tf.reduce_mean(mean_squared_error, axis=None)
print("average mse for the batch:\n", average_mse.numpy())

error for each training example in the batch:
 [0.1370967  0.15011537 0.2427506  0.12684295 0.1503181  0.15855145
 0.16248909 0.10417567] 

average mse for the batch:
 0.15404248


## Categorical CrossEntropy

In [50]:
labels = [[0,1,0],
         [0,0,1],
         [1,0,0],
         [1,0,0],
         [0,1,0]]

labels = tf.constant(labels, dtype=tf.float32)

predictions = tf.random.normal(labels.shape)

# turn network output into categorical probability distribution over the labels
predictions = tf.nn.softmax(predictions)


# calculate categorical crossentropy

CCE_loss = tf.keras.losses.CategoricalCrossentropy()
batch_loss = CCE_loss(labels, predictions)

print("CCE loss between predicted label probabilities and ground truth labels is:\n", batch_loss.numpy())

CCE loss between predicted label probabilities and ground truth labels is:
 1.5270935


## Binary CrossEntropy

In [51]:
labels = [1,0,0,1,0,0,1,0]
labels = tf.constant(labels, dtype = tf.float32)

predictions = tf.random.uniform(labels.shape)

BCE_loss = tf.keras.losses.BinaryCrossentropy()
batch_loss = BCE_loss(labels,predictions)

print("BCE loss between predicted label probabilities and ground truth labels is:\n", batch_loss.numpy())

BCE loss between predicted label probabilities and ground truth labels is:
 0.96125954


# Optimizers

In [63]:

# choose optimizer and loss

optimizer = tf.keras.optimizers.SGD(learning_rate=0.001,
                                   momentum=0)

loss_function = tf.keras.losses.MSE

# create data
x = tf.linspace(0,100, 128)
x = tf.cast(x, tf.float32) # turn into float32 tensor

y = x * np.pi

# a simple linear univariate model function without bias
def model(x, parameter):
    return x * parameter


# set learning rate
learning_rate = 0.001

# initialize parameter variable to a value far away from pi
parameter_estimate = tf.Variable(520.5, trainable=True, dtype=tf.float32)

print("before training:", parameter_estimate)

#iterate over epochs
for epoch in range(1):

    # iterate over training examples
    for x,y in zip(x,y):
        
        x = tf.expand_dims(x, axis = 0) # add batch dimension (of 1) 
        y = tf.expand_dims(y, axis = 0)
        
        # within GradientTape context manager, calculate loss between targets and prediction
        with tf.GradientTape() as tape:

            prediction = model(x, parameter_estimate)

            loss = tf.reduce_mean(loss_function(y, prediction))

        # outside of context manager, obtain gradients with respect to list of trainable variables
        gradients = tape.gradient(loss, [parameter_estimate])

        #apply gradients with optimizer
        optimizer.apply_gradients(zip(gradients, [parameter_estimate]))
        
print("after training: ", parameter_estimate)

before training: <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=520.5>
after training:  <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=3.1415927>
