In [1]:
import tensorflow as tf
import numpy as np
from tensorflow import keras

# Tensors and Variables

Both are similar to numpy arrays and have similar functionality. But

* Tensors are immutable
* Variales are mutable

In [2]:
tf.constant([[1., 2., 3.], [4., 5., 6.]])

<tf.Tensor: id=0, shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [3]:
tf.Variable([[1., 2., 3.], [4., 5., 6.]])

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

Cannot add different datatypes (float64 and float32 don't even add!)

In [4]:
# This will not run
# tf.constant([[1., 2., 3.], [4., 5., 6.]]) + tf.constant(65)

All functions for tensors and variables on pg 380 (things like square, exp, max, multiply, etc...)

For variables (since they are mutable) can modify in place (this is how weights of neural networks are changed).

In [5]:
v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
v.assign(2*v)
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

In [6]:
v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
v[0, 1].assign(24)
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[ 1., 24.,  3.],
       [ 4.,  5.,  6.]], dtype=float32)>

Other data structures on pg 383.

# Customizing Models and Training Algorithms

## Custom Loss Functions

Can implement other loss funtions than the simple MSE. 

In [7]:
# Replaces small errors by linear loss
def huber_fn(y_true, y_pred):
    error = y_true-y_pred
    is_small_error = tf.abs(error) < 1
    squared_loss = tf.square(error) / 2
    linear_loss = tf.abs(error) - 0.5
    return tf.where(is_small_error, squared_loss, linear_loss)

In [8]:
a = np.array([3,4,6,8,4])
b = np.array([3.1,4.5,7.8,8.1,4.2])

In [9]:
huber_fn(a,b)

<tf.Tensor: id=47, shape=(5,), dtype=float64, numpy=array([0.005, 0.125, 1.3  , 0.005, 0.02 ])>

Can use this loss function when compiling the model

In [10]:
# model.compile(loss=huber_fn, optimizer='nadam')
# model.fit(X_train, y_train, [...])

## Saving and Loading Models with Custom Components

Need to specify function when loading the model.

In [11]:
# model = keras.models.load_model('my_model.h5', custom_objects={'huber_fn': huber_fn})

What if there are hyperparameters to the loss function though? Well a simple way of implementing this is a function of a function:

In [12]:
def create_huber(threshold=1.0):
    def huber_fn(y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < threshold
        squared_loss = tf.square(error) / 2
        linear_loss  = threshold * tf.abs(error) - threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    return huber_fn

Then compile the model as such.

In [13]:
# model.compile(loss=create_huber(2.0), optimizer='nadam')

The problem with this is that when you save the model the threshhold isn't saved. This means the threshhold needs to be specified when you load the model. There is a way around this


In [14]:
class HuberLoss(keras.losses.Loss):
    def __init__(self, threshold=1.0, **kwargs):
        self.threshold = threshold
        super().__init__(**kwargs)
    def call(self, y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < self.threshold
        squared_loss = tf.square(error) / 2
        linear_loss  = self.threshold * tf.abs(error) - self.threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "threshold": self.threshold}

What we've done is created a subclass of the keras.losses.Loss class and overwrote some of the functions.
* __init__ saves the threshhold value in the class
* call computes the element-wise loss (as keras Loss functions should)
* get_config returns a dictionary mapping of each hyperparameter.

Then to save a load the model:

In [15]:
# model.compile(loss=HuberLoss(2.), optimizer='nadam')

# model = keras.models.load_model("model", custom_objects={"HuberLoss": HuberLoss})

It will automatically get the hyperparameter values using the get_config method (this is an implementation of keras).

## Custom Activation Functions, Initializers, Regularizers, and Constraints

For these other custom requirements, the functions just need to be written in the correct format:

In [16]:
def my_softplus(z): # return value is just tf.nn.softplus(z)
    return tf.math.log(tf.exp(z) + 1.0)

# Normalize by shape because of fan-in and fan-out
def my_glorot_initializer(shape, dtype=tf.float32):
    stddev = tf.sqrt(2. / (shape[0] + shape[1]))
    return tf.random.normal(shape, stddev=stddev, dtype=dtype)

def my_l1_regularizer(weights):
    return tf.reduce_sum(tf.abs(0.01 * weights))

def my_positive_weights(weights): # return value is just tf.nn.relu(weights)
    return tf.where(weights < 0., tf.zeros_like(weights), weights)

Then its possible to build layers using these custom functions

In [23]:
layer = keras.layers.Dense(30, activation=my_softplus,
                           kernel_initializer=my_glorot_initializer,
                           kernel_regularizer=my_l1_regularizer,
                           kernel_constraint=my_positive_weights)

What if the function has hyperparameters that need to be saved (same problem as before). Then we need to use the subclassing API:

In [24]:
class MyL1Regularizer(keras.regularizers.Regularizer):
    def __init__(self, factor):
        self.factor = factor
    def __call__(self, weights):
        return tf.reduce_sum(tf.abs(self.factor * weights))
    def get_config(self):
        return {"factor": self.factor}

We have overwritten some methods of the superclass.

**Note that you need to implement call() for losses, layers, activation functions, and models, but __call__() for regularizers, initializers, and constraints. Things are different for metrics.**

## Custom Metrics

We need to be careful here. The way metrics work in tensorflow is that they compute the metric each batch and then calculate a running mean. But this runs in to problems, such as the following:

*Example*: Suppose we are doing binary classification. In the first batch the model makes 5 positive predictions, 4 of which are correct. In the second batch the model makes 3 positive predictions, but they are all incorrect. Using running mean *precision* score, the mean is 40%. But the mean over all samples is 4/8 = 50\%. To keep track of this we need to keras.metrics.Precision class:

In [32]:
precision = keras.metrics.Precision()
precision([0, 1, 1, 1, 0, 1, 0, 1], [1, 1, 0, 1, 0, 1, 0, 1])

<tf.Tensor: id=399, shape=(), dtype=float32, numpy=0.8>

In [33]:
precision([0, 1, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 0, 0, 0, 0])

<tf.Tensor: id=446, shape=(), dtype=float32, numpy=0.5>

Each time it is called it keeps track of a running precision. The result() method returns the final precision score

In [34]:
precision.result()

<tf.Tensor: id=455, shape=(), dtype=float32, numpy=0.5>

There are some other methods to this as well.

**We will now create a streaming metric using the subclass API**

In [35]:
class HuberMetric(keras.metrics.Metric):
    def __init__(self, threshold=1.0, **kwargs):
        super().__init__(**kwargs) # handles base args (e.g., dtype)
        self.threshold = threshold
        self.total = self.add_weight("total", initializer="zeros")
        self.count = self.add_weight("count", initializer="zeros")
    def huber_fn(self, y_true, y_pred): # workaround
        error = y_true - y_pred
        is_small_error = tf.abs(error) < self.threshold
        squared_loss = tf.square(error) / 2
        linear_loss  = self.threshold * tf.abs(error) - self.threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    def update_state(self, y_true, y_pred, sample_weight=None):
        metric = self.huber_fn(y_true, y_pred)
        self.total.assign_add(tf.reduce_sum(metric))
        self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))
    def result(self):
        return self.total / self.count
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, "threshold": self.threshold}

* The add_weight in the *init* is created to keep track of the metrics state over multiple batches. In this case it is the sum of the Huber losses (total) and the number of instances seen so far (count)

* The *huber_fn* function computes the huber loss over all instances in the batch

* The *update_state* updates the total and count

* The *result* returns the moving average

* The *get_config* ensures the threshhold gets saved along with the model

Of course in our previous example above the metric cannot simply be averaged over batches.