# Notes on Chapter 12 of *Hands-On Machine Learning with Scikit-Learn, Keras, & TensorFlow*, 3rd edition, by Aurélien Géron

Reduce the amount of logging messages displayed by TensorFlow

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [2]:
import itertools
import time

import keras
from keras import layers
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns
import tensorflow as tf
import tensorflow.experimental.numpy as tnp

## Basic tensor operations

In [3]:
x = tf.constant([[1,2],[3,4],[5,6]], dtype=tf.float16)

In [4]:
x

<tf.Tensor: shape=(3, 2), dtype=float16, numpy=
array([[1., 2.],
       [3., 4.],
       [5., 6.]], dtype=float16)>

In [5]:
3 * x

<tf.Tensor: shape=(3, 2), dtype=float16, numpy=
array([[ 3.,  6.],
       [ 9., 12.],
       [15., 18.]], dtype=float16)>

In [6]:
x @ tf.transpose(x)

<tf.Tensor: shape=(3, 3), dtype=float16, numpy=
array([[ 5., 11., 17.],
       [11., 25., 39.],
       [17., 39., 61.]], dtype=float16)>

In [7]:
tf.constant(23)

<tf.Tensor: shape=(), dtype=int32, numpy=23>

Keras also has a tensor library in keras.ops

In [8]:
y = keras.ops.array([[1,2],[3,4],[5,6]])
y

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[1, 2],
       [3, 4],
       [5, 6]], dtype=int32)>

In [9]:
[[1,2],[3,4],[5,6]] * keras.ops.arange(2,4)

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[ 2,  6],
       [ 6, 12],
       [10, 18]], dtype=int32)>

Note the tensorflow will not automatically perform implicit type promotion due to performance concerns:

In [10]:
try:
    tf.constant(3) * tf.constant(2.)
except Exception as e:
    print(type(e), ':', e)
else:
    assert False # (unreached)

<class 'tensorflow.python.framework.errors_impl.InvalidArgumentError'> : cannot compute Mul as input #1(zero-based) was expected to be a int32 tensor but is a float tensor [Op:Mul] name: 


Thus manual casts are required for this type of code:

In [11]:
tf.cast(tf.constant(3), tf.float32) * tf.constant(2.)

<tf.Tensor: shape=(), dtype=float32, numpy=6.0>

Tensorflow also includes a more comprehensive numpy emulation library in experimental. First one needs to enable Numpy-like behavior:

In [12]:
from tensorflow.python.ops.numpy_ops import np_config
np_config.enable_numpy_behavior()

This enables automatic type promotion, in addition to adding more numpy-like member functions (e.g. ravel and reshape) to tf tensors. 

In [13]:
tf.constant(3) * tf.constant(2.) # previously an error

<tf.Tensor: shape=(), dtype=float64, numpy=6.0>

In [14]:
x = tnp.arange(12).reshape((3,4))
x

<tf.Tensor: shape=(3, 4), dtype=int64, numpy=
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])>

In [15]:
x[tnp.newaxis, 1, :]

<tf.Tensor: shape=(1, 4), dtype=int64, numpy=array([[4, 5, 6, 7]])>

Note that (unlike JAX) some things like direct assignment are still not allowed:

In [16]:
try:
    x[1,1] = 100
except Exception as e:
    print(type(e), ':', e)
else:
    assert False # (unreached)

<class 'TypeError'> : 'tensorflow.python.framework.ops.EagerTensor' object does not support item assignment


Mutible tensors need to be declared with tf.Variable:

In [17]:
x = tf.Variable([[1,2],[3,4]])
x

<tf.Variable 'Variable:0' shape=(2, 2) dtype=int32, numpy=
array([[1, 2],
       [3, 4]], dtype=int32)>

In [18]:
x.assign( 2*x )
x

<tf.Variable 'Variable:0' shape=(2, 2) dtype=int32, numpy=
array([[2, 4],
       [6, 8]], dtype=int32)>

In [19]:
x.scatter_nd_update([[0,0], [1,1,]], [23, 42])
x

<tf.Variable 'Variable:0' shape=(2, 2) dtype=int32, numpy=
array([[23,  4],
       [ 6, 42]], dtype=int32)>

### Custom objects

Custom objects such as loss functions can be used, but they can create complications when loading and saving models. In particular, you'll need to provide a dictionary at load time with the custom components, which can be either functions or objects that implement a get_config function.

In [20]:
m = keras.models.Sequential([
    layers.Input((8,)),
    layers.Dense(12),
    layers.Activation('softmax')
])

In [21]:
def my_loss(y_true, y_predicted):
    residual = y_true - y_predicted
    return tf.where(tf.abs(residual) > 1, residual, tf.square(residual)/2)

class MyLRSchedule(keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, initial_learning_rate, alpha, **kwargs):
        self.initial_learning_rate = initial_learning_rate
        self.alpha = alpha
        super().__init__(**kwargs) # kwargs not needed for this example

    def __call__(self, step):
        return self.initial_learning_rate / (step + 1)

    def get_config(self):
        base_config = {} #super().get_config()
        return {
            'initial_learning_rate': self.initial_learning_rate,
            'alpha': self.alpha,
            **base_config,
        }

In [22]:
m.compile(
    loss=my_loss,
    optimizer=keras.optimizers.SGD(learning_rate=MyLRSchedule(0.002, 0.999))
)
m.save('ch13_custom_objects.keras')

Load will fail without custom objects

In [23]:
try:
    m1 = keras.models.load_model('ch13_custom_objects.keras')
    assert False # unreached
except TypeError as e:
    print(type(e), e)

<class 'TypeError'> <class 'keras.src.optimizers.sgd.SGD'> could not be deserialized properly. Please ensure that components that are Python object instances (layers, models, etc.) returned by `get_config()` are explicitly deserialized in the model's `from_config()` method.

config={'module': 'keras.optimizers', 'class_name': 'SGD', 'config': {'name': 'SGD', 'learning_rate': {'module': None, 'class_name': 'MyLRSchedule', 'config': {'initial_learning_rate': 0.002, 'alpha': 0.999}, 'registered_name': 'MyLRSchedule'}, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'momentum': 0.0, 'nesterov': False}, 'registered_name': None}.

Exception encountered: Could not locate class 'MyLRSchedule'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': None, 'cla

Custom objects can be passed either as a context or an extra parameters on load

In [24]:
custom_objects = {
    'my_loss': my_loss,
    'MyLRSchedule': MyLRSchedule
}

m1 = keras.models.load_model(
    'ch13_custom_objects.keras',
    custom_objects=custom_objects
)

with keras.saving.custom_object_scope(custom_objects):
    m2 = keras.models.load_model('ch13_custom_objects.keras')

Custom layers

Simple layers without weights can be constructed with a lambda layer

In [25]:
square_layer = keras.layers.Lambda(lambda x : tf.square(x))

More complex layers can be implemented by deriving from the Layer class

In [26]:
class DenseAndFlatten(layers.Layer):
    """Same as the old Dense, but with less functionality"""
    
    def __init__(self, units, activation=None, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.activation = keras.activations.get(activation)

    # initialize the parameters etc the first time this instance is used.
    def build(self, batch_input_shape):
        self.W = self.add_weight(name="W", shape=(batch_input_shape[-1], self.units),
            initializer="glorot_normal")
        self.b = self.add_weight(name="b", shape=(self.units,),
            initializer="zeros")

    def call(self, X):
        return self.activation(X @ self.W + self.b)

    def get_config(self):
        # needed only if supporting load/save functionality
        return {
            "units": self.units,
            "activation": keras.activations.serialize(self.activation),
            **(super().get_config())
        }

Models are a subclass of layers and custom layers can be defined in a similar fashion. This can sometimes be useful when a loss function needs to contain a term from internal variables in the model, in which case you can call the Model.add_loss to add a term to the loss function (typically as part of the call function).

## GradientTape and Autodiff

The GradientTape class can be used to record autodiff-calculated gradients:

In [27]:
def f(x,y):
    return x - 2*y + x*y

x1 = tf.Variable(42.)
y1 = tf.Variable(23.)

with tf.GradientTape() as tape:
    z1 = f(x1, y1)

tape.gradient(z1, [x1, y1])

[<tf.Tensor: shape=(), dtype=float32, numpy=24.0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=40.0>]

By default gradient tapes can only be queried once:

In [28]:
with tf.GradientTape() as tape:
    z1 = f(x1, y1)

print(tape.gradient(z1, x1))

try:
    print(tape.gradient(z1, y1))
except RuntimeError as e:
    print(type(e), ':', e)
else:
    assert False # (unreached)

tf.Tensor(24.0, shape=(), dtype=float32)
<class 'RuntimeError'> : A non-persistent GradientTape can only be used to compute one set of gradients (or jacobians)


This behavior can be controlled with the `persistent` parameter:

In [29]:
with tf.GradientTape(persistent=True) as tape:
    z1 = f(x1, y1)

print(tape.gradient(z1, x1))
print(tape.gradient(z1, y1))

tf.Tensor(24.0, shape=(), dtype=float32)
tf.Tensor(40.0, shape=(), dtype=float32)


Only variables are tracked by default:

In [30]:
c1 = tf.constant(7.)

with tf.GradientTape() as tape:
    z1 = c1 * f(x1, y1)

print(tape.gradient(z1, c1))

None


Things like constants can be tracked by adding them with the `watch` function:

In [31]:
with tf.GradientTape() as tape:
    tape.watch(c1)
    z1 = c1 * f(x1, y1)

print(tape.gradient(z1, c1))

tf.Tensor(962.0, shape=(), dtype=float32)
