In [1]:
import logging
import numpy as np

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models, layers

In [2]:
logging_format = '%(message)s'
logging.basicConfig(format=logging_format, level=logging.INFO)

# Using TensorFlow like NumPy

## Tensors and Operations

In [3]:
# Create a matrix
tf.constant([[1., 2., 3.], [4., 5., 6.]])

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [4]:
# Create a scalar
tf.constant(42)

<tf.Tensor: shape=(), dtype=int32, numpy=42>

In [5]:
t = tf.constant([[1., 2., 3.], [4., 5., 6.]])

logging.info(f'The tensor t has shape {t.shape}, and dtype {t.dtype}')

The tensor t has shape (2, 3), and dtype <dtype: 'float32'>


In [6]:
logging.info(f'It is possible to sum 10 to the tensor t, that is equal to the tensor {t + 10}')

It is possible to sum 10 to the tensor t, that is equal to the tensor [[11. 12. 13.]
 [14. 15. 16.]]


In [7]:
logging.info(f'We can also apply the dot product to itself and the result is {t @ tf.transpose(t)}')

We can also apply the dot product to itself and the result is [[14. 32.]
 [32. 77.]]


## Tensors and NumPy

In [8]:
a = np.array([2., 4., 5.])
tf.constant(a)

<tf.Tensor: shape=(3,), dtype=float64, numpy=array([2., 4., 5.])>

In [9]:
t.numpy()

array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)

In [10]:
tf.square(a)

<tf.Tensor: shape=(3,), dtype=float64, numpy=array([ 4., 16., 25.])>

In [11]:
np.square(t)

array([[ 1.,  4.,  9.],
       [16., 25., 36.]], dtype=float32)

In [12]:
# Tensorflow does not convert automatically the types for an operation
try:
    tf.constant(2.) + tf.constant(40)
except:
    logging.info('The operation cannot be performed due to difference in the types of the variables.')

The operation cannot be performed due to difference in the types of the variables.


In [13]:
t2 = tf.constant(40., dtype=tf.float64)

tf.constant(2.) + tf.cast(t2, tf.float32)

<tf.Tensor: shape=(), dtype=float32, numpy=42.0>

## Variables

In [14]:
v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])
v

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

The ```tf.Variable``` behaves like ```tf.Tensor``` but it can be modified.

In [15]:
v = tf.Variable([[1., 2., 3.], [4., 5., 6.]])

v.assign(2 * v)
logging.info(f'The vector v multiplied by 2: \n{v}\n')

v[0, 1].assign(42)
logging.info(f'The vector v assigned 42 in [0, 1]: \n{v}\n')

v[:, 2].assign([0., 1.])
logging.info(f'The vector v replacing the 3rd column: \n{v}\n')

v.scatter_nd_update(indices=[[0, 0], [1, 2]], updates=[100., 200.])
logging.info(f'The vector v replacing the instances [0, 0] and [1, 2]: \n{v}\n')

The vector v multiplied by 2: 
<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

The vector v assigned 42 in [0, 1]: 
<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 42.,  6.],
       [ 8., 10., 12.]], dtype=float32)>

The vector v replacing the 3rd column: 
<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[ 2., 42.,  0.],
       [ 8., 10.,  1.]], dtype=float32)>

The vector v replacing the instances [0, 0] and [1, 2]: 
<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[100.,  42.,   0.],
       [  8.,  10., 200.]], dtype=float32)>



# Customizing Models and Training Algorithm

## Custom Loss Function

In [16]:
# Create a loss function for the Huber Loss

def huber_fn(y_true, y_pred):
    error = y_true - y_pred
    is_small_error = tf.abs(error) < 1
    squared_loss = tf.square(error) / 2
    linear_loss = tf.abs(error) - 0.5
    return tf.where(is_small_error, squared_loss, linear_loss)

## Saving and Loading Models That Contain Custom Components

In [17]:
# If we need to set the threshold dinamically, we can create a function that returns the function

def huber_loss_fn(threshold=1.0):
    def huber_fn(y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < threshold
        squared_loss = tf.square(error) / 2
        linear_loss = threshold * tf.abs(error) - threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    return huber_fn

Alternatively, we can create a subclass of ```keras.losses.Loss``` to implement a method ```get_config()``` to save the hyperparameter configuration:

In [18]:
class HuberLoss(keras.losses.Loss):

    def __init__(self, threshold=1.0, **kwargs):
        self.threshold = threshold
        super().__init__(**kwargs)
    
    def call(self, y_true, y_pred):
        error = y_true - y_pred
        is_small_error = tf.abs(error) < self.threshold
        squared_loss = tf.square(error) / 2
        linear_loss = self.threshold * tf.abs(error) - self.threshold**2 / 2
        return tf.where(is_small_error, squared_loss, linear_loss)
    
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, 'threshold': self.treshold}

## Custom Activation Functions, Initializers, Regularizers and Constraints

In [19]:
def my_softplus(z):
    return tf.math.log(tf.exp(z) + 1) # Equivalent to the tf.nn.softplus(z)

def my_glorot_initializer(shape, dtype=tf.float32):
    stddev = tf.sqrt(2. / (shape[0] + shape[1]))
    return tf.random.normal(shape, stddev=stddev, dtype=dtype)

def my_l1_regularizer(weights):
    return tf.reduce_sum(tf.abs(0.01 * weights))

def my_positive_weights(weights):
    return tf.where(weights < 0., tf.zeros_like(weights), weights) # equivalent to tf.nn.relu(weights)

layer = layers.Dense(30, activation=my_softplus,
                     kernel_initializer=my_glorot_initializer,
                     kernel_regularizer=my_l1_regularizer,
                     kernel_constraint=my_positive_weights)

If the function has a hyperparameter we want to save, we can always create a subclass to allow keras to save this configuration after training:

In [20]:
class MyL1Regularizer(keras.regularizers.Regularizer):
    def __init__(self, factor):
        self.factor = factor
    def __call__(self, weights):
        return tf.reduce_sum(tf.abs(self.factor * weights))
    def get_config(self):
        return {'factor' : self.factor}

## Custom Metrics

Precision is equal to the total of true positives over the total positive predictions (true positives and false positives). It is important to keep track of the previous precision to gradually update the metric, batch after batch. These metrics are called *streaming metrics*.

In [21]:
precision = keras.metrics.Precision()

precision([0, 1, 1, 1, 0, 1, 0, 1], [1, 1, 0, 1, 0, 1, 0, 1])
logging.info(precision.result())

precision([0, 1, 0, 0, 1, 0, 1, 1], [1, 0, 1, 1, 0, 0, 0, 0])
logging.info(precision.result())

tf.Tensor(0.8, shape=(), dtype=float32)
tf.Tensor(0.5, shape=(), dtype=float32)


In [22]:
logging.info(precision.variables)

[<tf.Variable 'true_positives:0' shape=(1,) dtype=float32, numpy=array([4.], dtype=float32)>, <tf.Variable 'false_positives:0' shape=(1,) dtype=float32, numpy=array([4.], dtype=float32)>]


If such a streaming metric is required to be created, we need to create a subclass of the ```keras.metrics.Metric``` class.

In [23]:
class HuberMetric(keras.metrics.Metric):
    def __init__(self, threshold=1.0, **kwargs):
        super().__init__(**kwargs)
        self.threshold = threshold
        self.huber_fn = create_huber(threshold)
        self.total = self.add_weight('total', initializer='zeros')
        self.count = self.add_weight('count', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        metric = self.huber_fn(y_true, y_pred)
        self.total.assign_add(tf.reduce_sum(metric))
        self.count.assign_add(tf.cast(tf.size(y_true), tf.float32))

    def result(self):
        return self.total / self.count
    
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, 'threshold': self.threshold}

## Custom Layers

We can create a custom stateful layer by creating a subclass of the ```keras.layers.Layer``` class:

In [24]:
from tensorflow.python.ops.gen_string_ops import UnicodeDecodeWithOffsets
class MyDense(keras.layers.Layer):
    def __init__(self, units, activation=None, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.activation = keras.activations.get(activation)
    
    def build(self, batch_input_shape):
        self.kernel = self.add_weight(
            name='kernel', shape=[batch_input_shape[-1], self.units],
            initializer='glorot_normal')
        self.bias = self.add_weight(
            name='bias', shape=[self.units], initializer='zeros')
        super().build(batch_input_shape)
    
    def call(self, x):
        return self.activation(x @ self.kernel + self.bias)

    def comput_output_shape(self, batch_input_shape):
        return tf.TensorShape(batch_input_shape.as_list()[:-1] + [self.units])
    
    def get_config(self):
        base_config = super().get_config()
        return {**base_config, 'units':self.units,
                'activation': keras.activations.serialize(self.activation)}

## Custom Models

In order to create a custom model, we need to subclass the ```keras.models.Model``` and implement the ```call()``` method:

In [25]:
# Create a custom layer

class ResidualBlock(keras.layers.Layer):

    def __init__(self, n_layers, n_neurons, **kwargs):
        super().__init__(**kwargs)
        self.hidden = [keras.layers.Dense(n_neurons, activation='elu',
                                          kernel_initializer='he_normal') 
                       for _ in range(n_layers)]
    
    def call(self, inputs):
        z = inputs
        for layer in self.hidden:
            z = layer(z)
        return inputs + z

# Now, create a custom model

class ResidualRegressor(keras.Model):
    
    def __init__(self, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.hidden1 = keras.layers.Dense(30, activation='elu', kernel_initializer='he_normal')
        self.block1 = ResidualBlock(2, 30)
        self.block2 = ResidualBlock(2, 30)
        self.out = keras.layers.Dense(output_dim)
    
    def call(self, inputs):
        z = self.hidden1(inputs)
        for _ in range(1 + 3):
            z = self.block1(z)
        z = self.block2(z)
        return self.out(z)

## Computing Gradients Using Autodiff

TensorFlow implements the gradients using autodiff, and makes this simple:

In [26]:
def f(w1, w2):
    return 3 * w1 ** 2 + 2 * w1 * w2

w1, w2 = tf.Variable(5.), tf.Variable(3.)
with tf.GradientTape() as tape:
    z = f(w1, w2)

gradients = tape.gradient(z, [w1, w2])


logging.info(gradients)

[<tf.Tensor: shape=(), dtype=float32, numpy=36.0>, <tf.Tensor: shape=(), dtype=float32, numpy=10.0>]


In [27]:
def f(w1, w2):
    return 3 * w1 ** 2 + tf.stop_gradient(2 * w1 * w2)

with tf.GradientTape() as tape:
    z = f(w1, w2)

gradients = tape.gradient(z, [w1, w2])

logging.info(gradients)

[<tf.Tensor: shape=(), dtype=float32, numpy=30.0>, None]


# TensorFlow Functions and Graphs

In [28]:
def cube(x):
    return x ** 3

# Convert the function to a TensorFlow function
tf_cube = tf.function(cube)

# Alternatively, we can use tf.function as a decorator

@tf.function
def tf_cube(x):
    return x ** 3

# Exercises

## Question 1 - 11

1. How would you describe TensorFlow in a short sentence? What are its main features? Can you name other popular Deep Learning libraries?

> TensorFlow is python library that performs numerical operations, and that it is optimized for machine learning problems. Its main features are that it is optimized to compute fast and using low memory, it is designed to have GPU support, and their computation graphs can be exported to portable format.

2. Is TensorFlow a drop-in replacement for NumPy? What are the main differences
between the two?

> Tensorflow is not intended to be a replacement of NumPy, even though they have many similarities. TensorFlow is oriented to be memory efficient and compute optimizing the operations, while NumPy is a more general numerical library that works with arrays, matrixes and tensors. 
>
> The methods in TensorFlow do not have the same name as in NumPy, whereby it is not possible to substitute the functions. Furthermore, the arrays in NumPy are mutable, which is not the case for the tensors with TensorFlow.

3. Do you get the same result with tf.range(10) and tf.constant(np.ara
nge(10))?

> Both of the functions return an array (one-dimensional tensor) with numbers between 0-9. However, TensorFlow uses 32 bytes as default and NumPy uses 64 bytes as default.

4. Can you name six other data structures available in TensorFlow, beyond regular tensors?

> Queues, Sets, Sparse Tensors, tensor arrays, string tensors, and ragged tensors

5. A custom loss function can be defined by writing a function or by subclassing the ```keras.losses.Loss``` class. When would you use each option?

> Writing a function is a great default if the thresholds or any other parameter are fixed. We would create a custom loss function sublassing the ```keras.losses.Loss``` whenever we want to implement a ```get_config()``` method to save the configuration, i.e., when we want to save the values of the hyperparameters of the loss function.

6. Similarly, a custom metric can be defined in a function or a subclass of
```keras.metrics.Metric```. When would you use each option?

> The regular function is a good default to implement a custom metric. However, if we need to implement a streaming metric, i.e., a metric that is gradually updated at each batch, and/or if we need to save the hyperparameters of the metric by implementing the method ```get_config()```, the best option is to create the custom metric by subclassing the ```keras.metrics.Metric```. 

7. When should you create a custom layer versus a custom model?

> As a general rule, we want to be able to distinguish the different components of our model. That is why, a custom layer should be a block that whether is repetitive or it has some loops or another custom implementation. We should implement a custom model where we can differentiate the layers (including custom layers) to have a more understandable and clean code.

8. What are some use cases that require writing your own custom training loop?

> Normally, using the ```fit()``` method offers the flexibility required. However, if more flexibility is required, we can implement a custom training loop. Some cases may be when needing to print specific information, if we want to implement a different optimizer, or when trying to create a more complex network architecture.

9. Can custom Keras components contain arbitrary Python code, or must they be
convertible to TF Functions?

> By default, the Keras will automatically convert the function to a TF function, therefore it is required that this functions are convertible to TF functions by meeting the rules stablished. However, it is possible to specify not to convert the functions to TF functions by setting ```dynamic=True``` when creating a custom Keras component.

10. What are the main rules to respect if you want a function to be convertible to a TF Function?

> - If any external library is called, it will not be part of the 

11. When would you need to create a dynamic Keras model? How do you do that?
Why not make all your models dynamic?

## Questions 12 - 13

The solutions for the questions 12 and 13 are in the file ex_12