In [2]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

## 1. Defining Custom Layers 

The fundamental data structure in neural networks is the layer. A Layer is an object that encapsulates some state (weights) and some computation (a forward pass). 

`tf.keras.layers.Layer` is the base class of all Keras layers, and it inherits from `tf.Module`

#### a)  Define a Layer

In [3]:
class MyDense(tf.keras.layers.Layer):
    # Adding **kwargs to support base Keras layer arguments
    def __init__(self, in_features, out_features, **kwargs):
        super(MyDense, self).__init__(**kwargs)
        self.w = tf.Variable(
          tf.random.normal([in_features, out_features]), name='w')
        self.b = tf.Variable(tf.zeros([out_features]), name='b')
    
    def call(self, x):
        y = tf.matmul(x, self.w) + self.b
        return tf.nn.relu(y)


In [5]:
# Instantiate your layer

simple_layer = MyDense(name="simple", in_features=2, out_features=4)


# Call the layer on a sample input

x = tf.random.normal((4,2))
y = simple_layer(x)

print(y)

tf.Tensor(
[[0.12239385 0.         0.         1.1331159 ]
 [0.         1.4213165  0.28176796 0.01418395]
 [0.22830437 0.         0.         0.        ]
 [0.         0.41254354 0.47291455 0.        ]], shape=(4, 4), dtype=float32)


#### b) Build Method 

It is often convenient to delay creating variables until the input shape is fixed.

In [6]:
class MyDense(tf.keras.layers.Layer):
    
    def __init__(self, units=32, **kwargs):
        super(MyDense, self).__init__(**kwargs)
        self.units = units

    def build(self, input_shape):
        self.w = tf.Variable(tf.random.normal([input_shape[-1], self.units]), name='w')
        self.b = tf.Variable(tf.zeros([self.units]), name='b')

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

In [7]:
# Instantiate your layer

flexible_layer = MyDense(name="simple", units=4)


# Call the layer on a sample input

x = tf.random.normal((2,2))
y = flexible_layer(x)

print(y)

tf.Tensor(
[[ 1.2818995   3.6266985  -2.1463177  -1.3724216 ]
 [-0.45315763 -0.60562986  1.2862213   0.23369679]], shape=(2, 4), dtype=float32)


In [8]:
# At this point we can inspect the variable

flexible_layer.variables

[<tf.Variable 'simple/w:0' shape=(2, 4) dtype=float32, numpy=
 array([[ 0.09635675,  1.4592559 ,  0.764034  , -0.54429656],
        [ 0.88694566,  2.3231995 , -1.6301693 , -0.880391  ]],
       dtype=float32)>,
 <tf.Variable 'simple/b:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]

In [9]:
# We can also call the variables by name

flexible_layer.b

<tf.Variable 'simple/b:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>

#### c) Non-trainable weights 

By default, the variables in a layer are trainable, i.e. they will tracked by the Gradient Tape and will be updated during backpropagation. However, we can also specify certain weights to be non-trainable.

In [10]:
class MyDense(tf.keras.layers.Layer):
    
    def __init__(self, units=32, **kwargs):
        super(MyDense, self).__init__(**kwargs)
        self.units = units

    def build(self, input_shape):
        self.w = tf.Variable(tf.random.normal([input_shape[-1], self.units]), name='w', trainable=True)
        self.b = tf.Variable(tf.zeros([self.units]), name='b', trainable=False)

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

In [11]:
# Instantiate the layer

my_new_layer = MyDense(units=16)

x = tf.random.normal((2,2))
y = my_new_layer(x)

y.shape

TensorShape([2, 16])

In [12]:
print("weights:", len(my_new_layer.weights))
print("non-trainable weights:", len(my_new_layer.non_trainable_weights))


# It's not included in the trainable weights:
print("\n trainable_weights:", my_new_layer.trainable_weights)
print("\n non trainable_weights:", my_new_layer.non_trainable_weights)

weights: 2
non-trainable weights: 1

 trainable_weights: [<tf.Variable 'my_dense/w:0' shape=(2, 16) dtype=float32, numpy=
array([[ 1.2969841 ,  0.610474  , -1.5344253 ,  1.0515987 ,  0.23048133,
        -0.45529392,  1.4825242 , -0.24479723,  0.37186486, -1.1000478 ,
         0.47395763, -0.34441286,  1.3781023 , -0.566258  ,  0.8011622 ,
        -1.2585021 ],
       [-0.42209846,  1.3883487 ,  1.7076505 , -0.6812025 , -0.35678172,
        -1.0544008 , -0.20595   ,  0.9043317 , -0.45903835,  0.8311017 ,
        -0.313266  ,  1.6771985 , -0.1826686 ,  0.3008226 , -0.8782128 ,
         1.1014637 ]], dtype=float32)>]

 non trainable_weights: [<tf.Variable 'my_dense/b:0' shape=(16,) dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)>]


#### d) training arg in call()  

In [13]:
class CustomDropout(tf.keras.layers.Layer):
    def __init__(self, rate, **kwargs):
        super(CustomDropout, self).__init__(**kwargs)
        self.rate = rate

    def call(self, inputs, training=None):
        if training:
            return tf.nn.dropout(inputs, rate=self.rate)
        return inputs

In [14]:
dropuout = CustomDropout(rate=0.5)

x = tf.random.normal((2,2))
print('input: ', x)


# During training
output_during_training = dropuout(x, training=True)
print('\n output_during_training: ', output_during_training)


# During inference
output_during_inference = dropuout(x, training=False)
print('\n output_during_inference: ', output_during_inference)

input:  tf.Tensor(
[[-1.0439885   0.01639184]
 [-0.2634523  -1.0460414 ]], shape=(2, 2), dtype=float32)

 output_during_training:  tf.Tensor(
[[-2.087977    0.03278367]
 [-0.         -2.0920827 ]], shape=(2, 2), dtype=float32)

 output_during_inference:  tf.Tensor(
[[-1.0439885   0.01639184]
 [-0.2634523  -1.0460414 ]], shape=(2, 2), dtype=float32)


#### e) Recursively composible  

It also possible to compose a layer out of other layers. The outer layer will automatically track the weights of the inner layer.

In [15]:
# Let's assume we are reusing the Linear class
# with a `build` method that we defined above.


class MLPBlock(tf.keras.layers.Layer):
    def __init__(self):
        super(MLPBlock, self).__init__()
        self.dense_1 = MyDense(32)
        self.dense_2 = MyDense(32)
        self.dense_3 = MyDense(1)

    def call(self, inputs):
        x = self.dense_1(inputs)
        x = tf.nn.relu(x)
        x = self.dense_2(x)
        x = tf.nn.relu(x)
        return self.dense_3(x)


mlp = MLPBlock()
y = mlp(tf.ones(shape=(3, 64)))  # The first call to the `mlp` will create the weights
print("weights:", len(mlp.weights))
print("trainable weights:", len(mlp.trainable_weights))
print("y.shape: ", y.shape)

weights: 6
trainable weights: 3
y.shape:  (3, 1)


## 2. Defining Models: Three Levels of abstraction

Given a set of (either predefined or custom defined) layers, we can begin to start composing them into a DAG to define a model. A `tf.keras.Model` is similar to a `tf.keras.layers.Layer` except that models come with extra functionality that make them easy to train, evaluate, load, save, and even train on multiple machines.

#### a) Sequential

A Sequential model is appropriate for a plain stack of layers where each layer has exactly one input tensor and one output tensor.

In [16]:
# There are two ways to define a sequential model:

# 1. Either as a list of layers

model = tf.keras.Sequential(
    [
        tf.keras.layers.InputLayer(input_shape=(4,)),
        tf.keras.layers.Dense(32),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Dense(16),
        tf.keras.layers.ReLU(),
        tf.keras.layers.Dense(1)
    ]
)


# 2. Or instantiate a Sequential Model and add layers by calling the .add() method on it
model = tf.keras.Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=(4,)))
model.add(tf.keras.layers.Dense(32))
model.add(tf.keras.layers.ReLU())
model.add(tf.keras.layers.Dense(16))
model.add(tf.keras.layers.ReLU())
model.add(tf.keras.layers.Dense(1))

In [17]:
# Now we can call the model on an Input Tensor
x = tf.ones((16, 4))
y = model(x)

print(y.shape)

(16, 1)


In [18]:
# We can call summary method to display the graph
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 32)                160       
_________________________________________________________________
re_lu_2 (ReLU)               (None, 32)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 16)                528       
_________________________________________________________________
re_lu_3 (ReLU)               (None, 16)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 17        
Total params: 705
Trainable params: 705
Non-trainable params: 0
_________________________________________________________________


#### b) Functional API 

The __Functional API__ is more flexible than Sequential, and specifically come in handy when the model has non-linear topology, shared layers and/or multiple inputs, outputs.

First, lets redefine the above model in Functional API.

In [19]:
inputs = tf.keras.Input(shape=(4,))

x = tf.keras.layers.Dense(32)(inputs)
x = tf.keras.layers.ReLU()(x)
x = tf.keras.layers.Dense(16)(x)
x = tf.keras.layers.ReLU()(x)

outputs = tf.keras.layers.Dense(1)(x)


model = tf.keras.Model(inputs=inputs, outputs=outputs, name="functional_model")

In [20]:
model.summary()

Model: "functional_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 4)]               0         
_________________________________________________________________
dense_6 (Dense)              (None, 32)                160       
_________________________________________________________________
re_lu_4 (ReLU)               (None, 32)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 16)                528       
_________________________________________________________________
re_lu_5 (ReLU)               (None, 16)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 1)                 17        
Total params: 705
Trainable params: 705
Non-trainable params: 0
____________________________________________________

With Functional API, it's easy to define more complex topologies. Lets define a model with multiple inputs and outputs.

Let's say we want a model that takes in a few weather data variables on any given day to predict temperature and humidity for the same day:

Inputs:

- Pressure
- Precipitation
- Clouds
- Wind


Outputs:

- Temperature
- Humidity

In [21]:
# Now lets build this model

pressure_input = tf.keras.layers.Input(shape=(1,), name='pressure')
precipitation_input = tf.keras.layers.Input(shape=(1,), name='precipitation')
clouds_input = tf.keras.layers.Input(shape=(1,), name='clouds')
wind_input = tf.keras.layers.Input(shape=(1,), name='wind')


# Lets pass the pressure and precipitaion through a one stack of linear layers, and clouds and wind through another
x = tf.keras.layers.concatenate([pressure_input, precipitation_input])
x = tf.keras.layers.Dense(units=32, activation='relu')(x)
x = tf.keras.layers.Dense(units=16, activation='relu')(x)


y = tf.keras.layers.concatenate([clouds_input, wind_input])
y = tf.keras.layers.Dense(units=32, activation='relu')(y)
y = tf.keras.layers.Dense(units=16, activation='relu')(y)


# Lets merge the two branches and send through a few more layers
z = tf.keras.layers.concatenate([x,y])
z = tf.keras.layers.Dense(units=32, activation='relu')(z)
z = tf.keras.layers.Dense(units=16, activation='relu')(z)

# Finally split again into two outputs
temperature = tf.keras.layers.Dense(units=1)(z)
humidity = tf.keras.layers.Dense(units=1)(z)


model = tf.keras.Model(inputs=[pressure_input, precipitation_input, clouds_input, wind_input], 
                       outputs=[temperature, humidity], name="multi_input_output_model")

In [22]:
# We can print the summary but it might be difficult to visualize the graph
model.summary()

Model: "multi_input_output_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
pressure (InputLayer)           [(None, 1)]          0                                            
__________________________________________________________________________________________________
precipitation (InputLayer)      [(None, 1)]          0                                            
__________________________________________________________________________________________________
clouds (InputLayer)             [(None, 1)]          0                                            
__________________________________________________________________________________________________
wind (InputLayer)               [(None, 1)]          0                                            
___________________________________________________________________________

In [23]:
# Luckily we can also plot the model
tf.keras.utils.plot_model(model, show_shapes=True)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


#### c) Subclassing 

In [24]:
class FCN(tf.keras.Model):

    def __init__(self):
        super(FCN, self).__init__()
        self.dense_1 = tf.keras.layers.Dense(32)
        self.dense_2 = tf.keras.layers.Dense(16)
        self.dense_3 = tf.keras.layers.Dense(1)
        self.relu = tf.keras.layers.ReLU()


    def call(self, inputs):
        x = self.dense_1(inputs)
        x = self.relu(x)
        x = self.dense_2(x)
        x = self.relu(x)
        return self.dense_3(x)

In [25]:
model = FCN()


# Call the model on an Input Tensor
x = tf.ones((16, 4))
y = model(x)

print(y.shape)

(16, 1)


In [26]:
# Print summary

model.summary()

Model: "fcn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_17 (Dense)             multiple                  160       
_________________________________________________________________
dense_18 (Dense)             multiple                  528       
_________________________________________________________________
dense_19 (Dense)             multiple                  17        
_________________________________________________________________
re_lu_6 (ReLU)               multiple                  0         
Total params: 705
Trainable params: 705
Non-trainable params: 0
_________________________________________________________________


## 3. Training: Three Levels of abstraction

For this exercise, we will fix the model architecture (a small CNN) and train it on the MNIST dataset. 

In [27]:
# Prepare Dataset

from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

train_images = train_images.reshape((60000, 28, 28, 1))
train_images = train_images.astype('float32') / 255

train_images, val_images = train_images[:50000], train_images[50000:]
train_labels, val_labels = train_labels[:50000], train_labels[50000:]

test_images = test_images.reshape((10000, 28, 28, 1))
test_images = test_images.astype('float32') / 255

train_labels = to_categorical(train_labels)
val_labels = to_categorical(val_labels)
test_labels = to_categorical(test_labels)

In [33]:
# Define Model

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))


model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 13, 13, 32)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 11, 11, 64)        18496     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 5, 5, 64)          0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 3, 3, 64)          36928     
_________________________________________________________________
flatten_1 (Flatten)          (None, 576)               0         
_________________________________________________________________
dense_22 (Dense)             (None, 64)               

#### a) Model.fit() method 

To use the built in methods `(Model.fit(), Model.evaluate(), Model.predict() `, we simply need to specify the
- optimizer
- loss
- metrics

and compile the model.

In [34]:
model.compile(
    # Optimizer
    optimizer = tf.keras.optimizers.Adam(),
    # Loss function to minimize
    loss = tf.keras.losses.CategoricalCrossentropy(),
    # List of metrics to monitor
    metrics = [tf.keras.metrics.CategoricalAccuracy()],
)

The `.fit()` method will accept `numpy arrays`, `tf.data.Dataset` objects and `data generators`. Here we will input the MNIST data as a numpy array.

The `.fit()` method can slice the data into batches, and will iterate over the entire dataset for a given number of epochs. Additionally, after each epoch it will evaluate on a hold-out validation set if specified.

In [35]:
history = model.fit(
    train_images,
    train_labels,
    batch_size=64,
    epochs=2,
    validation_data=(val_images, val_labels),
)

Epoch 1/2
Epoch 2/2


The returned history object holds a record of the loss and metric values recorded at the end of each epoch during training:

In [37]:
history.history

{'loss': [0.19717292487621307, 0.054348211735486984],
 'categorical_accuracy': [0.9392399787902832, 0.983020007610321],
 'val_loss': [0.07050801813602448, 0.05096294358372688],
 'val_categorical_accuracy': [0.9803000092506409, 0.9854000210762024]}

After training, we can call the `evaaluate` or `predict` methods on a test set.

In [42]:
model.evaluate(test_images, test_labels)



[0.04175985977053642, 0.986299991607666]

In [44]:
predictions = model.predict(test_images, verbose=1)



In [45]:
predictions.shape

(10000, 10)

#### b) Customizing what happens in Model.fit()

#### c) Training Loop from scratch