# 7.1 Going beyond the Sequential model: the Keras functional API

## 7.1.1 Introduction to the functional API

In [None]:
from keras import Input, layers
input_tensor = Input(shape=(32,)) # A tensor
dense = layers.Dense(32, activation="relu") # A layer is a function
output_tensor = dense(input_tensor) # A layer may be called on a tensor, and it returns a tensor

sied by side a simple Sequential model and its equivalent in the functional API:

In [None]:
from keras.models import Sequential, Model
from keras import layers
from keras import Input

seq_model = Sequential()
seq_model.add(layers.Dense(32, activation="relu", input_shape=(64,)))
seq_model.add(layers.Dense(32, activation="relu"))
seq_model.add(layers.Dense(10, activation="softmax"))

input_tensor = Input(shape=(64,))
x = layers.Dense(32, activation="relu")(input_tensor)
x = layers.Dense(32, activation="relu")(x)
output_tensor = layers.Dense(10, activation="softmax")(x)

model = Model(input_tensor, output_tensor)

model.summary()

The reason it works is that output_tensor was obtained by repeatedly transforming input_tensor. If you tried to build a model from inputs and outputs that weren't related, you'd get a RuntimeError

In [None]:
model.compile(optimizer="rmsprop", loss="categorical_crossentropy") # Compiles the model

import numpy as np
x_train = np.random.random((1000, 64))
y_train = np.random.random((1000, 10))

model.fit(x_train, y_train, epochs=10, batch_size=128) # Trains the model for 10 epochs

score = model.evaluate(x_train, y_train) # Evaluates the model

## 7.1.2 Multi-input models

In [None]:
# Functional API implementation of a two-input question-answering model

from keras.models import Model
from keras import layers
from keras import Input

text_vocabulary_size = 10000
question_vocabulary_size = 10000
answer_vocabulary_size = 500

text_input = Input(shape=(None,), dtype="int32", name="text") # The text input is a variable-length sequence of integers.

embedded_text = layers.Embedding(64, text_vocabulary_size)(text_input) # Embeds the inputs into a sequence of vectors of size 64

encoded_text = layers.LSTM(32)(embedded_text) # Encodes the vectors in a single vector via an LSTM

question_input = Input(shape=(None,),
                       dtype="int32",
                       name="question")

embedded_question = layers.Embedding(32, question_vocabulary_size)(question_input)

encoded_question = layers.LSTM(16)(embedded_question)

concatenated = layers.concatenate([encoded_text, encoded_question], axis=-1) # Concatenates the encoded question and encoded text

answer = layers.Dense(answer_vocabulary_size, activation="softmax")(concatenated) # Adds a softmax classifier on top

model = Model([text_input, question_input], answer) # At model instantiation, you specify the two inputs and the output
model.compile(optimizer="rmsprop",
              loss="categorical_crossentropy",
              metrics=["acc"])

In [None]:
# Feeding data to a multi-input model

import numpy as np

num_samples = 1000
max_length = 100

text = np.random.randint(1, text_vocabulary_size, size=(num_samples, max_length)) # Generates dummy Numpy data

question = np.random.randint(1, question_vocabulary_size, size=(num_samples, max_length))

answer = np.random.randint(0, 1, size=(num_samples, answer_vocabulary_size)) # Answers are one-hot encoded, not integers

model.fit([text, question], answers, epochs=10, batch_size=128) # Fitting using a list of inputs

model.fit({"text": text, "question": question}, answers, epochs=10, batch_size=128) # Fitting using a dictionary of inputs (only if inputs are named)

## 7.1.3 Multi-output models

In [None]:
# Functional API implementation of a three-output model

vocabulary_size = 50000
num_income_groups = 10

posts_input = Input(shape=(None,), dtype="int32", name="posts")
embedded_posts = layers.Embedding(256, vocabulary_size)(posts_input)
x = layers.Conv1D(128, 5, activation="relu")(embedded_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation="relu")(x)
x = layers.Conv1D(256, 5, activation="relu")(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation="relu")(x)
x = layers.Conv1D(256, 5, activation="relu")(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation="relu")(x)

age_prediction = layers.Dense(1, name="age")(x)
income_prediction = layers.Dense(num_income_groups,
                                 activation="softmax",
                                 name="income")(x)

gender_prediction = layers.Dense(1, activation="sigmoid", name="gender")(x)

model = Model(posts_input, [age_prediction, income_prediction, gender_prediction])

In [None]:
# Compilation options of a multi-output model: multiple losses

model.compile(optimizer="rmsprop",
              loss=["mse", "categorical_crossentropy", "binary_crossentropy"])

model.compile(optimizer="rmsprop",
              loss={"age": "mse",
                    "income": "categorical_crossentropy",
                    "gender": "binary_crossentropy"})

In [None]:
# Compilation options of a multi-output model: loss weighting

model.compile(optimizer="rmsprop",
              loss=["mae", "categorical_crossentropy", "binary_crossentropy"],
              loss_weights=[0.25, 1., 10.])

model.compile(optimizer="rmsprop",
              loss={"age": "mse",
                     "income": "categorical_crossentropy",
                     "gender": "binary_crossentropy"},
              loss_weights={"age": 0.25,
                            "income": 1.,
                            "gender": 10.})

In [None]:
# Feeding data to a multi-output model

model.fit(posts, [age_targets, income_targets, gender_targets], epochs=10, batch_size=64)

model.fit(posts, {"age": age_targets,
                  "income": income_targets,
                  "gender": gender_targets}, epochs=10, batch_size=64)

## 7.1.4 Directed acyclic graphs of layers

In [None]:
branch_a = layers.Conv2D(128, 1, activation="relu", strides=2)(x)

branch_b = layers.Conv2D(128, 1, activation="relu")(x)
branch_b = layers.Conv2D(128, 3, activation="relu", strides=2)(branch_b)

branch_c = layers.AveragePooling2D(3, strides=2)(x)
branch_c = layers.Conv2D(128, 3, activation="relu")(branch_c)

branch_d = layers.Conv2D(128, 1, activation="relu")(x)
branch_d = layers.Conv2D(128, 3, activation="relu")(branch_d)
branch_d = layers.Conv2D(128, 3, activation="relu", strides=2)(branch_d)

output = layers.concatenate([branch_a, branch_b, branch_c, branch_d], axis=-1) # Concatenates the branch outputs to obtain the module output

How to implement a residual connection in Keras

In [None]:
x = ....
y = layers.Conv2D(128, 3, activation="relu", padding="same")(x)
y = layers.Conv2D(128, 3, activation="relu", padding="same")(y)
y = layers.Conv2D(128, 3, activation="relu", padding="same")(y)

y = layers.add([y, x]) # Adds the original x back to the output features

## 7.1.5 Layer weight sharing

In [None]:
lstm = layers.LSTM(32)

left_input = Input(shape=(None, 128))
left_output = lstm(left_input)

right_input = Input(shape=(None, 128))
right_output = lstm(right_input)

merged = layers.concatenate([left_output, right_output], axis=-1)
predictions = layers.Dense(1, activation="sigmoid")(merged)

model = Model([left_input, right_input], predictions)
model.fit([left_data, right_data], targets)

## 7.1.6 Models as layers

In [None]:
from keras import layers
from keras import applications
from keras import Input

xception_base = applications.Xception(weights=None,
                                      include_top=False)

left_input = Input(shape=(250, 250, 3))
right_input = Input(shape=(250, 250, 3))

left_features = xception_base(left_input)
right_input = xception_base(right_input)

merged_features = layers.concatenate([left_features, right_input], axis=-1)

# 7.2 Inspecting and monitoring deep-learning models using Keras callbacks and Tensorboard

## 7.2.1 Using callbacks to act on a model during training

In [None]:
# The MODELCHECKPOINT and EARLYSTOPPING callbacks

import keras

callbacks_list = [
    keras.callbacks.EarlyStopping( # Interrupts training when improvement stops
        monitor="acc", # Monitors the model's validation accuracy
        patience=1, # Interrupts training when accuracy has stopped improving for more than one epoch
    ),
    keras.callbacks.ModelCheckpoint( # Saves the current weights after every epoch
        filepath = "my_model.h5", # Path to the destination model file
        monitor="val_loss",
        save_best_only=True
    )
]

model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["acc"])

model.fit(x, y,
          epochs=10,
          batch_size=32,
          callbacks=callbacks_list,
          validation_data=(x_val, y_val))

In [None]:
# The REDUCELRONPLATEU callback

callbacks_list = [
    keras.callbacks.ReduceLROnPlateau(
        monitor="loss", # Monitors the model's validation loss
        factor=0.1, # Divides the learning rate by 10 when triggered
        patience=10, # the callback is triggered after the validation loss has stopped improving for 10 epochs.
    )
]

model.fit(x, y,
          epochs=10,
          batch_size=32,
          callbacks=callbacks_list,
          validation_data=(x_val, y_val))

In [None]:
# Writing own callback

import keras
import numpy as np

class ActivationLonger(keras.callbacks.Callback):

  def set_model(self, model):
    self.model = model
    layer_outputs = [layer.output for layer in model.layers]
    self.activations_model = keras.models.Model(model.input, layer_outputs)

  def on_epoch_end(self, epoch, logs=None):
    if self.validation_data is None:
      raise RuntimeError("Requires validation_data.")

    validation_sample = self.validation_data[0][0:1]
    activations = self.activations_model.predict(validation_sample)
    f = open("activations_at_epoch_" + str(epoch) + ".npz", "w")
    np.savez(f, activations)
    f.close()

## 7.2.2 Introduction to TensorBoard: the TensorFlow visualization framework

In [None]:
# Text-classification model to use with TensorBoard

from keras.datasets import imdb
from keras.preprocessing import sequence

max_features = 2000
max_len = 500

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)

model = keras.models.Sequential()
model.add(layers.Embedding(max_features, 128, input_length=maxlen, name="embed"))
model.add(layers.Conv1D(32, 7, activation="relu"))
model.add(layers.MaxPooling1D(5))
model.add(layers.Conv1D(32, 7, activation="relu"))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(1))
model.summary()
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["acc"])


In [None]:
# Training the model with a TensorBoard callback

callbacks = [
    keras.callbacks.TensorBoard(
        log_dir = "my_log_dir",
        histogram_freq = 1,
        emedding_freq = 1,
    )
]

history = model.fit(x_train, y_train,
                    epochs=20,
                    batch_size=128,
                    validation_split=0.2,
                    callbacks=callbacks)

# 7.3 Getting the most out of your models

## 7.3.1 Advanced architecture patterns

In [None]:
# Batch Normalization

normalized_data = (data - np.mean(data, axis=..)) / np.std(data, axis=...)

In [None]:
# The BatchNormalization layer is typically used after a convolutional or densely connected layer

conv_model.add(layers.Conv2D(32, 3, activations="relu"))
conv_model.add(layers.BatchNormalization())

dense_model.add(layers.Dense(32, activation="relu"))
dense_model.add((layers.BatchNormalization()))

In [None]:
# Depthwise Separable Convolution

height = 64
width = 64
channels = 3
num_classes = 10

model = Sequential()
model.add(layers.SeparableConv2D(32, 3,
                                 activation="relu",
                                 input_shape=(height, width, channels,)))
model.add(layers.SeparableConv2D(64, 3, activation="relu"))
model.add(layers.MaxPooling2D(2))

model.add(layers.SeparableConv2D(64, 3, activation="relu"))
model.add(layers.SeparableConv2D(128, 3, activation="relu"))
model.add(layers.MaxPooling2D(2))

model.add(layers.SeparableConv2D(64, 3, activation="relu"))
model.add(layers.SeparableConv2D(128, 3, activation="relu"))
model.add(layers.GlobalAveragePooling2D())

model.add(layers.Dense(32, activation="relu"))
model.add(layers.Dense(num_classes, activation="softmax"))

model.compile(optimizer="rmsprop", loss="categorical_crossentropy")

## 7.3.3 Model ensembling

In [None]:
# Use four different models to compute initial predictions

preds_a = model_a.predict(x_val)
preds_b = model_b.predict(x_val)
preds_c = model_c.predict(x_val)
preds_d = model_d.predict(x_val)

final_preds = 0.25 * (preds_a + preds_b + preds_c + preds_d)

In [None]:
preds_a = model_a.predict(x_val)
preds_b = model_b.predict(x_val)
preds_c = model_c.predict(x_val)
preds_d = model_d.predict(x_val)

final_preds = 0.5 * preds_a + 0.25 * preds_b + 0.1 * preds_c + 0.15 * preds_d