# Deep Learning with Python: Keras Functional API

#### Introduction to the Functional API

In [1]:
from keras import Input, layers
input_tensor = Input(shape=(32,))

dense = layers.Dense(32, activation='relu') # A layer is a function

# A layer may be called  on a tensor, and it returns a tensor
output_tensor = dense(input_tensor)

Using TensorFlow backend.


### Sequential Model: Standard VS Functional API

In [2]:
from keras.models import Sequential, Model
from keras import layers
from keras import Input

seq_model= Sequential()
seq_model.add(layers.Dense(32, activation='relu', input_shape=(64,)))
seq_model.add(layers.Dense(32, activation='relu'))
seq_model.add(layers.Dense(10, activation='softmax'))

input_tensor = Input(shape=(64,))
x = layers.Dense(32, activation='relu')(input_tensor)
x = layers.Dense(32, activation='relu')(x)
output_tensor =  layers.Dense(10, activation='softmax')(x)

model = Model(input_tensor, output_tensor)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 64)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_6 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_7 (Dense)              (None, 10)                330       
Total params: 3,466
Trainable params: 3,466
Non-trainable params: 0
_________________________________________________________________


When it comes to compiling, training, or evaluating such an instance of Model, the API is the same as that of Sequential:

In [6]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

import numpy as np

# Genrate dummy numpy data to train on
x_train = np.random.random((1000,64))
y_train=  np.random.random((1000,10))

model.fit(x_train, y_train, epochs=10, batch_size=128)

score = model.evaluate(x_train, y_train)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Multi-Input Models: API Implemenatino of a two-input question-answering model

Following is an example of how you can build such a model with the functional API. You set up two independent branches, encoding the text input and the question input as representation vectors; then, concatenate these vectors; and finally, add a softmax classifier on top of the concatenated representations.

In [24]:
from keras.models import Model
from keras import layers
from keras import Input

text_vocabulary_size=10000
question_vocabulary_size=10000
answer_vocabulary_size=500

# The text input is a variable length sequence of integers. 
# Note that you can optionally name the inputs
text_input = Input(shape=(None,), dtype='int32', name='text')

# Embeds the inputs into a sequence of vectors of size 64
embedded_text = layers.Embedding(64, text_vocabulary_size)(text_input)

# Encodes the vectors in a single vector via an LSTM
encoded_text = layers.LSTM(32)(embedded_text)

# Same process (with diffesent instances) for the question
question_input = Input(shape=(None,), dtype='int32',
                      name='question')

embedding_question = layers.Embedding(32, question_vocabulary_size)(question_input)

encoded_question = layers.LSTM(16)(embedding_question)

# Concatenates the encoded question and encoded text
concatenated = layers.concatenate([encoded_text, encoded_question],
                                  axis=1)


# adds softmax classifier on top
answer = layers.Dense(answer_vocabulary_size, activation='softmax')(concatenated)

# At model instantiation, you specify the two intups and the output
model = Model([text_input, question_input], answer)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',
             metrics=['acc'])


 Now, how do you train this two-input model? There are two possible APIs: you can feed the model a list of Numpy arrays as inputs, or you can feed it a dictionary that maps input names to Numpy arrays. Naturally, the latter option is available only if you give names to your inputs.

### Feeding data to Multi-Input Model

In [25]:
import numpy as np
num_samples = 1000
max_length = 100

# Generat dummy Numpy data
text = np.random.randint(1, text_vocabulary_size, 
                         size=(num_samples, max_length))
question = np.random.randint(1, question_vocabulary_size,
                             size=(num_samples, max_length))

# Answers are one-hot encoded not integers
answers = np.random.randint(0, 1,
                            size=(num_samples, answer_vocabulary_size))

# Fitting using a list of inputs
model.fit([text, question], answers, epochs=10, batch_size=128)

# Fitting using a dictionary of inputs(only if inputs are named)
model.fit({'text':text, 'question':question}, answers, epochs=10,
         batch_size=128)

Epoch 1/10


InvalidArgumentError: indices[0,0] = 7881 is not in [0, 64)
	 [[Node: embedding_14/Gather = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, validate_indices=true, _device="/job:localhost/replica:0/task:0/cpu:0"](embedding_14/embeddings/read, _recv_text_7_0)]]

Caused by op 'embedding_14/Gather', defined at:
  File "/Users/Kavi/anaconda/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/Kavi/anaconda/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-24-14dd3524a495>", line 14, in <module>
    embedded_text = layers.Embedding(64, text_vocabulary_size)(text_input)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/keras/engine/topology.py", line 603, in __call__
    output = self.call(inputs, **kwargs)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/keras/layers/embeddings.py", line 134, in call
    out = K.gather(self.embeddings, inputs)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 1184, in gather
    return tf.gather(reference, indices)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1359, in gather
    validate_indices=validate_indices, name=name)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
    op_def=op_def)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2395, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/Kavi/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1264, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): indices[0,0] = 7881 is not in [0, 64)
	 [[Node: embedding_14/Gather = Gather[Tindices=DT_INT32, Tparams=DT_FLOAT, validate_indices=true, _device="/job:localhost/replica:0/task:0/cpu:0"](embedding_14/embeddings/read, _recv_text_7_0)]]


### Multi-Output Model: Keras API implementation of a three-output model

In the same way, you can use the functional API to build models with multiple outputs (or multiple heads). A simple example is a network that attempts to simultaneously predict different properties of the data, such as a network that takes as input a series of social media posts from a single anonymous person and tries to predict attributes of that person, such as age, gender, and income level (see figure 7.7).

In [30]:
from keras import layers
from keras import Input
from keras.models import Model

vocabulary_size = 5000
num_income_groups =10

post_input = Input(shape=(None, ), dtype='int32', name='posts')
embedded_posts = layers.Embedding(256, vocabulary_size)(post_input)

x = layers.Conv1D(128, 5, activation='relu')(embedded_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)

# Note that the output layer's are given names
age_prediction =layers.Dense(1, name='age')(x)
income_prediction = layers.Dense(num_income_groups, activation='softmax',
                                name='income')(x)
gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x)

model = Model(post_input, [age_prediction, income_prediction,
                          gender_prediction])

Importantly, training such a model requires the ability to specify different loss func- tions for different heads of the network: for instance, age prediction is a scalar regres- sion task, but gender prediction is a binary classification task, requiring a different training procedure. But because gradient descent requires you to minimize a scalar, you must combine these losses into a single value in order to train the model. The simplest way to combine different losses is to sum them all. In Keras, you can use either a list or a dictionary of losses in compile to specify different objects for different outputs; the resulting loss values are summed into a global loss, which is minimized during training.

### Compiliation of a mult-output model: Multiple Losses

In [35]:
model.compile(optimizer='rmsprop', loss=['mse', 
                                         'categorical_crossentropy',
                                        'binary_crossentropy'])
# An equivalent if you give names to the output layer
model.compile(optimizer='rmsprop', loss={'age':'mse',
                                        'income':'categorical_crossentropy',
                                        'gender':'binary_crossentropy'})



Note that very imbalanced loss contributions will cause the model representations to be optimized preferentially for the task with the largest individual loss, at the expense of the other tasks. To remedy this, you can assign different levels of importance to the loss values in their contribution to the final loss. This is useful in particular if the losses’ values use different scales. For instance, the mean squared error (MSE) loss used for the age-regression task typically takes a value around 3–5, whereas the cross- entropy loss used for the gender-classification task can be as low as 0.1. In such a situa- tion, to balance the contribution of the different losses, you can assign a weight of 10 to the crossentropy loss and a weight of 0.25 to the MSE loss.

### Compilation options of a multi-output model: loss weighting

In [36]:
model.compile(optimizer='rmsprop',
              loss=['mse', 'categorical_crossentropy', 
                    'binary_crossentropy'],
              loss_weights=[0.25, 1.0, 10.])
              
model.compile(optimizer='rmsprop', loss={'age':'mse',
                                        'income':'categorical_crossentropy',
                                        'gender':'binary_crossentropy'},
             loss_weights={'age':5, 'income':1.,
                          'gender': 10.})



In [38]:
model.fit(posts, [age_targets, income_targets, gender_targets],
         epochs=10, batch_size=64)

model.fit(posts, {'age':age_targets, 
                  'income':income_targets, 
                  'gender':gender_targets},
         epochs=10, batch_size=64)

NameError: name 'posts' is not defined

### Inception Model

In [39]:
from keras import layers

# Every branch has the same stride value (2), which is necessary to keep 
# all branch outputs the same size so you can concatenate them

# In this branch the striding occurs in the spatial convolutional layer
branch_a = layers.Conv2D(128, 1, activation='relu', strides=2)(x)

branch_b = layers.Conv2D(128, 1, activation='relu')(x)
branch_b = layers.Conv2D(128, 3, activation='relu', strides=2)(branch_b)

# In this branch, the striding occurs in the average pooling layer
branch_c = layers.AveragePooling2D(3, strides=2)(x)
branch_c = layers.Conv2D(128, 3, activation='relu')(branch_c)

branch_d = layers.Conv2D(128, 1, activation='relu')(x)
branch_d = layers.Conv2D(128, 1, activation='relu')(branch_d)
branch_d = layers.Conv2D(128, 1, activation='relu', strides=2)(branch_d)

# Concatenates the branch outputs to obtain the module output
output = layers.concatenate([branch_a, branch_c, branch_d], axis=1)

ValueError: Input 0 is incompatible with layer conv2d_1: expected ndim=4, found ndim=2

Note that the full Inception V3 architecture is available in Keras as keras.applications .inception_v3.InceptionV3

### Residual Connection

This example assumes the exis- tence of a 4D input tensor x:

In [40]:
from keras import layers

#x = ... 

# Applies a transformation to x
y = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)

# Add the original x back to the output features
y = layers.add([y,x])

ValueError: Input 0 is incompatible with layer conv2d_2: expected ndim=4, found ndim=2

And the following implements a residual connection when the feature-map sizes dif- fer, using a linear residual connection (again, assuming the existence of a 4D input
tensor x):

In [None]:
from keras import layers

x = ...
y = layers.Conv2D(128, 3, activation='relu',padding='same')(x)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)
y = layers.MaxPooling2D(2, strides=2)(y)

# Use a 1x1 convolution to linearly downsample the original x tensor 
# to the same shape as y
residual = layers.Conv2D(128, 1, strides=2, padding='same')(x)

# Adds the residual tensor back to the output features
y = layers.add([y, residual])

### Layer Weight Sharing

In [None]:
from keras import layers
from keras import Inpyt
from keras.models import Mode

lstm = layers.LSTM(32)

left_input = Input(shape=(None, 128))
left_output = lstm(left_input)

right_input = Input(shape=(None, 128))
right_output = lstm(right_output)

merged = layer.concatenate([left_output, right_output], axis=-1)
prediction = layers.Dense(1, activation='sigmoid')(merged)

model = Model([left_output, right_input], predictions)
model.fit([left_data, right_data], targets)

Naturally, a layer instance may be used more than once—it can be called arbitrarily many times, reusing the same set of weights every time.

### Models as layers

One simple practical example of what you can build by reusing a model instance is a vision model that uses a dual camera as its input: two parallel cameras, a few centi- meters (one inch) apart. Such a model can perceive depth, which can be useful in many applications. You shouldn’t need two independent models to extract visual features from the left camera and the right camera before merging the two feeds. Such low-level processing can be shared across the two inputs: that is, done via layers that use the same weights and thus share the same representations. Here’s how you’d implement a Siamese vision model (shared convolutional base) in Keras:

In [None]:
from keras import layers
from keras import application
from keras import Input

# The base image-processing model is the Xception network 
#(convolutional base only)
xception_base = application.Xception(weights=None, include_top=False)

# The inputs are 250 x 250 RGB images
left_input = Input(shape=(250, 250, 3))
right_input = Input(shape=(250, 250, 3))

# Calls the same vision model twice
left_features = xception_base(left_input)
right_featurse = xception_base(right_input)

# The merged features contain information from the right visual feed
# and the left visual feed
merged_features = layers.concatenate([left_features, right_input], axis=1)

### Keras Callback: Model Checkpointing and Early Stopping Callbacks

In [1]:
import keras


callbacks_list = [keras.callbacks.EarlyStopping(monitor='acc', patience=1),
                  keras.callbacks.ModelCheckpoint(filepath='my_model.h5', 
                                                  monitor='val_loss',save_best_only=True,)]
model.compile(optimizer ='rmsprop',
             loss='binary_crossentropy',
             metrics=['acc'])

model.fit(x, y, epochs=10, batch_size=32,
         callbacks=callbacks_list,
         validation_data=(x_val, y_val))
                  
                  

Using TensorFlow backend.


NameError: name 'model' is not defined

### The ReducetLROnPlateau Callback


You can use this callback to reduce the learning rate when the validation loss has stopped improving. Reducing or increasing the learning rate in case of a loss plateau is is an effective strategy to get out of local minima during training. The following exam- ple uses the ReduceLROnPlateau callback

In [None]:
callbacks_list = [
    keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=10)
]

model.fit(x, y, epochs=10, batch_size=32,
         callbacks=callbacks_list,
         validation_data=(x_val, y_val))

### TensorBoard

In [4]:
import keras
from keras import layers
from keras.datasets import imdb
from keras.preprocessing import sequence

max_features = 2000
max_len = 500

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)

model = keras.models.Sequential()
model.add(layers.Embedding(max_features, 128,
                          input_length=max_len,
                          name='embed'))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.MaxPooling1D(5))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(1))
model.summary()
model.compile(optimizer='rmsprop',
             loss='binary_crossentropy',
             metrics=['acc'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embed (Embedding)            (None, 500, 128)          256000    
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 494, 32)           28704     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 98, 32)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 92, 32)            7200      
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 291,937
Trainable params: 291,937
Non-trainable params: 0
_________________________________________________________________


### Creating a directory  for TensorBoard Log files

In [5]:
! mkdir my_log_dir

Let’s launch the training with a TensorBoard callback instance. This callback will write
log events to disk at the specified location.

### Training the model with a TensorBoard callback

In [6]:
callbacks = [
    keras.callbacks.TensorBoard(
    log_dir='my_log_dir',
    histogram_freq=1,
    embeddings_freq=1,)
]

history = model.fit(x_train, y_train, 
                   epochs=20,
                   batch_size=128,
                   validation_split=0.2,
                   callbacks=callbacks)

AttributeError: module 'pandas' has no attribute 'computation'

At this point, you can launch the TensorBoard server from the command line, instructing it to read the logs the callback is currently writing. The tensorboard utility should have been automatically installed on your machine the moment you installed TensorFlow (for example, via pip):

In [None]:
!tensorboard --logdir=my_log_dir


You can then browse to http://localhost:6006 and look at your model training (see figure 7.10).

### Keras Graph Layers

Note that Keras also provides another, cleaner way to plot models as graphs of layers rather than graphs of TensorFlow operations: the utility keras.utils.plot_model. Using it requires that you’ve installed the Python pydot and pydot-ng libraries as well as the graphviz library. Let’s take a quick look:


In [None]:
from keras.utils import plot_model

plot_model(model, to_file='model.png')

You also have the option of displaying shape information in the graph of layers. This example visualizes model topology using plot_model and the show_shapes option

In [8]:
from keras.utils import plot_model
plot_model(model, show_shapes=True, to_file='model.png')

ImportError: Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.

### Batch Normalization

Normalization is a broad category of methods that seek to make different samples seen by a machine-learning model more similar to each other, which helps the model learn and generalize well to new data. The most common form of data normalization is one you’ve seen several times in this book already: centering the data on 0 by subtracting the mean from the data, and giving the data a unit standard deviation by dividing the data by its standard deviation. In effect, this makes the assumption that the data fol- lows a normal (or Gaussian) distribution and makes sure this distribution is centered and scaled to unit variance:

In [None]:
normalized data = (data - np.mean(data, axis=...)) // np.std(data, axis=...) 

The BatchNormalization layer is typically used after a convolutional or densely
connected layer:

In [None]:
# After a Conv layer
conv_model.add(layers.Conv2D(32, 3, activation='relu'))
conv_model.add(layers.BatchNormalization())

# After a Dense layer
conv_model.add(layers.Dense(32, activation='relu'))
conv_model.add(layers.BatchNormalization())

### Depthwise Seperable Convolution

What if I told you that there’s a layer you can use as a drop-in replacement for Conv2D that will make your model lighter (fewer trainable weight parameters) and faster (fewer floating-point operations) and cause it to perform a few percentage points bet- ter on its task? That is precisely what the depthwise separable convolution layer does (SeparableConv2D). This layer performs a spatial convolution on each channel of its input, independently, before mixing output channels via a pointwise convolution (a 1 × 1 convolution), as shown in figure 7.16. This is equivalent to separating the learn- ing of spatial features and the learning of channel-wise features, which makes a lot of sense if you assume that spatial locations in the input are highly correlated, but differ- ent channels are fairly independent. It requires significantly fewer parameters and involves fewer computations, thus resulting in smaller, speedier models. And because it’s a more representationally efficient way to perform convolution, it tends to learn better representations using less data, resulting in better-performing models.

These advantages become especially important when you’re training small models from scratch on limited data. For instance, here’s how you can build a lightweight, depthwise separable convnet for an image-classification task (softmax categorical clas- sification) on a small dataset:


In [11]:
from keras.models import Sequential, Model
from keras import layers

height = 64
width = 64
channels = 3
num_classes = 10

model = Sequential()
model.add(layers.SeparableConv2D(32, 3, activation='relu',
                                input_shape=(height, width, channels,)))
model.add(layers.SeparableConv2D(64, 3, activation='relu'))
model.add(layers.MaxPooling2D(2))

model.add(layers.SeparableConv2D(64, 3, activation='relu'))
model.add(layers.SeparableConv2D(128, 3, activation='relu'))
model.add(layers.MaxPooling2D(2))

model.add(layers.SeparableConv2D(64, 3, activation='relu'))
model.add(layers.SeparableConv2D(128, 3, activation='relu'))
model.add(layers.GlobalAveragePooling2D(2))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

TypeError: separable_conv2d() got an unexpected keyword argument 'data_format'

### Model Ensembling

In [None]:
preds_a = model_a.predict(x_val)
preds_b = model_b.predict(x_val)
preds_c = model_c.predict(x_val)
preds_d = model_d.predict(x_val)

final_preds = 0.25 * (preds_a + preds_b + preds_c + preds_d)

A smarter way to ensemble classifiers is to do a weighted average, where the weights are learned on the validation data—typically, the better classifiers are given a higher weight, and the worse classifiers are given a lower weight. To search for a good set of ensembling weights, you can use random search or a simple optimization algo- rithm such as Nelder-Mead:

In [None]:
preds_a = model_a.predict(x_val)
preds_b = model_b.predict(x_val)
preds_c = model_c.predict(x_val)
preds_d = model_d.predict(x_val)

final_preds = 0.5 * preds_a + 0.25 * preds_b + 0.1 * preds_c + 0.15 * preds_d