# Ch 3b: Design Patterns 7 and 8

Design Pattern 7: Ensembles

Design Pattern 8: Cascade

# Design 7: Ensembles

Boosting, Bagging and Stacking. But only focus on Stacking

Stacking combines the output of initial models, usually of different types, and feeds them into a secondary meta-model as features. This second meta-model learns how to best combine the outputs to decrease the training error. Any problem type is accepted

## Stacking model

In [1]:
import os

import pandas as pd
import tensorflow as tf

from tensorflow import keras
from tensorflow import feature_column as fc
from tensorflow.keras import layers, models, Model

In [2]:
df = pd.read_csv('data/babyweight_train.csv')
df.head()

Unnamed: 0,weight_pounds,is_male,mother_age,plurality,gestation_weeks,mother_race
0,7.749249,False,12,Single(1),40,1.0
1,7.561856,True,12,Single(1),40,2.0
2,7.18707,False,12,Single(1),34,3.0
3,6.375769,True,12,Single(1),36,2.0
4,7.936641,False,12,Single(1),35,


Create `tf.data` input pipeline

In [3]:
# Get Label and key columns
CSV_COLUMNS = [
	'weight_pounds',
	'is_male',
	'mother_age',
	'plurality',
	'gestation_weeks',
	'mother_race'
]

# Add string name for label columns
LABEL_COLUMN = 'weight_pounds'

# Treate is_male and plurality as strings
DEFAULTS = [[0.0], ["null"], [0.0], ["null"], [0.0], ["0"]]

In [4]:
def get_dataset(file_path):
  dataset = tf.data.experimental.make_csv_dataset(
    file_path,
    batch_size=15,
    label_name=LABEL_COLUMN,
    select_columns=CSV_COLUMNS,
    column_defaults=DEFAULTS,
    num_epochs=1,
    ignore_errors=True
  )
  return dataset

train_data = get_dataset("./data/babyweight_train.csv")
test_data = get_dataset("./data/babyweight_eval.csv")

In [5]:
def show_batch(dataset):
  for batch, label in dataset.take(1):
    for key, value in batch.items():
      print(f"{key}: {value.numpy()}")

show_batch(train_data)

is_male: [b'False' b'True' b'True' b'True' b'True' b'False' b'False' b'False'
 b'True' b'False' b'True' b'True' b'True' b'False' b'True']
mother_age: [16. 18. 17. 16. 14. 17. 17. 16. 17. 16. 17. 17. 16. 16. 17.]
plurality: [b'Single(1)' b'Single(1)' b'Single(1)' b'Single(1)' b'Single(1)'
 b'Single(1)' b'Single(1)' b'Twins(2)' b'Twins(2)' b'Single(1)'
 b'Single(1)' b'Single(1)' b'Single(1)' b'Single(1)' b'Single(1)']
gestation_weeks: [41. 38. 39. 38. 38. 41. 38. 37. 34. 41. 39. 37. 39. 33. 37.]
mother_race: [b'0' b'0' b'2.0' b'1.0' b'0' b'2.0' b'1.0' b'1.0' b'0' b'1.0' b'0' b'1.0'
 b'1.0' b'0' b'0']


Create feature columns

In [6]:
numeric_columns = [fc.numeric_column('mother_age'),
                   fc.numeric_column('gestation_weeks')]
CATEGORIES = {
    'plurality': ["Single(1)", "Twins(2)", "Triplets(3)",
                  "Quadruplets(4)", "Quintuplets(5)", "Multiple(2+)"],
    'is_male' : ["True", "False", "Unknown"],
    'mother_race': [str(_) for _ in df.mother_race.unique()]
}

categorical_columns = []
for feature, vocab in CATEGORIES.items():
  cat_col = fc.categorical_column_with_vocabulary_list(
    key=feature,
    vocabulary_list=vocab
  )
  categorical_columns.append(fc.indicator_column(cat_col))

Create ensemble models

In [7]:
inputs = {colname: tf.keras.layers.Input(
	name=colname, shape=(), dtype='float32'
) for colname in ['mother_age', 'gestation_weeks']}

inputs.update({colname: tf.keras.layers.Input(
	name=colname, shape=(), dtype='string'
) for colname in ['is_male', 'plurality', 'mother_race']})

dnn_inputs = layers.DenseFeatures(categorical_columns + numeric_columns)(inputs)

# model_1
model1_h1 = layers.Dense(50, activation='relu')(dnn_inputs)
model1_h2 = layers.Dense(30, activation='relu')(model1_h1)
model1_output = layers.Dense(1, activation='relu')(model1_h2)
model_1 = tf.keras.models.Model(inputs=inputs, outputs=model1_output, name='model_1')

# model_2
model2_h1 = layers.Dense(64, activation='relu')(dnn_inputs)
model2_h2 = layers.Dense(32, activation='relu')(model2_h1)
model2_output = layers.Dense(1, activation='relu')(model2_h2)
model_2 = tf.keras.models.Model(inputs=inputs, outputs=model2_output, name='model_2')

# model_3
model3_h1 = layers.Dense(32, activation='relu')(dnn_inputs)
model3_output = layers.Dense(1, activation='relu')(model3_h1)
model_3 = tf.keras.models.Model(inputs=inputs, outputs=model3_output, name='model_3')

In [8]:
def fit_model(model):
  # define model
  model.compile(
    loss=tf.keras.losses.MeanSquaredError(),
    optimizer='adam',
    metrics=['mse']
  )

  # fit model
  model.fit(train_data.shuffle(500), epochs=1)
  
  # evaluate model
  test_loss, test_mse = model.evaluate(test_data)
  print(f"\n\n{model.name}:\n Test Loss {test_loss}, Test RMSE{test_mse**0.5}")

  return model

In [9]:
try:
	os.makedirs('models')
except:
	print('directory already exists')

directory already exists


Train each neural network and save the trained model to file

In [10]:
members = [model_1, model_2, model_3]

# fit and save models
n_members = len(members)

for i in range(n_members):
  # fit model
  model = fit_model(members[i])
  
  # save model
  filename = f"models/model_{str(i + 1)}.h5"
  model.save(filename, save_format='tf')
  print(f"Saved {filename}\n")



model_1:
 Test Loss 54.193626403808594, Test RMSE7.361632047569927
Saved models/model_1.h5



model_2:
 Test Loss 2.330007791519165, Test RMSE1.5264363044422014
Saved models/model_2.h5



model_3:
 Test Loss 1.3805581331253052, Test RMSE1.1749715456662366
Saved models/model_3.h5



### Load the trained models and create the stacked ensemble model

Loads the models trained above and returns them as a list

In [22]:
# Load trained models from file
def load_models(n_models):
  all_models = []
  for i in range(n_models):
    filename = f"models/model_{str(i + 1)}.h5"
    # load model from file
    model = models.load_model(filename)
    # add to list of members
    all_models.append(model)
    print(f">loaded {filename}")
  
  return all_models

# Load all models
members = load_models(n_members)

>loaded models/model_1.h5
>loaded models/model_2.h5
>loaded models/model_3.h5


Freeze the layers for the pre-trained model, since their weights will no longer be updated during the stacking portion. The Stacked Ensemble will be trainable and learn how to best combine the results of the ensemble members

In [23]:
# update all layers in all models not to be trainable
for i in range(n_members):
  model = members[i]
  for layer in model.layers:
    # make not trainable
    layer.trainable = False
    # rename to avoid 'unique layer name' issue
    layer._name = f"ensemble_{str(i + 1)}_{layer.name}"

Create the Stacked Ensemble model - also a NN

In [24]:
member_inputs = [model.input for model in members]

# concatenate merger output from each model
member_outputs = [model.output for model in members]
merge = layers.concatenate(member_outputs)

# building the layers of the ensemble model
h1 = layers.Dense(30, activation='relu')(merge)
h2 = layers.Dense(20, activation='relu')(h1)
h3 = layers.Dense(10, activation='relu')(h2)
h4 = layers.Dense(5, activation='relu')(h2)
ensemble_output = layers.Dense(1, activation='relu')(h3)
ensemble_model = Model(inputs=member_inputs, outputs=ensemble_output)

# plot graph of ensemble
tf.keras.utils.plot_model(ensemble_model, show_shapes=True, to_file='ensemble_graph.png')

# compile
ensemble_model.compile(loss='mse', optimizer='adam', metrics=['mse'])

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [25]:
FEATURES = ["is_male", "mother_age", "plurality",
            "gestation_weeks", "mother_race"]

# stack input features for our tf.dataset
def stack_features(features, label):
    for feature in FEATURES:
        for i in range(n_members):
            features['ensemble_' + str(i+1) + '_' + feature] = features[feature]
        
    return features, label

ensemble_data = train_data.map(stack_features).repeat(1)

In [26]:
ensemble_model.fit(ensemble_data.shuffle(500), epochs=1)



<keras.callbacks.History at 0x2749d4040a0>

In [27]:
val_loss, val_mse = ensemble_model.evaluate(test_data.map(stack_features))



In [None]:
print("Validation RMSE: {}".format(val_mse**0.5))

---

# Design 8: Cascade

Training a model to predict the distance that a bicycle will be ridden. Bicycles that ride longer that 4 horus are rare.

* __Model 1__: Classifies trips into Typical trips and Long trips
* __Model 2__: Regression model 1 for Typical trips
* __Model 3__: Regression model 2 for Long trips

Combine both models at the end