# Ensembling
Ensemble mutliple models as one?

**Conclusion:** Simple average ensembling gives boost over the best Model on the Librivox test set set

In [22]:
import keras
from keras import layers
from keras import Input
from keras.models import Model
from keras.models import load_model
import numpy as np
import matplotlib.pyplot as plt
import kapre
from utils import DataFeed
import sys
sys.path.append('../preprocessing')

## Load Data

In [2]:
data_path = '../preprocessing/preprocessed_data'
data, labels = DataFeed.Dataset.create(data_path, ['test/librivox'], num=-1, shuffle=True)
data.shape

(1080, 1, 80000)

In [9]:
import IPython.display as ipd
ipd.Audio(data[-6], rate=16000)

## Ensemble manually

In [27]:
models = ['conv2d_to_rnn/conv2d_to_rnn.h5', 'berlin_net/berlin_net.h5', 'conv2d_deep/conv2d_deep.h5', 'conv1d/conv1d.h5']

In [9]:
def acc(preds, labels):
    return np.mean(np.equal(np.argmax(labels, axis=-1), np.argmax(preds, axis=-1)))

In [10]:
def ensemble(models):
    preds = np.zeros(labels.shape)
    for mpath in models:
        model = load_model(mpath, custom_objects={'Melspectrogram':kapre.time_frequency.Melspectrogram, 
                                                  'Normalization2D': kapre.utils.Normalization2D})
        mpreds = model.predict(x=data, verbose=0)
        print(mpath + ": Individual Acc: " + str(acc(mpreds, labels)))
        
        preds += mpreds
    print("Ensemble Acc: " + str(acc(preds, labels)))

## Evaluate

In [11]:
preds = ensemble(models)

conv1d/conv1d.h5: Individual Acc: 0.7944444444444444
int_axis=0 passed but is ignored, str_axis is used instead.
conv2d_to_rnn/conv2d_to_rnn.h5: Individual Acc: 0.9083333333333333
int_axis=0 passed but is ignored, str_axis is used instead.
berlin_net/berlin_net.h5: Individual Acc: 0.8935185185185185
conv2d_deep/conv2d_deep.h5: Individual Acc: 0.7675925925925926
Ensemble Acc: 0.9453703703703704


In [12]:
preds = ensemble(models[:-1])

conv1d/conv1d.h5: Individual Acc: 0.7944444444444444
int_axis=0 passed but is ignored, str_axis is used instead.
conv2d_to_rnn/conv2d_to_rnn.h5: Individual Acc: 0.9083333333333333
int_axis=0 passed but is ignored, str_axis is used instead.
berlin_net/berlin_net.h5: Individual Acc: 0.8935185185185185
Ensemble Acc: 0.9435185185185185


## Ensemble with keras

In [25]:
def get_ensembled_model(models):
    """https://medium.com/randomai/ensemble-and-store-models-in-keras-2-x-b881a6d7693f"""
    
    # load all models
    loaded_models=[]
    for mpath in models:
        model = load_model(mpath, custom_objects={'Melspectrogram':kapre.time_frequency.Melspectrogram, 
                                                  'Normalization2D': kapre.utils.Normalization2D})
        model.name = mpath
        loaded_models.append(model)
        
    # collect outputs of models in a list
    model_input = Input(shape=loaded_models[0].input_shape[1:]) # c*h*w
    y_models=[model(model_input) for model in loaded_models] 

    # averaging outputs
    y_average=layers.average(y_models) 

    # build model from same input and avg output
    model_ens = Model(inputs=model_input, outputs=y_average, name='ensemble')  
   
    return model_ens

In [26]:
model_ens = get_ensembled_model(models)
model_ens.summary()
model_ens.save('model_ens.h5')

int_axis=0 passed but is ignored, str_axis is used instead.
int_axis=0 passed but is ignored, str_axis is used instead.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1, 80000)     0                                            
__________________________________________________________________________________________________
conv1d/conv1d.h5 (Sequential)   (None, 3)            205235      input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_to_rnn/conv2d_to_rnn.h5  (None, 3)            328371      input_1[0][0]                    
__________________________________________________________________________________________________
berlin_net/berlin_net.h5 (Seque (None, 3)            5235551     input_1[0][0]          