# Ensemble learning

In [1]:
model_names = [
    'vgg16-keras', 
    'vgg19-keras', 
    'resnet50-keras',
    'incv3-keras',   
    'Inception_v3'
]

classifier_filepath = {
    'incv3-keras'    : 'classifiers/7577-incv3-keras.pkl',
    'vgg16-keras'    : 'classifiers/8515-vgg16-keras.pkl',
    'vgg19-keras'    : 'classifiers/8654-vgg19-keras.pkl',
    'Inception_v3'   : 'classifiers/9061-Inception_v3.pkl',
    'resnet50-keras' : 'classifiers/9130-resnet50-keras.pkl'
}

import numpy as np
data = dict()
for model_name in model_names:
    data[model_name] = np.load('features/CIFAR10_{model}_features.npz'.format(model=model_name)) 

In [2]:
!ls -1 classifiers/*.pkl

classifiers/7577-incv3-keras.pkl
classifiers/8515-vgg16-keras.pkl
classifiers/8654-vgg19-keras.pkl
classifiers/9061-Inception_v3.pkl
classifiers/9130-resnet50-keras.pkl


In [3]:
from sklearn.externals import joblib

clf = dict()
for name in model_names:
    clf[name] = joblib.load(classifier_filepath[name])

In [4]:
clf['resnet50-keras'].get_params()

{'C': 0.01,
 'class_weight': None,
 'dual': True,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'loss': 'squared_hinge',
 'max_iter': 1000,
 'multi_class': 'ovr',
 'penalty': 'l2',
 'random_state': None,
 'tol': 0.0001,
 'verbose': 1}

Let us remind the classifiers scores on training dataset from CIFAR10

    incv3    => 7577
    vgg16    => 8515
    vgg19    => 8654
    Incv3    => 9061
    resnet50 => 9130

## Majority voting

In [5]:
import myutils
import numpy as np

In [6]:
_, data_testing = myutils.load_CIFAR_dataset(shuffle=False)

y_testing = np.array( data_testing )[:,1]
n_testing = y_testing.shape[0]

In [7]:
def most_common(lst):
    return max(set(lst), key=lst.count)

y_predictions = dict()
for name in model_names:
    y_predictions[name] = clf[name].predict( data[name]['features_testing'] )

In [8]:
y_ensembled = [ most_common( [ y_predictions[name][i] for name in model_names ] ) for i in range(n_testing) ]

In [9]:
np.sum(y_ensembled == y_testing)

9180

So simple majority voting increases our best result to **91.80%**