# Ensemble learning

In [62]:
model_names = [
    'vgg16-keras', 
    'vgg19-keras', 
    'resnet50-keras',
    'incv3-keras',   
    'Inception_v3'
]

classifier_filepath = {
    'incv3-keras'    : 'classifiers/7577-incv3-keras.pkl',
    'vgg16-keras'    : 'classifiers/8515-vgg16-keras.pkl',
    'vgg19-keras'    : 'classifiers/8654-vgg19-keras.pkl',
    'Inception_v3'   : 'classifiers/9061-Inception_v3.pkl',
    'resnet50-keras' : 'classifiers/9130-resnet50-keras.pkl'
}

import numpy as np
data = dict()
for model_name in model_names:
    data[model_name] = np.load('features/CIFAR10_{model}_features.npz'.format(model=model_name)) 

In [63]:
!ls -1 classifiers/*.pkl

'ls' is not recognized as an internal or external command,
operable program or batch file.


In [64]:
from sklearn.externals import joblib

clf = dict()
for name in model_names:
    clf[name] = joblib.load(classifier_filepath[name])

In [65]:
clf['resnet50-keras'].get_params()

{'C': 0.01,
 'class_weight': None,
 'dual': True,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'loss': 'squared_hinge',
 'max_iter': 1000,
 'multi_class': 'ovr',
 'penalty': 'l2',
 'random_state': None,
 'tol': 0.0001,
 'verbose': 1}

Let us remind the classifiers scores on training dataset from CIFAR10

    incv3    => 7577
    vgg16    => 8515
    vgg19    => 8654
    Incv3    => 9061
    resnet50 => 9130

## Majority voting

In [66]:
import myutils
import numpy as np

In [67]:
_, data_testing = myutils.load_CIFAR_dataset(shuffle=False)

y_testing = np.array( data_testing )[:,1]
n_testing = y_testing.shape[0]

In [70]:
from collections import Counter

def majority_vote(i):
    votes = np.zeros(10);
    for name in model_names:
        y = y_predictions[name][i]
        votes[y] += 1
    return votes.argmax()

y_predictions = dict()
for name in model_names:
    y_predictions[name] = clf[name].predict( data[name]['features_testing'] )
    
y_ensembled = [ majority_vote(i) for i in range(n_testing) ]

In [71]:
np.sum(y_ensembled == y_testing)

9189

Simple majority voting increases our best result to **91.89%**

## Weighted voting

In [110]:
# Assume, we know how good are our models. We can give some weight to their votes.
classifier_weights = {
    'incv3-keras'    : 2,
    'vgg16-keras'    : 2,
    'vgg19-keras'    : 3,
    'Inception_v3'   : 4,
    'resnet50-keras' : 5
}

def weighted_vote(i):
    votes = np.zeros(10);
    for name in model_names:
        y = y_predictions[name][i]
        votes[y] = votes[y] + classifier_weights[name]
    return votes.argmax()

y_ensembled = [ weighted_vote(i) for i in range(n_testing) ]

In [111]:
np.sum( y_ensembled == y_testing )

9293

## Dynamic voting
1. Take the image to classify
2. Find its K nearest neighbors
3. Classify the neighbor using all classifiers
4. Use weighted voting

    todo

## Adaboost

In [112]:
#  base_clf = clf['resnet50-keras']

In [116]:
from sklearn import svm

base_clf = svm.SVC(probability=True,kernel='linear')

In [119]:
n_training = 1000 # TODO: try with bigger testing data

X_train = data['resnet50-keras']['features_training'][:n_training]
y_train = data['resnet50-keras']['labels_training'][:n_training]

base_clf.fit( X_train, y_train )

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [120]:
base_clf.score( data['resnet50-keras']['features_testing'], data['resnet50-keras']['labels_testing'] )

0.83030000000000004

In [122]:
from sklearn.ensemble import AdaBoostClassifier
boosted_model = AdaBoostClassifier(base_estimator = base_clf)

In [123]:
boosted_model.fit( X_train, y_train )

AdaBoostClassifier(algorithm='SAMME.R',
          base_estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=True, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          learning_rate=1.0, n_estimators=50, random_state=None)

In [124]:
boosted_model.score( data['resnet50-keras']['features_testing'], data['resnet50-keras']['labels_testing'] )

0.78369999999999995