# Maximising classification accuracy via Ensemble Weight optimization

## Add the imports

In [2]:
from models import wide_residual_net as WRN

import numpy as np
import sklearn.metrics as metrics
from scipy.optimize import minimize
from sklearn.metrics import log_loss

from keras.datasets import cifar100
from keras import backend as K
import keras.utils.np_utils as kutils

## Some variables which we will use in some time

In [3]:
NUM_TESTS = 25
n = 2 * 6 + 4
k = 4

models_filenames = [r"weights/WRN-CIFAR100-%d-%d-Best.h5" % (n, k),
                    r"weights/WRN-CIFAR100-%d-%d-1.h5" % (n, k),
                    r"weights/WRN-CIFAR100-%d-%d-2.h5" % (n, k),
                    r"weights/WRN-CIFAR100-%d-%d-3.h5" % (n, k),
                    r"weights/WRN-CIFAR100-%d-%d-4.h5" % (n, k),
                    r"weights/WRN-CIFAR100-%d-%d-5.h5" % (n, k)]

## Load up the CIFAR 100 dataset and prepare for testing

In [4]:
(trainX, trainY), (testX, testY) = cifar100.load_data()
nb_classes = len(np.unique(testY))

trainX = trainX.astype('float32')
trainX /= 255.0
testX = testX.astype('float32')
testX /= 255.0

trainY = kutils.to_categorical(trainY)
testY_cat = kutils.to_categorical(testY)

## Create the Wide Residual Network (16-4)

In [5]:
# Decide dim ordering for Theano and Tensorflow backends
if K.image_dim_ordering() == "th":
    init = (3, 32, 32)
else:
    init = (32, 32, 3)
    
model = WRN.create_wide_residual_network(init, nb_classes=100, N=2, k=4, dropout=0.00)
model_prefix = 'WRN-CIFAR100-%d-%d' % (n, k)

Wide Residual Network-16-4 created.


## Obtain predictions from each of the Ensemble models

In [6]:
preds = []
for fn in models_filenames:
    model.load_weights(fn)
    yPreds = model.predict(testX, batch_size=128)
    preds.append(yPreds)

    print("Obtained predictions from model with weights = %s" % (fn))

Obtained predictions from model with weights = weights/WRN-CIFAR100-16-4-Best.h5
Obtained predictions from model with weights = weights/WRN-CIFAR100-16-4-1.h5
Obtained predictions from model with weights = weights/WRN-CIFAR100-16-4-2.h5
Obtained predictions from model with weights = weights/WRN-CIFAR100-16-4-3.h5
Obtained predictions from model with weights = weights/WRN-CIFAR100-16-4-4.h5
Obtained predictions from model with weights = weights/WRN-CIFAR100-16-4-5.h5


## Define helper function to calculate accuracy and error

In [7]:
def calculate_weighted_accuracy(prediction_weights):
    weighted_predictions = np.zeros((testX.shape[0], nb_classes), dtype='float32')
    for weight, prediction in zip(prediction_weights, preds):
        weighted_predictions += weight * prediction
    yPred = np.argmax(weighted_predictions, axis=1)
    yTrue = testY
    accuracy = metrics.accuracy_score(yTrue, yPred) * 100
    error = 100 - accuracy
    print("Accuracy : ", accuracy)
    print("Error : ", error)

## Consider a Single Best Model prediction. 

We can load the weights of the single best model and make predictions

In [8]:
# Load the weights of the best single model
model.load_weights(models_filenames[0])

# Get its predictions
yPreds = model.predict(testX, batch_size=128)
yPred = np.argmax(yPreds, axis=1)
yTrue = testY

# Calculate accuracy metric
accuracy = metrics.accuracy_score(yTrue, yPred) * 100
error = 100 - accuracy
print("Accuracy : ", accuracy)
print("Error : ", error)

Accuracy :  71.07
Error :  28.93


## Consider a non weighted ensemble prediction

Here, each model has the same weight for predictions. However, this may not lead to optimal results.

Notice that ensemble weighting is an improvement over the single best model, by a large margin for CIFAR100

In [9]:
prediction_weights = [1. / len(models_filenames)] * len(models_filenames)

calculate_weighted_accuracy(prediction_weights)

Accuracy :  71.78
Error :  28.22


## Now we consider a weighted ensemble

In a weighted ensemble, we try to optimize the weights of predictions of each model, so as to minimize the total log loss. This in turn improves the overall accuracy of the predictions

In [10]:
# Create the loss metric 
def log_loss_func(weights):
    ''' scipy minimize will pass the weights as a numpy array '''
    final_prediction = np.zeros((testX.shape[0], nb_classes), dtype='float32')

    for weight, prediction in zip(weights, preds):
        final_prediction += weight * prediction

    return log_loss(testY_cat, final_prediction)

In [12]:
best_acc = 0.0
best_weights = None

# Parameters for optimization
constraints = ({'type': 'eq', 'fun':lambda w: 1 - sum(w)})
bounds = [(0, 1)] * len(preds)

# Check for NUM_TESTS times
for iteration in range(NUM_TESTS):
    # Random initialization of weights
    prediction_weights = np.random.random(len(models_filenames))
    
    # Minimise the loss 
    result = minimize(log_loss_func, prediction_weights, method='SLSQP', bounds=bounds, constraints=constraints)
    print('Best Ensemble Weights: {weights}'.format(weights=result['x']))
    
    weights = result['x']
    weighted_predictions = np.zeros((testX.shape[0], nb_classes), dtype='float32')
    
    # Calculate weighted predictions
    for weight, prediction in zip(weights, preds):
        weighted_predictions += weight * prediction

    yPred = np.argmax(weighted_predictions, axis=1)
    yTrue = testY

    # Calculate weight prediction accuracy
    accuracy = metrics.accuracy_score(yTrue, yPred) * 100
    error = 100 - accuracy
    print("Iteration %d: Accuracy : " % (iteration + 1), accuracy)
    print("Iteration %d: Error : " % (iteration + 1), error)
    
    # Save current best weights 
    if accuracy > best_acc:
        best_acc = accuracy
        best_weights = weights
        
    print()

Best Ensemble Weights: [ 0.          0.26561464  0.04944201  0.05112553  0.59963485  0.03418297]
Iteration 1: Accuracy :  71.39
Iteration 1: Error :  28.61

Best Ensemble Weights: [  4.34944411e-08   3.71529158e-01   5.97871080e-01   0.00000000e+00
   9.03772028e-12   3.05997187e-02]
Iteration 2: Accuracy :  71.42
Iteration 2: Error :  28.58

Best Ensemble Weights: [ 0.04306361  0.59534088  0.08286299  0.02587616  0.12175668  0.13109969]
Iteration 3: Accuracy :  72.05
Iteration 3: Error :  27.95

Best Ensemble Weights: [  1.18818912e-02   5.16909311e-01   1.16893620e-01   4.26161061e-06
   4.35623601e-02   3.10748556e-01]
Iteration 4: Accuracy :  72.22
Iteration 4: Error :  27.78

Best Ensemble Weights: [ 0.03822819  0.53179849  0.19198791  0.08540319  0.01221369  0.14036853]
Iteration 5: Accuracy :  72.1
Iteration 5: Error :  27.9

Best Ensemble Weights: [  9.90197453e-07   4.81835256e-01   1.47842985e-01   2.06942531e-01
   5.64521086e-05   1.63321785e-01]
Iteration 6: Accuracy :  72

## We can now compute the best accuracy ensemble model

In [23]:
print("Best Accuracy : ", best_acc)
print("Best Weights : ", best_weights)

Best Accuracy :  72.25
Best Weights :  [  3.82715088e-06   5.09113442e-01   8.92782124e-02   2.22301797e-01
   4.25546797e-02   1.36748041e-01]


In [24]:
calculate_weighted_accuracy(best_weights)