In [1]:
from pySL import SL
from scipy import stats
from tflearn.datasets import cifar10
from scipy.optimize import minimize
import numpy as np
from numpy import loadtxt

In this demo, we use CIFAR10 data as example. First read validation/testing prediction matrix for each model. The base models are three ResNet-32 models.

In [2]:
resnet32_1 = loadtxt("prediction/result_resnet32_cifar10_1.txt", delimiter=",")
resnet32_2 = loadtxt("prediction/result_resnet32_cifar10_2.txt", delimiter=",")
resnet32_3 = loadtxt("prediction/result_resnet32_cifar10_3.txt", delimiter=",")

First 5000 are the prediction for validation set, while last 10000 are for testing set

In [3]:
resnet32_1.shape

(15000, 10)

Read the label for validation/testing set from CIFAR10 data by interface from tflearn package

In [4]:
(X, Y), (testX, testY) = cifar10.load_data()
valY = Y[45000:,]

In [5]:
pred = np.stack((resnet32_1, resnet32_2, resnet32_3), axis=0)

In [6]:
pred.shape

(3, 15000, 10)

In [7]:
myY = np.hstack((valY, testY))
mySL = SL(pred, myY)
mySL.fit()

Show the performance of each ensemble methods:

method = 'SL' means predicting using SuperLearner weight (with or without constrain).

method = 'naive' means using prediction by naive averaging all the models.

method = 'bayesian' means using Bayesian Optimal Classifier

In [8]:
print 'SL', np.mean( np.argmax(mySL.predict(method = 'SL', constrained = False), axis = 1) == np.array(testY))
print 'SL constrained', np.mean( np.argmax(mySL.predict(method = 'SL', constrained = True), axis = 1) == np.array(testY))
print 'Naive averaging (before softmax)', np.mean( np.argmax(mySL.predict(method = 'naive', softmax = False), axis = 1) == np.array(testY))
print 'Naive averaging (after softmax)',np.mean( np.argmax(mySL.predict(method = 'naive', softmax = True), axis = 1) == np.array(testY))
print 'Bayesian Optimal Classifier (before softmax)', np.mean( np.argmax(mySL.predict(method = 'bayesian', softmax = False), axis = 1) == np.array(testY))
print 'Bayesian Optimal Classifier (after softmax)', np.mean( np.argmax(mySL.predict(method = 'bayesian', softmax = True), axis = 1) == np.array(testY))
print 'Majority voting', np.mean( mySL.majority() == np.array(testY))

SL 0.9367
SL constrained 0.929
Naive averaging (before softmax) 0.9366
Naive averaging (after softmax) 0.9365
Bayesian Optimal Classifier (before softmax) 0.9179
Bayesian Optimal Classifier (after softmax) 0.9179
Majority voting 0.9349
