# Model Ensembles

Based on [Course Notes: Neural Nets Notes 3](http://cs231n.github.io/neural-networks-3/#ensemble), we want to try *averaging* the results from several different models to see if we can improve our final accuracy!


In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Loading the other modules I have in parent directories
import sys
PYTHONPATH="../"
sys.path.append(PYTHONPATH)
from dataProcessing import getDataLoaders
from models import FCNet, CNN_3d, layer0_12x12, layer2_12x12, layer0_3x6, count_parameters
from train import check_accuracy, train
from plottingFcts import trainingMetrics, sigBkgEff, plotROC, plotConfusion

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

  from ._conv import register_converters as _register_converters


In [2]:
loader_train, loader_val, loader_test = getDataLoaders(batch_size=16, N=20000)

In [3]:
fc = FCNet()
print(fc.modelName)
fc.load_state_dict(torch.load("../models/{}.pt".format(fc.modelName)))


fc_150_100_50_dpt_0.5


In [4]:
check_accuracy(loader_val,fc, verbose=True)

cpu
Got 4526 / 6000 correct (75.43)


In [5]:
cnn = CNN_3d(spatialDim=(3,6),preConvParams={'nF':32},
                 nFilters_1=32, filter_1= (3,3,3), stride_1=(1,1,1), padding_1=(1,1,1),
                 nFilters_2=16, filter_2= (3,3,2), stride_2=(2,2,2), padding_2=(1,1,1),
                 h1_dim=150, h2_dim=100)
cnn.load_state_dict(torch.load("../models/{}.pt".format(cnn.modelName[:-8])))

Output size after the first conv: 32,3.0,3.0,6.0
Output size after the second conv: 16,2.0,2.0,4.0


In [6]:
check_accuracy(loader_val,cnn,verbose=True)

cpu
Got 4666 / 6000 correct (77.77)


In [21]:
with torch.no_grad():
        for l0, l1, l2, y in loader_val:
#             l0 = l0.to(device=device, dtype=dtype)  # move to device, e.g. GPU
#             l1 = l1.to(device=device, dtype=dtype)
#             l2 = l2.to(device=device, dtype=dtype)
#             y = y.to(device=device, dtype=torch.long)
            scores1 = fc(l0, l1, l2)
            scores2 = cnn(l0, l1, l2)
        
            print(ensemble([fc,cnn],l0,l1,l2))
            
            break
            

tensor([[ 0.5183,  0.0184,  0.4633],
        [ 0.3737,  0.0240,  0.6023],
        [ 0.3611,  0.0218,  0.6171],
        [ 0.0717,  0.8770,  0.0513],
        [ 0.4850,  0.0323,  0.4827],
        [ 0.3774,  0.0216,  0.6010],
        [ 0.0363,  0.9517,  0.0120],
        [ 0.4697,  0.0236,  0.5067],
        [ 0.4197,  0.0213,  0.5590],
        [ 0.3536,  0.0192,  0.6272],
        [ 0.9419,  0.0219,  0.0361],
        [ 0.7115,  0.0173,  0.2712],
        [ 0.6656,  0.0281,  0.3063],
        [ 0.0320,  0.9507,  0.0172],
        [ 0.3795,  0.0242,  0.5964],
        [ 0.1042,  0.8618,  0.0340]])


  del sys.path[0]


In [39]:
def ensemble(models, l0, l1, l2):
    '''
    Input: 
    - models: A list of models to take the mean over
    
    '''
    
    batch_size = l0.shape[0]

    out = torch.zeros(batch_size,3)
    
    for m in models:
        #out += F.softmax(m(l0,l1,l2))
        out += m(l0,l1,l2)
    
    out /= len(models)
    
    return out

In [40]:
device = torch.device('cpu')
dtype = torch.float32

def check_ensemble_accuracy(loader, models, returnAcc=False, verbose=True):

    '''
    Check the accuracy of the model

    Inputs:
        loader: A DataLoader object, i.e, for the val or test st
        model: A Pytorch model to check the accuracy on
        returnAcc: If true, the function will return the calculated accuracy

    '''

    num_correct = 0
    num_samples = 0
    for m in models:
        m.eval()  # set model to evaluation mode
        m = m.to(device=device)

    with torch.no_grad():
        for l0, l1, l2, y in loader:
            l0 = l0.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            l1 = l1.to(device=device, dtype=dtype)
            l2 = l2.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.long)

            scores = ensemble(models,l0,l1,l2)
            _, preds = scores.max(1)
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        if verbose:
            print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [41]:
check_ensemble_accuracy(loader_val, [fc,cnn])

Got 4621 / 6000 correct (77.02)


In [38]:
print('{:.3f}'.format(.5*(77.77+75.43)))

76.600
