# Ensemble

In [1]:
import numpy as np
import math

import torch

import import_ipynb
import dist

importing Jupyter notebook from dist.ipynb


In [2]:
class Ensemble:
    def __init__(
        self,
        nets: list,
        mode: callable = None,
        reputations=None
    ):
        """Create an ensemble model and its methods.

        Parameters
        ----------
        nets : list
            List of neural networks (models) .
        mode : function
            Function. Inputs of the function are outputs from each model.
            Outputs of the function are the aggregated results.
        reputations : np.array
            Sum of `reputations` SHOULD be 1.
            `len(reputations)` SHOULD be same as number of `nets`.
        """

        self.nets = nets
        self.num_nets = len(self.nets)

        mode = mode or avg
        if reputations is None:
            reputations = dist.uniform(self.num_nets)

        self.set_mode(mode)
        self.update_reputations(reputations)

    def set_mode(self, new_mode: callable):
        self.mode = new_mode

    def update_reputations(self, new_reputations):
        assert len(new_reputations) == self.num_nets, \
            "dim of `reputations` SHOULD be same as len(nets)"
        assert math.isclose(sum(new_reputations), 1.), \
            "sum of `reputations` SHOULD be 1."
        if type(new_reputations) != np.ndarray:  # list, et al.
            new_reputations = np.array(new_reputations)  # converts into np.array

        self.reputations = new_reputations

    def __call__(self, inputs):
        """Calculate inference result of this (self) ensemble model.
        """

        # Calculates inference result
        outputs = list()
        for net in self.nets:
            outputs.append(net(inputs))
        outputs = torch.stack(outputs)  # to Tensor

        return self.mode(outputs, self.reputations)

    def eval(self):
        for net in self.nets:
            net.eval()

In [3]:
def avg(outputs, reputations=None):
    if reputations is None:
        reputations = dist.uniform(len(outputs))

    # Calculates `result` which is the final one
    result = torch.empty_like(outputs)
    for net_idx, (output, reputation) in enumerate(zip(outputs, reputations)):
        result[net_idx] = output.mul(reputation)

    return torch.sum(result, dim=0)

In [4]:
def med(outputs, reputations=None):
    """Calculate weighted median.
    
    See https://en.wikipedia.org/wiki/Weighted_median for weighted median.
    """

    if reputations is None:
        reputations = dist.uniform(len(outputs))

    # calculates sorted `outputs`' indexes
    selectors = outputs.data.sort(dim=0)[1]  # [0]: values, [1]: indexes
    # shape: (num_nets, batch_size, num_classes)
    # value: which network (index of net)

    # calculates sorted reputations
    sorted_repus = torch.from_numpy(reputations)[selectors]

    # selects median values
    result = torch.empty_like(outputs[0])
    # shape: (batch_size, num_classe)

    net_max, batch_max, class_max = selectors.shape

    for batch_idx in range(batch_max):
        for class_idx in range(class_max):

            accumulated_repus = 0.

            for net_idx in range(net_max):

                selector = selectors[net_idx][batch_idx][class_idx]  # index of selected net
                accumulated_repus += sorted_repus[net_idx][batch_idx][class_idx]

                if accumulated_repus >= 0.5:
                    # saves median value at `result` and then `break`
                    result[batch_idx][class_idx] = outputs[selector][batch_idx][class_idx]
                    break

    return result

In [5]:
def max(outputs, reputations=None):
    """Calculate weighted max.
    """

    if reputations is None:
        reputations = dist.uniform(len(outputs))

    # calculates max indexes via reputations
    selectors = torch.empty_like(outputs)
    for net_idx, (output, reputation) in enumerate(zip(outputs, reputations)):
        selectors[net_idx] = output.mul(reputation)
    selectors = selectors.data.max(dim=0)[1]  # [0]: values, [1]: indexes
    # shape: (batch_size, num_classes)
    # value: which network (index of net)

    # selects max values
    result = torch.empty_like(outputs[0])
    # shape: (batch_size, num_classe)

    batch_max, class_max = selectors.shape

    for batch_idx in range(batch_max):
        for class_idx in range(class_max):
            # saves max value at `result`
            net_idx = selectors[batch_idx][class_idx]
            result[batch_idx][class_idx] = outputs[net_idx][batch_idx][class_idx]

    return result

# main

In [6]:
if __name__ == "__main__":
    import os

    import torch.nn as nn
    import torch.optim as optim

    import torchvision.datasets as dset
    import torchvision.transforms as transforms

    from torch.utils.data import DataLoader  # TODO: DistributedDataParallel

    import import_ipynb
    from ml import train, test
    import nets

    """Hyperparams"""
    numNets = 5
    numWorkers = 4
    cuda = True

    base_path = './ensemble_test'

    trainFiles = [None for _ in range(numNets)]
    testFiles = [None for _ in range(numNets)]
    for i in range(numNets):
        path = os.path.join(base_path, str(i))
        os.makedirs(path, exist_ok=True)

        trainFiles[i] = open(os.path.join(path, 'train.csv'), 'w')
        testFiles[i] = open(os.path.join(path, 'test.csv'), 'w')

    epochs = 2
    batchSz = 256

    """Datasets"""
    # # gets mean and std
    # transform = transforms.Compose([transforms.ToTensor()])
    # dataset = dset.CIFAR10(root='cifar', train=True, download=True, transform=transform)
    # normMean, normStd = dist.get_norm(dataset)
    normMean = [0.49139968, 0.48215841, 0.44653091]
    normStd = [0.24703223, 0.24348513, 0.26158784]
    normTransform = transforms.Normalize(normMean, normStd)

    trainTransform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normTransform
    ])
    testTransform = transforms.Compose([
        transforms.ToTensor(),
        normTransform
    ])

    trainset = dset.CIFAR10(root='cifar', train=True, download=True, transform=trainTransform)
    testset = dset.CIFAR10(root='cifar', train=False, download=True, transform=trainTransform)

    # splits datasets
    splited_trainset = dist.random_split_by_dist(
        trainset,
        size=numNets,
        dist=dist.pareto,
        alpha=2.
    )
    splited_testset = dist.random_split_by_dist(
        testset,
        size=numNets,
        dist=dist.pareto,
        alpha=2.
    )

    # num_workers: number of CPU cores to use for data loading
    # pin_memory: being able to speed up the host to device transfer by enabling
    kwargs = {'num_workers': numWorkers, 'pin_memory': cuda}

    # loaders
    trainLoaders = [DataLoader(
        splited_trainset[i], batch_size=batchSz, shuffle=True, **kwargs
    ) for i in range(numNets)]
    testLoaders = [DataLoader(
        splited_testset[i], batch_size=batchSz, shuffle=True, **kwargs
    ) for i in range(numNets)]

    """Nets"""
    num_classes = 10
    resnets = [nets.resnet18(num_classes=num_classes) for _ in range(numNets)]

    criterions = [nn.CrossEntropyLoss() for _ in range(numNets)]
    optimizers = [optim.SGD(net.parameters(), lr=1e-1, momentum=0.9) for net in resnets]

    if cuda:
        for net in resnets:
            # if multi-gpus
            if torch.cuda.device_count() > 1:
                net = nn.DataParallel(net)

            # use cuda
            net.cuda()

    """Train & Test models"""
    for i in range(numNets):
        for epoch in range(epochs):

            train(
                resnets[i], criterions[i], optimizers[i], trainLoaders[i],
                epoch=epoch, cuda=cuda, log=True, log_file=trainFiles[i]
            )
            test(
                resnets[i], criterions[i], testLoaders[i],
                epoch=epoch, cuda=cuda, log=True, log_file=testFiles[i]
            )

    """Test the ensemble model"""
    ensemble = Ensemble(resnets, mode=med, reputations=[0.05, 0.2, 0.3, 0.4, 0.05])

    testFile = open(os.path.join(base_path, 'test.csv'), 'w')

    for i in range(numNets):
        test(
            ensemble, criterions[i], testLoaders[i],
            epoch=0, cuda=cuda, log=True, log_file=testFile
        )

importing Jupyter notebook from ml.ipynb
importing Jupyter notebook from nets.ipynb
Files already downloaded and verified
Files already downloaded and verified
