# Simulator

Simulating `Ensemble Federated Learning` paradigm which distills ensemble model's knowledge into local one.

The ensemble model is formed by concating peers' model.

## Features

* Byzantine

## TODO

* Network topology

In [None]:
import import_ipynb
import nn.dist as dist
import nn.ensemble as ensemble
import nn.kd as kd
import nn.ml as ml
import nn.nets as nets

In [None]:
if __name__ == "__main__":
    import os
    from copy import deepcopy

    import torch
    import torch.nn as nn
    import torch.optim as optim

    import torchvision.datasets as dset
    import torchvision.transforms as transforms

    from torch.utils.data import DataLoader  # TODO: DistributedDataParallel

    """Hyperparams"""
    numNets = 21
    numByzs = 10

    numWorkers = 4
    cuda = True

    base_path = './simul_21_pareto_10_byzantine_ensemble_kd'

    trainFiles = [None for _ in range(numNets)]
    testFiles = [None for _ in range(numNets)]
    for i in range(numNets):
        path = os.path.join(base_path, str(i))
        os.makedirs(path, exist_ok=True)
        trainFiles[i] = open(os.path.join(path, 'train.csv'), 'w')
        testFiles[i] = open(os.path.join(path, 'test.csv'), 'w')
    testFile = open(os.path.join(base_path, 'test.csv'), 'w')

    epochs = 3000
    batchSz = 128

    """Datasets"""
    # # gets mean and std
    # transform = transforms.Compose([transforms.ToTensor()])
    # dataset = dset.CIFAR10(root='cifar', train=True, download=True, transform=transform)
    # normMean, normStd = dist.get_norm(dataset)
    normMean = [0.49139968, 0.48215841, 0.44653091]
    normStd = [0.24703223, 0.24348513, 0.26158784]
    normTransform = transforms.Normalize(normMean, normStd)

    trainTransform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normTransform
    ])
    testTransform = transforms.Compose([
        transforms.ToTensor(),
        normTransform
    ])

    trainset = dset.CIFAR10(root='cifar', train=True, download=True, transform=trainTransform)
    testset = dset.CIFAR10(root='cifar', train=False, download=True, transform=trainTransform)

    # splits datasets
    splited_trainset = dist.random_split_by_dist(
        trainset,
        size=numNets,
        dist=dist.pareto,
        alpha=2.
    )
    splited_testset = dist.random_split_by_dist(
        testset,
        size=numNets,
        dist=dist.pareto,
        alpha=2.
    )

    # num_workers: number of CPU cores to use for data loading
    # pin_memory: being able to speed up the host to device transfer by enabling
    kwargs = {'num_workers': numWorkers, 'pin_memory': cuda}

    # loaders
    trainLoaders = [DataLoader(
        splited_trainset[i], batch_size=batchSz, shuffle=True, **kwargs
    ) for i in range(numNets)]
    testLoaders = [DataLoader(
        splited_testset[i], batch_size=batchSz, shuffle=True, **kwargs
    ) for i in range(numNets)]
    global_testLoader = DataLoader(testset, batch_size=batchSz, shuffle=True, **kwargs)

    """Nets"""
    num_classes = 10
    resnets = [nets.resnet18(num_classes=num_classes) for _ in range(numNets)]

    criterions = [nn.CrossEntropyLoss() for _ in range(numNets)]
    global_criterion = nn.CrossEntropyLoss()
    optimizers = [optim.SGD(net.parameters(), lr=1e-1, momentum=0.9) for net in resnets]

    if cuda:
        for net in resnets:
            # if multi-gpus
            if torch.cuda.device_count() > 1:
                net = nn.DataParallel(net)

            # use cuda
            net.cuda()

    """Train & Test models"""
    for epoch in range(epochs):

        # teacher
        teacher = ensemble.Ensemble(deepcopy(resnets), mode=ensemble.med)

        ml.test(
            teacher, global_criterion, global_testLoader,
            epoch=epoch, cuda=cuda, log=True, log_file=testFile
        )

        # byzantines
        # random weights
        # normal distribution
        for b in range(numByzs):
            # get weights
            weights = dict(resnets[b].named_parameters())

            # rand weights
            for name, param in weights.items():
                weights[name].data.copy_(
                    torch.normal(mean=0., std=1., size=param.shape).data
                )

            # set weights
            state_dict = resnets[b].state_dict()
            state_dict.update(weights)
            resnets[b].load_state_dict(state_dict)  # load

            ml.test(
                resnets[b], criterions[b], testLoaders[b],
                epoch=epoch, cuda=cuda, log=True, log_file=testFiles[b]
            )

        # students
        for i in range(numByzs, numNets):
            kd.train_KD(
                resnets[i], teacher, kd.criterion_KD, optimizers[i], trainLoaders[i],
                epoch=epoch, cuda=cuda, log=True, log_file=trainFiles[i]
                # alpha=0.9, temperature=4
            )
            ml.test(
                resnets[i], criterions[i], testLoaders[i],
                epoch=epoch, cuda=cuda, log=True, log_file=testFiles[i]
            )