# FL Simulator

Simulating `Federated Learning` paradigm which averages neighbor's model weights into local one.

## Features

* Byzantine

## TODO

* Network topology

In [None]:
import import_ipynb
import nn.dist as dist
import nn.ensemble as ensemble
import nn.ml as ml
import nn.nets as nets

In [None]:
if __name__ == "__main__":
    import os
    from copy import deepcopy

    import torch
    import torch.nn as nn
    import torch.nn.functional as F ### joonha
    import torch.optim as optim
    from torch.autograd import Variable

    import torchvision.datasets as dset
    import torchvision.transforms as transforms

    from torch.utils.data import DataLoader  # TODO: DistributedDataParallel

    """Hyperparams"""
    numNets = 5 #21
    # numByzs = 0

    numWorkers = 4
    cuda = True

    base_path = './simul_21_uniform_0_byzantine_fedavg'

    trainFiles = [None for _ in range(numNets)]
    testFiles = [None for _ in range(numNets)]
    for i in range(numNets):
        path = os.path.join(base_path, str(i))
        os.makedirs(path, exist_ok=True)
        trainFiles[i] = open(os.path.join(path, 'train.csv'), 'w')
        testFiles[i] = open(os.path.join(path, 'test.csv'), 'w')
    testFile_global = open(os.path.join(base_path, 'test_global.csv'), 'w')
    testFile_ensemble = open(os.path.join(base_path, 'test_ensemble.csv'), 'w')

    epochs = 3000 #3000
    batchSz = 64

    """Datasets"""
    # # gets mean and std
    # transform = transforms.Compose([transforms.ToTensor()])
    # dataset = dset.CIFAR10(root='cifar', train=True, download=True, transform=transform)
    # normMean, normStd = dist.get_norm(dataset)
    #normMean = [0.49139968, 0.48215841, 0.44653091]
    normMean = [0]
    #normStd = [0.24703223, 0.24348513, 0.26158784]
    normStd = [1]
    normTransform = transforms.Normalize(normMean, normStd)

    trainTransform = transforms.Compose([
        transforms.RandomCrop(28, padding=4), #CIFAR10은 32, MNIST는 28
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normTransform
    ])
    testTransform = transforms.Compose([
        transforms.ToTensor(),
        normTransform
    ])

    trainset = dset.MNIST(root='MNIST', train=True, download=True, transform=trainTransform)
    testset = dset.MNIST(root='MNIST', train=False, download=True, transform=trainTransform)

    # splits datasets
    splited_trainset = dist.random_split_by_dist(
        trainset,
        size=numNets,
        dist=dist.uniform,
        # alpha=2.
    )
    splited_testset = dist.random_split_by_dist(
        testset,
        size=numNets,
        dist=dist.uniform,
        # alpha=2.
    )

    # num_workers: number of CPU cores to use for data loading
    # pin_memory: being able to speed up the host to device transfer by enabling
    kwargs = {'num_workers': numWorkers, 'pin_memory': cuda}

    # loaders
    trainLoaders = [DataLoader(
        splited_trainset[i], batch_size=batchSz, shuffle=True, **kwargs
    ) for i in range(numNets)]
    testLoaders = [DataLoader(
        splited_testset[i], batch_size=batchSz, shuffle=True, **kwargs
    ) for i in range(numNets)]
    global_testLoader = DataLoader(testset, batch_size=batchSz, shuffle=True, **kwargs)

    """Nets"""
    
    num_classes = 10
    fcnn = [nets.FCNN() for _ in range(numNets)]
    global_model = nets.FCNN()
    #resnets = [nets.resnet18(num_classes=num_classes) for _ in range(numNets)]
    #global_model = nets.resnet18(num_classes=num_classes)

    criterions = [nn.CrossEntropyLoss() for _ in range(numNets)]
    global_criterion = nn.CrossEntropyLoss()
    ensemble_criterion = nn.CrossEntropyLoss()
    optimizers = [optim.SGD(net.parameters(), lr=1e-1, momentum=0.9) for net in fcnn]

    if cuda:
        for net in (fcnn + [global_model]):
            # if multi-gpus
            if torch.cuda.device_count() > 1:
                net = nn.DataParallel(net)

            # use cuda
            net.cuda()

    if cuda:
        s = Variable(torch.Tensor([1. / numNets]).cuda().double())
    else:
        s = Variable(torch.Tensor([1. / numNets]).double())

    """Train & Test models"""
    for epoch in range(epochs):

        # aggregation and averaging
        global_state_dict = global_model.state_dict()
        local_params = dict(fcnn[0].named_parameters())

        for name, param in global_state_dict.items():
            if name in local_params.keys():
                global_state_dict[name].fill_(0.).double()
                for a in range(numNets):
                    v = dict(fcnn[a].named_parameters())[name]
                    t = v.clone().detach()
                    t.mul_(s.expand(v.size()))
                    global_state_dict[name].add_(t)

        ml.test(
            global_model, global_criterion, global_testLoader,
            epoch=epoch, cuda=cuda, log=True, log_file=testFile_global
        )

        # ensemble
        avg_ensemble = ensemble.Ensemble(deepcopy(fcnn), mode=ensemble.avg)

        ml.test(
            avg_ensemble, ensemble_criterion, global_testLoader,
            epoch=epoch, cuda=cuda, log=True, log_file=testFile_ensemble
        )

        # # byzantines
        # # random normal distribution
        # for b in range(numByzs):
        #     # TODO
        #     # weights.WrapedWeights(resnets[b].named_parameters()).apply(resnets[b])
        #     ml.test(
        #         resnets[b], criterions[b], testLoaders[b],
        #         epoch=epoch, cuda=cuda, log=True, log_file=testFiles[b]
        #     )

        # students
        # for i in range(numByzs, numNets):
        for i in range(numNets):
            fcnn[i].load_state_dict(global_model.state_dict())
            ml.train(
                fcnn[i], criterions[i], optimizers[i], trainLoaders[i],
                epoch=epoch, cuda=cuda, log=True, log_file=trainFiles[i]
                # alpha=0.9, temperature=4
            )
            ml.test(
                fcnn[i], criterions[i], testLoaders[i],
                epoch=epoch, cuda=cuda, log=True, log_file=testFiles[i]
            )