## Imports and Setup

In [None]:
!pip install wandb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.13.6-py2.py3-none-any.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 6.5 MB/s 
[?25hCollecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.11-py3-none-any.whl (10 kB)
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting GitPython>=1.0.0
  Downloading GitPython-3.1.29-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 58.2 MB/s 
Collecting setproctitle
  Downloading setproctitle-1.3.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31 kB)
Collecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.11.1-py2.py3-none-any.whl (168 kB)
[K     |████████████████████████████████| 168 kB 70.9 MB/s 
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.10-

In [None]:

import torch 
import torchvision 
from torchvision.datasets import MNIST, CIFAR10, CIFAR100 
import numpy as np
from numpy import random
from torch.nn import functional as F
from torchvision import transforms, datasets
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchvision.io import read_image
from tqdm.notebook import tqdm
from copy import deepcopy
import torch.nn as nn
import torch.optim as optim
import wandb
import numpy as np
from statistics import mean
import random

## Data Utils

In [None]:

#This is working.
def get_MNIST(path='./data'):
    train = MNIST(path, train=True,download=True)
    test  = MNIST(path, train=False, download=True)
    numpy_data = np.vstack([train.data.numpy(), test.data.numpy()])
    numpy_targets = np.concatenate([train.targets.numpy(), test.targets.numpy()])
    return numpy_data.reshape(-1, 1,28,28), numpy_targets

def get_CIFAR10(path='./data'):
    train = CIFAR10(path, train=True,download=True)
    test  = CIFAR10(path, train=False, download=True)
    numpy_data = np.vstack([train.data, test.data])
    numpy_targets = np.concatenate([np.array(train.targets), np.array(test.targets)])
    return numpy_data, numpy_targets

def get_CIFAR100(path='./data'):
    train = CIFAR100(path, train=True,download=True)
    test  = CIFAR100(path, train=False, download=True)
    numpy_data = np.vstack([train.data, test.data])
    numpy_targets = np.concatenate([np.array(train.targets), np.array(test.targets)])
    return numpy_data, numpy_targets

In [None]:
def federated_data_split(data, targets, num_clients, alpha, 
                         uniform=False):
    '''
        data: ndarray of (data_lenght,  X, ....)
        targets ndarray of (data_lenght)

    '''
  
    #If uniform then send alpha to inf 
    if uniform==True:
        alpha = 1_000_000

    clients_list = [{} for i in range(num_clients)]
    class_partitions = [data[targets==i] for i in range(targets.max()+1)]
    ds = np.random.dirichlet([alpha for i in range(num_clients)]
                                                ,size=len(class_partitions))
    #iterating over classes
    for class_idx in range(len(class_partitions)):
        class_data = class_partitions[class_idx]
        class_distribution = ds[class_idx]
        expected_samples = class_distribution*class_data.shape[0]
        samples_per_client = np.floor(expected_samples)
        base_sum = np.sum(samples_per_client)
        for i in range((class_data.shape[0]-base_sum).astype(int)):
            add_index = np.argmax(expected_samples-samples_per_client)
            samples_per_client[add_index] += 1 
        cum_clients_samples = np.cumsum(samples_per_client)
        scan_sum = 0
        #TODO This should be changed to numpy split style function.
        for idx, i in enumerate(cum_clients_samples):
            i = int(i)
            data_to_add = class_data[scan_sum:i]
            if data_to_add.shape[0]>0:
                clients_list[idx][class_idx]=data_to_add
            scan_sum=i
    numpy_clients = []
    for client in clients_list:
        data_list, target_list = [],[]
        for target_value in client.keys():
            class_client_data = client[target_value] 
            data_list.append(class_client_data)
            target_list.append(
                (target_value+np.zeros((class_client_data.shape[0],))).astype(int))
        data = np.vstack(data_list)
        target = np.concatenate(target_list)
        numpy_clients.append((data,target))
    return numpy_clients

def train_test_split(data, target, test_ratio):
    '''
        train_test_split of numpy arra
    '''
    data_len  = data.shape[0] 
    stop_index = np.round(data_len*test_ratio).astype(int)
    permutation = np.random.permutation(np.arange(data_len)).astype(int)
    data_train , data_test = data[permutation[:stop_index]],data[permutation[stop_index:]]
    target_train, target_test = target[permutation[:stop_index]],target[permutation[stop_index:]]
    return ((data_train, target_train),(data_test, target_test))


## Models


In [None]:
class CIFAR_net(nn.Module):
    def __init__(self, out_dim=10):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 64, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(64, 128, 5)
        self.avgpool = nn.AdaptiveAvgPool2d((3, 3))
        self.fc1 = nn.Linear(128 * 3 * 3, 1000)
        self.fc2 = nn.Linear(1000, 50)
        self.fc3 = nn.Linear(50, out_dim)

    def forward(self, x):

        x = self.pool(F.relu(self.conv1(x)))
        x = self.avgpool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.fc3(x)

        return x


class MNIST_net(nn.Module):
    def __init__(self):
        super(MNIST_net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32,64, kernel_size=5)
        self.fc1 = nn.Linear(3*3*64, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        #x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(F.max_pool2d(self.conv3(x),2))
        x = F.dropout(x, p=0.5, training=self.training)
        x = x.view(-1,3*3*64 )
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

##  Utils

In [None]:
def client_batcher(clients_train, clients_test,participate_rate):
    clients_num = len(clients_train)
    participate_num = round(participate_rate*clients_num)
    participate_list = [1 if t<=participate_num else 0 for t in range(clients_num)]
    particiapte_list = random.shuffle(participate_list)
    participate_list = [idx for idx, t in enumerate(participate_list) if t==1]
    clients_train_loaders = [clients_train[p] for p in participate_list] 
    clients_test_laoders = [clients_test[p] for p in participate_list]
    return participate_list, clients_train_loaders, clients_test_laoders

In [None]:
def get_accuracy_loss(model,loss_fn,  dataLoader , repeat=None):

  """
  Takes in a model , dataLoader and calculate the accuracy 
  if repeat value is given it calculates the accuracy on a random sample of the 
  = batch size of dataLoader * repeat

  returns a float 
  """
  model.eval()
  accuracy = []    
  loss = []
  data_size = len(dataLoader.dataset)
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  with torch.no_grad():
    for i , (data, targets) in enumerate(dataLoader):
      if (repeat is not None) and i>repeat:
        break

      data = data.float().to(device)
      targets = targets.long().to(device)
      preds = model(data)
      batch_size = list(targets.shape)[0]
      preds = model(data)
      loss.append(loss_fn(preds, targets).float().mean()*batch_size)
      preds = torch.argmax(preds, dim=1)
      accuracy.append((preds == targets).float().mean() * batch_size)
  return sum(accuracy).item() / data_size, sum(loss).item() / data_size

def get_fed_accuracy_loss(state, loss_fn, loaders):
    model = state['server_model']
    acc = []
    loss = []
    for i in loaders:
        a , l =get_accuracy_loss(model, loss_fn, i)
        acc.append(a)
        loss.append(l)
    return mean(acc), mean(loss)

def get_bgd(model, loaders,loss_fn):
    sum_norm = None
    norm_sum = None
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')    
    grad_vec =  torch.zeros_like(torch.hstack([p.flatten() for 
                                                   p in model.parameters()]))    
    client_grad_vecs = [grad_vec for i in range(len(loaders))]
    client_grad_vecs = torch.vstack(client_grad_vecs)
    for idx, loader in enumerate(loaders):
        for idx2, (data,targets) in enumerate(loader):
            data = data.float().to(device)
            targets = targets.long().to(device)
            preds = model(data)
            batch_size = list(targets.shape)[0] 
            loss = loss_fn(preds, targets)
            loss.backward()
            ##add the gradient: 
            with torch.no_grad():
                client_grad_vecs[idx]+= torch.hstack(
                    [p.grad.flatten() for p in model.parameters()]
                )
        client_grad_vecs[idx] /= (idx2+1)
    clients_norms = torch.norm(client_grad_vecs,dim=1)**2
    mean_norm = torch.mean(clients_norms).item()
    norm_mean = torch.norm(torch.mean(client_grad_vecs,dim=1))**2
    return mean_norm, norm_mean.item()

In [None]:
def base_krum(agg_dicts, neighbour_val=3):
    
    vecs = []
    #greate vectors to simplify 
    for param_dict in agg_dicts:
        vec = torch.hstack([param_dict[name].flatten() for name in 
                            param_dict.keys()])
        vecs.append(vec)
    vec_num = len(vecs)
    sim_matrix = torch.zeros(size=(vec_num,vec_num))
    for i in range(vec_num):
        if i==0:
            continue
        for j in range(i):
            sim_matrix[i,j]=torch.norm(vec[i]-vec[j])**2
    sim_matrix += sim_matrix.T.clone()
    vec_score = torch.zeros(size=(vec_num,))
    for i in range(vec_num):
        vec_score[i] = torch.sum(torch.sort(sim_matrix[i])[0][:neighbour_val+1])
    chosen_vec = torch.argmin(vec_score)
    return agg_dicts[chosen_vec]

def bucket_krum(agg_dicts, neighbour_val=3,bag_size=2):
    num_clients = len(agg_dicts)
    order = np.random.permutation(np.arange(num_clients)).astype(int)
    bags = np.array_split(order,bag_size)
    bag_mean = []
    for bag in bags:
        bag_param = [agg_dicts[j] for j in bag]
        bag_mean.append(mean_agg(bag_param))
    output = base_krum(bag_mean, neighbour_val=neighbour_val)
    return output


def get_krum_fn(hyper):
    """
        A wrapper around krum to allow easy interface with the 
        federated algorithms
    """
    def krum_wrap(agg_list):
         return bucket_krum(agg_list, neighbour_val=hyper['neighbour_val'], 
                    bag_size=hyper['bag_size'])
    
    return krum_wrap

In [None]:
@torch.no_grad()
def mean_agg(agg_dicts, p_list=None, agg_state=None):
    """
    """
    keys = list(agg_dicts[0].keys())
    avg_dict = {}
    for param_name in keys:
        param_list = []
        for i_dict in agg_dicts:
            param_list.append(i_dict[param_name])
        param_tensor = torch.stack(param_list)
        mean_param = torch.mean(param_tensor,0)
        avg_dict[param_name] = mean_param
    return avg_dict

## FedAvg

In [None]:
def init_fedavg_state(model, hyper=None):
    return {
        "server_model" : deepcopy(model),
    }



def fedavg_client_run(model,train_dataloader,loss_fn,hyper):
    client_model = deepcopy(model)
    local_steps = hyper["local_steps"]
    k = 0
    while k<local_steps:
        for data, targets in train_dataloader:
            k += 1
            if(k>=local_steps):
                break
            data = data.float().to(hyper['device'])
            targets = targets.long().to(hyper['device'])
            preds = client_model(data)
            loss = loss_fn(preds, targets)
            loss.backward()
            with torch.no_grad():
                for param in client_model.parameters():
                    grad = param.grad
                    if grad!=None:
                        param -= hyper['client_lr']*grad

        ##return the update dict
        og_state_dict = model.state_dict()
        with torch.no_grad():
            update_dict ={}
            for name , param in client_model.named_parameters():
                update_dict[name] = param- og_state_dict[name]
                # print(update_dict[name])
    return update_dict

      
def fedavg(p_list, server_state, clients_train_loaders, loss_fn, hyper, agg_fun=None):
    server_model = server_state['server_model']
    clients_update_dicts = []
    for idx, i in enumerate(p_list):
            clients_update_dicts.append(fedavg_client_run(server_model,clients_train_loaders[idx],
                                                          loss_fn,hyper))            
    update_dict = agg_fun(clients_update_dicts)
    with torch.no_grad():
        for name, param in server_model.named_parameters():
            param += hyper['server_lr']*update_dict[name]
    return {"server_model":server_model}

## SCAFFOLD

In [None]:
def init_scaffold_state(model, hyper=None):
    num_clients = hyper['num_clients']
    server_control_variates = {}
    clients_control_variates = [{} for _ in range(num_clients)]
    for name, param in model.named_parameters():
        server_control_variates[name] = torch.zeros_like(param)
        for client_variates_dict in clients_control_variates:
            client_variates_dict[name] = torch.zeros_like(param)
    return {
        "server_model" : deepcopy(model),
        "server_control_variates": server_control_variates, 
        "clients_control_variates"  : clients_control_variates
    }
    
def scaffold_client_run(model, server_control_variates,
                        client_control_variates, train_dataloader,loss_fn,hyper):
    client_model = deepcopy(model)
    local_steps = hyper["local_steps"]
    k = 0
    while k<local_steps:
        for data, targets in train_dataloader:
            k += 1
            if(k>=local_steps):
                break
            data = data.float().to(hyper['device'])
            targets = targets.long().to(hyper['device'])
            preds = client_model(data)
            loss = loss_fn(preds, targets)
            loss.backward()
            with torch.no_grad():
                for name, param in client_model.named_parameters():
                    grad = param.grad
                    if grad!=None:
                        param -= hyper['client_lr']*(grad-client_control_variates[name]+
                                                     server_control_variates[name])
        ##return the update dict
        og_state_dict = model.state_dict()
        update_client_control_variate = {name: torch.zeros_like(param) for name, param in 
                                      model.named_parameters()}
        with torch.no_grad():
            update_dict ={}
            for name , param in client_model.named_parameters():
                update_dict[name] = param- og_state_dict[name]
                update_client_control_variate[name] = -1* deepcopy(server_control_variates[name])
                update_client_control_variate[name] += (1/(local_steps*hyper['client_lr']))*(-update_dict[name])
    return update_dict, update_client_control_variate

      
def scaffold(p_list, server_state, clients_train_loaders, loss_fn, hyper, agg_fun=None):
    server_model = server_state['server_model']
    clients_update_dicts = []
    update_control_variates = []
    for idx, i in enumerate(p_list):
            param_update, new_client_variates = scaffold_client_run(server_model,server_state['server_control_variates'],
                                                                   server_state['clients_control_variates'][i], clients_train_loaders[idx],
                                                                loss_fn,hyper)
            clients_update_dicts.append(param_update)
            update_control_variates.append(new_client_variates)            
    update_param_dict = agg_fun(clients_update_dicts)
    update_variates_dict =  agg_fun(update_control_variates)
    with torch.no_grad():
        for name, param in server_model.named_parameters():
            #updating the model
            param += hyper['server_lr']*update_param_dict[name]
            #updateing the control variate
            server_state['server_control_variates'][name] += (len(p_list)/hyper['num_clients'])*update_variates_dict[name]
            for idx, p_client in enumerate(p_list):
                server_state['clients_control_variates'][p_client][name]+= update_control_variates[idx][name]
    return server_state

## Main

In [None]:
def main(config=None):
    with wandb.init(config=config) as run:
        hyper=wandb.config
        #print hyperpartmeters
        print(hyper)
        torch.manual_seed(hyper["random_state"])
        random.seed(hyper["random_state"])
        np.random.seed(hyper["random_state"])
        if hyper['data']=='MNIST':
            data, targets = get_MNIST()
            model = MNIST_net().to(hyper['device'])
            num_classes=10
        elif hyper['data'] == 'CIFAR10':
            data, targets = get_CIFAR10()
            model = CIFAR_net(out_dim=10).to(hyper['device'])
            num_classes = 10
        elif hyper['data'] == 'CIFAR100':
            data, targets = get_CIFAR100()
            num_classes = 100
            model = CIFAR_net(out_dim=100).to(hyper['device'])
        else:
            raise NotImplementedError("Dataset not recognized")
        loss_fn = nn.CrossEntropyLoss()
        
        ##Prepare the dataloaders:
        clients_data = federated_data_split(data, targets, 
                                        hyper['num_clients'], hyper['alpha'])
        clients_data = [train_test_split(data, target, hyper["test_ratio"]) for 
                    data, target in clients_data]
        clients_train = [(torch.tensor(data), torch.tensor(targets)) for (data,targets), _ in clients_data]
        clients_test = [(torch.tensor(data), torch.tensor(targets)) for _, (data,targets) in clients_data]
        
        if hyper['attack']==True:
            attack_list, _, _ = client_batcher(clients_train, clients_test, hyper['attack_ratio'])
            for attack_idx in attack_list:
                attack_data, attack_targets = clients_train[attack_idx]
                attack_targets[attack_targets<5]  += 1
                attack_targets = torch.remainder(attack_targets,num_classes)
                clients_train[attack_idx] = (attack_data, attack_targets)
                # clients_train[attack_idx]=(attack_data, 
                #                            torch.randint(0,num_classes, size=attack_targets.shape))
        clients_train = [TensorDataset(data, targets) for data,targets in clients_train]
        clients_test = [TensorDataset(data, targets) for data,targets in clients_test]
        clients_train_loaders = [DataLoader(ds,batch_size=hyper["batch_size"]) for ds in clients_train]
        clients_test_loaders=[DataLoader(ds,batch_size=hyper["batch_size"]) for ds in clients_test]

        ##Start federated traininf:
        if hyper['alg']=='fedavg':
            server_alg = fedavg
            #note the fedavg state should include the global state too. 
            global_state = init_fedavg_state(model, hyper)
        elif hyper['alg'] == 'scaffold':
            server_alg = scaffold
            global_state = init_scaffold_state(model, hyper)
        else:
            raise NotImplementedError("The federated algorithm not implemented")

        if hyper['aggregation']=='mean':
            agg_fun = mean_agg
        elif hyper['aggregation']=='krum':
            agg_fun = get_krum_fn(hyper)
        else:
            raise NotImplementedError("The aggregation")
        
        for i in range(hyper['rounds']):           
            p_list, p_train, p_test = client_batcher(clients_train, clients_test, hyper['participate_rate'])
            p_train_loaders = [DataLoader(ds,batch_size=hyper["batch_size"]) for ds in p_train]
            p_test_loaders = [DataLoader(ds,batch_size=hyper["batch_size"]) for ds in p_test]
            global_state = server_alg(p_list, global_state, p_train_loaders, loss_fn, hyper, agg_fun)
            train_acc , train_loss = get_fed_accuracy_loss(global_state,loss_fn, clients_train_loaders)
            test_acc , test_loss = get_fed_accuracy_loss(global_state, loss_fn, clients_test_loaders)
            mean_norm, norm_mean = get_bgd(global_state['server_model'], clients_train_loaders, loss_fn)
            print("train acc {0}, train loss {1}".format(train_acc, train_loss))
            print("test acc {0}, test loss {1}".format(test_acc, test_loss))
            log_dict={
                "Train Accuracy": train_acc,
                "Train Loss": train_loss,
                "Test Accuracy": test_acc,
                "Test Loss": test_loss,
                "Mean Norm" : mean_norm,
                "Norm mean": norm_mean,
                "B^2": mean_norm/norm_mean
            }
            wandb.log(log_dict)
    

## Sweep Configuration

In [None]:
sweep_config = {
    "data": "Hell Sweep", 
    "method": "grid",
    "parameters":{
        'data' : {
            'value':'MNIST'},
        'alg' : {
            'values':['scaffold']},
        'aggregation' : {
            'values':['mean', 'krum']},
        'rounds' : {
            'values':[250]},
        'num_clients' : {
            'value':15},
        'server_lr' : {
            'value':1},
        'client_lr' : {
            'value':1e-4}, 
        'alpha':{
            'values':[10]
        },
        'test_ratio' : {
            'value':0.1},
        'participate_rate' : {
            'value':1},
        'batch_size' : {
            'value':64},
        'local_steps' : {
            'values':[3]},
        'device' : {
            'value':'cuda'},
        'neighbour_val' : {
            'value':10},
        'bag_size' : {
            'value':3},
        'attack' : {
            'value':True},
        'attack_ratio' : {
            'value':0.3},
        'random_state' : {
            'value':42}
    },
}

In [None]:
wandb.login()
sweep_id = wandb.sweep(sweep_config, project="Finicky", entity='waihegz')
wandb.agent(sweep_id, function=main)

ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Create sweep with ID: map0t4yt
Sweep URL: https://wandb.ai/waihegz/Finicky/sweeps/map0t4yt


[34m[1mwandb[0m: Agent Starting Run: 9451175u with config:
[34m[1mwandb[0m: 	aggregation: mean
[34m[1mwandb[0m: 	alg: scaffold
[34m[1mwandb[0m: 	alpha: 10
[34m[1mwandb[0m: 	attack: True
[34m[1mwandb[0m: 	attack_ratio: 0.3
[34m[1mwandb[0m: 	bag_size: 3
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	client_lr: 0.0001
[34m[1mwandb[0m: 	data: MNIST
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	local_steps: 3
[34m[1mwandb[0m: 	neighbour_val: 10
[34m[1mwandb[0m: 	num_clients: 15
[34m[1mwandb[0m: 	participate_rate: 1
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	rounds: 250
[34m[1mwandb[0m: 	server_lr: 1
[34m[1mwandb[0m: 	test_ratio: 0.1
ERROR:wandb.jupyter:Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mwaihegz[0m. Use [1m`wandb login --relogin`[0m to force relogin


{'aggregation': 'mean', 'alg': 'scaffold', 'alpha': 10, 'attack': True, 'attack_ratio': 0.3, 'bag_size': 3, 'batch_size': 64, 'client_lr': 0.0001, 'data': 'MNIST', 'device': 'cuda', 'local_steps': 3, 'neighbour_val': 10, 'num_clients': 15, 'participate_rate': 1, 'random_state': 42, 'rounds': 250, 'server_lr': 1, 'test_ratio': 0.1}
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

train acc 0.1416934625402429, train loss 4.282535547428335
test acc 0.12248276271364636, test loss 4.506032019893039
train acc 0.1482785338421116, train loss 3.1328642735566077
test acc 0.13175353625920164, test loss 3.2632792061272693
train acc 0.13356954193494758, train loss 2.706188434626649
test acc 0.11513145439638128, test loss 2.8136362879245387
train acc 0.13667547886764275, train loss 2.5529554092666285
test acc 0.12037085434327023, test loss 2.6528029456885527
train acc 0.15248179622928587, train loss 2.468826885827777
test acc 0.1329792761568286, test loss 2.563050329123625
train acc 0.1653814541125997, train loss 2.4075719854574755
test acc 0.14788289882741978, test loss 2.4941661982171817
train acc 0.17896574553998468, train loss 2.353569559457259
test acc 0.1617769087265857, test loss 2.4318910790786816
train acc 0.1909348233376458, train loss 2.303152867001076
test acc 0.17613409553192683, test lo

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
B^2,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▅▅▆▇▇█████▇▇▇▆▆▅▅▄▄
Mean Norm,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
Norm mean,▂▆▇██▇▆▅▅▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Test Accuracy,▁▁▂▃▄▅▆▆▆▇▇▇▇▇▇▇▇▇▇█████████████████████
Test Loss,█▆▅▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train Accuracy,▁▂▃▄▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█████████████████
Train Loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
B^2,465384914.07069
Mean Norm,21448390656.0
Norm mean,46.08742
Test Accuracy,0.76366
Test Loss,0.68593
Train Accuracy,0.71696
Train Loss,0.75733


[34m[1mwandb[0m: Agent Starting Run: 4fvunkwz with config:
[34m[1mwandb[0m: 	aggregation: krum
[34m[1mwandb[0m: 	alg: scaffold
[34m[1mwandb[0m: 	alpha: 10
[34m[1mwandb[0m: 	attack: True
[34m[1mwandb[0m: 	attack_ratio: 0.3
[34m[1mwandb[0m: 	bag_size: 3
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	client_lr: 0.0001
[34m[1mwandb[0m: 	data: MNIST
[34m[1mwandb[0m: 	device: cuda
[34m[1mwandb[0m: 	local_steps: 3
[34m[1mwandb[0m: 	neighbour_val: 10
[34m[1mwandb[0m: 	num_clients: 15
[34m[1mwandb[0m: 	participate_rate: 1
[34m[1mwandb[0m: 	random_state: 42
[34m[1mwandb[0m: 	rounds: 250
[34m[1mwandb[0m: 	server_lr: 1
[34m[1mwandb[0m: 	test_ratio: 0.1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


{'aggregation': 'krum', 'alg': 'scaffold', 'alpha': 10, 'attack': True, 'attack_ratio': 0.3, 'bag_size': 3, 'batch_size': 64, 'client_lr': 0.0001, 'data': 'MNIST', 'device': 'cuda', 'local_steps': 3, 'neighbour_val': 10, 'num_clients': 15, 'participate_rate': 1, 'random_state': 42, 'rounds': 250, 'server_lr': 1, 'test_ratio': 0.1}
train acc 0.12577378291363087, train loss 4.029889896451133
test acc 0.11141136927792517, test loss 4.207948070573114
train acc 0.14942636122355554, train loss 3.0958651247379896
test acc 0.1315101160696793, test loss 3.22340322167524
train acc 0.1306619522590541, train loss 2.727139497238023
test acc 0.11088463812067063, test loss 2.845862756721639
train acc 0.13706442030245652, train loss 2.5777969901763504
test acc 0.11560983991268997, test loss 2.6928650006466888
train acc 0.1498828783987372, train loss 2.4967110830230603
test acc 0.12912489928112683, test loss 2.6154176343349422
train acc 0.16268397282104155, train loss 2.4366545445636416
test acc 0.1431

0,1
B^2,▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▃▃▄▅▆▇███▇▇▇▇▆▅▄▃▃▂▂▂▂▂▂▂
Mean Norm,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▄▄▅▅▅▆▆▇▇███
Norm mean,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▄▅▆▇███
Test Accuracy,▁▂▃▄▄▅▆▆▇▇▇███████████▇▇▇▇▇█▇▇▇▅▅▆▆▆▅▅▆▅
Test Loss,█▅▅▄▃▃▂▂▂▂▁▁▁▁▂▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▇▅▄▄▅▇▆▃▇
Train Accuracy,▁▂▃▄▅▅▆▆▇▇▇███▇▇████████▇▇▇█▇▇▇▅▅▅▅▅▅▅▆▅
Train Loss,▇▅▄▃▃▂▂▂▁▁▁▁▁▂▂▂▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▆▅▅▄▆█▇▃█

0,1
B^2,3460711.77673
Mean Norm,285566337024.0
Norm mean,82516.64844
Test Accuracy,0.421
Test Loss,2.92926
Train Accuracy,0.39959
Train Loss,3.18152


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


## Dumps


test acc 0.7867021333070594, test loss 0.7816527218722072