In [None]:
import os
import copy
import time
import pickle
import numpy as np
import tqdm as tqdm
import math

import torch
import torch.nn as nn
from torch import Tensor
from typing import Type
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim


!pip install tensorboardX
import tensorboardX
from tensorboardX import SummaryWriter



In [None]:
import argparse
def args_parser():


    parser = argparse.ArgumentParser()
    parser.add_argument('-f')

    # federated arguments (Notation for the arguments followed from paper)

    parser.add_argument('--epochs', type=int, default=10,
                        help="number of rounds of training")
    parser.add_argument('--num_users', type=int, default=100,
                        help="number of users: K")
    parser.add_argument('--frac', type=float, default=0.1,
                        help='the fraction of clients: C')
    parser.add_argument('--local_ep', type=int, default=2,
                        help="the number of local epochs: E")
    parser.add_argument('--local_bs', type=int, default=128,
                        help="local batch size: B")
    parser.add_argument('--lr', type=float, default=0.1,
                        help='learning rate')
    parser.add_argument('--momentum', type=float, default=0.99,
                        help='SGD momentum (default: 0.5)')


    # model arguments
    parser.add_argument('--model', type=str, default='resnet', help='model name')
    parser.add_argument('--kernel_num', type=int, default=9,
                        help='number of each kind of kernel')
    parser.add_argument('--kernel_sizes', type=str, default='3,4,5',
                        help='comma-separated kernel size to \
                        use for convolution')
    parser.add_argument('--num_channels', type=int, default=1, help="number \
                        of channels of imgs")
    parser.add_argument('--norm', type=str, default='batch_norm',
                        help="batch_norm, layer_norm, or None")
    parser.add_argument('--num_filters', type=int, default=32,
                        help="number of filters for conv nets -- 32 for \
                        mini-imagenet, 64 for omiglot.")
    parser.add_argument('--max_pool', type=str, default='True',
                        help="Whether use max pooling rather than \
                        strided convolutions")


    # other arguments
    parser.add_argument('--dataset', type=str, default='cifar', help="name \
                        of dataset")
    parser.add_argument('--num_classes', type=int, default=10, help="number \
                        of classes")
    parser.add_argument('--gpu_id', default=None, help="To use cuda, set \
                        to a specific GPU ID. Default set to use CPU.")
    parser.add_argument('--optimizer', type=str, default='adam', help="type \
                        of optimizer")
    parser.add_argument('--iid', type=int, default=1,
                        help='Default set to IID. Set to 0 for non-IID.')
    parser.add_argument('--unequal', type=int, default=0,
                        help='whether to use unequal data splits for  \
                        non-i.i.d setting (use 0 for equal splits)')
    parser.add_argument('--stopping_rounds', type=int, default=10,
                        help='rounds of early stopping')
    parser.add_argument('--verbose', type=int, default=1, help='verbose')
    parser.add_argument('--seed', type=int, default=1, help='random seed')
    args = parser.parse_args()
    return args

In [None]:

from torch import nn
import torch.nn.functional as F


class MLP(nn.Module):
    def __init__(self, dim_in, dim_hidden, dim_out):
        super(MLP, self).__init__()
        self.layer_input = nn.Linear(dim_in, dim_hidden)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout()
        self.layer_hidden = nn.Linear(dim_hidden, dim_out)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = x.view(-1, x.shape[1]*x.shape[-2]*x.shape[-1])
        x = self.layer_input(x)
        x = self.dropout(x)
        x = self.relu(x)
        x = self.layer_hidden(x)
        return self.softmax(x)


class CNNMnist(nn.Module):
    def __init__(self, args):
        super(CNNMnist, self).__init__()
        self.conv1 = nn.Conv2d(args.num_channels, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, args.num_classes)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, x.shape[1]*x.shape[2]*x.shape[3])
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)



class CNNFashion_Mnist(nn.Module):
    def __init__(self, args):
        super(CNNFashion_Mnist, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc = nn.Linear(7*7*32, 10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out



class CNNCifar(nn.Module):
    def __init__(self, args):
        super(CNNCifar, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, args.num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

class modelC(nn.Module):
    def __init__(self, input_size, n_classes=10, **kwargs):
        super(AllConvNet, self).__init__()
        self.conv1 = nn.Conv2d(input_size, 96, 3, padding=1)
        self.conv2 = nn.Conv2d(96, 96, 3, padding=1)
        self.conv3 = nn.Conv2d(96, 96, 3, padding=1, stride=2)
        self.conv4 = nn.Conv2d(96, 192, 3, padding=1)
        self.conv5 = nn.Conv2d(192, 192, 3, padding=1)
        self.conv6 = nn.Conv2d(192, 192, 3, padding=1, stride=2)
        self.conv7 = nn.Conv2d(192, 192, 3, padding=1)
        self.conv8 = nn.Conv2d(192, 192, 1)

        self.class_conv = nn.Conv2d(192, n_classes, 1)


    def forward(self, x):
        x_drop = F.dropout(x, .2)
        conv1_out = F.relu(self.conv1(x_drop))
        conv2_out = F.relu(self.conv2(conv1_out))
        conv3_out = F.relu(self.conv3(conv2_out))
        conv3_out_drop = F.dropout(conv3_out, .5)
        conv4_out = F.relu(self.conv4(conv3_out_drop))
        conv5_out = F.relu(self.conv5(conv4_out))
        conv6_out = F.relu(self.conv6(conv5_out))
        conv6_out_drop = F.dropout(conv6_out, .5)
        conv7_out = F.relu(self.conv7(conv6_out_drop))
        conv8_out = F.relu(self.conv8(conv7_out))

        class_out = F.relu(self.class_conv(conv8_out))
        pool_out = F.adaptive_avg_pool2d(class_out, 1)
        pool_out.squeeze_(-1)
        pool_out.squeeze_(-1)
        return pool_out




# building a ResNet18 Architecture
#this function creates a 3x3 convolutional layer with specified input and output channels, stride, padding, and without bias
def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)



class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out

class ResNet18(nn.Module):

    def __init__(self, block, layers, num_classes=1000):
        self.inplanes = 64
        super(ResNet18, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import copy

print("newest")
class DatasetSplit(Dataset):
    """An abstract Dataset class wrapped around Pytorch Dataset class.
    """

    def __init__(self, dataset, idxs):
        self.dataset = dataset
        self.idxs = [int(i) for i in idxs]

    def __len__(self):
        return len(self.idxs)

    def __getitem__(self, item):
        image, label = self.dataset[self.idxs[item]]
        return torch.tensor(image), torch.tensor(label)



class LocalUpdate(object):
    def __init__(self, args, dataset, idxs, logger, r):
        self.args = args
        self.logger = logger
        self.trainloader, self.validloader, self.testloader, self.sample_size = self.train_val_test(
            dataset, list(idxs))
        self.r = r
        # self.device = 'cuda' if args.gpu else 'cpu'
        self.device = 'cpu'
        # Default criterion set to NLL loss function
        self.criterion = nn.NLLLoss().to(self.device)




    def train_val_test(self, dataset, idxs):

        idxs_train = idxs[:int(0.8*len(idxs))]
        idxs_val = idxs[int(0.8*len(idxs)):int(0.9*len(idxs))]
        idxs_test = idxs[int(0.9*len(idxs)):]
        trainloader = DataLoader(DatasetSplit(dataset, idxs_train),
                                 batch_size=self.args.local_bs, shuffle=True)
        validloader = DataLoader(DatasetSplit(dataset, idxs_val),
                                 batch_size=self.args.local_bs, shuffle=False)
        testloader = DataLoader(DatasetSplit(dataset, idxs_test),
                                 batch_size=self.args.local_bs, shuffle=False)

        # validloader = DataLoader(DatasetSplit(dataset, idxs_val),
        #                          batch_size=int(len(idxs_val)/10), shuffle=False)
        # testloader = DataLoader(DatasetSplit(dataset, idxs_test),
        #                         batch_size=int(len(idxs_test)/10), shuffle=False)
        sample_size = len(trainloader.dataset)
        return trainloader, validloader, testloader, sample_size



    def calculate_gradients(self, model):
        # Method to calculate gradients of the model's parameters after JUST ONE epoch
        model.train()

        # Create a SGD optimizer for gradient calculation
        #optimizer = torch.optim.SGD(model.parameters(), lr=self.args.lr, momentum=0.9, weight_decay = 1e-4)
        optimizer = torch.optim.Adam(model.parameters(), lr = self.args.lr, weight_decay = 1e-4)

        for iter in range(1):  # Iterate for one epoch
            optimizer.zero_grad()

            for batch_idx, (images, labels) in enumerate(self.trainloader):
                images, labels = images.to(self.device), labels.to(self.device)

                # Forward pass and calculate loss
                log_probs = model(images)
                loss = self.criterion(log_probs, labels)
                loss.backward()  # Backpropagate and calculate gradients

                optimizer.step()  # Update parameters using gradients


        total_batches = len(self.trainloader)
        local_grad = [p.grad.clone() / total_batches for p in model.parameters()]
        #print(local_grad)
        #print(f'Gradient shape: {local_grad.size()}')
        return local_grad

        # total_grads = len(local_grad)
        # avg_grads = [torch.zeros_like(grad) for grad in local_grad[0]]  # Initialize with zeros

        # for i in range(total_grads):

        #     avg_grads += local_grad[i]

        # avg_grads/= total_grads

        # return avg_grads




    # def update_weights(self, model, global_round, r, p):
    #     model.train()
    #     optimizer = SAM(model.parameters(), base_optimizer=torch.optim.SGD, rho=0.03, lr=self.args.lr, momentum=0.5)

    #     #avg_grads = calculate_average_gradient([self.calculate_gradients(model) for _ in range(self.args.local_ep)])

    #     for iter in range(self.args.local_ep):
    #         optimizer.zero_grad()

    #         for batch_idx, (images, labels) in enumerate(self.trainloader):
    #             images, labels = images.to(self.device), labels.to(self.device)

    #             # Forward pass and calculate loss
    #             log_probs = model(images)
    #             loss = self.criterion(log_probs, labels)
    #             loss.backward()  # Backpropagate and calculate gradients

    #             # Calculate cosine similarity between local gradients and other gradients
    #             local_grads = self.calculate_gradients(model)
    #             r = calculate_r(local_grads, p)

    #             # Perform the first step of SAM optimizer
    #             optimizer.first_step(r=r, zero_grad=True)

    #             # Perform a full forward-backward pass with closure
    #             def closure():
    #                 optimizer.zero_grad()
    #                 log_probs = model(images)
    #                 loss = self.criterion(log_probs, labels)
    #                 loss.backward()
    #                 return loss

    #             optimizer.step(r=r, closure=closure)

    #             if self.args.verbose and (batch_idx % 10 == 0):
    #                 print('| Global Round : {} | Local Epoch : {} | [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
    #                 global_round, iter, batch_idx * len(images),
    #                 len(self.trainloader.dataset),
    #                 100. * batch_idx / len(self.trainloader), loss.item()))

    #             self.logger.add_scalar('loss', loss.item())

    #     return model.state_dict()


    def update_weights(self, model, global_round, p):
        model.train()
        optimizer = SAM(model.parameters(), base_optimizer=torch.optim.SGD, rho=0.01, lr=self.args.lr, momentum=0.5)

        # Calculate local_grads and r outside the loop
        local_grads = self.calculate_gradients(model)
        r_values = calculate_r(local_grads, p)

        for iter in range(self.args.local_ep):
            optimizer.zero_grad()

            for batch_idx, (images, labels) in enumerate(self.trainloader):
                images, labels = images.to(self.device), labels.to(self.device)

                # Forward pass and calculate loss
                log_probs = model(images)
                loss = self.criterion(log_probs, labels)
                loss.backward()  # Backpropagate and calculate gradients


                # Perform the first step of SAM optimizer
                #optimizer.first_step(r=r_values, zero_grad=True)

                # Perform a full forward-backward pass with closure
                def closure():
                    optimizer.zero_grad()
                    log_probs = model(images)
                    loss = self.criterion(log_probs, labels)
                    loss.backward()
                    return loss

                optimizer.step(r=r_values, closure=closure)

                if self.args.verbose and (batch_idx % 10 == 0):
                    print('| Global Round : {} | Local Epoch : {} | [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    global_round, iter, batch_idx * len(images),
                    len(self.trainloader.dataset),
                    100. * batch_idx / len(self.trainloader), loss.item()))

                self.logger.add_scalar('loss', loss.item())

        return model.state_dict()



    def inference(self, model):
        model.eval()
        loss, total, correct = 0.0, 0.0, 0.0

        for batch_idx, (images, labels) in enumerate(self.testloader):
            images, labels = images.to(self.device), labels.to(self.device)

            # Inference
            outputs = model(images)
            batch_loss = self.criterion(outputs, labels)
            loss += batch_loss.item()

            # Prediction
            _, pred_labels = torch.max(outputs, 1)
            pred_labels = pred_labels.view(-1)
            correct += torch.sum(torch.eq(pred_labels, labels)).item()
            total += len(labels)

        accuracy = correct/total
        return accuracy, loss


def test_inference(args, model, test_dataset):
    """ Returns the test accuracy and loss """

    model.eval()
    loss, total, correct = 0.0, 0.0, 0.0

    # device = 'cuda' if args.gpu else 'cpu'
    device = 'cpu'
    criterion = nn.NLLLoss().to(device)
    testloader = DataLoader(test_dataset, batch_size=128,
                            shuffle=False)

    for batch_idx, (images, labels) in enumerate(testloader):
        images, labels = images.to(device), labels.to(device)

        # Inference
        outputs = model(images)
        batch_loss = criterion(outputs, labels)
        loss += batch_loss.item()

        # Prediction
        _, pred_labels = torch.max(outputs, 1)
        pred_labels = pred_labels.view(-1)
        correct += torch.sum(torch.eq(pred_labels, labels)).item()
        total += len(labels)

    accuracy = correct/total
    return accuracy, loss



class calculate(object):
    def __init__(self, args, dataset, idxs, logger, p):
        self.args = args
        self.logger = logger
        self.trainloader, self.validloader, self.testloader, self.sample_size = self.train_val_test(
            dataset, list(idxs))
        # self.device = 'cuda' if args.gpu else 'cpu'
        self.device = 'cpu'
        # Default criterion set to NLL loss function
        self.criterion = nn.NLLLoss().to(self.device)

    def train_val_test(self, dataset, idxs):
        """
        Returns train, validation and test dataloaders for a given dataset
        and user indexes.
        """
        # split indexes for train, validation, and test (80, 10, 10)
        idxs_train = idxs[:int(0.8*len(idxs))]
        idxs_val = idxs[int(0.8*len(idxs)):int(0.9*len(idxs))]
        idxs_test = idxs[int(0.9*len(idxs)):]
        trainloader = DataLoader(DatasetSplit(dataset, idxs_train),
                                 batch_size=self.args.local_bs, shuffle=True)
        validloader = DataLoader(DatasetSplit(dataset, idxs_val),
                                 batch_size=self.args.local_bs, shuffle=False)
        testloader = DataLoader(DatasetSplit(dataset, idxs_test),
                                 batch_size=self.args.local_bs, shuffle=False)

        # validloader = DataLoader(DatasetSplit(dataset, idxs_val),
        #                          batch_size=int(len(idxs_val)/10), shuffle=False)
        # testloader = DataLoader(DatasetSplit(dataset, idxs_test),
        #                         batch_size=int(len(idxs_test)/10), shuffle=False)
        sample_size = len(trainloader.dataset)
        return trainloader, validloader, testloader, sample_size

newest


In [None]:
import copy
import torch
from torchvision import datasets, transforms

import numpy as np

print("...")

def get_dataset(args):
    """ Returns train and test datasets and a user group which is a dict where
    the keys are the user index and the values are the corresponding data for
    each of those users.
    """

    if args.dataset == 'cifar':
        data_dir = '../data/cifar/'
        train_transform = transforms.Compose(
            [transforms.RandomHorizontalFlip(),
             transforms.RandomCrop(32, 4),
             transforms.ToTensor(),
             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] )])

        test_transform =transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] )])

        train_dataset = datasets.CIFAR10(data_dir, train=True, download=True,
                                       transform=train_transform)

        test_dataset = datasets.CIFAR10(data_dir, train=False, download=True,
                                      transform=test_transform)

        # sample training data amongst users
        if args.iid:
            # Sample IID user data from CIFAR
            user_groups = cifar_iid(train_dataset, args.num_users)
        else:
            # Sample Non-IID user data from CIFAR
            if args.unequal:
                # Chose uneuqal splits for every user
                raise NotImplementedError()
            else:
                # Chose euqal splits for every user
                user_groups = cifar_noniid(train_dataset, args.num_users)

    elif args.dataset == 'mnist' or 'fmnist':
        if args.dataset == 'mnist':
            data_dir = '../data/mnist/'
        else:
            data_dir = '../data/fmnist/'

        apply_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))])

        train_dataset = datasets.MNIST(data_dir, train=True, download=True,
                                       transform=apply_transform)

        test_dataset = datasets.MNIST(data_dir, train=False, download=True,
                                      transform=apply_transform)

        # sample training data amongst users
        if args.iid:
            # Sample IID user data from Mnist
            user_groups = mnist_iid(train_dataset, args.num_users)
        else:
            # Sample Non-IID user data from Mnist
            if args.unequal:
                # Chose uneuqal splits for every user
                user_groups = mnist_noniid_unequal(train_dataset, args.num_users)
            else:
                # Chose euqal splits for every user
                user_groups = mnist_noniid(train_dataset, args.num_users)

    return train_dataset, test_dataset, user_groups


# def calculate_r(local_grads, p):
#     len_grads = len(local_grads)
#     r_k = np.zeros(len_grads)

#     for i in range(len_grads):
#         flattened_i = local_grads[i].flatten()  # Flatten the gradient tensor
#         cos_sims = [torch.cosine_similarity(flattened_i, local_grads[j].flatten(), dim=0) for j in range(len_grads)]
#         r_k[i] = sum(p[j] * cos_sims[j] for j in range(len_grads))
#         r_k[i] /= np.linalg.norm(flattened_i)

#     return r_k


# def calculate_r(local_grads, weight_coefficients):
#     num_clients = len(local_grads)
#     r_k = np.zeros(num_clients)
#     #weight_coefficients_tensor = [torch.tensor(p) for p in weight_coefficients]
#     for i in range(num_clients):
#         dot_product = sum(weight_coefficients[j] * torch.sum(local_grads[i] * local_grads[j]) for j in range(num_clients))

#         norm_i = torch.norm(local_grads[i]).item()
#         r_k[i] = dot_product / (norm_i * norm_i)

#     return r_k


# def calculate_r(local_grads, weight_coefficients):
#     num_clients = len(local_grads)
#     r_k = np.zeros(num_clients)
#     weight_coefficients_tensor = [torch.tensor(p) for p in weight_coefficients]
#     for i in range(num_clients):
#       flat_i = torch.cat([p.view(-1) for p in local_grads[i]])
#       dot_product = sum(weight_coefficients_tensor[j] * torch.sum(flat_i * flat_i) for j in range(num_clients))
#       sum_squared_i = torch.sum(flat_i * flat_i)
#       r_k[i] = dot_product / (sum_squared_i * sum_squared_i)


#     return r_k



# def calculate_r(local_grads, weight_coefficients):
#     num_clients = len(local_grads)
#     r_k = np.zeros(num_clients)
#     weight_coefficients_tensor = [torch.tensor(p) for p in weight_coefficients]
#     for i in range(num_clients):
#       flat_i = torch.cat([p.view(-1) for p in local_grads[i]])
#       dot_product = sum(weight_coefficients_tensor[j] * torch.sum(flat_i * flat_i) for j in range(num_clients))
#       sum_squared_i = torch.sum(flat_i * flat_i)
#       r_k[i] = dot_product / (sum_squared_i)


#     return r_k

def calculate_r(local_grads, weight_coefficients):
    num_clients = len(local_grads)
    r_k = np.zeros(num_clients)
    weight_coefficients_tensor = [torch.tensor(p) for p in weight_coefficients]

    for i in range(num_clients):
        flat_i = torch.cat([p.view(-1) for p in local_grads[i]])

        # Initialize dot_product for client i
        dot_product = 0.0

        for j in range(num_clients):
            flat_j = torch.cat([p.view(-1) for p in local_grads[j]])

            # Perform element-wise multiplication only if the sizes match
            if flat_i.size() == flat_j.size():
                dot_product += weight_coefficients_tensor[j] * torch.sum(flat_i * flat_j)

        # Calculate the sum of squared values in the gradient of client i
        sum_squared_i = torch.sum(flat_i * flat_i)

        # Calculate the radius for client i
        r_k[i] = dot_product / sum_squared_i

    return r_k




# def calculate_r(local_grads, p):
#     len_grads = len(local_grads)
#     r_k = np.zeros(len_grads)


#     for i in range(len_grads):
#         r_k[i] = sum(p[j] * np.dot(local_grads[i], local_grads[j]) for j in range(len_grads))
#         r_k[i] = r_k[i] / (np.linalg.norm(local_grads[i]) * np.linalg.norm(local_grads[i]))

#     return r_k


def average_weights(local_weights):
    #here we are returning the average of the weights at the
    #level of the server. Thus, we don't need to change it here
    """
    Returns the average of the weights.
    """
    w_avg = copy.deepcopy(local_weights[0])
    for key in w_avg.keys():
        for i in range(1, len(local_weights)):
            w_avg[key] += local_weights[i][key]
        w_avg[key] = torch.div(w_avg[key], len(local_weights))
    return w_avg




def exp_details(args):
    print('\nExperimental details:')
    print(f'    Model     : {args.model}')
    print(f'    Dataset   : {args.dataset}')
    print(f'    Optimizer : {args.optimizer}')
    print(f'    Learning  : {args.lr}')
    print(f'    Global Rounds   : {args.epochs}\n')

    print('    Federated parameters:')
    if args.iid:
        print('    IID')
    else:
        print('    Non-IID')
    print(f'    Fraction of users  : {args.frac}')
    print(f'    Local Batch size   : {args.local_bs}')
    print(f'    Local Epochs       : {args.local_ep}\n')
    return

...


In [None]:
import numpy as np
from torchvision import datasets, transforms

print("new copy")


def mnist_iid(dataset, num_users):
    """
    mnist_iid takes in a dataset and the number of users as input and returns a dictionary that represents
    the division of the datasets among the users in an approximately equal and independent manner
    """
    num_items = int(len(dataset)/num_users) #the number of items each user will get
    dict_users, all_idxs = {}, [i for i in range(len(dataset))]
    #the dictionary will store the division of datasets among users
    #all_idxs will store the indices of the datasets


    for i in range(num_users): #looping over all users
        dict_users[i] = set(np.random.choice(all_idxs, num_items,
                                             replace=False))
        #randomly selecting "num_items" from all_idxs without replacement, and turn them into a set {...}

        all_idxs = list(set(all_idxs) - dict_users[i])
        #after assigning itms to user, this line updates "all_idxs" to remove the indices  that have already been selected

    return dict_users #the division od the dataset among users {set}




def mnist_noniid(dataset, num_users):
    """
    divide mnist dataset aong multiple users in a non-iid manner. This division ensures that each user has
    a unique and potentially different subset of data
    """
    # 60,000 training imgs -->  200 imgs/shard X 300 shards
    #variable initialization
    num_shards, num_imgs = 200, 300
    idx_shard = [i for i in range(num_shards)] #this list is used to to keep track of available shards during assignment process
    dict_users = {i: np.array([]) for i in range(num_users)} #to store the non-iid division of dataset among users

    #i:np.array([]) creating a key depending on i and value is the empty arrays: example: {1:[], 2:[], 3:[] ....}

    idxs = np.arange(num_shards*num_imgs) #used to assign images to users
    labels = dataset.train_labels.numpy()

    # sort labels
    idxs_labels = np.vstack((idxs, labels)) #vertically stacking idxs and labels. it creates a matrix of 2 rows, 1st = idxs, 2nd = labels
    idxs_labels = idxs_labels[:, idxs_labels[1, :].argsort()] #sort labels based on the second row in ascending order
    idxs = idxs_labels[0, :]


    # divide and assign 2 shards/client
    for i in range(num_users):
        rand_set = set(np.random.choice(idx_shard, 2, replace=False)) #choose 2 from "200" without replacement
        #{randomly selecting 2 shards without replacement and store them in rand_set}

        idx_shard = list(set(idx_shard) - rand_set) #remove the selected ones to ensure that the same shards aren't selected again
        for rand in rand_set:
            dict_users[i] = np.concatenate(
                (dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0)
            #appending the empty arrays with this interval

    return dict_users  #non-iid division of the dataset




def mnist_noniid_unequal(dataset, num_users):
    """
    Sample non-I.I.D client data from MNIST dataset where clients have an unequal amount of data
    """

    # 60,000 training imgs --> 50 imgs/shard X 1200 shards
    num_shards, num_imgs = 1200, 50  #1200 = number of shards into which dataset is divided. 50 = images in each dataset
    idx_shard = [i for i in range(num_shards)] #this list will keep track of available shards during assignment process
    dict_users = {i: np.array([]) for i in range(num_users)} #dict that will store the non-iid division of dataset among users
    idxs = np.arange(num_shards*num_imgs) #creating a sequence of indices which will be used o assign images to users
    labels = dataset.train_labels.numpy()


    # sort labels
    idxs_labels = np.vstack((idxs, labels)) #vertically stacking idxs and labels, to get a matrix of 2 rows.
    idxs_labels = idxs_labels[:, idxs_labels[1, :].argsort()] #sort based on the second row = labels
    idxs = idxs_labels[0, :]

    # Minimum and maximum shards assigned per client:
    min_shard = 1
    max_shard = 30

    # Divide the shards into random chunks for every client such that the sum of these chunks = num_shards
    random_shard_size = np.random.randint(min_shard, max_shard+1,
                                          size=num_users)
    random_shard_size = np.around(random_shard_size /
                                  sum(random_shard_size) * num_shards)
    random_shard_size = random_shard_size.astype(int)

    # the following parts handles the assignment of shards to users

    if sum(random_shard_size) > num_shards:
        #if the random shard size is more than the total number of available shards, each user is assigned one shard first
        #to ensure they have at least one, and the rest are distributed randomly based on the "random_shared_size"
        for i in range(num_users):
            rand_set = set(np.random.choice(idx_shard, 1, replace=False))
            idx_shard = list(set(idx_shard) - rand_set)
            for rand in rand_set:
                dict_users[i] = np.concatenate(
                    (dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]),
                    axis=0)

        random_shard_size = random_shard_size-1

        # Next, randomly assign the remaining shards
        for i in range(num_users):
            if len(idx_shard) == 0:
                continue
            shard_size = random_shard_size[i]
            if shard_size > len(idx_shard):
                shard_size = len(idx_shard)
            rand_set = set(np.random.choice(idx_shard, shard_size,
                                            replace=False))
            idx_shard = list(set(idx_shard) - rand_set)
            for rand in rand_set:
                dict_users[i] = np.concatenate(
                    (dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]),
                    axis=0)
    else:
        # if random_shard_size is less than or equal to the total number of available shards, then shards are directly assigned to users
        #based on the calculated "random_shard_size". Any remaining shard are then assigned to the user with minimum number of images
        for i in range(num_users):
            shard_size = random_shard_size[i]
            rand_set = set(np.random.choice(idx_shard, shard_size,
                                            replace=False))
            idx_shard = list(set(idx_shard) - rand_set)
            for rand in rand_set:
                dict_users[i] = np.concatenate(
                    (dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]),
                    axis=0)

        if len(idx_shard) > 0:
            # Add the leftover shards to the client with minimum images:
            shard_size = len(idx_shard)
            # Add the remaining shard to the client with lowest data
            k = min(dict_users, key=lambda x: len(dict_users.get(x)))
            rand_set = set(np.random.choice(idx_shard, shard_size,
                                            replace=False))
            idx_shard = list(set(idx_shard) - rand_set)
            for rand in rand_set:
                dict_users[k] = np.concatenate(
                    (dict_users[k], idxs[rand*num_imgs:(rand+1)*num_imgs]),
                    axis=0)

    return dict_users


def cifar_iid(dataset, num_users):
    """
    creates iid data distribution among clients for CIFAR-10 dataset
    """
    num_items = int(len(dataset)/num_users) #data points per each user
    dict_users, all_idxs = {}, [i for i in range(len(dataset))] #dictionary to store the indicies and array to store the indices of the dataset
    for i in range(num_users):
        dict_users[i] = set(np.random.choice(all_idxs, num_items,
                                             replace=False))
        #randomly selects "num_items" from"all_idxs" without replacement
        all_idxs = list(set(all_idxs) - dict_users[i]) #remove the idxs that have been already selected


    return dict_users #dictionary representing the IID data distribution among clients


def cifar_noniid(dataset, num_users):
    """
    creates non-iid data distribution among clients for the CIFAR-10 dataset
    divides dataset into subsets and assign them to each user, ensuring a varying distribution of data
    """
    num_shards, num_imgs = 200, 250
    idx_shard = [i for i in range(num_shards)]
    dict_users = {i: np.array([]) for i in range(num_users)}
    idxs = np.arange(num_shards*num_imgs)
    labels = dataset.train_labels.numpy()
    #labels = np.array(train_dataset)

    # sort labels
    idxs_labels = np.vstack((idxs, labels))
    idxs_labels = idxs_labels[:, idxs_labels[1, :].argsort()]
    idxs = idxs_labels[0, :]

    # divide and assign
    for i in range(num_users):
        rand_set = set(np.random.choice(idx_shard, 2, replace=False)) #randomly selecting 2 shards from the available shards (idxs_shards); without replacement
        idx_shard = list(set(idx_shard) - rand_set)
        for rand in rand_set:
            dict_users[i] = np.concatenate(
                (dict_users[i], idxs[rand*num_imgs:(rand+1)*num_imgs]), axis=0)

    return dict_users








new copy


In [None]:
import torch
import torch.optim as optim

# Define the SAM optimizer
class SAM(torch.optim.Optimizer):
    #define the SAM optimizer class which inherits from torch.optim.Optimizer
    def __init__(self, params, base_optimizer, rho=0.03, **kwargs):
        #assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"

        defaults = dict(rho=rho, **kwargs) #dictionary that holds default values for rho, adaptive, and any aditional keyword arguments
        super(SAM, self).__init__(params, defaults)

        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups #param_groups: attribute of SAM, is set to match the parameter groups of the base-optimizer
        self.defaults.update(self.base_optimizer.defaults) #same as the defaults


    # @torch.no_grad()
    # # This method modifies the parameters based on the SAM technique. It performs a forward pass with gradient computation but does not update the actual parameters.
    # def first_step(self,  r, zero_grad=False):
    #     grad_norm = self._grad_norm()
    #     for group in self.param_groups: #for each parameter group
    #         scale = r * group["rho"] / (grad_norm + 1e-12)
    #         #   scale = self.rho / (2 * grad_norm + 1e-12)  # Updated scale calculation
    #         for p in group["params"]: #For each parameter in the group, its data is being cloned and stored
    #             if p.grad is None: continue



    #             self.state[p]["old_p"] = p.data.clone()
    #             print("p.grad shape:", p.grad.shape)
    #             print("scale shape:", scale.shape)
    #             #e_w = p.grad * scale.to(p).unsqueeze(1)
    #             e_w = p.grad * scale.view(1, -1).repeat(p.grad.size(0), 1).to(p)
    #             p.add_(e_w)  # climb to the local maximum "w + e(w)"

    #     if zero_grad: self.zero_grad()

    @torch.no_grad()
    # This method modifies the parameters based on the SAM technique. It performs a forward pass with gradient computation but does not update the actual parameters.
    def first_step(self,  r, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups: #for each parameter group
            for p in group["params"]:
              if p.grad is None:
                continue
              self.state[p]["old_p"] = p.data.clone()
              for r_k, grad_elem in zip(r, p.grad):
                scale = r_k * group["rho"] / (grad_norm + 1e-12)
                e_w = grad_elem * scale.to(p)
                p.add_(e_w)

        if zero_grad:
          self.zero_grad()


    @torch.no_grad()
    #This method undoes the parameter modifications from first_step and performs the actual optimization step.
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None: continue
                p.data = self.state[p]["old_p"]  # get back to "w" from "w + e(w)"
        self.base_optimizer.step()  # do the actual "sharpness-aware" update

        if zero_grad: self.zero_grad()

    @torch.no_grad()
    #This method performs a full optimization step using the SAM technique. It requires a closure, which is a function containing the forward and backward passes.
    def step(self,r,  closure):
        assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided"
        closure = torch.enable_grad()(closure)  # the closure should do a full forward-backward pass

        self.first_step(r, zero_grad=True) #The first_step is performed with zeroed gradients.
        closure() #The closure is executed (forward-backward pass).
        self.second_step() #The second_step is executed to perform the actual optimization update.



    def _grad_norm(self):
      shared_device = self.param_groups[0]["params"][0].device
      grad_tensors = [
        p.grad.norm(p=2).to(shared_device)
        for group in self.param_groups for p in group["params"]
        if p.grad is not None
      ]
      if len(grad_tensors) > 0:
        norm = torch.norm(torch.stack(grad_tensors), p=2)
      else:
        norm = torch.tensor(1e-12).to(shared_device)
      return norm




    def load_state_dict(self, state_dict):
        #this method loads the state dictionary of the optimizer.
        super().load_state_dict(state_dict)
        self.base_optimizer.param_groups = self.param_groups

In [None]:

start_time = time.time() #sets the start time to current time

#define paths
path_project= os.path.abspath('..')
logger = SummaryWriter('../logs')

args = args_parser()
exp_details(args)



Experimental details:
    Model     : resnet
    Dataset   : cifar
    Optimizer : adam
    Learning  : 0.1
    Global Rounds   : 10

    Federated parameters:
    IID
    Fraction of users  : 0.1
    Local Batch size   : 128
    Local Epochs       : 2



In [None]:

device = 'cpu'

# load dataset and user groups
train_dataset, test_dataset, user_groups = get_dataset(args)

# BUILD MODEL

if args.model == 'cnn':
  if args.dataset == 'mnist':
    global_model = CNNMnist(args=args)
  elif args.dataset == 'fmnist':
    global_model = CNNFashion_Mnist(args=args)
  elif args.dataset == 'cifar':
    global_model = CNNCifar(args=args)


elif args.model == 'mlp':
  img_size = train_dataset[0][0].shape
  len_in = 1
  for x in img_size:
    len_in *= x
    global_model = MLP(dim_in=len_in, dim_hidden=64,
                               dim_out=args.num_classes)
elif args.model == 'resnet':
  global_model = ResNet18(BasicBlock, [2, 2, 2, 2], num_classes=10)

else:
  exit('Error: unrecognized model')

print("global model")
print(global_model)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:01<00:00, 102482604.00it/s]


Extracting ../data/cifar/cifar-10-python.tar.gz to ../data/cifar/
Files already downloaded and verified
global model
ResNet18(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn

In [None]:
# Move the global model to the specified device
global_model.to(device)

# Set the global model to training mode
global_model.train()


ResNet18(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)


In [None]:
# Training
train_loss, train_accuracy = [], []
val_acc_list, net_list = [], []
cv_loss, cv_acc = [], []

print_every = 2 #specifies how often to print the training progress during training.

#variables to check if the validation loss has stopped improving i.e. overfitting
val_loss_pre, counter = 0, 0

In [None]:
# Randomly shuffle the indices of users
users_pool = np.random.choice(range(args.num_users), args.num_users, replace=False)
users_pool.sort()

# Calculate weight coefficients for each client
weight_coefficient_p = []
for idx in users_pool:
    local_model = calculate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger, p=0)
    a, b, d, c = local_model.train_val_test(dataset=train_dataset, idxs=list(user_groups[idx]))
    weight_coefficient_p.append(c)

# Normalize weight coefficients
total_size = sum(weight_coefficient_p)
weight_coefficient_p = [number / total_size for number in weight_coefficient_p]
print(weight_coefficient_p)

[0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01]


In [None]:
# for epoch in tqdm(range(args.epochs)):
for epoch in range(args.epochs):
    args.lr *= 0.99
    #print("the new learning rate")
    print(args.lr)
    loss_pool = []
    local_losses =  []  # List to store local models' weights
    local_gradients = []
    local_weights_list = []  # Create a list to store local weights
    print(f'\n | Global Training Round : {epoch+1} |\n')

    global_model.train()
    # Select a subset of users
    m = max(int(args.frac * args.num_users), 1)

    idxs_users = np.random.choice(range(args.num_users), m, replace=False)
    print(idxs_users)

    for idx in idxs_users:
        local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger, r=1)
        #print("awesome so far")

        # Calculate gradients for one epoch
        # local_grads = local_model.calculate_gradients(model=copy.deepcopy(global_model))  #get the average gradient of all the batch!
        # # for grad in local_grads:
        # #   print(f'Local gradient shape: {grad.shape}')
        # local_gradients.extend(local_grads) #append to the list and go out of the loop

        local_grads = local_model.calculate_gradients(model=copy.deepcopy(global_model))
        local_gradients.append(local_grads)



    #calculate r globally, not inside the for loop
    r = calculate_r(local_gradients, weight_coefficient_p)
    #print("r is ready")
    print(r)
    r = r * 0.1
    print(r)



    for idx in idxs_users:
       local_model = LocalUpdate(args=args, dataset=train_dataset, idxs=user_groups[idx], logger=logger, r=r)
       local_weights = local_model.update_weights(model = copy.deepcopy(global_model), global_round= epoch , p=weight_coefficient_p)
       local_weights_list.append(copy.deepcopy(local_weights))


    global_weights = average_weights (local_weights_list)
    print("Global Weights:")
    global_model.load_state_dict(global_weights)
    #print(global_weights)

     #global_model.load_state_dict(global_weights)
    list_acc, list_loss = [], []
    global_model.eval()
    for c in range(args.num_users):
            local_model = LocalUpdate(args=args, dataset=train_dataset,
                                      idxs=user_groups[idx], logger=logger, r=r)
            acc, loss = local_model.inference(model=global_model)
            list_acc.append(acc)
            list_loss.append(loss)
    train_accuracy.append(sum(list_acc)/len(list_acc))

    #print global training loss after every 'i' rounds
    if (epoch+1) % print_every == 0:
            print(f' \nAvg Training Stats after {epoch+1} global rounds:')
            print(f'Training Loss : {np.mean(np.array(train_loss))}')
            print('Train Accuracy: {:.2f}% \n'.format(100*train_accuracy[-1]))

#Test inference after completion of training
test_acc, test_loss = test_inference(args, global_model, test_dataset)

print(f' \n Results after {args.epochs} global rounds of training:')
print("|---- Avg Train Accuracy: {:.2f}%".format(100*train_accuracy[-1]))
print("|---- Test Accuracy: {:.2f}%".format(100*test_acc))

#     # Saving the objects train_loss and train_accuracy:
file_name = 'save/objects/{}_{}_{}_C[{}]_iid[{}]_E[{}]_B[{}].pkl'.\
    format(args.dataset, args.model, args.epochs, args.frac, args.iid,
               args.local_ep, args.local_bs)

print('\n Total Run Time: {0:0.4f}'.format(time.time()-start_time))

0.099

 | Global Training Round : 1 |

[20 14  4 84 76 30 19 25 64 88]


  return torch.tensor(image), torch.tensor(label)


[0.057535   0.05765288 0.05674439 0.05348188 0.05342261 0.05760142
 0.05490991 0.05355322 0.0570736  0.0511042 ]
[0.0057535  0.00576529 0.00567444 0.00534819 0.00534226 0.00576014
 0.00549099 0.00535532 0.00570736 0.00511042]
Global Weights:
0.09801

 | Global Training Round : 2 |

[27 56 73 17 75 49 63  6 29 61]
[0.07590308 0.08000397 0.09026952 0.08296521 0.07413893 0.07313354
 0.08159728 0.08597334 0.08060515 0.07291178]
[0.00759031 0.0080004  0.00902695 0.00829652 0.00741389 0.00731335
 0.00815973 0.00859733 0.00806051 0.00729118]
Global Weights:
 
Avg Training Stats after 2 global rounds:
Training Loss : nan
Train Accuracy: 21.08% 

0.0970299

 | Global Training Round : 3 |

[45 22 71 72 81 55 99 27 87 97]


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


[0.08493072 0.07944954 0.08223518 0.06872048 0.07049341 0.05869118
 0.08668152 0.09053168 0.07446194 0.08281495]
[0.00849307 0.00794495 0.00822352 0.00687205 0.00704934 0.00586912
 0.00866815 0.00905317 0.00744619 0.00828149]
Global Weights:
0.096059601

 | Global Training Round : 4 |

[20 23 15 69 90 31 35 11 81 29]
[0.0557272  0.07508065 0.0875722  0.0730506  0.08162227 0.08840208
 0.08797956 0.06495033 0.08473684 0.05930161]
[0.00557272 0.00750806 0.00875722 0.00730506 0.00816223 0.00884021
 0.00879796 0.00649503 0.00847368 0.00593016]
Global Weights:
 
Avg Training Stats after 4 global rounds:
Training Loss : nan
Train Accuracy: 14.00% 

0.09509900499

 | Global Training Round : 5 |

[ 0 18 46 38 76 66 50  9 94 57]
[0.09512553 0.0779391  0.08633877 0.07858998 0.02931198 0.08977985
 0.09465256 0.07046313 0.09157463 0.02228688]
[0.00951255 0.00779391 0.00863388 0.007859   0.0029312  0.00897799
 0.00946526 0.00704631 0.00915746 0.00222869]
Global Weights:
0.0941480149401

 | Global Tr

In [None]:
import matplotlib
import matplotlib.pyplot as plt

matplotlib.use('Agg')
plt.ion()
plt.figure()
plt.plot(range(len(train_accuracy)), train_accuracy)
plt.title('Training accuracy vs communication rounds')
plt.ylabel('Training accuracy')
plt.xlabel('Communication round')
plt.grid(True)
plt.savefig("/content/train_accuracy.png")
plt.show()

In [None]:
from IPython.display import Image
Image('/content/train_accuracy.png')