In [19]:
import os
import torch
import torchvision
import tarfile
from torchvision.datasets.utils import download_url
from torch.utils.data import random_split, DataLoader

from torchvision.datasets import CIFAR10
from torchvision import transforms
import matplotlib.pyplot as plt

%matplotlib inline
plt.rcParams['figure.figsize'] = [5, 5]

In [20]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

train_dataset = CIFAR10('data', train=True, download=True, transform=transform)
test_dataset = CIFAR10('data', train=False, download=True, transform=transform)

train_dataset, dev_dataset = random_split(train_dataset, [int(len(train_dataset) * 0.80), int(len(train_dataset) * 0.2)])

Files already downloaded and verified
Files already downloaded and verified


In [21]:
total_train_size = len(train_dataset)
total_test_size = len(test_dataset)
total_dev_size = len(dev_dataset)
total_train_size, total_dev_size, total_test_size

(40000, 10000, 10000)

In [22]:
classes = 10
input_dim = 3 * 32 * 32

num_clients = 8
rounds = 15
batch_size = 64
epochs_per_client = 5
learning_rate = 0.05

In [23]:
def get_device():
    return torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def to_device(data, device):
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader(DataLoader):
        def __init__(self, dl, device):
            self.dl = dl
            self.device = device

        def __iter__(self):
            for batch in self.dl:
                yield to_device(batch, self.device)

        def __len__(self):
            return len(self.dl)

device = get_device()
device

device(type='cuda')

In [24]:
import torch.nn as nn
import torch.nn.functional as F
import copy

class FederatedNet(torch.nn.Module):
    def __init__(self):
        super(FederatedNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)
        self.dropout = nn.Dropout(0.5)
        self.track_layers = {
        'conv1': self.conv1,
        'conv2': self.conv2,
        'conv3': self.conv3,
        'fc1': self.fc1,
        'fc2': self.fc2,
        'fc3': self.fc3
    }

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x

    def get_track_layers(self):
        return self.track_layers

    def apply_parameters(self, parameters_dict):
        with torch.no_grad():
            for layer_name in parameters_dict:
                self.track_layers[layer_name].weight.data *= 0
                self.track_layers[layer_name].bias.data *= 0
                self.track_layers[layer_name].weight.data += parameters_dict[layer_name]['weight']
                self.track_layers[layer_name].bias.data += parameters_dict[layer_name]['bias']

    def get_parameters(self, deep_copy = True):
        parameters_dict = dict()
        for layer_name in self.track_layers:
            parameters_dict[layer_name] = {
                'weight': self.track_layers[layer_name].weight.data,
                'bias': self.track_layers[layer_name].bias.data
            }
        if deep_copy:
            params_clone = copy.deepcopy(parameters_dict)
            return params_clone
        else:
            return parameters_dict

    def batch_accuracy(self, outputs, labels):
        with torch.no_grad():
            _, predictions = torch.max(outputs, dim=1)
            return torch.tensor(torch.sum(predictions == labels).item() / len(predictions))

    def _process_batch(self, batch):
        images, labels = batch
        outputs = self(images)
        loss = torch.nn.functional.cross_entropy(outputs, labels)
        accuracy = self.batch_accuracy(outputs, labels)
        return (loss, accuracy)

    def fit(self, dataset, epochs, lr, batch_size=128, opt=torch.optim.SGD):
        dataloader = DeviceDataLoader(DataLoader(dataset, batch_size, shuffle=True), device)
        optimizer = opt(self.parameters(), lr)
        history = []
        for epoch in range(epochs):
            losses = []
            accs = []
            for batch in dataloader:
                loss, acc = self._process_batch(batch)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                loss.detach()
                losses.append(loss)
                accs.append(acc)
            avg_loss = torch.stack(losses).mean().item()
            avg_acc = torch.stack(accs).mean().item()
            history.append((avg_loss, avg_acc))
        return history

    def evaluate(self, dataset, batch_size=128):
        dataloader = DeviceDataLoader(DataLoader(dataset, batch_size), device)
        losses = []
        accs = []
        with torch.no_grad():
            for batch in dataloader:
                loss, acc = self._process_batch(batch)
                losses.append(loss)
                accs.append(acc)
        avg_loss = torch.stack(losses).mean().item()
        avg_acc = torch.stack(accs).mean().item()
        return (avg_loss, avg_acc)

In [25]:
class Client:
    def __init__(self, client_id, dataset):
        self.client_id = client_id
        self.dataset = dataset

    def get_dataset_size(self):
        return len(self.dataset)

    def get_client_id(self):
        return self.client_id

    def __difference(self, params1 : dict, params2 : dict):
        diff = {}
        for layer in params1.keys():
            diff[layer] = {}
            for key in params1[layer].keys():
                diff[layer][key] = params1[layer][key] - params2[layer][key]
        return diff

    def train(self, parameters_dict):
        net = to_device(FederatedNet(), device)
        net.apply_parameters(parameters_dict)
        wt = net.get_parameters(deep_copy=True)
        train_history = net.fit(self.dataset, epochs_per_client, learning_rate, batch_size)
        loss = round(train_history[-1][0],3)
        accuracy = round(train_history[-1][1],3)
        print(f'{self.client_id}, Loss = {loss}, Accuracy = {accuracy}')
        wt_plus_1 = net.get_parameters(deep_copy=True)
        update = self.__difference(wt, wt_plus_1)
        return update

In [26]:
examples_per_client = total_train_size // num_clients
client_datasets = random_split(train_dataset, [min(i + examples_per_client,
           total_train_size) - i for i in range(0, total_train_size, examples_per_client)])
clients = [Client('client_' + str(i), client_datasets[i]) for i in range(num_clients)]

In [27]:
def diff(wt, wt_plus_1):
    for layer in wt.keys():
        for key in wt[layer].keys():
            print(wt[layer][key] - wt_plus_1[layer][key])

In [28]:
def reconstruct_wt_plus_1(global_params, client_updates):
    wt_plus_1 = {}
    for layer in global_params.keys():
        wt_plus_1[layer] = {}
        for key in global_params[layer].keys():
            wt_plus_1[layer][key] = global_params[layer][key] - client_updates[layer][key]
    return wt_plus_1

In [29]:
import numpy as np
def flatten_dict_to_vector(d):
    flat_list = []
    shapes = {}

    for k1, v1 in d.items():
        for k2, tensor in v1.items():
            shapes[f'{k1}_{k2}'] = tensor.shape
            # Check if tensor is on GPU and move to CPU if necessary
            if tensor.is_cuda:
                tensor = tensor.cpu()
            flat_list.extend(tensor.flatten().numpy())

    flat_vector = np.array(flat_list)
    return flat_vector, shapes

In [32]:
def restore_vector_to_dict(flat_vector, shapes):
    restored_dict = {}
    offset = 0

    # Check if CUDA is available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    for k, shape in shapes.items():
        size = np.prod(shape)
        tensor_flat = flat_vector[offset:offset + size]
        tensor = tensor_flat.clone().detach().reshape(shape).to(device)  # Send tensor to the appropriate device

        k1, k2 = k.split('_')
        if k1 not in restored_dict:
            restored_dict[k1] = {}
        restored_dict[k1][k2] = tensor

        offset += size

    return restored_dict

In [None]:
from time import time
start = time()
global_net = to_device(FederatedNet(), device)
history = []
for i in range(rounds):
    print('Start Round {} ...'.format(i + 1))
    curr_parameters = global_net.get_parameters()
    new_parameters = dict([(layer_name, {'weight': 0, 'bias': 0}) for layer_name in curr_parameters])
    updates = []

     # get client updates
    for client in clients:
        update = client.train(curr_parameters)
        updates.append(update)
    processed_updates = []

    # apply ga to updates
    for update in updates:
        flat_vector, shapes = flatten_dict_to_vector(update)
        d = flat_vector.size
        q = 1000
        random_seed = 42

        np.random.seed(random_seed)
        G_np = np.random.normal(0, 1, size=(d, int(q)))
        G = torch.tensor(G_np, dtype=torch.float32)
        w = torch.matmul(G.T, torch.tensor(flat_vector)) / q
        delta = torch.matmul(G, w)
        restored_wt = restore_vector_to_dict(delta, shapes)
        processed_updates.append(restored_wt)

    # send updates to the server
    for update in processed_updates:
        client_parameters = reconstruct_wt_plus_1(curr_parameters, update)
        fraction = client.get_dataset_size() / total_train_size
        for layer_name in client_parameters:
            new_parameters[layer_name]['weight'] += fraction * (client_parameters[layer_name]['weight'])
            new_parameters[layer_name]['bias'] += fraction * (client_parameters[layer_name]['bias'])
    global_net.apply_parameters(new_parameters)

    train_loss, train_acc = global_net.evaluate(train_dataset)
    dev_loss, dev_acc = global_net.evaluate(dev_dataset)
    print('After round {}, train_loss = {}, dev_loss = {}, dev_acc = {}\n'.format(i + 1, round(train_loss, 4),
            round(dev_loss, 4), round(dev_acc, 4)))
    history.append((train_loss, dev_loss, dev_acc))

Start Round 1 ...
client_0, Loss = 1.935, Accuracy = 0.296
client_1, Loss = 1.914, Accuracy = 0.289
client_2, Loss = 1.931, Accuracy = 0.275
client_3, Loss = 1.927, Accuracy = 0.278
client_4, Loss = 1.916, Accuracy = 0.295
client_5, Loss = 1.952, Accuracy = 0.278
client_6, Loss = 1.926, Accuracy = 0.29
client_7, Loss = 1.928, Accuracy = 0.291
After round 1, train_loss = 13.5183, dev_loss = 13.5761, dev_acc = 0.1128

Start Round 2 ...
client_0, Loss = 1.826, Accuracy = 0.321
client_1, Loss = 1.741, Accuracy = 0.35
client_2, Loss = 1.741, Accuracy = 0.34
client_3, Loss = 1.735, Accuracy = 0.349
client_4, Loss = 1.725, Accuracy = 0.365
client_5, Loss = 1.728, Accuracy = 0.359
client_6, Loss = 1.746, Accuracy = 0.357
client_7, Loss = 1.735, Accuracy = 0.35
After round 2, train_loss = 586.3163, dev_loss = 588.2312, dev_acc = 0.0942

Start Round 3 ...
client_0, Loss = nan, Accuracy = 0.102
client_1, Loss = nan, Accuracy = 0.1
client_2, Loss = nan, Accuracy = 0.111
client_3, Loss = nan, Accur

In [None]:
print("Execution lasted:", time.time()-start)

In [None]:
plt.plot([i + 1 for i in range(len(history))], [history[i][0] for i in range(len(history))], color='r', label='train loss')
plt.plot([i + 1 for i in range(len(history))], [history[i][1] for i in range(len(history))], color='b', label='dev loss')
plt.legend()
plt.title('Training history')
plt.show()

In [None]:
dev_accs = [history[i][2] for i in range(len(history))]
# Plot accuracies
plt.plot([i + 1 for i in range(len(history))], dev_accs, color='m', label='dev accuracy')
plt.legend()
plt.title('Accuracy history')
plt.show()