In [17]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor

import matplotlib.pyplot as plt
import numpy as np
from admm.agents import FedConsensus
from admm.servers import EventADMM
from admm.models import FCNet, CNN
from admm.utils import average_params, split_dataset
import seaborn as sns
sns.set_theme()

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
cifar_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

cifar_trainset = datasets.CIFAR10(
    root='./data/cifar10', train=True,
    download=False, transform=cifar_transform
)

mnist_transform = transforms.Compose([
    transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))
])

mnist_trainset = datasets.MNIST(
    root='./data/mnist_data', train=True,
    download=False, transform=mnist_transform
)

In [27]:
if cifar_trainset.targets is not torch.Tensor: print('list')

list


In [39]:
from datset_preperation import _partition_data

trainsets = _partition_data(
    num_clients=10,
    iid=False,
    balance=False,
    power_law=True,
    seed=42,
    trainset=cifar_trainset
)

for i, dataset in enumerate(trainsets):
    labels = np.zeros(10)
    loader = DataLoader(dataset, batch_size=1)
    for data, target in loader:
        labels[target.item()] += 1
    print(f'Dataset {i} distribution: {labels} - num_samples = {labels.sum()}')

tensor([23659, 15003, 15008,  ..., 49997,     1,  4754])
Dataset 0 distribution: [4630. 3065.    0.    0.    0.    0.    0.    0.    0.    0.] - num_samples = 7695.0
Dataset 1 distribution: [   0. 1934.   73.    0.    0.    0.    0.    0.    0.    0.] - num_samples = 2007.0
Dataset 2 distribution: [   0.    0. 4926.   60.    0.    0.    0.    0.    0.    0.] - num_samples = 4986.0
Dataset 3 distribution: [   0.    0.    0. 4939. 4801.    0.    0.    0.    0.    0.] - num_samples = 9740.0
Dataset 4 distribution: [  0.   0.   0.   0. 198.  65.   0.   0.   0.   0.] - num_samples = 263.0
Dataset 5 distribution: [   0.    0.    0.    0.    0. 4934.  179.    0.    0.    0.] - num_samples = 5113.0
Dataset 6 distribution: [   0.    0.    0.    0.    0.    0. 4820.  540.    0.    0.] - num_samples = 5360.0
Dataset 7 distribution: [   0.    0.    0.    0.    0.    0.    0. 4459.  925.    0.] - num_samples = 5384.0
Dataset 8 distribution: [   0.    0.    0.    0.    0.    0.    0.    0. 4074. 494

Dataset 0 distribution: [ 776. 4921.    0.    0.    0.    0.    0.    0.    0.    0.]
Dataset 1 distribution: [   0. 1820. 4251.    0.    0.    0.    0.    0.    0.    0.]
Dataset 2 distribution: [   0.    0. 1706.   50.    0.    0.    0.    0.    0.    0.]
Dataset 3 distribution: [   0.    0.    0. 6080. 1833.    0.    0.    0.    0.    0.]
Dataset 4 distribution: [   0.    0.    0.    0. 4008. 5210.    0.    0.    0.    0.]
Dataset 5 distribution: [   0.    0.    0.    0.    0.  210. 5785.    0.    0.    0.]
Dataset 6 distribution: [   0.    0.    0.    0.    0.    0.  132. 3427.    0.    0.]
Dataset 7 distribution: [   0.    0.    0.    0.    0.    0.    0. 2837.  525.    0.]
Dataset 8 distribution: [   0.    0.    0.    0.    0.    0.    0.    0. 5325. 2869.]
Dataset 9 distribution: [5146.    0.    0.    0.    0.    0.    0.    0.    0. 3079.]


In [40]:
from flwr_datasets import FederatedDataset
from flwr_datasets.partitioner import ExponentialPartitioner, NaturalIdPartitioner, LinearPartitioner

nodes = 3
fds = FederatedDataset(dataset='cifar10', partitioners={'train': nodes, 'test': 1})
partitions = [fds.load_partition(node_id=node, split='train') for node in range(nodes)]

transforms = ToTensor()
def apply_transforms(batch):
  batch["img"] = [transforms(img) for img in batch["img"]]
  return batch

partitions_torch = [partition.with_transform(apply_transforms) for partition in partitions]

print('Training partitions')
for i, partition in enumerate(partitions):
    print(f'\nPartition {i}')
    b= []
    for i in range(10):
        a = [1 for label in partition['label'] if label == i]
        b.append(sum(a))
        a = []
    print(b)

print('\nTest partition\n')
partition = fds.load_partition(node_id=0, split='test')
b= []
for i in range(10):
    a = [1 for label in partition['label'] if label == i]
    b.append(sum(a))
    a = []
print(b)

Training partitions

Partition 0
[1705, 1605, 1682, 1628, 1669, 1683, 1674, 1692, 1643, 1686]

Partition 1
[1599, 1723, 1659, 1729, 1670, 1647, 1679, 1644, 1653, 1664]

Partition 2
[1696, 1672, 1659, 1643, 1661, 1670, 1647, 1664, 1704, 1650]

Test partition

[1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000]

Validation partition



ValueError: The given split: 'val' is not present in the dataset's splits: '['train', 'test']'.