In [1]:
import os
import sys
# To Avoid Crashes with a lot of nodes
from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.datasets import MNIST
import copy
import numpy as np
import lightning as pl
from torchmetrics.classification import MulticlassAccuracy, MulticlassRecall, MulticlassPrecision, MulticlassF1Score, MulticlassConfusionMatrix
from torchmetrics import MetricCollection
from torch import Tensor
import torch
import random
from datetime import datetime
import networkx as nx
from lightning.pytorch.loggers import CSVLogger

In [2]:
from torchvision.datasets import CIFAR10

In [3]:
from multiprocessing import Manager, Process

In [4]:
import logging
# configure logging at the root level of Lightning
logging.getLogger("lightning.pytorch").setLevel(logging.ERROR)
# configure logging on module level, redirect to file
logger = logging.getLogger("lightning.pytorch.core")
logger.addHandler(logging.FileHandler("core.log"))

In [5]:
log = logging.getLogger("pytorch_lightning")
log.propagate = False
log.setLevel(logging.ERROR)

In [6]:
import torch, torch.nn as nn, torch.utils.data as data, torchvision as tv
from collections import OrderedDict
from skimage.util import random_noise
from torch.utils.data import Subset
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt

In [7]:
import wandb
from pytorch_lightning.loggers import WandbLogger

In [8]:
!wandb login

wandb: Currently logged in as: jifegi. Use `wandb login --relogin` to force relogin


In [9]:
cwd = os.getcwd()
os.environ['WANDB_NOTEBOOK_NAME'] = cwd+'\\'+'exps.ipynb'

In [10]:
mnist_train = MNIST(
    f"{sys.path[0]}/data", train=True, download=True, transform=transforms.ToTensor()
)
mnist_val = MNIST(
    f"{sys.path[0]}/data", train=False, download=True, transform=transforms.ToTensor()
)

mnist_val = Subset(mnist_val, range(2000))

In [11]:
class MNISTModelMLP(pl.LightningModule):
    """
    LightningModule for MNIST.
    """

    def process_metrics(self, phase, y_pred, y, loss=None):
        """
        Calculate and log metrics for the given phase.
        Args:
            phase (str): One of 'Train', 'Validation', or 'Test'
            y_pred (torch.Tensor): Model predictions
            y (torch.Tensor): Ground truth labels
            loss (torch.Tensor, optional): Loss value
        """
        if loss is not None:
            self.log(f"{phase}/Loss", loss, prog_bar=True, logger=True)

        y_pred_classes = torch.argmax(y_pred, dim=1)
        if phase == "Train":
            output = self.train_metrics(y_pred_classes, y, para=self.state_dict())
            allmetrics = output
            allmetrics['loss'] = loss
            self.allmetrics.append(output)
        elif phase == "Validation":
            output = self.val_metrics(y_pred_classes, y, para=self.state_dict())
        elif phase == "Test":
            output = self.test_metrics(y_pred_classes, y, para=self.state_dict())
        else:
            raise NotImplementedError
        # print(f"y_pred shape: {y_pred.shape}, y_pred_classes shape: {y_pred_classes.shape}, y shape: {y.shape}")  # Debug print
        output = {f"{phase}/{key.replace('Multiclass', '').split('/')[-1]}": value for key, value in output.items()}
        self.log_dict(output, prog_bar=True, logger=True)

        if self.cm is not None:
            self.cm.update(y_pred_classes, y)

    def log_metrics_by_epoch(self, phase, print_cm=False, plot_cm=False):
        """
        Log all metrics at the end of an epoch for the given phase.
        Args:
            phase (str): One of 'Train', 'Validation', or 'Test'
            :param phase:
            :param plot_cm:
        """
        print(f"Epoch end: {phase}, epoch number: {self.epoch_global_number[phase]}")
        if phase == "Train":
            output = self.train_metrics.compute()
            self.train_metrics.reset()
        elif phase == "Validation":
            output = self.val_metrics.compute()
            self.val_metrics.reset()
        elif phase == "Test":
            output = self.test_metrics.compute()
            self.test_metrics.reset()
        else:
            raise NotImplementedError

        output = {f"{phase}Epoch/{key.replace('Multiclass', '').split('/')[-1]}": value for key, value in output.items()}
        self.log_dict(output, prog_bar=True, logger=True)

        if self.cm is not None:
            cm = self.cm.compute().cpu()
            # print(f"{phase}Epoch/CM\n", cm) if print_cm else None
            if plot_cm:
                import seaborn as sns
                import matplotlib.pyplot as plt
                plt.figure(figsize=(10, 7))
                ax = sns.heatmap(cm.numpy(), annot=True, fmt="d", cmap="Blues")
                ax.set_xlabel("Predicted labels")
                ax.set_ylabel("True labels")
                ax.set_title("Confusion Matrix")
                ax.set_xticks(range(10))
                ax.set_yticks(range(10))
                ax.xaxis.set_ticklabels([i for i in range(10)])
                ax.yaxis.set_ticklabels([i for i in range(10)])
                # self.logger.experiment.add_figure(f"{phase}Epoch/CM", ax.get_figure(), global_step=self.epoch_global_number[phase])
                plt.close()

        # Reset metrics

        self.epoch_global_number[phase] += 1

    def __init__(
            self,
            in_channels=1,
            out_channels=10,
            learning_rate=1e-3,
            metrics=None,
            confusion_matrix=None,
            seed=None
    ):
        super().__init__()
        self.allmetrics = []
        if metrics is None:
            metrics = MetricCollection([
                MulticlassAccuracy(num_classes=out_channels),
                MulticlassPrecision(num_classes=out_channels),
                MulticlassRecall(num_classes=out_channels),
                MulticlassF1Score(num_classes=out_channels)
            ])

        
        # Define metrics
        self.train_metrics = metrics.clone(prefix="Train/")
        self.val_metrics = metrics.clone(prefix="Validation/")
        self.test_metrics = metrics.clone(prefix="Test/")

        if confusion_matrix is None:
            self.cm = MulticlassConfusionMatrix(num_classes=out_channels)

        # Set seed for reproducibility initialization
        if seed is not None:
            torch.manual_seed(seed)
            torch.cuda.manual_seed_all(seed)

        self.example_input_array = torch.zeros(1, 1, 28, 28)
        self.learning_rate = learning_rate

        self.criterion = torch.nn.CrossEntropyLoss()

        self.l1 = torch.nn.Linear(28 * 28, 256)
        self.l2 = torch.nn.Linear(256, 128)
        self.l3 = torch.nn.Linear(128, out_channels)

        self.epoch_global_number = {"Train": 0, "Validation": 0, "Test": 0}

    def forward(self, x):
        """ """
        batch_size, channels, width, height = x.size()

        # (b, 1, 28, 28) -> (b, 1*28*28)
        x = x.view(batch_size, -1)
        x = self.l1(x)
        x = torch.relu(x)
        x = self.l2(x)
        x = torch.relu(x)
        x = self.l3(x)
        x = torch.log_softmax(x, dim=1)
        return x

    def configure_optimizers(self):
        """ """
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

    def step(self, batch, phase):
        images, labels = batch
        images = images.to(self.device)
        labels = labels.to(self.device)
        y_pred = self.forward(images)
        loss = self.criterion(y_pred, labels)

        # Get metrics for each batch and log them
        self.log(f"{phase}/Loss", loss, prog_bar=True)
        self.process_metrics(phase, y_pred, labels, loss)

        return loss

    def training_step(self, batch, batch_id):
        """
        Training step for the model.
        Args:
            batch:
            batch_id:

        Returns:
        """
        return self.step(batch, "Train")

    def on_train_epoch_end(self):
        self.log_metrics_by_epoch("Train", print_cm=False, plot_cm=True)

    def validation_step(self, batch, batch_idx):
        """
        Validation step for the model.
        Args:
            batch:
            batch_idx:

        Returns:
        """
        return self.step(batch, "Validation")

    def on_validation_epoch_end(self):
        self.log_metrics_by_epoch("Validation", print_cm=False, plot_cm=True)

    def test_step(self, batch, batch_idx):
        """
        Test step for the model.
        Args:
            batch:
            batch_idx:

        Returns:
        """
        return self.step(batch, "Test")


    def on_test_epoch_end(self):
        self.log_metrics_by_epoch("Test", print_cm=False, plot_cm=True)

In [12]:
def labelFlipping(dataset, indices, poisoned_percent=0, targeted=False, target_label=4, target_changed_label=7):
    """
    select flipping_persent of labels, and change them to random values.
    Args:
        dataset: the dataset of training data, torch.util.data.dataset like.
        indices: Indices of subsets, list like.
        flipping_persent: The ratio of labels want to change, float like.
    """
    new_dataset = copy.deepcopy(dataset)
    targets = new_dataset.targets.detach().clone()
    num_indices = len(indices)
    # classes = new_dataset.classes
    # class_to_idx = new_dataset.class_to_idx
    # class_list = [class_to_idx[i] for i in classes]
    class_list = set(targets.tolist())
    if targeted == False:
        num_flipped = int(poisoned_percent * num_indices)
        if num_indices == 0:
            return new_dataset
        if num_flipped > num_indices:
            return new_dataset
        flipped_indice = random.sample(indices, num_flipped)

        for i in flipped_indice:
            t = targets[i]
            flipped = torch.tensor(random.sample(class_list, 1)[0])
            while t == flipped:
                flipped = torch.tensor(random.sample(class_list, 1)[0])
            targets[i] = flipped
    else:
        for i in indices:
            if int(targets[i]) == int(target_label):
                targets[i] = torch.tensor(target_changed_label)
    new_dataset.targets = targets
    return new_dataset


In [13]:
def modelpoison(model: OrderedDict, poisoned_ratio, noise_type="gaussian"):
    """
    Function to add random noise of various types to the model parameter.
    """
    poisoned_model = OrderedDict()

    for layer in model:
        bt = model[layer]
        t = bt.detach().clone()
        single_point = False
        if len(t.shape) == 0:
            t = t.view(-1)
            single_point = True
        # print(t)
        if noise_type == "salt":
            # Replaces random pixels with 1.
            poisoned = torch.tensor(random_noise(t, mode=noise_type, amount=poisoned_ratio))
        elif noise_type == "gaussian":
            # Gaussian-distributed additive noise.
            poisoned = torch.tensor(random_noise(t, mode=noise_type, mean=0, var=poisoned_ratio, clip=True))
        elif noise_type == "s&p":
            # Replaces random pixels with either 1 or low_val, where low_val is 0 for unsigned images or -1 for signed images.
            poisoned = torch.tensor(random_noise(t, mode=noise_type, amount=poisoned_ratio))
        else:
            print("ERROR: @modelpoisoning: poison attack type not supported.")
            poisoned = t
        if single_point:
            poisoned = poisoned[0]
        poisoned_model[layer] = poisoned

    return poisoned_model

In [14]:
def datapoison(dataset, indices, poisoned_percent, poisoned_ratio, targeted=False, target_label=3, noise_type="salt", backdoor_validation=False):
    """
    Function to add random noise of various types to the dataset.
    """
    new_dataset = copy.deepcopy(dataset)
    train_data = new_dataset.data
    targets = new_dataset.targets
    num_indices = len(indices)

    if not targeted:
        num_poisoned = int(poisoned_percent * num_indices)
        if num_indices == 0:
            return new_dataset
        if num_poisoned > num_indices:
            return new_dataset
        poisoned_indice = random.sample(indices, num_poisoned)

        for i in poisoned_indice:
            t = train_data[i]
            if noise_type == "salt":
                # Replaces random pixels with 1.
                noise_img = random_noise(t, mode=noise_type, amount=poisoned_ratio)
                noise_img = np.array(255*noise_img, dtype = 'uint8')
                poisoned = torch.tensor(noise_img)               

            elif noise_type == "gaussian":
                # Gaussian-distributed additive noise.
                # poisoned = torch.tensor(random_noise(t, mode=noise_type, mean=0, var=poisoned_ratio, clip=True))
                noise_img = random_noise(t, mode=noise_type, mean=0, var=poisoned_ratio, clip=True)
                noise_img = np.array(255*noise_img, dtype = 'uint8')
                poisoned = torch.tensor(noise_img)
            elif noise_type == "s&p":
                # Replaces random pixels with either 1 or low_val, where low_val is 0 for unsigned images or -1 for signed images.
                # poisoned = torch.tensor(random_noise(t, mode=noise_type, amount=poisoned_ratio))
                noise_img = random_noise(t, mode=noise_type, amount=poisoned_ratio)
                noise_img = np.array(255*noise_img, dtype = 'uint8')
                poisoned = torch.tensor(noise_img)
            # elif noise_type == "nlp_rawdata":
            #     # for NLP data, change the word vector to 0 with p=poisoned_ratio
            #     poisoned = poison_to_nlp_rawdata(t, poisoned_ratio)
            else:
                print("ERROR: @datapoisoning: poison attack type not supported.")
                poisoned = t
            train_data[i] = poisoned
    else:
        if backdoor_validation:
            # mark all instances for testing
            print("Datapoisoning: generating watermarked samples for testing (all classes)")
            for i in indices:
                t = train_data[i]
                poisoned = add_x_to_image(t)
                train_data[i] = poisoned
        else:
            # only mark samples from specific target for training
            print("Datapoisoning: generating watermarked samples for training, target: " + str(target_label))
            for i in indices:
                if int(targets[i]) == int(target_label):
                    t = train_data[i]
                    poisoned = add_x_to_image(t)
                    train_data[i] = poisoned
    new_dataset.data = train_data
    return new_dataset


def add_x_to_image(img):
    """
    Add a 10*10 pixels X at the top-left of a image
    """
    size = 10
    for i in range(0, size):
        for j in range(0, size):
            if i + j == size-1 or i == j:                
                img[i][j] = 255
    return torch.tensor(img).clone().detach()

In [15]:
class ChangeableSubset(Subset):
    """
    Could change the elements in Subset Class
    """

    def __init__(self,
                 dataset,
                 indices,
                 label_flipping=False,
                 data_poisoning=False,
                 poisoned_percent=0,
                 poisoned_ratio=0,
                 targeted=False,
                 target_label=0,
                 target_changed_label=0,
                 noise_type="salt"):
        super().__init__(dataset, indices)
        new_dataset = copy.copy(dataset)
        self.dataset = new_dataset
        self.indices = indices
        self.label_flipping = label_flipping
        self.data_poisoning = data_poisoning
        self.poisoned_percent = poisoned_percent
        self.poisoned_ratio = poisoned_ratio
        self.targeted = targeted
        self.target_label = target_label
        self.target_changed_label = target_changed_label
        self.noise_type = noise_type

        if self.label_flipping:
            self.dataset = labelFlipping(self.dataset, self.indices, self.poisoned_percent, self.targeted, self.target_label, self.target_changed_label)
        if self.data_poisoning:
            self.dataset = datapoison(self.dataset, self.indices, self.poisoned_percent, self.poisoned_ratio, self.targeted, self.target_label, self.noise_type)

    def __getitem__(self, idx):
        if isinstance(idx, list):
            return self.dataset[[self.indices[i] for i in idx]]
        return self.dataset[self.indices[idx]]

    def __len__(self):
        return len(self.indices)


In [16]:
# L0 norm, number of non zero items
def l0_norm(x):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    x = torch.tensor(x, dtype=torch.float32).to(device)
    return torch.sum(x != 0).item()

# L1 norm, abs value of all items
def l1_norm(x):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    x = torch.tensor(x, dtype=torch.float32).to(device)
    return torch.sum(torch.abs(x)).item()

# L2 norm, the square root of the sum of the squares of the items
def l2_norm(x):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    x = torch.tensor(x, dtype=torch.float32).to(device)
    return torch.sqrt(torch.sum(x**2)).item()

# L∞ norm, the maximum absolute value among the items
def l_inf_norm(x):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    x = torch.tensor(x, dtype=torch.float32).to(device)
    return torch.max(torch.abs(x)).item()


In [17]:
targets = np.array(mnist_train.targets)

# Define function for Dirichlet sampling and balanced data distribution
def dirichlet_sampling_balanced(targets, alpha, num_clients):
    num_classes = len(np.unique(targets))
    data_per_client = [[] for _ in range(num_clients)]
    
    for k in range(num_classes):
        idx_k = np.where(targets == k)[0]
        np.random.shuffle(idx_k)
        proportions = np.random.dirichlet(np.repeat(alpha, num_clients))
        proportions = (np.cumsum(proportions) * len(idx_k)).astype(int)[:-1]
        splits = np.split(idx_k, proportions)
        for i in range(num_clients):
            data_per_client[i].extend(splits[i])
    
    # Ensure each client has the same number of samples
    min_samples = min(len(data) for data in data_per_client)
    balanced_data_per_client = [data[:min_samples] for data in data_per_client]
    
    return balanced_data_per_client

In [18]:
max_round = 5
max_epoch = 3
alpha_list = [100, 10, 1, 0.1, 0.01, 0.001]
num_clients = 2

In [19]:
def plot_class_distribution(client_indices, num_clients):
    num_classes = len(np.unique(targets))
    fig, axs = plt.subplots(1, num_clients, figsize=(8, 4))
    axs = axs.flatten()
    
    for i in range(num_clients):
        labels = mnist_train.targets[client_indices[i]]
        unique, counts = np.unique(labels, return_counts=True)
        axs[i].bar(unique, counts, tick_label=unique)
        axs[i].set_title(f'Client {i+1}')
        axs[i].set_xlabel('Class')
        axs[i].set_ylabel('Frequency')
    
    plt.tight_layout()
    plt.show()



In [20]:
def fed_avg(models):
    """
    Weighted average of the models.

    Args:
        models: Dictionary with the models (node: model, num_samples).
    """
    if len(models) == 0:
        return None

    # Total Samples
    total_samples = len(models)

    # Create a Zero Model
    accum = {layer: torch.zeros_like(param) for layer, param in models[0].items()}

    # Add weighted models
    for model in models:
        for layer in accum:
            accum[layer] += model[layer]

    # Normalize Accum
    for layer in accum:
        accum[layer] /= total_samples
        
    # self.print_model_size(accum)

    return accum

In [31]:
class local_node():
    def __init__(
            self,
            node_id: int,
            experimentsName=None,
            maxRound: int = 10,
            maxEpoch: int = 3,
            train_dataset=None,
            test_dataset=None,
            indices=[],
            neiList=[],
            experimentsName_path = None,
            logger=None
    ):
        self.node_id = node_id
        self.indices = indices
        self.logger=logger
        self.model = MNISTModelMLP()
        self.neiList = neiList
        self.maxRound = maxRound
        self.maxEpoch = maxEpoch
        self.experimentsName = experimentsName
        self.nei_models = {}
        self.dataset = train_dataset
        self.test_dataset = test_dataset
        
        tr_subset = ChangeableSubset(
            self.dataset, indices)
        self.data_train, self.data_val = random_split(
                    tr_subset,
                    [
                        int(len(tr_subset) * 0.8),
                        len(tr_subset) - int(len(tr_subset) * 0.8),
                    ],
                )

        
        self.curren_round = 0
        self.aggregated_model = MNISTModelMLP()

        self.local_model_record = {}
        self.local_model_record[0] = self.model

        self.aggregated_model_record = {}
        self.aggregated_model_record[0] = self.aggregated_model

        self.nei_model_record = {}  
        self.experimentsName_path = experimentsName_path      


    def get_model(self):
        model_info = self.model        
        return model_info
    
    def next_round(self):
        self.curren_round += 1

    def get_current_round(self):
        return self.curren_round

    def set_model(self, round, model):
        self.model_record[round] = model
    
    def set_current_model(self, model):
        self.model = model

    def set_current_aggregated_model(self, model):
        self.aggregated_model = model
    
    def replace_local_aggregated_model(self):
        self.model = self.aggregated_model

    def get_neiList(self):
        return self.neiList

    def set_neiList(self, new_neiList):
        self.neiList = new_neiList
    
    def add_nei_model(self, round, nei_id, nei_model):
        if round in self.nei_model_record:
            self.nei_model_record[round][nei_id]=nei_model
        else:
            self.nei_model_record[round] = {}
            self.nei_model_record[round][nei_id]=nei_model
       
    def local_training(self):
        # trainer = pl.Trainer(max_epochs=self.maxEpoch, accelerator='cuda', devices=-1) 
        trainer = pl.Trainer(logger=self.logger,
                             max_epochs=self.maxEpoch, 
                             devices=1,
                             accelerator="cuda",
                             enable_progress_bar=False,

                            )
        trainer.fit(self.model, train_dataloaders=DataLoader(self.data_train, batch_size=64,shuffle=True), val_dataloaders=DataLoader(self.data_val, batch_size=64,shuffle=False))

        print(f"Performance of Node {self.node_id} before aggregation at round {self.curren_round}")
        trainer.test(self.model, DataLoader(self.test_dataset, batch_size=64,shuffle=False))

        trainer.save_checkpoint(f"{self.experimentsName_path}/checkpoint_{self.experimentsName}_node_{self.node_id}_round_{self.curren_round}.ckpt")


    
    def aggregation(self):
        current_rount_nei_models = self.nei_model_record[self.curren_round]
        nei_models_list = []
        
        for nei in current_rount_nei_models:
            if nei in self.neiList:
                nei_models_list.append(current_rount_nei_models[nei].state_dict())        
        if self.node_id not in self.neiList:
            nei_models_list.append(self.model.state_dict())
            
        print(f"Node {self.node_id} aggregate model with {self.neiList}")
        aggregated_model_para = fed_avg(nei_models_list)     
        self.aggregated_model.load_state_dict(aggregated_model_para)
        self.replace_local_aggregated_model()

        trainer = pl.Trainer(logger=self.logger,
                             devices=1,
                             accelerator="cuda",) 
        print(f"Performance of Node {self.node_id} after aggregation at round {self.curren_round}")
        trainer.test(self.model, DataLoader(self.test_dataset, batch_size=64,shuffle=False))


In [22]:
def generate_logger_config(project, group, dataset, dist_alpha, node_id, epoch, round):
    config = {'project':project,
              'group': group,
              'name':f"node_{node_id}",
              'config':{
                  'dataset': dataset,
                  'dist_alpha': dist_alpha,
                  'round': round,
                  'epoch': epoch
              }}
    return config


In [23]:
def adjacency_matrix_to_nei_list(adjacency_matrix):
    nei_list = {}
    for i, adj in enumerate(adjacency_matrix):
        nei_list[i] = [i]
        for nei, j in enumerate(adj):
            if j == 1:
                nei_list[i].append(nei)
    return nei_list

In [24]:
num_clients = 10
G = nx.complete_graph(num_clients)
adj_matrix = nx.adjacency_matrix(G).todense()
nei_list = adjacency_matrix_to_nei_list(adj_matrix)

In [46]:
alpha = 100
node_list = {}
maxRound = 2
maxEpoch = 3
train_dataset = mnist_train
test_dataset = mnist_val
# datetime object containing current date and time
now = datetime.now()
# dd/mm/YY H:M:S
dt_string = now.strftime("%d_%m_%Y_%H_%M_%S")

experimentsName = f'{num_clients}_clients_alpha_{alpha}_MNIST_fully'+dt_string
client_indices = dirichlet_sampling_balanced(targets, alpha, num_clients)
cwd = os.getcwd()

logger_list = {}
experimentsName_path = cwd+'/experiments/'+experimentsName
os.mkdir(experimentsName_path)

for client in range(num_clients):
    indices = client_indices[client]
    node_id = client  
    
    neiList = nei_list[client]
    # logger_config = generate_logger_config('local_test', experimentsName, 'MNIST', alpha, node_id, maxEpoch, maxRound)
    # logger = None
    # wandb_logger = WandbLogger(project="local_test", group=experimentsName, name = f"node_{client}", id = f"{experimentsName}_node_{client}", reinit=True, offline=True)   
    csvlogger = CSVLogger(save_dir=experimentsName_path, name=f"node_{client}") 
    logger_list[node_id] = csvlogger

    node = local_node(node_id, experimentsName, maxRound, maxEpoch, train_dataset, test_dataset, indices, neiList, experimentsName_path, logger_list[node_id])
    node_list[node_id] = node
    # logger_list[node_id][0].experiment.finish()

for round in range(maxRound):
    for node_id in node_list:
        node = node_list[node_id]
        node.next_round()
        node.local_training()
        for nei in node.neiList:
            node_list[nei].add_nei_model(round+1, node_id, node.model)
    
    for node_id in node_list:
        node = node_list[node_id]
        node.aggregation()

# wandb.finish()


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
d:\git\beidou\venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:430: The dataloader, val_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 16 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
d:\git\beidou\venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:430: The dataloader, train_dataloader, does

Epoch end: Validation, epoch number: 0


[34m[1mwandb[0m: Currently logged in as: [33mjifegi[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 0 before aggregation at round 1


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
d:\git\beidou\venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:430: The dataloader, test_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 16 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.


Epoch end: Test, epoch number: 0


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0

d:\git\beidou\venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:430: The dataloader, val_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 16 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
d:\git\beidou\venv\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:430: The dataloader, train_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 16 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.





d:\git\beidou\venv\Lib\site-packages\pytorch_lightning\loggers\wandb.py:396: There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse this run. If this is not desired, call `wandb.finish()` before instantiating `WandbLogger`.


Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 1 before aggregation at round 1


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 0


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 2 before aggregation at round 1


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 0


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 3 before aggregation at round 1


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 0


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 4 before aggregation at round 1


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 0


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1
Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2


`Trainer.fit` stopped: `max_epochs=3` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 5 before aggregation at round 1
Epoch end: Test, epoch number: 0


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 6 before aggregation at round 1


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 0


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 7 before aggregation at round 1


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 0


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 8 before aggregation at round 1


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 0


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 9 before aggregation at round 1


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 0


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Node 0 aggregate model with [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Performance of Node 0 after aggregation at round 1
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 59.29it/s]Epoch end: Test, epoch number: 0
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 51.28it/s]


Node 1 aggregate model with [1, 0, 2, 3, 4, 5, 6, 7, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 1 after aggregation at round 1
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 64.28it/s]Epoch end: Test, epoch number: 0
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 54.59it/s]


Node 2 aggregate model with [2, 0, 1, 3, 4, 5, 6, 7, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 2 after aggregation at round 1
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 57.75it/s]Epoch end: Test, epoch number: 0
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 50.42it/s]


Node 3 aggregate model with [3, 0, 1, 2, 4, 5, 6, 7, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 3 after aggregation at round 1
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 60.14it/s]Epoch end: Test, epoch number: 0
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 51.87it/s]


Node 4 aggregate model with [4, 0, 1, 2, 3, 5, 6, 7, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 4 after aggregation at round 1
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 51.33it/s]Epoch end: Test, epoch number: 0
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 44.84it/s]


Node 5 aggregate model with [5, 0, 1, 2, 3, 4, 6, 7, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 5 after aggregation at round 1
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 62.60it/s]Epoch end: Test, epoch number: 0
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 53.23it/s]


Node 6 aggregate model with [6, 0, 1, 2, 3, 4, 5, 7, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 6 after aggregation at round 1
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 63.18it/s]Epoch end: Test, epoch number: 0
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 54.51it/s]


Node 7 aggregate model with [7, 0, 1, 2, 3, 4, 5, 6, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 7 after aggregation at round 1
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 54.35it/s]Epoch end: Test, epoch number: 0
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 47.90it/s]


Node 8 aggregate model with [8, 0, 1, 2, 3, 4, 5, 6, 7, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 8 after aggregation at round 1
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 56.06it/s]Epoch end: Test, epoch number: 0
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 49.10it/s]


Node 9 aggregate model with [9, 0, 1, 2, 3, 4, 5, 6, 7, 8]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 9 after aggregation at round 1
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 57.55it/s]Epoch end: Test, epoch number: 0
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 50.03it/s]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0


d:\git\beidou\venv\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:612: Checkpoint directory .\local_test\10_clients_alpha_100_MNIST_fully27_05_2024_19_00_17_node_0\checkpoints exists and is not empty.


Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 0 before aggregation at round 2


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 1


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 1 before aggregation at round 2


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 1


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1
Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2


`Trainer.fit` stopped: `max_epochs=3` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 2 before aggregation at round 2
Epoch end: Test, epoch number: 1


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1
Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2


`Trainer.fit` stopped: `max_epochs=3` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 3 before aggregation at round 2
Epoch end: Test, epoch number: 1


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1
Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2


`Trainer.fit` stopped: `max_epochs=3` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 4 before aggregation at round 2
Epoch end: Test, epoch number: 1


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1
Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2


`Trainer.fit` stopped: `max_epochs=3` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 5 before aggregation at round 2
Epoch end: Test, epoch number: 1


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 6 before aggregation at round 2


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 1


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 7 before aggregation at round 2


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 1


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1


`Trainer.fit` stopped: `max_epochs=3` reached.


Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2
Performance of Node 8 before aggregation at round 2


You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Test, epoch number: 1


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Epoch end: Validation, epoch number: 0
Epoch end: Validation, epoch number: 1
Epoch end: Train, epoch number: 0
Epoch end: Validation, epoch number: 2
Epoch end: Train, epoch number: 1
Epoch end: Validation, epoch number: 3
Epoch end: Train, epoch number: 2


`Trainer.fit` stopped: `max_epochs=3` reached.
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 9 before aggregation at round 2
Epoch end: Test, epoch number: 1


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Node 0 aggregate model with [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Performance of Node 0 after aggregation at round 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 59.70it/s]Epoch end: Test, epoch number: 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 51.12it/s]


Node 1 aggregate model with [1, 0, 2, 3, 4, 5, 6, 7, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 1 after aggregation at round 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 59.46it/s]Epoch end: Test, epoch number: 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 51.71it/s]


Node 2 aggregate model with [2, 0, 1, 3, 4, 5, 6, 7, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 2 after aggregation at round 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 56.82it/s]Epoch end: Test, epoch number: 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:01<00:00, 22.02it/s]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Node 3 aggregate model with [3, 0, 1, 2, 4, 5, 6, 7, 8, 9]
Performance of Node 3 after aggregation at round 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 58.76it/s]Epoch end: Test, epoch number: 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 50.70it/s]


Node 4 aggregate model with [4, 0, 1, 2, 3, 5, 6, 7, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 4 after aggregation at round 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 58.35it/s]Epoch end: Test, epoch number: 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 50.92it/s]


Node 5 aggregate model with [5, 0, 1, 2, 3, 4, 6, 7, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 5 after aggregation at round 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 51.42it/s]Epoch end: Test, epoch number: 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 44.99it/s]


Node 6 aggregate model with [6, 0, 1, 2, 3, 4, 5, 7, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 6 after aggregation at round 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 58.43it/s]Epoch end: Test, epoch number: 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 50.92it/s]


Node 7 aggregate model with [7, 0, 1, 2, 3, 4, 5, 6, 8, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 7 after aggregation at round 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 58.66it/s]Epoch end: Test, epoch number: 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 50.78it/s]


Node 8 aggregate model with [8, 0, 1, 2, 3, 4, 5, 6, 7, 9]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 8 after aggregation at round 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 58.01it/s]Epoch end: Test, epoch number: 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 50.52it/s]


Node 9 aggregate model with [9, 0, 1, 2, 3, 4, 5, 6, 7, 8]


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Ti') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Performance of Node 9 after aggregation at round 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 59.12it/s]Epoch end: Test, epoch number: 2
Testing DataLoader 0: 100%|██████████| 32/32 [00:00<00:00, 51.58it/s]


0,1
Test/Accuracy,██████████▁▁▁▁▁▁▁▁▁▁████████████████████
Test/F1Score,██████████▁▁▁▁▁▁▁▁▁▁████████████████████
Test/Loss,▁▁▁▁▁▁▁▁▁▁██████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Test/Precision,██████████▁▁▁▁▁▁▁▁▁▁████████████████████
Test/Recall,██████████▁▁▁▁▁▁▁▁▁▁████████████████████
TestEpoch/Accuracy,██████████▁▁▁▁▁▁▁▁▁▁████████████████████
TestEpoch/F1Score,██████████▁▁▁▁▁▁▁▁▁▁████████████████████
TestEpoch/Precision,██████████▁▁▁▁▁▁▁▁▁▁████████████████████
TestEpoch/Recall,██████████▁▁▁▁▁▁▁▁▁▁████████████████████
Train/Accuracy,▆▅▅▇▃▅▁▇▅▆▄▆▅▆▅▄▃█▁▆▃▅▂▆▃▄▅▅▃▇▃▄▃▄▄▅▆▅▂▅

0,1
Test/Accuracy,0.8804
Test/F1Score,0.86911
Test/Loss,0.37805
Test/Precision,0.88175
Test/Recall,0.8804
TestEpoch/Accuracy,0.88361
TestEpoch/F1Score,0.88228
TestEpoch/Precision,0.88358
TestEpoch/Recall,0.88361
Train/Accuracy,0.87306


In [None]:
client_models = {}
for alpha in alpha_list:
    client_indices = dirichlet_sampling_balanced(targets, alpha, num_clients)
    plot_class_distribution(client_indices, num_clients)
    for client in range(num_clients):
        indices = client_indices[client] 
        tr_subset = ChangeableSubset(
            mnist_train, indices)
        data_train, data_val = random_split(
                    tr_subset,
                    [
                        int(len(tr_subset) * 0.8),
                        len(tr_subset) - int(len(tr_subset) * 0.8),
                    ],
                )
        model = MNISTModelMLP()
        client_models[f'client_{client}_alpha_{alpha}'] = {
            'client': client,
            'indices': indices,
            'data_train': data_train,
            'data_val': data_val,
            'model': model,
            'l0_norm': [],
            'l1_norm': [],
            'l2_norm': [],
            'linf_norm': []
        }

In [None]:
# models = [client_models[i]['model'].state_dict() for i in range(num_clients)]

In [None]:
def flatten_params(model_state_dict):
    models_layer_flatten = torch.cat([param.view(-1) for layer, param in model_state_dict.items()], 0)
    return models_layer_flatten

In [None]:
client_models.keys()

In [None]:
model_name = 'client_0_alpha_100'
test_model = client_models[model_name]['model']
test_data = client_models[model_name]['data_train']

In [None]:
def get_changes(current, previous):
    changes = np.mean([torch.norm(current[layer] - previous[layer]) for layer in current])
    return changes


Data poisoning

In [None]:
tr_subset = ChangeableSubset(
            mnist_train, indices, label_flipping=False, targeted=False, data_poisoning=True, poisoned_percent=1, poisoned_ratio=0.5)

In [None]:
a = tr_subset.__getitem__(1)

In [None]:
a[0].shape

In [None]:
plt.imshow(a[0][0], cmap='gray')

In [None]:
datapoisoing_client_models = {}
for poisoned_percent in [0.2,0.4,0.6,0.8,1.0]:
    client_indices = dirichlet_sampling_balanced(targets, 100, num_clients)
    indices = client_indices[0]
    tr_subset = ChangeableSubset(
            mnist_train, indices, label_flipping=False, targeted=False,data_poisoning=True, poisoned_percent=poisoned_percent, poisoned_ratio=0.5)
    
    data_train, data_val = random_split(
                    tr_subset,
                    [
                        int(len(tr_subset) * 0.8),
                        len(tr_subset) - int(len(tr_subset) * 0.8),
                    ],
                )
    
    model = MNISTModelMLP()
    datapoisoing_client_models[f'datapoisoing_client_{client}_pp_{poisoned_percent}'] = {
        'client': client,
        'indices': indices,
        'data_train': data_train,
        'data_val': data_val,
        'model': model,
        'l0_norm': [],
        'l1_norm': [],
        'l2_norm': [],
        'linf_norm': []
    }


In [None]:
for model_name in datapoisoing_client_models.keys():
    for round in range(max_round):    
        test_model = datapoisoing_client_models[model_name]['model']
        test_data = datapoisoing_client_models[model_name]['data_train']
        if round == 0:
            trainer = pl.Trainer(max_epochs=max_epoch, accelerator='cuda', devices=1)  
            trainer.fit(test_model, 
                    train_dataloaders=data.DataLoader(test_data, batch_size=64,shuffle=True))
            trainer.test(test_model, data.DataLoader(mnist_val, batch_size=64,shuffle=True))
            trainer.save_checkpoint(f"checkpoint_{model_name}_{round}.ckpt")
        else:
            new_max_epochs = trainer.max_epochs
            new_trainer = pl.Trainer(max_epochs=new_max_epochs, accelerator='cuda', devices=1)
            temp_model = MNISTModelMLP()
            test_model = temp_model.load_from_checkpoint(f"checkpoint_{model_name}_{round-1}.ckpt")
            datapoisoing_client_models[model_name]['model']=test_model
            new_trainer.fit(test_model, 
                    train_dataloaders=data.DataLoader(test_data, batch_size=64,shuffle=True))
            new_trainer.test(test_model, data.DataLoader(mnist_val, batch_size=64,shuffle=True))
            new_trainer.save_checkpoint(f"checkpoint_{model_name}_{round}.ckpt")
            old_model_dict = copy.deepcopy(temp_model.load_from_checkpoint(f"checkpoint_{model_name}_{round-1}.ckpt").state_dict())
            current_model_dict = test_model.state_dict()
            old_flatten = flatten_params(old_model_dict)
            current_flatten = flatten_params(current_model_dict)
            l0 = l0_norm(current_flatten-old_flatten)
            l1 = l1_norm(current_flatten-old_flatten)
            l2 = l2_norm(current_flatten-old_flatten)
            linf = l_inf_norm(current_flatten-old_flatten)
            datapoisoing_client_models[model_name]['l0_norm'].append(l0)
            datapoisoing_client_models[model_name]['l1_norm'].append(l1)
            datapoisoing_client_models[model_name]['l2_norm'].append(l2)
            datapoisoing_client_models[model_name]['linf_norm'].append(linf)

Label Flipping

In [None]:
labelFlipping_client_models = {}


for poisoned_percent in [0.2,0.4,0.6,0.8,1.0]:
    client_indices = dirichlet_sampling_balanced(targets, 100, num_clients)
    indices = client_indices[0]
    tr_subset = ChangeableSubset(
            mnist_train, indices, label_flipping=True, targeted=False, poisoned_percent=poisoned_percent)
    
    data_train, data_val = random_split(
                    tr_subset,
                    [
                        int(len(tr_subset) * 0.8),
                        len(tr_subset) - int(len(tr_subset) * 0.8),
                    ],
                )
    
    model = MNISTModelMLP()
    labelFlipping_client_models[f'labelFlipping_client_{client}_pp_{poisoned_percent}'] = {
        'client': client,
        'indices': indices,
        'data_train': data_train,
        'data_val': data_val,
        'model': model,
        'l0_norm': [],
        'l1_norm': [],
        'l2_norm': [],
        'linf_norm': []
    }


In [None]:
datapoisoing_client_models['datapoisoing_client_1_pp_0.4']['l2_norm']

In [None]:
labelFlipping_client_models['labelFlipping_client_1_pp_0.2']['l2_norm']

In [None]:
client_models['client_1_alpha_0.001']['l2_norm']

In [None]:
client_models['client_1_alpha_100']['l2_norm']

In [None]:
for round in range(max_round):
    for model_name in labelFlipping_client_models.keys():
        test_model = labelFlipping_client_models[model_name]['model']
        test_data = labelFlipping_client_models[model_name]['data_train']
        if round == 0:
            trainer = pl.Trainer(max_epochs=max_epoch, accelerator='cuda', devices=1)  
            trainer.fit(test_model, 
                    train_dataloaders=data.DataLoader(test_data, batch_size=64,shuffle=True))
            trainer.test(test_model, data.DataLoader(mnist_val, batch_size=64,shuffle=True))
            trainer.save_checkpoint(f"checkpoint_{model_name}_{round}.ckpt")
        else:
            new_max_epochs = trainer.max_epochs
            new_trainer = pl.Trainer(max_epochs=new_max_epochs, accelerator='cuda', devices=1)
            temp_model = MNISTModelMLP()
            test_model = temp_model.load_from_checkpoint(f"checkpoint_{model_name}_{round-1}.ckpt")
            labelFlipping_client_models[model_name]['model']=test_model
            new_trainer.fit(test_model, 
                    train_dataloaders=data.DataLoader(test_data, batch_size=64,shuffle=True))
            new_trainer.test(test_model, data.DataLoader(mnist_val, batch_size=64,shuffle=True))
            new_trainer.save_checkpoint(f"checkpoint_{model_name}_{round}.ckpt")
            old_model_dict = copy.deepcopy(temp_model.load_from_checkpoint(f"checkpoint_{model_name}_{round-1}.ckpt").state_dict())
            current_model_dict = test_model.state_dict()
            old_flatten = flatten_params(old_model_dict)
            current_flatten = flatten_params(current_model_dict)
            l0 = l0_norm(current_flatten-old_flatten)
            l1 = l1_norm(current_flatten-old_flatten)
            l2 = l2_norm(current_flatten-old_flatten)
            linf = l_inf_norm(current_flatten-old_flatten)
            labelFlipping_client_models[model_name]['l0_norm'].append(l0)
            labelFlipping_client_models[model_name]['l1_norm'].append(l1)
            labelFlipping_client_models[model_name]['l2_norm'].append(l2)
            labelFlipping_client_models[model_name]['linf_norm'].append(linf)

Benign model with non-iid

In [None]:
for model_name in client_models.keys():    
    for round in range(max_round):
        test_model = client_models[model_name]['model']
        test_data = client_models[model_name]['data_train']
        if round == 0:
            trainer = pl.Trainer(max_epochs=max_epoch, accelerator='cuda', devices=1)  
            trainer.fit(test_model, 
                    train_dataloaders=data.DataLoader(test_data, batch_size=64,shuffle=True))
            trainer.test(test_model, data.DataLoader(mnist_val, batch_size=64,shuffle=True))
            trainer.save_checkpoint(f"checkpoint_{model_name}_{round}.ckpt")
        else:
            new_max_epochs = trainer.max_epochs
            new_trainer = pl.Trainer(max_epochs=new_max_epochs, accelerator='cuda', devices=1)
            temp_model = MNISTModelMLP()
            test_model = temp_model.load_from_checkpoint(f"checkpoint_{model_name}_{round-1}.ckpt")
            client_models[model_name]['model']=test_model
            new_trainer.fit(test_model, 
                    train_dataloaders=data.DataLoader(test_data, batch_size=64,shuffle=True))
            new_trainer.test(test_model, data.DataLoader(mnist_val, batch_size=64,shuffle=True))
            new_trainer.save_checkpoint(f"checkpoint_{model_name}_{round}.ckpt")
            old_model_dict = copy.deepcopy(temp_model.load_from_checkpoint(f"checkpoint_{model_name}_{round-1}.ckpt").state_dict())
            current_model_dict = test_model.state_dict()
            old_flatten = flatten_params(old_model_dict)
            current_flatten = flatten_params(current_model_dict)
            l0 = l0_norm(current_flatten-old_flatten)
            l1 = l1_norm(current_flatten-old_flatten)
            l2 = l2_norm(current_flatten-old_flatten)
            linf = l_inf_norm(current_flatten-old_flatten)
            client_models[model_name]['l0_norm'].append(l0)
            client_models[model_name]['l1_norm'].append(l1)
            client_models[model_name]['l2_norm'].append(l2)
            client_models[model_name]['linf_norm'].append(linf)

In [None]:
datapoisoing_client_models['datapoisoing_client_1_pp_1.0']['l2_norm']
labelFlipping_client_models['labelFlipping_client_1_pp_1.0']['l2_norm']
client_models['client_0_alpha_100']['l2_norm']

In [None]:
labelFlipping_client_models.keys()

In [None]:
labelFlipping_client_models['labelFlipping_client_1_pp_1.0']['l2_norm']

In [None]:
client_models['client_0_alpha_100']['l2_norm']