In [1]:
# Copyright (C) 2022-2024 TU Darmstadt
# SPDX-License-Identifier: Apache-2.0

# -----------------------------------------------------------
# Primary author: Phillip Rieger <phillip.rieger@trust.tu-darmstadt.de>
# Co-authored-by: Torsten Krauss <torsten.krauss@uni-wuerzburg.de>
# ------------------------------------------------------------

import argparse
import os
import pickle
import time
import warnings
from copy import deepcopy
from datetime import datetime

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
import torch.optim as optim
from torchvision import transforms, datasets
from sklearn.cluster import AgglomerativeClustering, DBSCAN

from CrowdGuardClientValidation import CrowdGuardClientValidation
from openfl.experimental.interface import Aggregator, Collaborator, FLSpec
from openfl.experimental.placement import aggregator, collaborator
from openfl.experimental.runtime import LocalRuntime
from cifar10_crowdguard import MEAN, STD_DEV, poison_data, seed_random_generators
from cifar10_crowdguard import BATCH_SIZE_TRAIN, BATCH_SIZE_TEST, Net, test, default_optimizer
from cifar10_crowdguard import FederatedFlow
from cifar10_crowdguard import PRETRAINED_MODEL_FILE, download_pretrained_model
warnings.filterwarnings("ignore")

Aggregator step "start" registered
Collaborator step "train" registered
Aggregator step "fed_avg_aggregation" registered
Aggregator step "collect_models" registered
Collaborator step "local_validation" registered
Aggregator step "defend" registered
Aggregator step "end" registered


In [2]:
TOTAL_CLIENT_NUMBER = 4
PMR = 0.25
NUMBER_OF_MALICIOUS_CLIENTS = max(1, int(TOTAL_CLIENT_NUMBER * PMR)) if PMR > 0 else 0
NUMBER_OF_BENIGN_CLIENTS = TOTAL_CLIENT_NUMBER - NUMBER_OF_MALICIOUS_CLIENTS
NUMBER_OF_ROUNDS = 10

In [3]:
class CommandLineArgumentSimulator:
    
    def __init__(self):
        self.test_dataset_ratio = 0.4
        self.train_dataset_ratio = 0.4
        self.log_dir = 'test_debug'
        self.comm_round = NUMBER_OF_ROUNDS
        self.flow_internal_loop_test=False
        self.optimizer_type = 'SGD'
        
args = CommandLineArgumentSimulator()

In [4]:
download_pretrained_model()

In [5]:
aggregator_object = Aggregator()
aggregator_object.private_attributes = {}
collaborator_names = [f'benign_{i:02d}' for i in range(NUMBER_OF_BENIGN_CLIENTS)] + [f'malicious_{i:02d}' for i in range(NUMBER_OF_MALICIOUS_CLIENTS)]    
collaborators = [Collaborator(name=name) for name in collaborator_names]
if torch.cuda.is_available():
    device = torch.device(
        "cuda:1"
        )  # This will enable Ray library to reserve available GPU(s) for the task
else:
    device = torch.device("cpu")

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(MEAN, STD_DEV),])

cifar_train = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)
cifar_train = [x for x in cifar_train]
cifar_test = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform)
cifar_test = [x for x in cifar_test]
X = torch.stack([x[0] for x in cifar_train] + [x[0] for x in cifar_test])
Y = torch.LongTensor(np.stack(np.array([x[1] for x in cifar_train] + [x[1] for x in cifar_test])))

Files already downloaded and verified
Files already downloaded and verified


In [6]:
seed_random_generators(0)
shuffled_indices = np.arange(X.shape[0])
np.random.shuffle(shuffled_indices)

N_total_samples = len(cifar_test) + len(cifar_train)
train_dataset_size = int(N_total_samples * args.train_dataset_ratio)
test_dataset_size = int(N_total_samples * args.test_dataset_ratio)
X = X[shuffled_indices]
Y = Y[shuffled_indices]

train_dataset_data = X[:train_dataset_size]
train_dataset_targets = Y[:train_dataset_size]

test_dataset_data = X[train_dataset_size:train_dataset_size + test_dataset_size]
test_dataset_targets = Y[train_dataset_size:train_dataset_size + test_dataset_size]
print(f"Dataset info (total {N_total_samples}): train - {test_dataset_targets.shape[0]}, "
          f"test - {test_dataset_targets.shape[0]}, ")


Dataset info (total 60000): train - 24000, test - 24000, 


In [7]:
for idx, collab in enumerate(collaborators):
    # construct the training and test and population dataset
    benign_training_X = train_dataset_data[idx::len(collaborators)]
    benign_training_Y = train_dataset_targets[idx::len(collaborators)]
    
    if 'malicious' in collab.name:
        local_train_data, local_train_targets = poison_data(benign_training_X, benign_training_Y)
    else:
        local_train_data, local_train_targets = benign_training_X, benign_training_Y
    

    local_test_data = test_dataset_data[idx::len(collaborators)]
    local_test_targets = test_dataset_targets[idx::len(collaborators)]
    

    poison_test_data, poison_test_targets = poison_data(local_test_data, local_test_targets,
                                                        pdr=1.0)

    collab.private_attributes = {
        "train_loader": torch.utils.data.DataLoader(
            TensorDataset(local_train_data, local_train_targets),
            batch_size=BATCH_SIZE_TRAIN, shuffle=True
            ),
        "test_loader": torch.utils.data.DataLoader(
            TensorDataset(local_test_data, local_test_targets),
            batch_size=BATCH_SIZE_TEST, shuffle=False
            ),
        "backdoor_test_loader": torch.utils.data.DataLoader(
            TensorDataset(poison_test_data, poison_test_targets),
            batch_size=BATCH_SIZE_TEST, shuffle=False
            ),
        }

In [9]:
pretrained_weights = torch.load(PRETRAINED_MODEL_FILE, map_location=device)
test_model = Net().to(device)
test_model.load_state_dict(pretrained_weights)
test(test_model, collab.private_attributes['train_loader'], device, test_train='Train')
test(test_model, collab.private_attributes['test_loader'], device)
test(test_model, collab.private_attributes['backdoor_test_loader'], device, mode='Backdoor')

Benign Train set: Avg. loss: 3.3837500759895813, Accuracy: 2083/6000 (34.717%)
Benign Test set: Avg. loss: 0.9973345994949341, Accuracy: 3768/6000 (62.800%)
Backdoor Test set: Avg. loss: 5.72957197825114, Accuracy: 325/6000 (5.417%)


0.05416666716337204

In [8]:
local_runtime = LocalRuntime(aggregator=aggregator_object, collaborators=collaborators)

print(f"Local runtime collaborators = {local_runtime.collaborators}")

# change to the internal flow loop
model = Net()
model.load_state_dict(pretrained_weights)
top_model_accuracy = 0
optimizers = {
    collaborator.name: default_optimizer(model, optimizer_type=args.optimizer_type)
    for collaborator in collaborators
    }
flflow = FederatedFlow(
    model,
    optimizers,
    device,
    args.comm_round,
    top_model_accuracy,
    NUMBER_OF_MALICIOUS_CLIENTS / TOTAL_CLIENT_NUMBER,
    'CrowdGuard'
    )

flflow.runtime = local_runtime
flflow.run()

Local runtime collaborators = ['benign_00', 'benign_01', 'benign_02', 'malicious_00']
####################
Round 0...
####################

Calling start
Performing initialization for model
Sending state from aggregator to collaborators

Calling train
####################
Performing model training for collaborator benign_00 in round 0
Benign Train set: Avg. loss: 0.9885769543495584, Accuracy: 3790/6000 (63.16666603088379%)
Benign Test set: Avg. loss: 1.001497248808543, Accuracy: 3761/6000 (62.68333196640015%)
Backdoor Test set: Avg. loss: 5.7991689046223955, Accuracy: 330/6000 (5.499999970197678%)
Benign Train set: Avg. loss: 1.0561292523399313, Accuracy: 3597/6000 (59.950000047683716%)
Benign Test set: Avg. loss: 1.197381854057312, Accuracy: 3327/6000 (55.44999837875366%)
Backdoor Test set: Avg. loss: 5.618184248606364, Accuracy: 302/6000 (5.0333332270383835%)
Should transfer from train to collect_models

Calling train
####################
Performing model training for collaborator be

Backdoor Test set: Avg. loss: 5.079943021138509, Accuracy: 371/6000 (6.183333322405815%)
Benign Train set: Avg. loss: 0.5856330990791321, Accuracy: 4701/6000 (78.35000157356262%)
Benign Test set: Avg. loss: 1.2663490772247314, Accuracy: 3113/6000 (51.883333921432495%)
Backdoor Test set: Avg. loss: 0.023603687683741253, Accuracy: 5993/6000 (99.88332986831665%)
Scale Model by 4.0
Should transfer from train to collect_models

Calling collect_models
Sending state from aggregator to collaborators

Calling local_validation
Performing model validation for collaborator benign_00 in round 1
Distance: cosine, use y 2: [4.425570297924237, 0.3427598443292488]
Distance: cosine, use x 2: [16.344858266211432, 0.34275984432924833]
Distance: cosine, use y 1: [10.95445115010334]
Distance: cosine, use x 2: [10.954451150103328, 0.0]
Distance: euclid, use y 2: [0.19457190016820558, 0.019510296358126844]
Distance: euclid, use x 2: [20.585892915372213, 0.019510296358126844]
Distance: euclid, use y 1: [10.954

Distance: cosine, use y 2: [8.731105394391083, 0.12413702567453089]
Distance: cosine, use x 2: [13.670805247683036, 0.12413702567453089]
Distance: cosine, use y 1: [10.954451150103324]
Distance: cosine, use x 2: [10.954451150103342, 0.0]
Distance: euclid, use y 2: [2.465981367017415, 1.0990384519078722]
Distance: euclid, use x 2: [17.190112051993644, 1.0990384519078722]
Distance: euclid, use y 1: [10.954451150103347]
Distance: euclid, use x 2: [10.954451150103319, 0.0]
Suspicious Models detected by 3: [1]
Should transfer from local_validation to defend

Calling defend
Agglomerative Clustering: {0: array([0, 1, 2]), 1: array([3])}
DBScan Input: [0 1 2]
DBScan Clustering: [0 1 2]
Negatives: [0, 1, 2]
Finished round 3/10
Sending state from aggregator to collaborators

Calling train
####################
Performing model training for collaborator benign_00 in round 3
Benign Train set: Avg. loss: 0.8797781603767517, Accuracy: 4098/6000 (68.29999685287476%)
Benign Test set: Avg. loss: 0.98808

Benign Test set: Avg. loss: 1.0738922754923503, Accuracy: 3626/6000 (60.43333411216736%)
Backdoor Test set: Avg. loss: 5.036426067352295, Accuracy: 421/6000 (7.016666233539581%)
Should transfer from train to collect_models

Calling train
####################
Performing model training for collaborator malicious_00 in round 4
Benign Train set: Avg. loss: 3.241051955425993, Accuracy: 2149/6000 (35.81666648387909%)
Benign Test set: Avg. loss: 0.9790957868099213, Accuracy: 3860/6000 (64.33333158493042%)
Backdoor Test set: Avg. loss: 5.473701159159343, Accuracy: 386/6000 (6.433333456516266%)
Benign Train set: Avg. loss: 0.5482086730288699, Accuracy: 4775/6000 (79.58333492279053%)
Benign Test set: Avg. loss: 1.1979321042696636, Accuracy: 3244/6000 (54.06666398048401%)
Backdoor Test set: Avg. loss: 0.012110005132853985, Accuracy: 5992/6000 (99.86666440963745%)
Scale Model by 4.0
Should transfer from train to collect_models

Calling collect_models
Sending state from aggregator to collaborators


Suspicious Models detected by 2: [3]
Should transfer from local_validation to defend

Calling local_validation
Performing model validation for collaborator malicious_00 in round 5
Distance: cosine, use y 2: [2.2935255570229356, 3.2583348201762328]
Distance: cosine, use x 2: [14.272313280144168, 2.2935255570229356]
Distance: cosine, use y 1: [10.954451150103312]
Distance: cosine, use x 2: [10.954451150103326, 0.0]
Distance: euclid, use y 2: [3.7317955424528026, 0.441976840267575]
Distance: euclid, use x 2: [17.495995482338625, 0.441976840267575]
Distance: euclid, use y 1: [10.954451150103061]
Distance: euclid, use x 2: [10.954451150103594, 0.0]
Suspicious Models detected by 3: [0, 2]
Should transfer from local_validation to defend

Calling defend
Agglomerative Clustering: {0: array([0, 1, 2]), 1: array([3])}
DBScan Input: [0 1 2]
DBScan Clustering: [0 1 2]
Negatives: [0, 1, 2]
Finished round 6/10
Sending state from aggregator to collaborators

Calling train
####################
Performi

Backdoor Test set: Avg. loss: 6.598117272059123, Accuracy: 265/6000 (4.416666552424431%)
Benign Train set: Avg. loss: 0.8541606298469483, Accuracy: 4111/6000 (68.51666569709778%)
Benign Test set: Avg. loss: 1.1488163471221924, Accuracy: 3502/6000 (58.36666822433472%)
Backdoor Test set: Avg. loss: 7.346678733825684, Accuracy: 62/6000 (1.0333333164453506%)
Should transfer from train to collect_models

Calling train
####################
Performing model training for collaborator malicious_00 in round 7
Benign Train set: Avg. loss: 3.7933548435251763, Accuracy: 2059/6000 (34.316664934158325%)
Benign Test set: Avg. loss: 0.9883201122283936, Accuracy: 3818/6000 (63.63333463668823%)
Backdoor Test set: Avg. loss: 6.588667392730713, Accuracy: 276/6000 (4.600000008940697%)
Benign Train set: Avg. loss: 0.5654542832932574, Accuracy: 4768/6000 (79.46666479110718%)
Benign Test set: Avg. loss: 1.2047673066457112, Accuracy: 3285/6000 (54.750001430511475%)
Backdoor Test set: Avg. loss: 0.03584048183013

Distance: cosine, use y 2: [0.24431579576776663, 5.2569501477100475]
Distance: cosine, use x 2: [16.76755929301521, 0.2443157957677662]
Distance: cosine, use y 1: [10.95445115010334]
Distance: cosine, use x 2: [10.954451150103328, 0.0]
Distance: euclid, use y 2: [0.07282139662990428, 0.0038479150123968964]
Distance: euclid, use x 2: [20.63099133657362, 0.0038479150123968964]
Distance: euclid, use y 1: [10.954451150103338]
Distance: euclid, use x 2: [10.954451150103328, 0.0]
Suspicious Models detected by 2: [3]
Should transfer from local_validation to defend

Calling local_validation
Performing model validation for collaborator malicious_00 in round 8
Distance: cosine, use y 2: [0.8265963692700686, 7.309482023131439]
Distance: cosine, use x 2: [12.158018104786466, 0.8265963692700686]
Distance: cosine, use y 1: [10.954451150103342]
Distance: cosine, use x 2: [10.95445115010333, 0.0]
Distance: euclid, use y 2: [1.7396525584425433, 4.420115231286226]
Distance: euclid, use x 2: [14.21144557