In [1]:
import time
import json
import os
import pandas as pd
import numpy as np
import flwr as fl
import networkx as nx

from sklearn.model_selection import train_test_split

from hydra import initialize, compose
from omegaconf import OmegaConf, DictConfig

from logging import INFO, DEBUG
from flwr.common.logger import log


from src.models.evaluation_metrics import custom_acc_mc, custom_acc_binary

from src.data.dataset_info import datasets

with initialize(version_base=None, config_path="conf/"):
    cfg = compose(config_name='config.yaml')
    print(OmegaConf.to_yaml(cfg))

# choosing the dataset
dataset = datasets[0]
print("dataset: {}".format(dataset.name))
folder_path = "./fl_from_2_datasets_pca/"

learning_rate = 0.001
LAMBD_1 = 0.0001
LAMBD_2 = 0.001

multi_class: false
with_network_features: false
n_clients: 5
n_rounds: 20
config_fit:
  lr: 0.01
  momentum: 0.9
  local_epochs: 1
  batch_size: 256

dataset: cic_ton_iot


In [2]:
dtime = time.strftime("%Y%m%d-%H%M%S")
dtime

'20240720-163506'

In [3]:
clients_paths = [
    #folder_path + "client_0_pca.parquet",
    #folder_path + "client_1_pca.parquet",
    folder_path + "client_2_pca.parquet",
    folder_path + "client_3_pca.parquet",
    folder_path + "client_4_pca.parquet",
    folder_path + "client_5_pca.parquet",
    #folder_path + "client_6_pca.parquet",
    #folder_path + "client_7_pca.parquet"
]

cn_2 = [
    "dst_global_betweenness",
    "src_global_degree",
    "dst_global_degree",
    "src_mv",
    "src_global_pagerank",
    "dst_global_pagerank",
    "src_global_betweenness",
    "dst_mv"
]

cn_1 = [
    "dst_local_pagerank",
    "src_local_betweenness",
    "src_Comm",
    "src_local_degree",
    "dst_local_betweenness",
    "dst_Comm",
    "dst_local_degree",
    "src_local_pagerank"
]

for i, client_path in enumerate(clients_paths):
    # Determine which set of columns to drop based on the client index
    if i < 5:
        drop_columns = cn_2
    else:
        drop_columns = cn_1
    df = pd.read_parquet(client_path)
    df.drop(columns=drop_columns, errors='ignore', inplace=True)
test = pd.read_parquet(folder_path + "test.parquet")
df.drop(columns=cn_2, errors='ignore', inplace=True)


In [4]:
client_columns = []

# Read each file and store the columns as a set
for client_path in clients_paths:
    df = pd.read_parquet(client_path)
    client_columns.append(set(df.columns))

# Find the intersection of columns across all clients
common_columns = set.intersection(*client_columns)

# Find the difference of columns for each client compared to the intersection
differences = [columns - common_columns for columns in client_columns]

# Display the columns of each client, the intersection, and the differences
#for idx, columns in enumerate(client_columns):
 #   print(f"Client {idx} columns: {columns}")

#print(f"\nIntersection of columns across all clients: {common_columns}")

for idx, diff in enumerate(differences):
    print(f"Difference in columns for client {idx}: {diff}")

Difference in columns for client 0: set()
Difference in columns for client 1: set()
Difference in columns for client 2: set()
Difference in columns for client 3: set()


In [5]:

clients_pathss = [
    folder_path + "client_0_pca.parquet",
    folder_path + "client_1_pca.parquet",
    folder_path + "client_2_pca.parquet",
    folder_path + "client_3_pca.parquet",
    folder_path + "client_4_pca.parquet",
    folder_path + "client_5_pca.parquet",
    folder_path + "client_6_pca.parquet",
    folder_path + "client_7_pca.parquet",
    folder_path + "test.parquet"
]

columns_per_client = {}
for path in clients_pathss:
    client_name = path.split('/')[-1].split('.')[0]  
    df = pd.read_parquet(path)
    columns_per_client[client_name] = set(df.columns)

all_features = set().union(*columns_per_client.values())
common_features = set.intersection(*columns_per_client.values())
unique_features = {client: columns - common_features for client, columns in columns_per_client.items()}

common_features, unique_features


({'ACK Flag Cnt',
  'Active Max',
  'Active Mean',
  'Active Min',
  'Active Std',
  'Attack',
  'Bwd Blk Rate Avg',
  'Bwd Byts/b Avg',
  'Bwd Header Len',
  'Bwd IAT Max',
  'Bwd IAT Mean',
  'Bwd IAT Min',
  'Bwd IAT Std',
  'Bwd IAT Tot',
  'Bwd PSH Flags',
  'Bwd Pkt Len Max',
  'Bwd Pkt Len Mean',
  'Bwd Pkt Len Min',
  'Bwd Pkt Len Std',
  'Bwd Pkts/b Avg',
  'Bwd Pkts/s',
  'Bwd Seg Size Avg',
  'Bwd URG Flags',
  'CWE Flag Count',
  'Class',
  'Down/Up Ratio',
  'Dst IP',
  'Dst Port',
  'ECE Flag Cnt',
  'FIN Flag Cnt',
  'Flow Byts/s',
  'Flow Duration',
  'Flow IAT Max',
  'Flow IAT Mean',
  'Flow IAT Min',
  'Flow IAT Std',
  'Flow ID',
  'Flow Pkts/s',
  'Fwd Act Data Pkts',
  'Fwd Blk Rate Avg',
  'Fwd Byts/b Avg',
  'Fwd Header Len',
  'Fwd IAT Max',
  'Fwd IAT Mean',
  'Fwd IAT Min',
  'Fwd IAT Tot',
  'Fwd PSH Flags',
  'Fwd Pkt Len Max',
  'Fwd Pkt Len Mean',
  'Fwd Pkt Len Min',
  'Fwd Pkt Len Std',
  'Fwd Pkts/b Avg',
  'Fwd Pkts/s',
  'Fwd Seg Size Avg',
  'Fwd Se

# Data Loading and Preprocessing

In [6]:
import pickle
# the input dimension of the training set
# input_dim = df.shape[1] - len(drop_columns) - len(weak_columns) - 1  # for the label_column
  
# specifying the number of classes, since it is different from one dataset to another and also if binary or multi-class classification
classes_set = {"benign", "attack"}
labels_names = {0: "benign", 1: "attack"}
num_classes = 2
if cfg.multi_class:
    with open(folder_path + "labels_names.pkl", 'rb') as f:
        labels_names, classes_set = pickle.load(f)
    num_classes = len(classes_set)
    
labels_names = {int(k): v for k, v in labels_names.items()}

print(f"==>> classes_set: {classes_set}")
print(f"==>> num_classes: {num_classes}")
print(f"==>> labels_names: {labels_names}")

==>> classes_set: {'benign', 'attack'}
==>> num_classes: 2
==>> labels_names: {0: 'benign', 1: 'attack'}


In [7]:
test = pd.read_parquet(folder_path + "test.parquet")
print(test.columns)
if cfg.multi_class:
    test[dataset.label_col] = test[dataset.class_num_col]
    
#test.drop(["src_degree", "dst_degree", "src_betweenness", "dst_betweenness", "src_pagerank", "dst_pagerank"], axis=1, inplace=True)
#test.drop(["src_multidigraph_degree", "dst_multidigraph_degree", "src_multidigraph_betweenness", "dst_multidigraph_betweenness", "src_multidigraph_pagerank", "dst_multidigraph_pagerank"], axis=1, inplace=True)

if not cfg.multi_class:
    test_by_class = {}
    classes = test[dataset.class_col].unique()
    for class_value in classes:
        test_class = test[test[dataset.class_col] == class_value].copy()
        test_class.drop(dataset.drop_columns, axis=1, inplace=True, errors='ignore')
        test_class.drop(dataset.weak_columns, axis=1, inplace=True, errors='ignore')
        test_class.reset_index(drop=True, inplace=True)

        test_class_labels = test_class[dataset.label_col].to_numpy()
        test_class = test_class.drop([dataset.label_col], axis=1).to_numpy()

        test_by_class[class_value] = (test_class, test_class_labels)
    
    
test.drop(dataset.drop_columns, axis=1, inplace=True,errors='ignore')
test.drop(dataset.weak_columns, axis=1, inplace=True,errors='ignore')
test.reset_index(drop=True, inplace=True)
  
test_labels = test[dataset.label_col].to_numpy()
test = test.drop([dataset.label_col], axis=1).to_numpy()
input_dim = test.shape[1]
client_data = []
for client_path in clients_paths:
    client_data.append(pd.read_parquet(client_path))
    
for i in range(len(client_data)):
    
    cdata = client_data[i]

    if cfg.multi_class:
        cdata[dataset.label_col] = cdata[dataset.class_num_col]
        
    #cdata.drop(["src_degree", "dst_degree", "src_betweenness", "dst_betweenness", "src_pagerank", "dst_pagerank"], axis=1, inplace=True)
    #if i==0:
    cdata.drop(["src_multidigraph_degree", "dst_multidigraph_degree", "src_multidigraph_betweenness", "dst_multidigraph_betweenness", "src_multidigraph_pagerank", "dst_multidigraph_pagerank"], axis=1, inplace=True, errors='ignore')

    cdata.drop(dataset.drop_columns, axis=1, inplace=True, errors='ignore')
    cdata.drop(dataset.weak_columns, axis=1, inplace=True, errors='ignore')
    cdata.reset_index(drop=True, inplace=True)
    c_train, c_test = train_test_split(cdata, test_size=0.1)

    y_train = c_train[dataset.label_col].to_numpy()
    x_train = c_train.drop([dataset.label_col], axis=1).to_numpy()
    y_test = c_test[dataset.label_col].to_numpy()
    x_test = c_test.drop([dataset.label_col], axis=1).to_numpy()

    client_data[i] = (x_train, y_train, x_test, y_test)

Index(['Flow ID', 'Src IP', 'Src Port', 'Dst IP', 'Dst Port', 'Protocol',
       'Timestamp', 'Flow Duration', 'Tot Fwd Pkts', 'Tot Bwd Pkts',
       ...
       'src_closeness', 'dst_closeness', 'src_pagerank', 'dst_pagerank',
       'src_k_core', 'dst_k_core', 'src_k_truss', 'dst_k_truss', 'pca_1',
       'pca_2'],
      dtype='object', length=101)


# Model

In [8]:
from keras import layers, models, Input, regularizers, callbacks, metrics, optimizers, initializers
# from src.models.evaluation_metrics import f1_m

def create_keras_model(input_shape, alpha = learning_rate):
    model = models.Sequential()
    
    model.add(layers.Conv1D(80, kernel_size=5,
                activation="relu", input_shape=(input_shape, 1), kernel_regularizer=regularizers.L1L2(l1=LAMBD_1, l2=LAMBD_2)))
    model.add(layers.MaxPooling1D())
    model.add(layers.LayerNormalization(axis=1))
    
    model.add(layers.Conv1D(80, 5, activation='relu', kernel_regularizer=regularizers.L1L2(l1=LAMBD_1, l2=LAMBD_2)))
    model.add(layers.MaxPooling1D())
    model.add(layers.LayerNormalization(axis=1))
    
    model.add(layers.LSTM(units=80,
                            kernel_regularizer=regularizers.L1L2(l1=LAMBD_1, l2=LAMBD_2),
                            recurrent_regularizer=regularizers.L1L2(l1=LAMBD_1, l2=LAMBD_2),
                            bias_regularizer=regularizers.L1L2(l1=LAMBD_1, l2=LAMBD_2),
                            return_sequences=False,
                            ))

    model.add(layers.LayerNormalization(axis=1))
    model.add(layers.Dense(500,activation='relu', kernel_regularizer=regularizers.L1L2(l1=LAMBD_1, l2=LAMBD_2)))
    model.add(layers.LayerNormalization(axis=1))
    model.add(layers.Dense(200,activation='relu', kernel_regularizer=regularizers.L1L2(l1=LAMBD_1, l2=LAMBD_2)))
    model.add(layers.LayerNormalization(axis=1))
    model.add(layers.Dense(80,activation='relu', kernel_regularizer=regularizers.L1L2(l1=LAMBD_1, l2=LAMBD_2)))
    model.add(layers.LayerNormalization(axis=1))

    if cfg.multi_class:
        model.add(layers.Dense(num_classes, activation='softmax'))
        model.compile(optimizer=optimizers.Adam(learning_rate=alpha),
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'])
    else:
        model.add(layers.Dense(1, activation='sigmoid'))
        model.compile(optimizer=optimizers.Adam(learning_rate=alpha),
                        loss='binary_crossentropy',
                        metrics=['accuracy'])
    
    
    return model


In [9]:
model = create_keras_model(input_dim)
model.summary()




  super().__init__(


# FL

## FL settings

In [10]:
results_final = {}
results_final["baseline"] = {}
results_final["baseline"]["accuracy"] = {}
results_final["baseline"]["f1s"] = {}

results_final["centralities - DiGraph"] = {}
results_final["centralities - DiGraph"]["accuracy"] = {}
results_final["centralities - DiGraph"]["f1s"] = {}

results_final["centralities - MultiDiGraph"] = {}
results_final["centralities - MultiDiGraph"]["accuracy"] = {}
results_final["centralities - MultiDiGraph"]["f1s"] = {}

In [11]:
results = {}  # a dictionary that will contain all the options and results of models
# add all options to the results dictionary, to know what options selected for obtained results
results["configuration"] = "2dt - baseline"
results["dtime"] = dtime
results["multi_class"] = cfg.multi_class
results["learning_rate"] = learning_rate
results["dataset_name"] = dataset.name
results["num_classes"] = num_classes
results["labels_names"] = labels_names
results["input_dim"] = input_dim

results["scores"] = {}
results["scores"]["server"] = {}
results["scores"]["clients"] = {}
results["scores"]["accuracy"] = {}
results["scores"]["f1s"] = {}

if not cfg.multi_class:
    results["scores"]["test_by_class"] = {}
    results["scores"]["test_by_class"]["accuracy"] = {}
    results["scores"]["test_by_class"]["f1s"] = {}
    for k in test_by_class.keys():
        results["scores"]["test_by_class"]["length"] = len(test_by_class[k][0])
        results["scores"]["test_by_class"]["accuracy"][k] = {}   
        results["scores"]["test_by_class"]["f1s"][k] = {}    
        
results

{'configuration': '2dt - baseline',
 'dtime': '20240720-163506',
 'multi_class': False,
 'learning_rate': 0.001,
 'dataset_name': 'cic_ton_iot',
 'num_classes': 2,
 'labels_names': {0: 'benign', 1: 'attack'},
 'input_dim': 39,
 'scores': {'server': {},
  'clients': {},
  'accuracy': {},
  'f1s': {},
  'test_by_class': {'accuracy': {'Benign': {},
    'DoS Hulk': {},
    'PortScan': {},
    'ddos': {},
    'DoS slowloris': {},
    'DoS Slowhttptest': {},
    'FTP-Patator': {},
    'DoS GoldenEye': {},
    'SSH-Patator': {},
    'Bot': {},
    'xss': {},
    'bruteforce': {}},
   'f1s': {'Benign': {},
    'DoS Hulk': {},
    'PortScan': {},
    'ddos': {},
    'DoS slowloris': {},
    'DoS Slowhttptest': {},
    'FTP-Patator': {},
    'DoS GoldenEye': {},
    'SSH-Patator': {},
    'Bot': {},
    'xss': {},
    'bruteforce': {}},
   'length': 446}}}

In [12]:

class FLClient(fl.client.NumPyClient):
    def __init__(self, cid, x_train, y_train, x_test, y_test):
        self.cid = cid
        self.x_train, self.y_train = x_train, y_train
        self.x_test, self.y_test = x_test, y_test
        self.model = create_keras_model(input_shape=input_dim)

    def get_parameters(self, config):
        return self.model.get_weights()

    def set_parameters(self, parameters, config):
        self.model.set_weights(parameters)

    def fit(self, parameters, config):
        
        lr=float(config["lr"])
        self.model = create_keras_model(input_shape=input_dim, alpha=lr)
        self.set_parameters(parameters, config)

        
        logdir = "logs/scalars/{}/baseline/client_{}".format(dtime, self.cid)
        tensorboard_callback = callbacks.TensorBoard(log_dir=logdir)

        history = self.model.fit(self.x_train, self.y_train,
                                 epochs=config["local_epochs"],
                                 batch_size=config["batch_size"],
                                 validation_data=(self.x_test, self.y_test),
                                 verbose=0,
                                 callbacks=[tensorboard_callback])

        return self.get_parameters(config), len(self.x_train), {k: v[-1] for k, v in history.history.items()}


    def evaluate(self, parameters, config):
        self.set_parameters(parameters, config)
        loss, accuracy = self.model.evaluate(self.x_test, self.y_test, cfg.config_fit.batch_size, verbose=0)
        return loss, len(self.x_test), {"accuracy": accuracy}


In [13]:
def generate_client_fn():
    def client_fn(cid: str):
        i = int(cid)
        return FLClient(cid, client_data[i][0], client_data[i][1], client_data[i][2], client_data[i][3]).to_client()

    return client_fn

In [14]:
def get_on_fit_config(config: DictConfig):

    def fit_config_fn(server_round: int):
        alpha = learning_rate
        if server_round > 5:
            alpha = alpha / (1 + 0.5 * server_round)


        return {
            "lr": alpha,
            "local_epochs": config.local_epochs,
            "batch_size": config.batch_size,
        }

    return fit_config_fn


def get_evaluate_fn(x_test_sever, y_test_server):

    def evaluate_fn(server_round: int, parameters, config):
        # eval_model = model
        eval_model = create_keras_model(input_shape=input_dim)
        eval_model.set_weights(parameters)

        
        logdir = "logs/scalars/{}/baseline/server".format(dtime) 
        # logdir = "logs/scalars/client{}_".format(config["cid"]) + datetime.now().strftime("%Y%m%d-%H%M%S")
        tensorboard_callback = callbacks.TensorBoard(log_dir=logdir)

        test_loss, test_acc = eval_model.evaluate(x_test_sever, y_test_server,
                                                  batch_size = cfg.config_fit.batch_size,
                                                  callbacks=[tensorboard_callback])
        
        
        y_pred = eval_model.predict(x_test_sever, batch_size = cfg.config_fit.batch_size)
        
        if cfg.multi_class:
            y_pred = np.argmax(y_pred, axis=1)
            scores = custom_acc_mc(y_test_server, y_pred)
        else:
            y_pred = np.transpose(y_pred)[0]
            y_pred = list(
                map(lambda x: 0 if x < 0.5 else 1, y_pred))
            scores = custom_acc_binary(y_test_server, y_pred)
        
        
        results["scores"]["accuracy"][server_round] = test_acc
        results["scores"]["f1s"][server_round] = scores["f1s"]
        results["scores"]["server"][server_round] = scores
        
        
        results["scores"]["accuracy"][server_round] = test_acc
        results["scores"]["f1s"][server_round] = scores["f1s"]
        results["scores"]["server"][server_round] = scores
        
        results_final["baseline"]["accuracy"][server_round] = scores["accuracy"]
        results_final["baseline"]["f1s"][server_round] = scores["f1s"]
        
        if not cfg.multi_class:
            for k in test_by_class.keys():
                y_pred_class = eval_model.predict(test_by_class[k][0], batch_size = cfg.config_fit.batch_size, verbose = 0)
                y_pred_class = np.transpose(y_pred_class)[0]
                y_pred_class = list(map(lambda x: 0 if x < 0.5 else 1, y_pred_class))
                scores_class = custom_acc_binary(test_by_class[k][1], y_pred_class)
                results["scores"]["test_by_class"]["accuracy"][k][server_round] = scores_class["accuracy"]
                results["scores"]["test_by_class"]["f1s"][k][server_round] = scores_class["f1s"]
                
        log(INFO, f"==>> scores: {scores}")
        
        
        return test_loss, {"accuracy": test_acc, "f1s": scores["f1s"], "FPR": scores["FPR"], "FNR": scores["FNR"]}

    return evaluate_fn


In [15]:
def weighted_average(metrics):
    print(f"==>> weighted_average: {metrics}")

    total_examples = 0
    federated_metrics = {k: 0 for k in metrics[0][1].keys()}
    for num_examples, m in metrics:
        for k, v in m.items():
            federated_metrics[k] += num_examples * v
        total_examples += num_examples
    return {k: v / total_examples for k, v in federated_metrics.items()}

strategy = fl.server.strategy.FedAvg(
    fraction_fit=1.0,  # in simulation, since all clients are available at all times, we can just use `min_fit_clients` to control exactly how many clients we want to involve during fit
    min_fit_clients=len(client_data),  # number of clients to sample for fit()
    fraction_evaluate=0.0,  # similar to fraction_fit, we don't need to use this argument.
    min_evaluate_clients=0,  # number of clients to sample for evaluate()
    min_available_clients=len(client_data),  # total clients in the simulation
    fit_metrics_aggregation_fn = weighted_average,
    # evaluate_metrics_aggregation_fn = weighted_average,
    on_fit_config_fn=get_on_fit_config(
        cfg.config_fit
    ),  # a function to execute to obtain the configuration to send to the clients during fit()
    evaluate_fn=get_evaluate_fn(test, test_labels),
)  # a function to run on the server side to evaluate the global model.


## FL Simulation

In [16]:
import multiprocessing
from math import floor
history = fl.simulation.start_simulation(
    client_fn=generate_client_fn(),  # a function that spawns a particular client
    # num_clients=cfg.n_clients,  # total number of clients
    num_clients=len(client_data),  # total number of clients
    config=fl.server.ServerConfig(
        num_rounds=cfg.n_rounds
        # num_rounds=5
    ),  # minimal config for the server loop telling the number of rounds in FL
    strategy=strategy,  # our strategy of choice
    client_resources={
        "num_cpus": floor(multiprocessing.cpu_count() / len(client_data)),
        "num_gpus": 0.0,
    },
)

INFO flwr 2024-07-20 16:35:33,610 | app.py:178 | Starting Flower simulation, config: ServerConfig(num_rounds=20, round_timeout=None)
2024-07-20 16:35:40,347	INFO worker.py:1621 -- Started a local Ray instance.
INFO flwr 2024-07-20 16:35:43,647 | app.py:213 | Flower VCE: Ray initialized with resources: {'CPU': 32.0, 'node:__internal_head__': 1.0, 'node:127.0.0.1': 1.0, 'memory': 34389929166.0, 'object_store_memory': 17194964582.0}
INFO flwr 2024-07-20 16:35:43,649 | app.py:219 | Optimize your simulation with Flower VCE: https://flower.dev/docs/framework/how-to-run-simulations.html
INFO flwr 2024-07-20 16:35:43,650 | app.py:242 | Flower VCE: Resources for each Virtual Client: {'num_cpus': 8, 'num_gpus': 0.0}
INFO flwr 2024-07-20 16:35:43,668 | app.py:288 | Flower VCE: Creating VirtualClientEngineActorPool with 4 actors
INFO flwr 2024-07-20 16:35:43,670 | server.py:89 | Initializing global parameters
INFO flwr 2024-07-20 16:35:43,671 | server.py:276 | Requesting initial parameters from on

[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 28ms/step - accuracy: 0.4223 - loss: 3.1230
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 26ms/step


INFO flwr 2024-07-20 16:38:41,218 | 589012468.py:68 | ==>> scores: {'accuracy': 0.42202847652193215, 'recall': 0.42202847652193215, 'precision': 0.5485151163593502, 'f1s': 0.416052114953504, 'FPR': 0.7070546773561963, 'FNR': 0.3121115281761456, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.66      0.29      0.41    339647\n           1       0.32      0.69      0.44    164909\n\n    accuracy                           0.42    504556\n   macro avg       0.49      0.49      0.42    504556\nweighted avg       0.55      0.42      0.42    504556\n'}
INFO flwr 2024-07-20 16:38:41,227 | server.py:94 | initial parameters (loss, other metrics): 3.124509572982788, {'accuracy': 0.42202848196029663, 'f1s': 0.416052114953504, 'FPR': 0.7070546773561963, 'FNR': 0.3121115281761456}
INFO flwr 2024-07-20 16:38:41,228 | server.py:104 | FL starting
DEBUG flwr 2024-07-20 16:38:41,230 | server.py:222 | fit_round 1: strategy sampled 4 clients (out of 4)
[2m[3

==>> weighted_average: [(489061, {'accuracy': 0.6253105401992798, 'loss': 1.1626622676849365, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.7399616241455078}), (489061, {'accuracy': 0.6272183060646057, 'loss': 1.2003849744796753, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.7423774600028992}), (489061, {'accuracy': 0.626463770866394, 'loss': 1.193687915802002, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.740676999092102}), (489061, {'accuracy': 0.6273736953735352, 'loss': 1.2229899168014526, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.7415419816970825})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 27ms/step - accuracy: 0.4956 - loss: 0.8002
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 26ms/step


INFO flwr 2024-07-20 16:42:48,687 | 589012468.py:68 | ==>> scores: {'accuracy': 0.4963274641466953, 'recall': 0.4963274641466953, 'precision': 0.5467694306287098, 'f1s': 0.5116518589793133, 'FPR': 0.48182377586140906, 'FNR': 0.548672298055291, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.66      0.52      0.58    339647\n           1       0.31      0.45      0.37    164909\n\n    accuracy                           0.50    504556\n   macro avg       0.49      0.48      0.48    504556\nweighted avg       0.55      0.50      0.51    504556\n'}
INFO flwr 2024-07-20 16:42:48,697 | server.py:125 | fit progress: (1, 0.8000199794769287, {'accuracy': 0.4963274598121643, 'f1s': 0.5116518589793133, 'FPR': 0.48182377586140906, 'FNR': 0.548672298055291}, 247.4662854000926)
INFO flwr 2024-07-20 16:42:48,699 | server.py:171 | evaluate_round 1: no clients selected, cancel
DEBUG flwr 2024-07-20 16:42:48,701 | server.py:222 | fit_round 2: strategy sampl

==>> weighted_average: [(489061, {'accuracy': 0.6197120547294617, 'loss': 0.6807188987731934, 'val_accuracy': 0.5818811058998108, 'val_loss': 0.7166900634765625}), (489061, {'accuracy': 0.608533501625061, 'loss': 0.695033073425293, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6701779365539551}), (489061, {'accuracy': 0.6250508427619934, 'loss': 0.6840630769729614, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6736971139907837}), (489061, {'accuracy': 0.6177143454551697, 'loss': 0.6834655404090881, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6824718117713928})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 27ms/step - accuracy: 0.3263 - loss: 0.8277
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 27ms/step


INFO flwr 2024-07-20 16:47:00,382 | 589012468.py:68 | ==>> scores: {'accuracy': 0.3268398354196561, 'recall': 0.3268398354196561, 'precision': 0.10682427801714788, 'f1s': 0.16102060725869183, 'FPR': 1.0, 'FNR': 0.0, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.00      0.00      0.00    339647\n           1       0.33      1.00      0.49    164909\n\n    accuracy                           0.33    504556\n   macro avg       0.16      0.50      0.25    504556\nweighted avg       0.11      0.33      0.16    504556\n'}
INFO flwr 2024-07-20 16:47:00,389 | server.py:125 | fit progress: (2, 0.8274058103561401, {'accuracy': 0.3268398344516754, 'f1s': 0.16102060725869183, 'FPR': 1.0, 'FNR': 0.0}, 499.15684810001403)
INFO flwr 2024-07-20 16:47:00,391 | server.py:171 | evaluate_round 2: no clients selected, cancel
DEBUG flwr 2024-07-20 16:47:00,393 | server.py:222 | fit_round 3: strategy sampled 4 clients (out of 4)
DEBUG flwr 2024-07-20 16:48:35,2

==>> weighted_average: [(489061, {'accuracy': 0.6255661249160767, 'loss': 0.6755192875862122, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6708500981330872}), (489061, {'accuracy': 0.6202661991119385, 'loss': 0.6723750829696655, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6625652313232422}), (489061, {'accuracy': 0.6276885867118835, 'loss': 0.6678911447525024, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.685636579990387}), (489061, {'accuracy': 0.6264228820800781, 'loss': 0.6707073450088501, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6639322638511658})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 28ms/step - accuracy: 0.3272 - loss: 0.7541
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 25ms/step


INFO flwr 2024-07-20 16:51:23,671 | 589012468.py:68 | ==>> scores: {'accuracy': 0.3276663046321915, 'recall': 0.3276663046321915, 'precision': 0.45535449610669826, 'f1s': 0.179989161923902, 'FPR': 0.98354762444538, 'FNR': 0.03135668762772196, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.52      0.02      0.03    339647\n           1       0.32      0.97      0.49    164909\n\n    accuracy                           0.33    504556\n   macro avg       0.42      0.49      0.26    504556\nweighted avg       0.46      0.33      0.18    504556\n'}
INFO flwr 2024-07-20 16:51:23,680 | server.py:125 | fit progress: (3, 0.7539950609207153, {'accuracy': 0.327666312456131, 'f1s': 0.179989161923902, 'FPR': 0.98354762444538, 'FNR': 0.03135668762772196}, 762.4462385997176)
INFO flwr 2024-07-20 16:51:23,682 | server.py:171 | evaluate_round 3: no clients selected, cancel
DEBUG flwr 2024-07-20 16:51:23,683 | server.py:222 | fit_round 4: strategy sampled 4

==>> weighted_average: [(489061, {'accuracy': 0.6174240112304688, 'loss': 0.6830087304115295, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6761804819107056}), (489061, {'accuracy': 0.611882746219635, 'loss': 0.6780111789703369, 'val_accuracy': 0.5823411345481873, 'val_loss': 0.6874815225601196}), (489061, {'accuracy': 0.6264167428016663, 'loss': 0.665158212184906, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6653954386711121}), (489061, {'accuracy': 0.5996266603469849, 'loss': 0.6868315935134888, 'val_accuracy': 0.5818811058998108, 'val_loss': 0.7199954986572266})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 28ms/step - accuracy: 0.3535 - loss: 0.8617
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 25ms/step


INFO flwr 2024-07-20 16:55:41,861 | 589012468.py:68 | ==>> scores: {'accuracy': 0.35416484988782215, 'recall': 0.35416484988782215, 'precision': 0.6273121571133472, 'f1s': 0.233832939350527, 'FPR': 0.9422135334626833, 'FNR': 0.035413470459465524, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.77      0.06      0.11    339647\n           1       0.33      0.96      0.49    164909\n\n    accuracy                           0.35    504556\n   macro avg       0.55      0.51      0.30    504556\nweighted avg       0.63      0.35      0.23    504556\n'}
INFO flwr 2024-07-20 16:55:41,871 | server.py:125 | fit progress: (4, 0.8613170981407166, {'accuracy': 0.35416483879089355, 'f1s': 0.233832939350527, 'FPR': 0.9422135334626833, 'FNR': 0.035413470459465524}, 1020.637093199417)
INFO flwr 2024-07-20 16:55:41,873 | server.py:171 | evaluate_round 4: no clients selected, cancel
DEBUG flwr 2024-07-20 16:55:41,874 | server.py:222 | fit_round 5: strategy 

==>> weighted_average: [(489061, {'accuracy': 0.6219510436058044, 'loss': 0.6752099394798279, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6608679890632629}), (489061, {'accuracy': 0.6251940131187439, 'loss': 0.6746762990951538, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6672102808952332}), (489061, {'accuracy': 0.6180844306945801, 'loss': 0.6759049892425537, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6630114316940308}), (489061, {'accuracy': 0.5838678479194641, 'loss': 0.6865887641906738, 'val_accuracy': 0.583684504032135, 'val_loss': 0.6838566660881042})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 33ms/step - accuracy: 0.3264 - loss: 0.8380
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 31ms/step


INFO flwr 2024-07-20 17:00:29,290 | 589012468.py:68 | ==>> scores: {'accuracy': 0.3269508240908839, 'recall': 0.3269508240908839, 'precision': 0.7205984691584876, 'f1s': 0.16127681668486085, 'FPR': 0.9998174575367955, 'FNR': 3.638370252684814e-05, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.91      0.00      0.00    339647\n           1       0.33      1.00      0.49    164909\n\n    accuracy                           0.33    504556\n   macro avg       0.62      0.50      0.25    504556\nweighted avg       0.72      0.33      0.16    504556\n'}
INFO flwr 2024-07-20 17:00:29,296 | server.py:125 | fit progress: (5, 0.8377032279968262, {'accuracy': 0.3269508183002472, 'f1s': 0.16127681668486085, 'FPR': 0.9998174575367955, 'FNR': 3.638370252684814e-05}, 1308.0608057994395)
INFO flwr 2024-07-20 17:00:29,297 | server.py:171 | evaluate_round 5: no clients selected, cancel
DEBUG flwr 2024-07-20 17:00:29,299 | server.py:222 | fit_round 6: strat

==>> weighted_average: [(489061, {'accuracy': 0.6280443668365479, 'loss': 0.658311665058136, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.660862386226654}), (489061, {'accuracy': 0.6267848014831543, 'loss': 0.6598171591758728, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6582053899765015}), (489061, {'accuracy': 0.6265864372253418, 'loss': 0.6591244339942932, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6578533053398132}), (489061, {'accuracy': 0.628232479095459, 'loss': 0.6578821539878845, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6575909852981567})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 32ms/step - accuracy: 0.3291 - loss: 0.8065
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 30ms/step


INFO flwr 2024-07-20 17:05:35,955 | 589012468.py:68 | ==>> scores: {'accuracy': 0.32964229936815737, 'recall': 0.32964229936815737, 'precision': 0.48259058533582505, 'f1s': 0.18417565826665444, 'FPR': 0.9804385141043496, 'FNR': 0.031714460702569294, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.56      0.02      0.04    339647\n           1       0.32      0.97      0.49    164909\n\n    accuracy                           0.33    504556\n   macro avg       0.44      0.49      0.26    504556\nweighted avg       0.48      0.33      0.18    504556\n'}
INFO flwr 2024-07-20 17:05:35,965 | server.py:125 | fit progress: (6, 0.8063144087791443, {'accuracy': 0.32964229583740234, 'f1s': 0.18417565826665444, 'FPR': 0.9804385141043496, 'FNR': 0.031714460702569294}, 1614.7280775001273)
INFO flwr 2024-07-20 17:05:35,966 | server.py:171 | evaluate_round 6: no clients selected, cancel
DEBUG flwr 2024-07-20 17:05:35,968 | server.py:222 | fit_round 7: str

==>> weighted_average: [(489061, {'accuracy': 0.6282222270965576, 'loss': 0.6568542122840881, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6573461294174194}), (489061, {'accuracy': 0.6282549500465393, 'loss': 0.6576483845710754, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6608986258506775}), (489061, {'accuracy': 0.6256887912750244, 'loss': 0.6604032516479492, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6571058034896851}), (489061, {'accuracy': 0.6269177198410034, 'loss': 0.6574656367301941, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6573666334152222})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 32ms/step - accuracy: 0.3371 - loss: 0.7924
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 30ms/step


INFO flwr 2024-07-20 17:10:38,446 | 589012468.py:68 | ==>> scores: {'accuracy': 0.33750069367919516, 'recall': 0.33750069367919516, 'precision': 0.5479426668170104, 'f1s': 0.20206495640520417, 'FPR': 0.9666359484994715, 'FNR': 0.036098696857054496, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.66      0.03      0.06    339647\n           1       0.33      0.96      0.49    164909\n\n    accuracy                           0.34    504556\n   macro avg       0.49      0.50      0.28    504556\nweighted avg       0.55      0.34      0.20    504556\n'}
INFO flwr 2024-07-20 17:10:38,452 | server.py:125 | fit progress: (7, 0.792189359664917, {'accuracy': 0.3375006914138794, 'f1s': 0.20206495640520417, 'FPR': 0.9666359484994715, 'FNR': 0.036098696857054496}, 1917.2135907001793)
INFO flwr 2024-07-20 17:10:38,453 | server.py:171 | evaluate_round 7: no clients selected, cancel
DEBUG flwr 2024-07-20 17:10:38,455 | server.py:222 | fit_round 8: strate

==>> weighted_average: [(489061, {'accuracy': 0.6285187602043152, 'loss': 0.6565228700637817, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6588783860206604}), (489061, {'accuracy': 0.6282263398170471, 'loss': 0.6569130420684814, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6575798988342285}), (489061, {'accuracy': 0.6269177198410034, 'loss': 0.6571718454360962, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6571597456932068}), (489061, {'accuracy': 0.626310408115387, 'loss': 0.6580455303192139, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6693305969238281})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 30ms/step - accuracy: 0.3708 - loss: 0.7626
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 28ms/step


INFO flwr 2024-07-20 17:15:13,043 | 589012468.py:68 | ==>> scores: {'accuracy': 0.3714989020049311, 'recall': 0.3714989020049311, 'precision': 0.6373076757092239, 'f1s': 0.2727507036070743, 'FPR': 0.9083195199722064, 'FNR': 0.052186357324342514, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.78      0.09      0.16    339647\n           1       0.34      0.95      0.50    164909\n\n    accuracy                           0.37    504556\n   macro avg       0.56      0.52      0.33    504556\nweighted avg       0.64      0.37      0.27    504556\n'}
INFO flwr 2024-07-20 17:15:13,051 | server.py:125 | fit progress: (8, 0.7624481916427612, {'accuracy': 0.3714989125728607, 'f1s': 0.2727507036070743, 'FPR': 0.9083195199722064, 'FNR': 0.052186357324342514}, 2191.8107431996614)
INFO flwr 2024-07-20 17:15:13,053 | server.py:171 | evaluate_round 8: no clients selected, cancel
DEBUG flwr 2024-07-20 17:15:13,054 | server.py:222 | fit_round 9: strategy 

==>> weighted_average: [(489061, {'accuracy': 0.6285392045974731, 'loss': 0.6565057039260864, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6586950421333313}), (489061, {'accuracy': 0.6274268627166748, 'loss': 0.657608687877655, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6592440009117126}), (489061, {'accuracy': 0.6281057000160217, 'loss': 0.656963586807251, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6576849222183228}), (489061, {'accuracy': 0.6262919902801514, 'loss': 0.6590524911880493, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6590906381607056})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 34ms/step - accuracy: 0.3277 - loss: 0.8098
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 33ms/step


INFO flwr 2024-07-20 17:20:22,846 | 589012468.py:68 | ==>> scores: {'accuracy': 0.32804683721925815, 'recall': 0.32804683721925815, 'precision': 0.4602698742206142, 'f1s': 0.18148668593360143, 'FPR': 0.9823139907021113, 'FNR': 0.03273320437332104, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.53      0.02      0.03    339647\n           1       0.32      0.97      0.48    164909\n\n    accuracy                           0.33    504556\n   macro avg       0.43      0.49      0.26    504556\nweighted avg       0.46      0.33      0.18    504556\n'}
INFO flwr 2024-07-20 17:20:22,857 | server.py:125 | fit progress: (9, 0.8096414804458618, {'accuracy': 0.3280468285083771, 'f1s': 0.18148668593360143, 'FPR': 0.9823139907021113, 'FNR': 0.03273320437332104}, 2501.614414599724)
INFO flwr 2024-07-20 17:20:22,859 | server.py:171 | evaluate_round 9: no clients selected, cancel
DEBUG flwr 2024-07-20 17:20:22,861 | server.py:222 | fit_round 10: strateg

==>> weighted_average: [(489061, {'accuracy': 0.6283735632896423, 'loss': 0.6568342447280884, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6586505770683289}), (489061, {'accuracy': 0.6266457438468933, 'loss': 0.6574130058288574, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6561108827590942}), (489061, {'accuracy': 0.627670168876648, 'loss': 0.6568827629089355, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6578521132469177}), (489061, {'accuracy': 0.6268298029899597, 'loss': 0.6572994589805603, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6577903032302856})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 34ms/step - accuracy: 0.3326 - loss: 0.8001
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 33ms/step


INFO flwr 2024-07-20 17:25:17,400 | 589012468.py:68 | ==>> scores: {'accuracy': 0.33313051474960165, 'recall': 0.33313051474960165, 'precision': 0.5157741585511868, 'f1s': 0.19273565038869747, 'FPR': 0.9737845468972197, 'FNR': 0.03474643591313997, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.61      0.03      0.05    339647\n           1       0.32      0.97      0.49    164909\n\n    accuracy                           0.33    504556\n   macro avg       0.47      0.50      0.27    504556\nweighted avg       0.52      0.33      0.19    504556\n'}
INFO flwr 2024-07-20 17:25:17,407 | server.py:125 | fit progress: (10, 0.799905002117157, {'accuracy': 0.33313050866127014, 'f1s': 0.19273565038869747, 'FPR': 0.9737845468972197, 'FNR': 0.03474643591313997}, 2796.1622517993674)
INFO flwr 2024-07-20 17:25:17,408 | server.py:171 | evaluate_round 10: no clients selected, cancel
DEBUG flwr 2024-07-20 17:25:17,409 | server.py:222 | fit_round 11: stra

==>> weighted_average: [(489061, {'accuracy': 0.6314365863800049, 'loss': 0.6540824770927429, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6589295268058777}), (489061, {'accuracy': 0.6285228133201599, 'loss': 0.6561875343322754, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6589432954788208}), (489061, {'accuracy': 0.6266109943389893, 'loss': 0.6571946144104004, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6583927273750305}), (489061, {'accuracy': 0.6266171336174011, 'loss': 0.6571184992790222, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.65781170129776})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 34ms/step - accuracy: 0.3400 - loss: 0.7945
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 32ms/step


INFO flwr 2024-07-20 17:30:13,751 | 589012468.py:68 | ==>> scores: {'accuracy': 0.34040621853669367, 'recall': 0.34040621853669367, 'precision': 0.5699268360279601, 'f1s': 0.20698726770463285, 'FPR': 0.9630793147002623, 'FNR': 0.034534197648400024, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.69      0.04      0.07    339647\n           1       0.33      0.97      0.49    164909\n\n    accuracy                           0.34    504556\n   macro avg       0.51      0.50      0.28    504556\nweighted avg       0.57      0.34      0.21    504556\n'}
INFO flwr 2024-07-20 17:30:13,759 | server.py:125 | fit progress: (11, 0.7944086790084839, {'accuracy': 0.340406209230423, 'f1s': 0.20698726770463285, 'FPR': 0.9630793147002623, 'FNR': 0.034534197648400024}, 3092.513742899522)
INFO flwr 2024-07-20 17:30:13,760 | server.py:171 | evaluate_round 11: no clients selected, cancel
DEBUG flwr 2024-07-20 17:30:13,763 | server.py:222 | fit_round 12: stra

==>> weighted_average: [(489061, {'accuracy': 0.6268113851547241, 'loss': 0.6570237278938293, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6558727025985718}), (489061, {'accuracy': 0.6282529234886169, 'loss': 0.656402051448822, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6568969488143921}), (489061, {'accuracy': 0.6284042000770569, 'loss': 0.6571499705314636, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6595873832702637}), (489061, {'accuracy': 0.6268625259399414, 'loss': 0.6570959091186523, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6568119525909424})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 34ms/step - accuracy: 0.3435 - loss: 0.7948
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 32ms/step


INFO flwr 2024-07-20 17:35:11,924 | 589012468.py:68 | ==>> scores: {'accuracy': 0.3439697476593282, 'recall': 0.3439697476593282, 'precision': 0.5899181532082431, 'f1s': 0.2137803050347138, 'FPR': 0.9579298506979305, 'FNR': 0.0342370640777641, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.72      0.04      0.08    339647\n           1       0.33      0.97      0.49    164909\n\n    accuracy                           0.34    504556\n   macro avg       0.52      0.50      0.28    504556\nweighted avg       0.59      0.34      0.21    504556\n'}
INFO flwr 2024-07-20 17:35:11,934 | server.py:125 | fit progress: (12, 0.7945657968521118, {'accuracy': 0.34396976232528687, 'f1s': 0.2137803050347138, 'FPR': 0.9579298506979305, 'FNR': 0.0342370640777641}, 3390.6860026000068)
INFO flwr 2024-07-20 17:35:11,935 | server.py:171 | evaluate_round 12: no clients selected, cancel
DEBUG flwr 2024-07-20 17:35:11,937 | server.py:222 | fit_round 13: strategy 

==>> weighted_average: [(489061, {'accuracy': 0.6275781393051147, 'loss': 0.6572146415710449, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6590206623077393}), (489061, {'accuracy': 0.6282529234886169, 'loss': 0.6563974022865295, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6571382880210876}), (489061, {'accuracy': 0.6267275214195251, 'loss': 0.6573198437690735, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.656208872795105}), (489061, {'accuracy': 0.6256744861602783, 'loss': 0.6575882434844971, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6568317413330078})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 32ms/step - accuracy: 0.3466 - loss: 0.7922
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 31ms/step


INFO flwr 2024-07-20 17:40:05,403 | 589012468.py:68 | ==>> scores: {'accuracy': 0.3473112994395072, 'recall': 0.3473112994395072, 'precision': 0.604933935906323, 'f1s': 0.2202741476433371, 'FPR': 0.9529217098929182, 'FNR': 0.03432802333408122, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.74      0.05      0.09    339647\n           1       0.33      0.97      0.49    164909\n\n    accuracy                           0.35    504556\n   macro avg       0.53      0.51      0.29    504556\nweighted avg       0.60      0.35      0.22    504556\n'}
INFO flwr 2024-07-20 17:40:05,410 | server.py:125 | fit progress: (13, 0.7919764518737793, {'accuracy': 0.3473112881183624, 'f1s': 0.2202741476433371, 'FPR': 0.9529217098929182, 'FNR': 0.03432802333408122}, 3684.1616011997685)
INFO flwr 2024-07-20 17:40:05,412 | server.py:171 | evaluate_round 13: no clients selected, cancel
DEBUG flwr 2024-07-20 17:40:05,414 | server.py:222 | fit_round 14: strategy 

==>> weighted_average: [(489061, {'accuracy': 0.6269177198410034, 'loss': 0.6568255424499512, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6567130088806152}), (489061, {'accuracy': 0.6284164786338806, 'loss': 0.6561604142189026, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6585681438446045}), (489061, {'accuracy': 0.626797080039978, 'loss': 0.656894326210022, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6558775305747986}), (489061, {'accuracy': 0.6272203326225281, 'loss': 0.6572998762130737, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6569864153862})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 32ms/step - accuracy: 0.3423 - loss: 0.7982
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 30ms/step


INFO flwr 2024-07-20 17:44:52,709 | 589012468.py:68 | ==>> scores: {'accuracy': 0.3427686916813991, 'recall': 0.3427686916813991, 'precision': 0.5800381984111169, 'f1s': 0.21222330302730139, 'FPR': 0.9589397227121099, 'FNR': 0.035831883038524276, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.70      0.04      0.08    339647\n           1       0.33      0.96      0.49    164909\n\n    accuracy                           0.34    504556\n   macro avg       0.52      0.50      0.28    504556\nweighted avg       0.58      0.34      0.21    504556\n'}
INFO flwr 2024-07-20 17:44:52,716 | server.py:125 | fit progress: (14, 0.7979128360748291, {'accuracy': 0.34276869893074036, 'f1s': 0.21222330302730139, 'FPR': 0.9589397227121099, 'FNR': 0.035831883038524276}, 3971.465792399831)
INFO flwr 2024-07-20 17:44:52,718 | server.py:171 | evaluate_round 14: no clients selected, cancel
DEBUG flwr 2024-07-20 17:44:52,720 | server.py:222 | fit_round 15: stra

==>> weighted_average: [(489061, {'accuracy': 0.6285759806632996, 'loss': 0.6560025215148926, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6586147546768188}), (489061, {'accuracy': 0.6267745494842529, 'loss': 0.6568876504898071, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6558942198753357}), (489061, {'accuracy': 0.6279094219207764, 'loss': 0.6562786102294922, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6575412154197693}), (489061, {'accuracy': 0.6282079219818115, 'loss': 0.656191349029541, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6569558382034302})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 30ms/step - accuracy: 0.6383 - loss: 0.6421
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 27ms/step


INFO flwr 2024-07-20 17:49:30,427 | 589012468.py:68 | ==>> scores: {'accuracy': 0.637620006500765, 'recall': 0.637620006500765, 'precision': 0.4570716821081682, 'f1s': 0.5264088611457926, 'FPR': 0.05483634479327066, 'FNR': 0.995797682358149, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.66      0.95      0.78    339647\n           1       0.04      0.00      0.01    164909\n\n    accuracy                           0.64    504556\n   macro avg       0.35      0.47      0.39    504556\nweighted avg       0.46      0.64      0.53    504556\n'}
INFO flwr 2024-07-20 17:49:30,433 | server.py:125 | fit progress: (15, 0.6429798603057861, {'accuracy': 0.6376200318336487, 'f1s': 0.5264088611457926, 'FPR': 0.05483634479327066, 'FNR': 0.995797682358149}, 4249.182703599334)
INFO flwr 2024-07-20 17:49:30,435 | server.py:171 | evaluate_round 15: no clients selected, cancel
DEBUG flwr 2024-07-20 17:49:30,437 | server.py:222 | fit_round 16: strategy samp

==>> weighted_average: [(489061, {'accuracy': 0.6282222270965576, 'loss': 0.656237781047821, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6570570468902588}), (489061, {'accuracy': 0.635654866695404, 'loss': 0.6494711637496948, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6582909822463989}), (489061, {'accuracy': 0.6287273168563843, 'loss': 0.6559593081474304, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6583269834518433}), (489061, {'accuracy': 0.6269729137420654, 'loss': 0.6567373871803284, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6568344235420227})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 30ms/step - accuracy: 0.4050 - loss: 0.7521
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 28ms/step


INFO flwr 2024-07-20 17:54:16,070 | 589012468.py:68 | ==>> scores: {'accuracy': 0.4050551375863135, 'recall': 0.4050551375863135, 'precision': 0.6536741876164732, 'f1s': 0.3384255693947583, 'FPR': 0.8460430976867159, 'FNR': 0.07778229205198019, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.80      0.15      0.26    339647\n           1       0.35      0.92      0.50    164909\n\n    accuracy                           0.41    504556\n   macro avg       0.57      0.54      0.38    504556\nweighted avg       0.65      0.41      0.34    504556\n'}
INFO flwr 2024-07-20 17:54:16,079 | server.py:125 | fit progress: (16, 0.7521717548370361, {'accuracy': 0.40505513548851013, 'f1s': 0.3384255693947583, 'FPR': 0.8460430976867159, 'FNR': 0.07778229205198019}, 4534.826159299351)
INFO flwr 2024-07-20 17:54:16,080 | server.py:171 | evaluate_round 16: no clients selected, cancel
DEBUG flwr 2024-07-20 17:54:16,082 | server.py:222 | fit_round 17: strategy

==>> weighted_average: [(489061, {'accuracy': 0.6285392045974731, 'loss': 0.6559134721755981, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6582068204879761}), (489061, {'accuracy': 0.6268113851547241, 'loss': 0.6568102836608887, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6559257507324219}), (489061, {'accuracy': 0.6268808841705322, 'loss': 0.6566444635391235, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6582539677619934}), (489061, {'accuracy': 0.628038227558136, 'loss': 0.6564841866493225, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6570178866386414})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 31ms/step - accuracy: 0.3355 - loss: 0.8052
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 30ms/step


INFO flwr 2024-07-20 17:58:54,251 | 589012468.py:68 | ==>> scores: {'accuracy': 0.3359646897470251, 'recall': 0.3359646897470251, 'precision': 0.5422775772114186, 'f1s': 0.19769645134963973, 'FPR': 0.9702131919316231, 'FNR': 0.0334305586717523, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.65      0.03      0.06    339647\n           1       0.33      0.97      0.49    164909\n\n    accuracy                           0.34    504556\n   macro avg       0.49      0.50      0.27    504556\nweighted avg       0.54      0.34      0.20    504556\n'}
INFO flwr 2024-07-20 17:58:54,262 | server.py:125 | fit progress: (17, 0.8049675822257996, {'accuracy': 0.3359646797180176, 'f1s': 0.19769645134963973, 'FPR': 0.9702131919316231, 'FNR': 0.0334305586717523}, 4813.007705499418)
INFO flwr 2024-07-20 17:58:54,263 | server.py:171 | evaluate_round 17: no clients selected, cancel
DEBUG flwr 2024-07-20 17:58:54,267 | server.py:222 | fit_round 18: strategy 

==>> weighted_average: [(489061, {'accuracy': 0.6285392045974731, 'loss': 0.6558826565742493, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6581002473831177}), (489061, {'accuracy': 0.6268113851547241, 'loss': 0.6567288637161255, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6558791399002075}), (489061, {'accuracy': 0.6271446943283081, 'loss': 0.6567303538322449, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6567727327346802}), (489061, {'accuracy': 0.6282529234886169, 'loss': 0.6561551690101624, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6569808721542358})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 31ms/step - accuracy: 0.3455 - loss: 0.7890
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 29ms/step


INFO flwr 2024-07-20 18:03:32,112 | 589012468.py:68 | ==>> scores: {'accuracy': 0.3460844782343288, 'recall': 0.3460844782343288, 'precision': 0.5948963990895255, 'f1s': 0.21885042787018127, 'FPR': 0.9537902587097781, 'FNR': 0.036292743270531024, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.72      0.05      0.09    339647\n           1       0.33      0.96      0.49    164909\n\n    accuracy                           0.35    504556\n   macro avg       0.53      0.50      0.29    504556\nweighted avg       0.59      0.35      0.22    504556\n'}
INFO flwr 2024-07-20 18:03:32,122 | server.py:125 | fit progress: (18, 0.7887288331985474, {'accuracy': 0.34608447551727295, 'f1s': 0.21885042787018127, 'FPR': 0.9537902587097781, 'FNR': 0.036292743270531024}, 5090.865696299821)
INFO flwr 2024-07-20 18:03:32,124 | server.py:171 | evaluate_round 18: no clients selected, cancel
DEBUG flwr 2024-07-20 18:03:32,125 | server.py:222 | fit_round 19: stra

==>> weighted_average: [(489061, {'accuracy': 0.628177285194397, 'loss': 0.6561570167541504, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.6572343707084656}), (489061, {'accuracy': 0.6280218362808228, 'loss': 0.6565576791763306, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6592389941215515}), (489061, {'accuracy': 0.6268236637115479, 'loss': 0.6566319465637207, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.6573600172996521}), (489061, {'accuracy': 0.6267704963684082, 'loss': 0.6568250060081482, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6560644507408142})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 30ms/step - accuracy: 0.3411 - loss: 0.7879
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 30ms/step


INFO flwr 2024-07-20 18:08:16,151 | 589012468.py:68 | ==>> scores: {'accuracy': 0.3415616898817971, 'recall': 0.3415616898817971, 'precision': 0.566462492079808, 'f1s': 0.2114868326542728, 'FPR': 0.9591252094085919, 'FNR': 0.03914279996846746, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.68      0.04      0.08    339647\n           1       0.33      0.96      0.49    164909\n\n    accuracy                           0.34    504556\n   macro avg       0.50      0.50      0.28    504556\nweighted avg       0.57      0.34      0.21    504556\n'}
INFO flwr 2024-07-20 18:08:16,159 | server.py:125 | fit progress: (19, 0.7876049280166626, {'accuracy': 0.3415616750717163, 'f1s': 0.2114868326542728, 'FPR': 0.9591252094085919, 'FNR': 0.03914279996846746}, 5374.901230399497)
INFO flwr 2024-07-20 18:08:16,161 | server.py:171 | evaluate_round 19: no clients selected, cancel
DEBUG flwr 2024-07-20 18:08:16,162 | server.py:222 | fit_round 20: strategy s

==>> weighted_average: [(489061, {'accuracy': 0.6288745403289795, 'loss': 0.6560693979263306, 'val_accuracy': 0.6256049871444702, 'val_loss': 0.6581478118896484}), (489061, {'accuracy': 0.6282570362091064, 'loss': 0.6561583280563354, 'val_accuracy': 0.6265802979469299, 'val_loss': 0.656743586063385}), (489061, {'accuracy': 0.6268113851547241, 'loss': 0.6568168997764587, 'val_accuracy': 0.6279052495956421, 'val_loss': 0.6557520031929016}), (489061, {'accuracy': 0.6273328065872192, 'loss': 0.6565094590187073, 'val_accuracy': 0.6266723275184631, 'val_loss': 0.656732976436615})]
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 31ms/step - accuracy: 0.3464 - loss: 0.7957
[1m1971/1971[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 30ms/step


INFO flwr 2024-07-20 18:13:14,579 | 589012468.py:68 | ==>> scores: {'accuracy': 0.346994188950285, 'recall': 0.346994188950285, 'precision': 0.5997366725397691, 'f1s': 0.22041970252878684, 'FPR': 0.9526213980986141, 'FNR': 0.03591677834442025, 'class_report': '              precision    recall  f1-score   support\n\n           0       0.73      0.05      0.09    339647\n           1       0.33      0.96      0.49    164909\n\n    accuracy                           0.35    504556\n   macro avg       0.53      0.51      0.29    504556\nweighted avg       0.60      0.35      0.22    504556\n'}
INFO flwr 2024-07-20 18:13:14,586 | server.py:125 | fit progress: (20, 0.7953880429267883, {'accuracy': 0.34699419140815735, 'f1s': 0.22041970252878684, 'FPR': 0.9526213980986141, 'FNR': 0.03591677834442025}, 5673.3258905000985)
INFO flwr 2024-07-20 18:13:14,588 | server.py:171 | evaluate_round 20: no clients selected, cancel
INFO flwr 2024-07-20 18:13:14,589 | server.py:153 | FL finished in 5673.32

In [17]:
print(f"==>> history: {history}")
print(f"==>> end of history")

==>> history: History (loss, centralized):
	round 0: 3.124509572982788
	round 1: 0.8000199794769287
	round 2: 0.8274058103561401
	round 3: 0.7539950609207153
	round 4: 0.8613170981407166
	round 5: 0.8377032279968262
	round 6: 0.8063144087791443
	round 7: 0.792189359664917
	round 8: 0.7624481916427612
	round 9: 0.8096414804458618
	round 10: 0.799905002117157
	round 11: 0.7944086790084839
	round 12: 0.7945657968521118
	round 13: 0.7919764518737793
	round 14: 0.7979128360748291
	round 15: 0.6429798603057861
	round 16: 0.7521717548370361
	round 17: 0.8049675822257996
	round 18: 0.7887288331985474
	round 19: 0.7876049280166626
	round 20: 0.7953880429267883
History (metrics, distributed, fit):
{'accuracy': [(1, 0.6265915781259537), (2, 0.6177526861429214), (3, 0.6249859482049942), (4, 0.6138375401496887), (5, 0.6122743338346481), (6, 0.6274120211601257), (7, 0.6272709220647812), (8, 0.6274933069944382), (9, 0.6275909394025803), (10, 0.6273798197507858), (11, 0.6282968819141388), (12, 0.62758

In [18]:
# creating the directories if they don't exist
if not os.path.isdir('./results'):
    os.mkdir('./results')

# creating the directories if they don't exist
if not os.path.isdir('./results/{}'.format(dtime)):
    os.mkdir('./results/{}'.format(dtime))

# if not os.path.isdir('./results/{}'.format(dataset_name)):
#     os.mkdir('./results/{}'.format(dataset_name))

class NumpyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NumpyEncoder, self).default(obj)

filename = ('./results/{}/baseline.json'.format(dtime))
outfile = open(filename, 'w')
outfile.writelines(json.dumps(results, cls=NumpyEncoder))
outfile.close()

# Centralities - DiGraph

In [20]:
test

Unnamed: 0,Flow ID,Src IP,Src Port,Dst IP,Dst Port,Protocol,Timestamp,Flow Duration,Tot Fwd Pkts,Tot Bwd Pkts,...,src_closeness,dst_closeness,src_pagerank,dst_pagerank,src_k_core,dst_k_core,src_k_truss,dst_k_truss,pca_1,pca_2
990077,192.168.10.3-192.168.10.15-53-49562-17,192.168.10.15,49562.0,192.168.10.3,53.0,17.0,6/7/2017 9:14,287.0,2.0,2.0,...,0.269812,0.330023,0.036407,0.000414,1.000000,1.000000,0.998033,0.019672,0.804361,1.035832
1524372,172.16.0.1-192.168.10.50-41262-80-6,172.16.0.1,41262.0,192.168.10.50,80.0,6.0,5/7/2017 10:52,98685922.0,9.0,5.0,...,0.242793,0.330141,0.000052,0.000454,0.565217,1.000000,0.006557,0.035410,-0.474735,-1.353992
775503,192.168.10.15-199.102.234.32-50685-443-6,199.102.234.32,443.0,192.168.10.15,50685.0,6.0,7/7/2017 9:44,3.0,2.0,0.0,...,0.245913,0.269812,0.000071,0.036407,0.434783,1.000000,0.003934,0.998033,-3.171998,2.284859
83047,192.168.10.5-69.16.175.42-51022-80-6,192.168.10.5,51022.0,69.16.175.42,80.0,6.0,5/7/2017 10:51,82.0,2.0,0.0,...,0.276961,0.310900,0.044805,0.000102,1.000000,0.695652,1.000000,0.006557,2.696235,1.922592
845324,192.168.10.3-192.168.10.15-53-54495-17,192.168.10.15,54495.0,192.168.10.3,53.0,17.0,03/07/2017 11:32:17,30679.0,4.0,2.0,...,0.269812,0.330023,0.036407,0.000414,1.000000,1.000000,0.998033,0.019672,0.804361,1.035832
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1210570,172.16.0.1-192.168.10.50-58284-80-6,172.16.0.1,58284.0,192.168.10.50,80.0,6.0,5/7/2017 11:01,31606378.0,2.0,2.0,...,0.242793,0.330141,0.000052,0.000454,0.565217,1.000000,0.006557,0.035410,-0.474735,-1.353992
578917,192.168.10.8-192.168.10.14-33265-6668-6,192.168.10.8,33265.0,192.168.10.14,6668.0,6.0,6/7/2017 3:19,48.0,2.0,0.0,...,0.355021,0.270038,0.033772,0.036593,1.000000,1.000000,1.000000,0.025574,-2.095889,3.381614
98330,192.168.10.3-192.168.10.16-53-10134-17,192.168.10.16,10134.0,192.168.10.3,53.0,17.0,4/7/2017 10:52,64770.0,2.0,2.0,...,0.279784,0.330023,0.036047,0.000414,1.000000,1.000000,0.489836,0.019672,0.699324,0.893982
1178177,172.16.0.1-192.168.10.50-38556-80-6,172.16.0.1,38556.0,192.168.10.50,80.0,6.0,7/7/2017 4:13,1286154.0,5.0,0.0,...,0.242793,0.330141,0.000052,0.000454,0.565217,1.000000,0.006557,0.035410,-0.474735,-1.353992


In [19]:
test = pd.read_parquet(folder_path + "test.parquet")

if cfg.multi_class:
    test[dataset.label_col] = test[dataset.class_num_col]
    
# test.drop(["src_degree", "dst_degree", "src_betweenness", "dst_betweenness", "src_pagerank", "dst_pagerank"], axis=1, inplace=True)
test.drop(["src_multidigraph_degree", "dst_multidigraph_degree", "src_multidigraph_betweenness", "dst_multidigraph_betweenness", "src_multidigraph_pagerank", "dst_multidigraph_pagerank"], axis=1, inplace=True)

if not cfg.multi_class:
    test_by_class = {}
    classes = test[dataset.class_col].unique()
    for class_value in classes:
        test_class = test[test[dataset.class_col] == class_value].copy()
        test_class.drop(dataset.drop_columns, axis=1, inplace=True)
        test_class.drop(dataset.weak_columns, axis=1, inplace=True)
        test_class.reset_index(drop=True, inplace=True)

        test_class_labels = test_class[dataset.label_col].to_numpy()
        test_class = test_class.drop([dataset.label_col], axis=1).to_numpy()

        test_by_class[class_value] = (test_class, test_class_labels)
    
    
test.drop(dataset.drop_columns, axis=1, inplace=True)
test.drop(dataset.weak_columns, axis=1, inplace=True)
test.reset_index(drop=True, inplace=True)
    
test_labels = test[dataset.label_col].to_numpy()
test = test.drop([dataset.label_col], axis=1).to_numpy()
input_dim = test.shape[1]

client_data = []
for client_path in clients_paths:
    client_data.append(pd.read_parquet(client_path))
    
for i in range(len(client_data)):
    
    cdata = client_data[i]
    
    if cfg.multi_class:
        cdata[dataset.label_col] = cdata[dataset.class_num_col]
        
    # cdata.drop(["src_degree", "dst_degree", "src_betweenness", "dst_betweenness", "src_pagerank", "dst_pagerank"], axis=1, inplace=True)
    cdata.drop(["src_multidigraph_degree", "dst_multidigraph_degree", "src_multidigraph_betweenness", "dst_multidigraph_betweenness", "src_multidigraph_pagerank", "dst_multidigraph_pagerank"], axis=1, inplace=True)

    cdata.drop(dataset.drop_columns, axis=1, inplace=True)
    cdata.drop(dataset.weak_columns, axis=1, inplace=True)
    cdata.reset_index(drop=True, inplace=True)
    
    c_train, c_test = train_test_split(cdata, test_size=0.1)

    y_train = c_train[dataset.label_col].to_numpy()
    x_train = c_train.drop([dataset.label_col], axis=1).to_numpy()
    y_test = c_test[dataset.label_col].to_numpy()
    x_test = c_test.drop([dataset.label_col], axis=1).to_numpy()
    
    client_data[i] = (x_train, y_train, x_test, y_test)

KeyError: "['src_multidigraph_degree', 'dst_multidigraph_degree', 'src_multidigraph_betweenness', 'dst_multidigraph_betweenness', 'src_multidigraph_pagerank', 'dst_multidigraph_pagerank'] not found in axis"

In [None]:
results = {}  # a dictionary that will contain all the options and results of models
# add all options to the results dictionary, to know what options selected for obtained results
results["configuration"] = "2dt - Centralities - DiGraph"
results["dtime"] = time.strftime("%Y%m%d-%H%M%S")
results["multi_class"] = cfg.multi_class
results["learning_rate"] = learning_rate
results["dataset_name"] = dataset.name
results["num_classes"] = num_classes
results["labels_names"] = labels_names
results["input_dim"] = input_dim

results["scores"] = {}
results["scores"]["server"] = {}
results["scores"]["clients"] = {}
results["scores"]["accuracy"] = {}
results["scores"]["f1s"] = {}

if not cfg.multi_class:
    results["scores"]["test_by_class"] = {}
    results["scores"]["test_by_class"]["accuracy"] = {}
    results["scores"]["test_by_class"]["f1s"] = {}
    for k in test_by_class.keys():
        results["scores"]["test_by_class"]["length"] = len(test_by_class[k][0])
        results["scores"]["test_by_class"]["accuracy"][k] = {}   
        results["scores"]["test_by_class"]["f1s"][k] = {}    
        
results

In [None]:
model = create_keras_model(input_dim)
model.summary()

In [None]:

class FLClient(fl.client.NumPyClient):
    def __init__(self, cid, x_train, y_train, x_test, y_test):
        self.cid = cid
        self.x_train, self.y_train = x_train, y_train
        self.x_test, self.y_test = x_test, y_test
        self.model = create_keras_model(input_shape=input_dim)

    def get_parameters(self, config):
        return self.model.get_weights()

    def set_parameters(self, parameters, config):
        self.model.set_weights(parameters)

    def fit(self, parameters, config):
        
        lr=float(config["lr"])
        # self.model = create_keras_model(input_shape= self.x_train.shape[1], alpha=lr)
        self.model = create_keras_model(input_shape=input_dim, alpha=lr)
        # log(INFO, f"==>> config: {config}")
        # log(INFO, f"==>> float(config[lr]): {lr}")
        self.set_parameters(parameters, config)

        
        logdir = "logs/scalars/{}/digraph/client_{}".format(dtime, self.cid)
        tensorboard_callback = callbacks.TensorBoard(log_dir=logdir)

        history = self.model.fit(self.x_train, self.y_train,
                                 epochs=config["local_epochs"],
                                 batch_size=config["batch_size"],
                                 validation_data=(self.x_test, self.y_test),
                                 verbose=0,
                                 callbacks=[tensorboard_callback])

        return self.get_parameters(config), len(self.x_train), {k: v[-1] for k, v in history.history.items()}


    def evaluate(self, parameters, config):
        self.set_parameters(parameters, config)
        loss, accuracy = self.model.evaluate(self.x_test, self.y_test, cfg.config_fit.batch_size, verbose=0)
        return loss, len(self.x_test), {"accuracy": accuracy}


In [None]:
def generate_client_fn():
    def client_fn(cid: str):
        i = int(cid)
        return FLClient(cid, client_data[i][0], client_data[i][1], client_data[i][2], client_data[i][3]).to_client()

    return client_fn

In [None]:
def get_on_fit_config(config: DictConfig):

    def fit_config_fn(server_round: int):
        alpha = learning_rate
        if server_round > 5:
            alpha = alpha / (1 + 0.5 * server_round)


        return {
            "lr": alpha,
            "local_epochs": config.local_epochs,
            "batch_size": config.batch_size,
        }

    return fit_config_fn


def get_evaluate_fn(x_test_sever, y_test_server):

    def evaluate_fn(server_round: int, parameters, config):
        # eval_model = model
        eval_model = create_keras_model(input_shape=input_dim)
        eval_model.set_weights(parameters)

        
        logdir = "logs/scalars/{}/digraph/server".format(dtime) 
        # logdir = "logs/scalars/client{}_".format(config["cid"]) + datetime.now().strftime("%Y%m%d-%H%M%S")
        tensorboard_callback = callbacks.TensorBoard(log_dir=logdir)

        test_loss, test_acc = eval_model.evaluate(x_test_sever, y_test_server,
                                                  batch_size = cfg.config_fit.batch_size,
                                                  callbacks=[tensorboard_callback])
        
        
        y_pred = eval_model.predict(x_test_sever, batch_size = cfg.config_fit.batch_size)
        
        if cfg.multi_class:
            y_pred = np.argmax(y_pred, axis=1)
            scores = custom_acc_mc(y_test_server, y_pred)
        else:
            y_pred = np.transpose(y_pred)[0]
            y_pred = list(
                map(lambda x: 0 if x < 0.5 else 1, y_pred))
            scores = custom_acc_binary(y_test_server, y_pred)
        
        
        results["scores"]["accuracy"][server_round] = test_acc
        results["scores"]["f1s"][server_round] = scores["f1s"]
        results["scores"]["server"][server_round] = scores
        
        
        results["scores"]["accuracy"][server_round] = test_acc
        results["scores"]["f1s"][server_round] = scores["f1s"]
        results["scores"]["server"][server_round] = scores
        
        results_final["centralities - DiGraph"]["accuracy"][server_round] = scores["accuracy"]
        results_final["centralities - DiGraph"]["f1s"][server_round] = scores["f1s"]
        
        if not cfg.multi_class:
            for k in test_by_class.keys():
                y_pred_class = eval_model.predict(test_by_class[k][0], batch_size = cfg.config_fit.batch_size, verbose = 0)
                y_pred_class = np.transpose(y_pred_class)[0]
                y_pred_class = list(map(lambda x: 0 if x < 0.5 else 1, y_pred_class))
                scores_class = custom_acc_binary(test_by_class[k][1], y_pred_class)
                results["scores"]["test_by_class"]["accuracy"][k][server_round] = scores_class["accuracy"]
                results["scores"]["test_by_class"]["f1s"][k][server_round] = scores_class["f1s"]
                
        log(INFO, f"==>> scores: {scores}")
        
        
        return test_loss, {"accuracy": test_acc, "f1s": scores["f1s"], "FPR": scores["FPR"], "FNR": scores["FNR"]}

    return evaluate_fn


In [None]:
def weighted_average(metrics):
    print(f"==>> weighted_average: {metrics}")

    total_examples = 0
    federated_metrics = {k: 0 for k in metrics[0][1].keys()}
    for num_examples, m in metrics:
        for k, v in m.items():
            federated_metrics[k] += num_examples * v
        total_examples += num_examples
    return {k: v / total_examples for k, v in federated_metrics.items()}

strategy = fl.server.strategy.FedAvg(
    fraction_fit=1.0,  # in simulation, since all clients are available at all times, we can just use `min_fit_clients` to control exactly how many clients we want to involve during fit
    min_fit_clients=len(client_data),  # number of clients to sample for fit()
    fraction_evaluate=0.0,  # similar to fraction_fit, we don't need to use this argument.
    min_evaluate_clients=0,  # number of clients to sample for evaluate()
    min_available_clients=len(client_data),  # total clients in the simulation
    fit_metrics_aggregation_fn = weighted_average,
    # evaluate_metrics_aggregation_fn = weighted_average,
    on_fit_config_fn=get_on_fit_config(
        cfg.config_fit
    ),  # a function to execute to obtain the configuration to send to the clients during fit()
    evaluate_fn=get_evaluate_fn(test, test_labels),
)  # a function to run on the server side to evaluate the global model.


In [None]:
import multiprocessing
from math import floor
history = fl.simulation.start_simulation(
    client_fn=generate_client_fn(),  # a function that spawns a particular client
    # num_clients=cfg.n_clients,  # total number of clients
    num_clients=len(client_data),  # total number of clients
    config=fl.server.ServerConfig(
        num_rounds=cfg.n_rounds
        # num_rounds=5
    ),  # minimal config for the server loop telling the number of rounds in FL
    strategy=strategy,  # our strategy of choice
    client_resources={
        "num_cpus": floor(multiprocessing.cpu_count() / len(client_data)),
        "num_gpus": 0.0,
    },
)

In [None]:
print(f"==>> history: {history}")
print(f"==>> end of history")

In [None]:
filename = ('./results/{}/digraph.json'.format(dtime))
outfile = open(filename, 'w')
outfile.writelines(json.dumps(results, cls=NumpyEncoder))
outfile.close()

# Centralities - MultiDiGraph

In [None]:
test = pd.read_parquet(folder_path + "test.parquet")

if cfg.multi_class:
    test[dataset.label_col] = test[dataset.class_num_col]
    
test.drop(["src_degree", "dst_degree", "src_betweenness", "dst_betweenness", "src_pagerank", "dst_pagerank"], axis=1, inplace=True)
# test.drop(["src_multidigraph_degree", "dst_multidigraph_degree", "src_multidigraph_betweenness", "dst_multidigraph_betweenness", "src_multidigraph_pagerank", "dst_multidigraph_pagerank"], axis=1, inplace=True)

if not cfg.multi_class:
    test_by_class = {}
    classes = test[dataset.class_col].unique()
    for class_value in classes:
        test_class = test[test[dataset.class_col] == class_value].copy()
        test_class.drop(dataset.drop_columns, axis=1, inplace=True)
        test_class.drop(dataset.weak_columns, axis=1, inplace=True)
        test_class.reset_index(drop=True, inplace=True)

        test_class_labels = test_class[dataset.label_col].to_numpy()
        test_class = test_class.drop([dataset.label_col], axis=1).to_numpy()

        test_by_class[class_value] = (test_class, test_class_labels)
    
    
test.drop(dataset.drop_columns, axis=1, inplace=True)
test.drop(dataset.weak_columns, axis=1, inplace=True)
test.reset_index(drop=True, inplace=True)
    
test_labels = test[dataset.label_col].to_numpy()
test = test.drop([dataset.label_col], axis=1).to_numpy()
input_dim = test.shape[1]

client_data = []
for client_path in clients_paths:
    client_data.append(pd.read_parquet(client_path))
    
for i in range(len(client_data)):
    
    cdata = client_data[i]
    
    if cfg.multi_class:
        cdata[dataset.label_col] = cdata[dataset.class_num_col]
        
    cdata.drop(["src_degree", "dst_degree", "src_betweenness", "dst_betweenness", "src_pagerank", "dst_pagerank"], axis=1, inplace=True)
    # cdata.drop(["src_multidigraph_degree", "dst_multidigraph_degree", "src_multidigraph_betweenness", "dst_multidigraph_betweenness", "src_multidigraph_pagerank", "dst_multidigraph_pagerank"], axis=1, inplace=True)

    cdata.drop(dataset.drop_columns, axis=1, inplace=True)
    cdata.drop(dataset.weak_columns, axis=1, inplace=True)
    cdata.reset_index(drop=True, inplace=True)
    
    c_train, c_test = train_test_split(cdata, test_size=0.1)

    y_train = c_train[dataset.label_col].to_numpy()
    x_train = c_train.drop([dataset.label_col], axis=1).to_numpy()
    y_test = c_test[dataset.label_col].to_numpy()
    x_test = c_test.drop([dataset.label_col], axis=1).to_numpy()
    
    client_data[i] = (x_train, y_train, x_test, y_test)

In [None]:
results = {}  # a dictionary that will contain all the options and results of models
# add all options to the results dictionary, to know what options selected for obtained results
results["configuration"] = "2dt - Centralities - MultiDiGraph"
results["dtime"] = time.strftime("%Y%m%d-%H%M%S")
results["multi_class"] = cfg.multi_class
results["learning_rate"] = learning_rate
results["dataset_name"] = dataset.name
results["num_classes"] = num_classes
results["labels_names"] = labels_names
results["input_dim"] = input_dim

results["scores"] = {}
results["scores"]["server"] = {}
results["scores"]["clients"] = {}
results["scores"]["accuracy"] = {}
results["scores"]["f1s"] = {}

if not cfg.multi_class:
    results["scores"]["test_by_class"] = {}
    results["scores"]["test_by_class"]["accuracy"] = {}
    results["scores"]["test_by_class"]["f1s"] = {}
    for k in test_by_class.keys():
        results["scores"]["test_by_class"]["length"] = len(test_by_class[k][0])
        results["scores"]["test_by_class"]["accuracy"][k] = {}   
        results["scores"]["test_by_class"]["f1s"][k] = {}    
        
results

In [None]:
model = create_keras_model(input_dim)
model.summary()

In [None]:

class FLClient(fl.client.NumPyClient):
    def __init__(self, cid, x_train, y_train, x_test, y_test):
        self.cid = cid
        self.x_train, self.y_train = x_train, y_train
        self.x_test, self.y_test = x_test, y_test
        self.model = create_keras_model(input_shape=input_dim)

    def get_parameters(self, config):
        return self.model.get_weights()

    def set_parameters(self, parameters, config):
        self.model.set_weights(parameters)

    def fit(self, parameters, config):
        
        lr=float(config["lr"])
        # self.model = create_keras_model(input_shape= self.x_train.shape[1], alpha=lr)
        self.model = create_keras_model(input_shape=input_dim, alpha=lr)
        # log(INFO, f"==>> config: {config}")
        # log(INFO, f"==>> float(config[lr]): {lr}")
        self.set_parameters(parameters, config)

        
        logdir = "logs/scalars/{}/multidigraph/client_{}".format(dtime, self.cid)
        tensorboard_callback = callbacks.TensorBoard(log_dir=logdir)

        history = self.model.fit(self.x_train, self.y_train,
                                 epochs=config["local_epochs"],
                                 batch_size=config["batch_size"],
                                 validation_data=(self.x_test, self.y_test),
                                 verbose=0,
                                 callbacks=[tensorboard_callback])

        return self.get_parameters(config), len(self.x_train), {k: v[-1] for k, v in history.history.items()}


    def evaluate(self, parameters, config):
        self.set_parameters(parameters, config)
        loss, accuracy = self.model.evaluate(self.x_test, self.y_test, cfg.config_fit.batch_size, verbose=0)
        return loss, len(self.x_test), {"accuracy": accuracy}


In [None]:
def generate_client_fn():
    def client_fn(cid: str):
        i = int(cid)
        return FLClient(cid, client_data[i][0], client_data[i][1], client_data[i][2], client_data[i][3]).to_client()

    return client_fn

In [None]:
def get_on_fit_config(config: DictConfig):

    def fit_config_fn(server_round: int):
        alpha = learning_rate
        if server_round > 5:
            alpha = alpha / (1 + 0.5 * server_round)


        return {
            "lr": alpha,
            "local_epochs": config.local_epochs,
            "batch_size": config.batch_size,
        }

    return fit_config_fn


def get_evaluate_fn(x_test_sever, y_test_server):

    def evaluate_fn(server_round: int, parameters, config):
        # eval_model = model
        eval_model = create_keras_model(input_shape=input_dim)
        eval_model.set_weights(parameters)

        
        logdir = "logs/scalars/{}/multidigraph/server".format(dtime) 
        # logdir = "logs/scalars/client{}_".format(config["cid"]) + datetime.now().strftime("%Y%m%d-%H%M%S")
        tensorboard_callback = callbacks.TensorBoard(log_dir=logdir)

        test_loss, test_acc = eval_model.evaluate(x_test_sever, y_test_server,
                                                  batch_size = cfg.config_fit.batch_size,
                                                  callbacks=[tensorboard_callback])
        
        
        y_pred = eval_model.predict(x_test_sever, batch_size = cfg.config_fit.batch_size)
        
        if cfg.multi_class:
            y_pred = np.argmax(y_pred, axis=1)
            scores = custom_acc_mc(y_test_server, y_pred)
        else:
            y_pred = np.transpose(y_pred)[0]
            y_pred = list(
                map(lambda x: 0 if x < 0.5 else 1, y_pred))
            scores = custom_acc_binary(y_test_server, y_pred)
        
        
        results["scores"]["accuracy"][server_round] = test_acc
        results["scores"]["f1s"][server_round] = scores["f1s"]
        results["scores"]["server"][server_round] = scores
        
        
        results["scores"]["accuracy"][server_round] = test_acc
        results["scores"]["f1s"][server_round] = scores["f1s"]
        results["scores"]["server"][server_round] = scores
        
        results_final["centralities - MultiDiGraph"]["accuracy"][server_round] = scores["accuracy"]
        results_final["centralities - MultiDiGraph"]["f1s"][server_round] = scores["f1s"]
        
        if not cfg.multi_class:
            for k in test_by_class.keys():
                y_pred_class = eval_model.predict(test_by_class[k][0], batch_size = cfg.config_fit.batch_size, verbose = 0)
                y_pred_class = np.transpose(y_pred_class)[0]
                y_pred_class = list(map(lambda x: 0 if x < 0.5 else 1, y_pred_class))
                scores_class = custom_acc_binary(test_by_class[k][1], y_pred_class)
                results["scores"]["test_by_class"]["accuracy"][k][server_round] = scores_class["accuracy"]
                results["scores"]["test_by_class"]["f1s"][k][server_round] = scores_class["f1s"]
                
        log(INFO, f"==>> scores: {scores}")
        
        
        return test_loss, {"accuracy": test_acc, "f1s": scores["f1s"], "FPR": scores["FPR"], "FNR": scores["FNR"]}

    return evaluate_fn


In [None]:
def weighted_average(metrics):
    print(f"==>> weighted_average: {metrics}")

    total_examples = 0
    federated_metrics = {k: 0 for k in metrics[0][1].keys()}
    for num_examples, m in metrics:
        for k, v in m.items():
            federated_metrics[k] += num_examples * v
        total_examples += num_examples
    return {k: v / total_examples for k, v in federated_metrics.items()}

strategy = fl.server.strategy.FedAvg(
    fraction_fit=1.0,  # in simulation, since all clients are available at all times, we can just use `min_fit_clients` to control exactly how many clients we want to involve during fit
    min_fit_clients=len(client_data),  # number of clients to sample for fit()
    fraction_evaluate=0.0,  # similar to fraction_fit, we don't need to use this argument.
    min_evaluate_clients=0,  # number of clients to sample for evaluate()
    min_available_clients=len(client_data),  # total clients in the simulation
    fit_metrics_aggregation_fn = weighted_average,
    # evaluate_metrics_aggregation_fn = weighted_average,
    on_fit_config_fn=get_on_fit_config(
        cfg.config_fit
    ),  # a function to execute to obtain the configuration to send to the clients during fit()
    evaluate_fn=get_evaluate_fn(test, test_labels),
)  # a function to run on the server side to evaluate the global model.


In [None]:
import multiprocessing
from math import floor
history = fl.simulation.start_simulation(
    client_fn=generate_client_fn(),  # a function that spawns a particular client
    # num_clients=cfg.n_clients,  # total number of clients
    num_clients=len(client_data),  # total number of clients
    config=fl.server.ServerConfig(
        num_rounds=cfg.n_rounds
        # num_rounds=5
    ),  # minimal config for the server loop telling the number of rounds in FL
    strategy=strategy,  # our strategy of choice
    client_resources={
        "num_cpus": floor(multiprocessing.cpu_count() / len(client_data)),
        "num_gpus": 0.0,
    },
)

In [None]:
print(f"==>> history: {history}")
print(f"==>> end of history")

In [None]:
filename = ('./results/{}/multidigraph.json'.format(dtime))
outfile = open(filename, 'w')
outfile.writelines(json.dumps(results, cls=NumpyEncoder))
outfile.close()

In [None]:
filename = ('./results/{}/results_final.json'.format(dtime))
outfile = open(filename, 'w')
outfile.writelines(json.dumps(results_final, cls=NumpyEncoder))
outfile.close()