In [None]:
import glob
import os
import sys
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from multiprocessing import Process
import gc

import flwr as fl
from flwr.server.strategy import FedAvg
import tensorflow as tf
import sklearn
import time

import numpy as np
import pandas as pd
from pandas import DataFrame

#!export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/
# demonstration of calculating metrics for a neural network model using sklearn
from sklearn.datasets import make_circles
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix

from flwr.server.client_manager import SimpleClientManager
from flwr.server.criterion import Criterion
from flwr.common.logger import log
from flwr.server.client_manager import ClientManager
from flwr.server.client_proxy import ClientProxy
from flwr.common import (
    EvaluateIns,
    EvaluateRes,
    FitIns,
    FitRes,
    MetricsAggregationFn,
    NDArrays,
    Parameters,
    Scalar,
    ndarrays_to_parameters,
    parameters_to_ndarrays,
)

# Make TensorFlow log less verbose
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

In [None]:
# argumentos
n = len(sys.argv)
print("Total arguments passed:", n)
iteracoes = 0
finalIterations = 0
if(n > 0):
    for value in sys.argv:
        print("arg:", value)
        try:
            iteracoes = int(value)
            break
        except:
            print("no")
print("iteracoes:",0)        

In [None]:

# General configuration
NUM_EPOCHS =80
NUMBER_OF_ITERATIONS_FINAL = 1
execution=12
    

BATCH_SIZE = 16
VERBOSE = 0


clientId_TO_be_removed = -1



outputFolder = "FL_"+str(execution)+"_ep_"+str(NUM_EPOCHS)+"_rd_"+str(NUMBER_OF_ITERATIONS_FINAL)


# train file name modifier
fileSufixTrain = "" # _smote for smote

fl.common.logger.configure(identifier="myFlowerExperiment", filename="log_"+outputFolder+".txt")

# usado para checkpoints
if(iteracoes > 0):
    NUMBER_OF_ITERATIONS_FINAL = iteracoes
    
NUMBER_OF_ITERATIONS = NUMBER_OF_ITERATIONS_FINAL

In [None]:
print("Checking whether the folder exists or not")
isExist = os.path.exists(outputFolder)
if not isExist:
    # Create a new directory because it does not exist
    os.makedirs(outputFolder)
    print("The new directory is created!")
else:
    print("The directory exists!")

In [None]:
# selected features
inputFeatures = ['battery_potential', "consumption", "ignition", "motor_water_temp", "oil_pressure"]
outputClasses = ["normal_label"]
#outputClasses = ["class"]

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

df_complete=pd.read_csv('Complete.csv', index_col=0)
X_test=pd.read_csv('Test.csv', index_col=0)
X_val=pd.read_csv('Validation.csv', index_col=0)
X_train=pd.read_csv('Train.csv', index_col=0)




X_merge=pd.concat([X_train, X_val], axis=0)
print(len(X_merge))

index_to_train = X_merge.index

x_complete_filtered=df_complete.loc[index_to_train]

for column in inputFeatures:
    scaler.fit(x_complete_filtered[column].values.reshape(-1, 1))
    x_complete_filtered[column]=scaler.fit_transform(x_complete_filtered[column].values.reshape(-1, 1))
    X_test[column]=scaler.fit_transform(X_test[column].values.reshape(-1, 1))

clientList=[]
clientList = [group for _, group in x_complete_filtered.groupby('device_id')]

for idx, df in enumerate(clientList, start=1):
    print(f"DataFrame {idx} (device_id={df['device_id'].iloc[0]}):\n{df}\n")




In [None]:
from collections import Counter

for df in clientList:
    print(Counter(df['device_id']))
    next

In [None]:
X_test[inputFeatures]

In [None]:
len(clientList)

In [None]:
def generateMetrics(y_test,yhat_probs):
    # predict crisp classes for test set deprecated
    #yhat_classes = model.predict_classes(X_test, verbose=0)
    #yhat_classes = np.argmax(yhat_probs,axis=1)
    yhat_classes = yhat_probs.round()
    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(y_test, yhat_classes)
    # precision tp / (tp + fp)
    precision = precision_score(y_test, yhat_classes)
    # recall: tp / (tp + fn)
    recall = recall_score(y_test, yhat_classes)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_test, yhat_classes)
    # kappa
    kappa = cohen_kappa_score(y_test, yhat_classes)
    # ROC AUC
    auc = roc_auc_score(y_test, yhat_probs)
    # confusion matrix
    matrix = confusion_matrix(y_test, yhat_classes)
    #print(matrix)
    
    array = []
    results = dict()
    results['accuracy'] = accuracy
    results['precision'] = precision
    results['recall'] = recall
    results['f1_score'] = f1
    results['cohen_kappa_score'] = kappa
    results['roc_auc_score'] = auc
    results['matrix'] = ("[[ " +str(matrix[0][0]) + " " +str(matrix[0][1]) +"][ " +str(matrix[1][0]) + " " + str(matrix[1][1]) +"]]") # array.append(np.array(matrix,dtype=object))
    results['TP'] = matrix[0][0]
    results['FP'] = matrix[0][1]
    results['FN'] = matrix[1][0]
    results['TN'] = matrix[1][1]
    
    array.append(accuracy)
    array.append(precision)
    array.append(recall)
    array.append(f1)
    array.append(kappa)
    array.append(auc)
    array.append("[[ " +str(matrix[0][0]) + " " +str(matrix[0][1]) +"][ " +str(matrix[1][0]) + " " + str(matrix[1][1]) +"]]") # array.append(np.array(matrix,dtype=object))
    array.append(matrix[0][0]) # TP
    array.append(matrix[0][1]) # FP
    array.append(matrix[1][0]) # FN
    array.append(matrix[1][1]) # TN
    
    return results, array

# y_test     = Array with real values
# yhat_probs = Array with predicted values
def printMetrics(y_test,yhat_probs):
    # generate metrics
    results, array= generateMetrics(y_test,yhat_probs)

    # accuracy: (tp + tn) / (p + n)
    accuracy = results['accuracy']
    print('Accuracy: %f' % accuracy)
    # precision tp / (tp + fp)
    precision = results['precision']
    print('Precision: %f' % precision)
    # recall: tp / (tp + fn)
    recall = results['recall'] 
    print('Recall: %f' % recall)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = results['f1_score']
    print('F1 score: %f' % f1)
    # kappa
    kappa = results['cohen_kappa_score']
    print('Cohens kappa: %f' % kappa)
    # ROC AUC
    auc = results['roc_auc_score']
    print('ROC AUC: %f' % auc)
    # confusion matrix
    print("Confusion Matrix")
    matrix = results['matrix']
    print(matrix)
    
    return results, array

def generateGlobalMetrics(metrics):
    accuracy,precision,recall,f1_score,cohen_kappa_score,roc_auc_score = 0,0,0,0,0,0
    for metric in metrics:
        accuracy = accuracy + metric['accuracy']
        precision = precision + metric['precision']
        recall = recall + metric['recall']
        f1_score = f1_score + metric['f1_score']
        cohen_kappa_score = cohen_kappa_score + metric['cohen_kappa_score']
        roc_auc_score = roc_auc_score + metric['roc_auc_score']
        
    # mean
    size = len(metrics)
    print(size)
    accuracy = accuracy / size
    precision = precision / size
    recall = recall / size
    f1_score = f1_score / size
    cohen_kappa_score = cohen_kappa_score / size
    roc_auc_score = roc_auc_score / size
    
    return [accuracy,precision,recall,f1_score,cohen_kappa_score,roc_auc_score]

def showGlobalMetrics(metrics):
    res = generateGlobalMetrics(metrics)
    
    accuracy = res[0]
    precision = res[1]
    recall = res[2]
    f1_score = res[3]
    cohen_kappa_score = res[4]
    roc_auc_score = res[5]
    
    #show:\
    print("accuracy: ",accuracy)
    print("precision: ",precision)
    print("recall: ",recall)
    print("f1_score: ",f1_score)
    print("cohen_kappa_score: ",cohen_kappa_score)
    print("roc_auc_score: ",roc_auc_score)
    
    return res

In [None]:
print("Preparing X_train data")
# load cliend data
#clientList = loadDataFromFoldersOnList(trainFolders,inputFolderPath,fileSufixTrain)
        
NUMBER_OF_CLIENTS = len(clientList)
print("Total",(len(clientList)))

In [None]:
print("creating model")

def create_keras_model():

    input_dim = len(inputFeatures)  # Número de características (columnas)
    encoding_dim = 128

    model=tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(input_dim,)),
        tf.keras.layers.Dense(encoding_dim, activation='tanh'),
        tf.keras.layers.Dense(2, activation='tanh'),
        tf.keras.layers.Dense(encoding_dim, activation='tanh'),
        tf.keras.layers.Dense(input_dim, activation='tanh')
        # layers.Dense(input_dim, activation='sigmoid')
    ])
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    return model
    
    

keras_model = create_keras_model()
keras_model.summary()

In [None]:
def transform_data_type(dataframe):
    
    # transform inputs
    for column in inputFeatures:
        dataframe[column] = dataframe[column].astype('float32')
    
    # transform outputs
    for column in outputClasses:
        dataframe[column] = dataframe[column].astype('float32')
    
    return dataframe

# transforms the data
X_test = transform_data_type(X_test)

X_test.info()

In [None]:
print("Prepering the test dataset")
# selects the data to train and test
X_test_data = X_test[inputFeatures]
y_test_label = X_test[outputClasses]




In [None]:
def evaluate_and_save_results(keras_model,X_test_data_, y_test_label_, current_round_index, 
                              clientId, prefix_string = "Results", lossValue = -1):
    # predict values
    

    yhat_probs = keras_model.predict(X_test_data_,verbose=VERBOSE)
    new_parameters = keras_model.get_weights()
    size_after_training = sum(np.prod(w.shape) for w in new_parameters)
    size_after_training_kb = size_after_training / 1024
    

    mse = np.mean(np.power(X_test_data_ - yhat_probs, 2), axis=1)
    best_f1 = 0
    best_threshold = 0
    #loss = log_loss(y_test, model.predict_proba(X_test))
    
    for threshold_value in range(50, 101):
        threshold = np.percentile(mse, threshold_value)
        anomaly_labels = (mse > threshold)
        y_pred_bool = ~anomaly_labels
        # y_test = X_test_data_["normal_label"]
    
        f1 = f1_score(y_test_label_, y_pred_bool)
        
        if f1 > best_f1:
            best_f1 = f1
            best_threshold = threshold
            best_threshold_value=threshold_value

    
    yhat_probs_=(mse > best_threshold)
    yhat_probs_rounded=~yhat_probs_

    y_predicted_df = pd.DataFrame(data=yhat_probs_rounded,columns=['normal_label']) 

    
    roundData = []

    columns = ['client','round','loss', 'size', 'class','accuracy','precision','recall', 
               'f1_score','cohen_kappa_score','roc_auc_score','confusion_matrix',
               'TP','FP','FN','TN', ]
    
    # Instantiate the list that will contain the results
    listOfMetrics = list()
    
    # print('awake')    
    # res,resA = printMetrics(y_test_label['awake'],y_predicted_df['awake'])
    res,resA = generateMetrics(y_test_label_['normal_label'],y_predicted_df['normal_label'])
    listOfMetrics.append(res)
    
    classData = np.concatenate(([clientId,current_round_index,lossValue,size_after_training_kb,'normal_label'], resA))
    roundData.append(classData)

    dataMetrics = pd.DataFrame(data=roundData,columns=columns) 
    # write file
    if(clientId >= 0):
        outputMetricFile = outputFolder+"/"+prefix_string+"_MLP_client_" + str(clientId) + "_round_" + str(current_round_index) + ".csv"
    else:
        #outputMetricFile = outputFolder+"/global_model_MLP_metrics.csv"
        outputMetricFile = outputFolder+"/"+ prefix_string+ ".csv"
        # print global model results
        if(os.path.isfile(outputMetricFile)):
            dataset = pd.read_csv(outputMetricFile)
            dataMetrics = pd.concat([dataset, dataMetrics], axis=0)
        # Perform garbage collection
        gc.collect()
        
    dataMetrics.to_csv(outputMetricFile, sep=',', encoding='utf-8', index=False)
    

In [None]:
class SaveModelStrategy(fl.server.strategy.FedAvg):
    def aggregate_fit(
        self,
        server_round: int,
        results: List[Tuple[fl.server.client_proxy.ClientProxy, fl.common.FitRes]],
        failures: List[Union[Tuple[ClientProxy, FitRes], BaseException]],
    ) -> Tuple[Optional[Parameters], Dict[str, Scalar]]:

        # Call aggregate_fit from base class (FedAvg) to aggregate parameters and metrics
        aggregated_parameters, aggregated_metrics = super().aggregate_fit(server_round, results, failures)

        return aggregated_parameters, aggregated_metrics

In [None]:
print("Declarating client function")

# Define a Flower client
class FlowerISABELASleepClient(fl.client.NumPyClient):

    def __init__(self, clientId, model, X_train_data, y_train_label,round_index=0):
        self.round_index = round_index
        self.clientId = clientId
        self.model = model
        self.X_train_data = X_train_data
        self.y_train_label = y_train_label
        


    def get_parameters(self, config):
        """Return current weights."""
        return self.model.get_weights()

    def fit(self, parameters, config):
        """Fit model and return new weights as well as number of training examples."""

        self.model.set_weights(parameters)
        size_before_training = sum(np.prod(w.shape) for w in parameters)
        
        y_pred=self.model.predict(self.X_train_data, verbose=VERBOSE)
        loss=np.mean(tf.keras.losses.mean_squared_error(self.X_train_data, y_pred))
        # # print model results
        # evaluate_and_save_results(self.model,self.X_train_data, self.y_train_label, self.round_index, self.clientId,"Local_train_beforeFit_trainDataset",loss)
        evaluate_and_save_results(self.model,self.X_train_data, self.y_train_label, self.round_index, self.clientId,"Local_before",loss)
        
        # 
        #if self.clientId != "id0004":
        self.model.fit(self.X_train_data, self.X_train_data, epochs=NUM_EPOCHS, batch_size=BATCH_SIZE,verbose=VERBOSE)

        y_pred=self.model.predict(self.X_train_data, verbose=VERBOSE)
        loss=np.mean(tf.keras.losses.mean_squared_error(self.X_train_data, y_pred))
        # # print model results
        evaluate_and_save_results(self.model,self.X_train_data, self.y_train_label, self.round_index, self.clientId,"Local_after",loss)

        y_pred=self.model.predict(X_test_data, verbose=VERBOSE)

        loss=np.mean(tf.keras.losses.mean_squared_error(X_test_data, y_pred))
        
        new_parameters = self.model.get_weights()
        size_after_training = sum(np.prod(w.shape) for w in new_parameters)

        evaluate_and_save_results(self.model,X_test_data, y_test_label, self.round_index, self.clientId,"Test_after",loss)
        return self.model.get_weights(), len(self.X_train_data), {}
    

In [None]:
def get_evaluate_fn( model):
    
    def evaluate(
        server_round: int, parameters: NDArrays, config: Dict[str, Scalar]
    ) -> Optional[Tuple[float, Dict[str, Scalar]]]:
        
        current_round = server_round
            
        print("Evaluating global model round",current_round)
        
        model.set_weights(parameters)
        
        # Evaluate local model parameters on the local test data
        print(X_test_data)
        print(y_test_label)
        y_pred=model.predict(X_test_data,verbose=VERBOSE)

        loss=np.mean(tf.keras.losses.mean_squared_error(X_test_data, y_pred))
        #loss=-1
        accuracy=np.mean(np.equal(X_test_data, y_pred))
        print(loss)
        print(accuracy)

        evaluate_and_save_results(model,X_test_data, y_test_label, current_round, -1,"global_model_MLP_metrics",loss)
        
        # AVALIAR A PORRA DO DATASET QUE FOI REMOVIDO da agregação
        if(clientId_TO_be_removed >= 0):
            data   = clientList[clientId_TO_be_removed][inputFeatures]
            labels = clientList[clientId_TO_be_removed][outputClasses]

            y_pred_New = model.predict(data, verbose=VERBOSE)
            lossNew=np.mean(tf.keras.losses.mean_squared_error(data, y_pred_New))
            # # print model results
            file_name = "excluded_model_MLP_metrics_from_client_"+str(clientId_TO_be_removed)
            evaluate_and_save_results(model,data, labels, current_round, -1,file_name,lossNew)
        
        return loss, { 'accuracy': accuracy }
    return evaluate

In [None]:
import ray
import math
# Create an instance of the model and get the parameters

# Specify client resources if you need GPU (defaults to 1 CPU and 0 GPU)
client_resources = None
#if DEVICE.type == "cuda":

client_resources = {"num_cpus": 1}

#keras_model = create_keras_model()
keras_model.compile(optimizer='adam', loss='mean_squared_error')


def client_fn(cid) -> FlowerISABELASleepClient:
    print("starting client: "+str(cid),type(cid))
    #convert client ID to int
    clientId = int(cid)
    print("starting client: ", type(clientId))

    data   = clientList[clientId][inputFeatures]
    labels = clientList[clientId][outputClasses]
    
    print("Creating client model to client UID: "+str(cid))
    print(Counter(clientList[clientId]['device_id']))
    print("Data X: "+str(len(data)))
    print("Data Y: "+str(len(labels)))
    print("Infor X"+str(data.info()))
    print("Infor X"+str(labels.info()))
    
    file_global_model = outputFolder+"/global_model_MLP_metrics.csv"
    index_round = 0 
    
    # get last
    if(os.path.isfile(file_global_model)):
        dataset = pd.read_csv(file_global_model)
        index_round = dataset["round"].max() + 1
        del dataset
    
    print("Creating client model to client: "+str(cid),"round",index_round)
    # Load and compile a Keras model for CIFAR-10
    model = create_keras_model()
    #modelcompile"adam", "categorical_crossentropy", metrics=["accuracy"])
    
    return FlowerISABELASleepClient(clientId,model,data,labels,index_round)


In [None]:
# https://github.com/adap/flower/blob/main/src/py/flwr/server/criterion_test.py
print("Configuring criterion selection")
class TestCriterion(Criterion):
        """Criterion to select only test clients."""

        def select(self, client: ClientProxy) -> bool:
            
            #print("Print type: ",type(client))
            #print("Print dir: ",type(dir(client)))
            #print("Print type client: ",type(client.client_fn))
            
            #print("Print type client: ",(client.client_fn.clientId))
            print("Testing Criterion to cid: ",client.cid," ",(type(client.cid))) # ok, acessa



            if(client.cid == str(clientId_TO_be_removed)):
                print("Return false")
                return False

            print("Return true")
            return True

customCriterion = TestCriterion()

min_num_clients_meus = NUMBER_OF_CLIENTS - 1
print("Configuring strategy")
strategy = SaveModelStrategy(
    min_available_clients=min_num_clients_meus, # menos o client ignorado na seleção
    evaluate_fn=get_evaluate_fn(keras_model)
) # (same arguments as FedAvg here)

print("Min num of clients: ",min_num_clients_meus)

import random

# https://flower.dev/docs/framework/_modules/flwr/server/client_manager.html#ClientManager.sample 
print("Configuring client manager selection")
class CustomSimpleClientManager(SimpleClientManager):

    # configurate the client selection
    def sample(self,num_clients: int, min_num_clients: Optional[int] = None, criterion: Optional[Criterion] = None) -> List[ClientProxy]:
        print("Instantiate custom criterion selection")
        print("Params: ",num_clients," ", min_num_clients)
        
        customCriterion = TestCriterion()

        """Sample a number of Flower ClientProxy instances."""
        # Block until at least num_clients are connected.
        if min_num_clients is None:
            min_num_clients = num_clients
            
        print("Params: ",num_clients," ", min_num_clients)
            
        selectedCriterion = customCriterion
        
        print("criterion parameter: ",(criterion is not None))
        
        # if there are no criteria, uses my custom one
        if criterion is not None:
            selectedCriterion = criterion
            
            
        self.wait_for(min_num_clients)
        # Sample clients which meet the criterion
        available_cids = list(self.clients)
        if selectedCriterion is not None:
            available_cids = [
                cid for cid in available_cids if selectedCriterion.select(self.clients[cid])
            ]
            
        print("Number of selected clients to aggregation: ", len(available_cids))


        if min_num_clients > len(available_cids):
            print(
                "Sampling failed: number of available clients",len(available_cids),
                " is less than number of requested clients.", num_clients,
            )
            return []
        
        sampled_cids = random.sample(available_cids, min_num_clients)
        return [self.clients[cid] for cid in sampled_cids]

In [None]:

# Start simulation
fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=NUMBER_OF_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=NUMBER_OF_ITERATIONS),  # Just three rounds
    client_resources=client_resources,
    strategy = strategy,
    client_manager = CustomSimpleClientManager()
)