In [64]:
import warnings
import flwr as fl
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.linear_model import SGDOneClassSVM
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
from typing import Tuple, Union, List, Dict
import numpy as np
import pandas as pd
from random import randint
from sklearn import metrics
import ast
from sklearn.model_selection import train_test_split

In [65]:
XY = Tuple[np.ndarray, np.ndarray]
Dataset = Tuple[XY, XY, XY]
ModelParams = Union[XY, Tuple[np.ndarray]]
XYList = List[XY]
Model = SGDOneClassSVM

In [66]:
def set_model_params(
    model: Model, params: ModelParams
) -> Model:
    """Sets the parameters of a sklean LogisticRegression model."""
    model.coef_ = params[0]
    # if model.fit_intercept:
    #     model.intercept_ = params[1]
    return model

In [67]:
def load_dataset():
    """Loads the MNIST dataset from local csv.
    """
    tw = 60
    feature = 'system calls tfidf_5gram-pcas'
    dataPath = 'f:/temp/'
    devices =  ['pi3', 'pi4_2G', 'pi4_4G']
    x_t=[]
    y_t=[]
    x_v = []
    y_v = []
    for device in devices:
        tsv_name = dataPath + 'encoded_bow{}_{}_{}.csv'.format(device, tw, feature)
        encoded_trace_df = pd.read_csv(tsv_name, sep='\t')
        ft = [ast.literal_eval(i) for i in encoded_trace_df[feature]]
        encoded_trace_df[feature] = ft
        normal = encoded_trace_df[encoded_trace_df.maltype=='normal']
        abnoraml = encoded_trace_df[encoded_trace_df.maltype!='normal']
        X = normal[feature].tolist()
        y = [1 for i in range(0,len(X))]
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=.3, random_state=42)
        x_v += X_val
        y_v += y_val
        X_test = abnoraml[feature].tolist()
        x_t += X_test
        y_test = [-1 for i in range(0,len(X_test))]
        y_t += y_test

    return (x_v, y_v), (x_t, y_t)

In [68]:
def set_initial_params(model: Model, n_features=20):
    """Sets initial parameters as zeros Required since model params are
    uninitialized until model.fit is called.
    But server asks for initial parameters from clients at launch. Refer
    to sklearn.linear_model.LogisticRegression documentation for more
    information.
    """
    n_classes = 1  # MNIST has 10 classes
    model.classes_ = np.array([i for i in range(10)])

    model.coef_ = np.zeros((n_classes, n_features))
    model.offset_ = np.zeros(1)
    # if model.fit_intercept:
    #     model.intercept_ = np.zeros((n_classes,))

In [69]:
def fit_round(rnd: int) -> Dict:
    """Send round number to client."""
    return {"rnd": rnd}


def get_eval_fn(model: SGDOneClassSVM):
    """Return an evaluation function for server-side evaluation."""
    
    # Load test data here to avoid the overhead of doing it in `evaluate` itself
    (X_val, y_val), (X_test, y_test) = load_dataset()

    # The `evaluate` function will be called after every round
    def evaluate(parameters: fl.common.Weights):

        set_model_params(model, parameters)

        y_pred_val = model.predict(X_val)
        sco_val = model.score_samples(X_val)
        loss_val = metrics.accuracy_score(y_val, y_pred_val)
        accuracy_val = metrics.accuracy_score(y_val, y_pred_val)

        y_pred_test = model.predict(X_test)
        sco_test = model.score_samples(X_test)
        loss_test = metrics.accuracy_score(y_test, y_pred_test)
        accuracy_test = metrics.accuracy_score(y_test, y_pred_test)
        # accuracy = model.score(X_test, y_test)
        print(f"validation accuracy is {accuracy_val}")
        return loss_test,  {"accuracy_validation": accuracy_val, "test_validation": accuracy_test}
    return evaluate

In [70]:
# Start Flower server for five rounds of federated learning
if __name__ == "__main__":
    model = SGDOneClassSVM()
    set_initial_params(model)
    strategy = fl.server.strategy.FedAvg(
        min_available_clients=3,
        eval_fn=get_eval_fn(model),
        on_fit_config_fn=fit_round,
    )
    fl.server.start_server("192.168.1.105:8080", strategy=strategy, config={"num_rounds": 1})

INFO flower 2021-11-18 23:58:40,661 | app.py:77 | Flower server running (insecure, 1 rounds)
INFO flower 2021-11-18 23:58:40,661 | server.py:118 | Initializing global parameters
INFO flower 2021-11-18 23:58:40,662 | server.py:304 | Requesting initial parameters from one random client
INFO flower 2021-11-18 23:58:46,710 | server.py:307 | Received initial parameters from one random client
INFO flower 2021-11-18 23:58:46,710 | server.py:120 | Evaluating initial parameters
INFO flower 2021-11-18 23:58:46,822 | server.py:123 | initial parameters (loss, other metrics): 0.0, {'accuracy_validation': 1.0, 'test_validation': 0.0}
INFO flower 2021-11-18 23:58:46,823 | server.py:133 | FL starting


validation accuracy is 1.0


DEBUG flower 2021-11-18 23:58:56,598 | server.py:251 | fit_round: strategy sampled 2 clients (out of 3)
DEBUG flower 2021-11-18 23:58:56,649 | server.py:260 | fit_round received 2 results and 0 failures
INFO flower 2021-11-18 23:58:56,766 | server.py:148 | fit progress: (1, 0.16537997587454764, {'accuracy_validation': 0.9964028776978417, 'test_validation': 0.16537997587454764}, 9.942148899999893)
INFO flower 2021-11-18 23:58:56,767 | server.py:199 | evaluate_round: no clients selected, cancel
INFO flower 2021-11-18 23:58:56,767 | server.py:172 | FL finished in 9.943065700000034
INFO flower 2021-11-18 23:58:56,767 | app.py:119 | app_fit: losses_distributed []
INFO flower 2021-11-18 23:58:56,768 | app.py:120 | app_fit: metrics_distributed {}
INFO flower 2021-11-18 23:58:56,768 | app.py:121 | app_fit: losses_centralized [(0, 0.0), (1, 0.16537997587454764)]
INFO flower 2021-11-18 23:58:56,768 | app.py:122 | app_fit: metrics_centralized {'accuracy_validation': [(0, 1.0), (1, 0.9964028776978

validation accuracy is 0.9964028776978417
