In [1]:
import flwr as fl
import LR_Utils as utils
from flwr.common import NDArrays, Scalar
from sklearn.metrics import log_loss
from sklearn import metrics 
import pandas as pd
from sklearn.linear_model import LogisticRegression
from typing import Tuple, Dict, Optional

In [2]:
def fit_round(server_round: int) -> Dict:
    """Send round number to client."""
    return {"server_round": server_round}

In [3]:
def get_evaluate_fn(model: LogisticRegression):
    """Return an evaluation function for server-side evaluation."""

    _, (X_test, y_test) = utils.load_mnist()

    def evaluate(
        server_round: int, parameters: NDArrays, config: Dict[str, Scalar]
    ) -> Optional[Tuple[float, Dict[str, Scalar]]]:
        utils.set_model_params(model, parameters)
        loss = log_loss(y_test, model.predict_proba(X_test))
        accuracy = model.score(X_test, y_test)
        return ({"Sever Loss":loss}, {"Server Accuracy": accuracy})
        
    return evaluate


In [4]:
if __name__ == "__main__":
    model = LogisticRegression()
    utils.set_initial_params(model)
    strategy = fl.server.strategy.FedAvg(
        min_available_clients=10,
        evaluate_fn=get_evaluate_fn(model),
        on_fit_config_fn=fit_round,

    )

  mnist_openml = openml.datasets.get_dataset(554)
  Xy, _, _, _ = mnist_openml.get_data(dataset_format="array")


In [5]:
fl.server.start_server(
    server_address="127.0.0.1:8080",
    strategy=strategy,
    config=fl.server.ServerConfig(num_rounds=250)
)

INFO flwr 2024-03-03 14:29:01,235 | app.py:163 | Starting Flower server, config: ServerConfig(num_rounds=5, round_timeout=None)
INFO flwr 2024-03-03 14:29:01,288 | app.py:176 | Flower ECE: gRPC server running (5 rounds), SSL is disabled
INFO flwr 2024-03-03 14:29:01,288 | server.py:89 | Initializing global parameters
INFO flwr 2024-03-03 14:29:01,289 | server.py:276 | Requesting initial parameters from one random client
INFO flwr 2024-03-03 14:29:12,898 | server.py:280 | Received initial parameters from one random client
INFO flwr 2024-03-03 14:29:12,900 | server.py:91 | Evaluating initial parameters
INFO flwr 2024-03-03 14:29:13,063 | server.py:94 | initial parameters (loss, other metrics): {'Sever Loss': 2.3025850929940455}, {'Server Accuracy': 0.098}
INFO flwr 2024-03-03 14:29:13,065 | server.py:104 | FL starting
DEBUG flwr 2024-03-03 14:29:13,858 | server.py:222 | fit_round 1: strategy sampled 5 clients (out of 10)
DEBUG flwr 2024-03-03 14:29:14,238 | server.py:236 | fit_round 1 re

History (loss, distributed):
	round 1: 1.3357131481170654
	round 2: 0.6836938858032227
	round 3: 0.5311362147331238
	round 4: 0.5115243196487427
	round 5: 0.4945966899394989
History (loss, centralized):
	round 0: {'Sever Loss': 2.3025850929940455}
	round 1: {'Sever Loss': 1.3357131838579812}
	round 2: {'Sever Loss': 0.683693900926164}
	round 3: {'Sever Loss': 0.5311362355868994}
	round 4: {'Sever Loss': 0.5115243217837753}
	round 5: {'Sever Loss': 0.4945966798826883}
History (metrics, centralized):
{'Server Accuracy': [(0, 0.098), (1, 0.6701), (2, 0.7842), (3, 0.8382), (4, 0.8424), (5, 0.8531)]}

In [2]:
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
trainDF = pd.read_csv(r"C:\HomeLab\ML Dissertation\Datasets\UNSW-NB15\UNSW_NB15_training-set.csv")
testDF = pd.read_csv(r"C:\HomeLab\ML Dissertation\Datasets\UNSW-NB15\UNSW_NB15_testing-set.csv")

In [4]:
trainDF.shape

testDF.shape

(82332, 45)

In [5]:
trainDF = trainDF.dropna()
trainDF = trainDF.drop_duplicates()

testDF = testDF.dropna()
testDF = testDF.drop_duplicates()

In [14]:
#Removing Redundant Columns
trainDF.drop(labels="id", axis=1, inplace=True)
trainDF.drop(labels="label", axis=1, inplace=True)

testDF.drop(labels="id", axis=1, inplace=True)
testDF.drop(labels="label", axis=1, inplace=True)


In [15]:
trainDF

Unnamed: 0,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,sttl,...,ct_src_dport_ltm,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack_cat
0,0.121478,tcp,-,FIN,6,4,258,172,74.087490,252,...,1,1,1,0,0,0,1,1,0,Normal
1,0.649902,tcp,-,FIN,14,38,734,42014,78.473372,62,...,1,1,2,0,0,0,1,6,0,Normal
2,1.623129,tcp,-,FIN,8,16,364,13186,14.170161,62,...,1,1,3,0,0,0,2,6,0,Normal
3,1.681642,tcp,ftp,FIN,12,12,628,770,13.677108,62,...,1,1,3,1,1,0,2,1,0,Normal
4,0.449454,tcp,-,FIN,10,6,534,268,33.373826,254,...,2,1,40,0,0,0,2,39,0,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175336,0.000009,udp,dns,INT,2,0,114,0,111111.107200,254,...,24,13,24,0,0,0,24,24,0,Generic
175337,0.505762,tcp,-,FIN,10,8,620,354,33.612649,254,...,1,1,2,0,0,0,1,1,0,Shellcode
175338,0.000009,udp,dns,INT,2,0,114,0,111111.107200,254,...,3,3,13,0,0,0,3,12,0,Generic
175339,0.000009,udp,dns,INT,2,0,114,0,111111.107200,254,...,30,14,30,0,0,0,30,30,0,Generic


In [13]:
#Import one hot encoder from sklearn

from sklearn.preprocessing import OneHotEncoder

#Create one hot encoder

ohe = OneHotEncoder(sparse_output=False, handle_unknown='ignore')

#Apply one hot encoder to train and test data

trn_Encoder = ohe.fit_transform(trainDF[['proto','service','state','attack_cat']])

tst_Encoder = ohe.fit_transform(testDF[['proto','service','state','attack_cat']])

#Assigning new encoded data to 'features' variables
trn_Features = pd.DataFrame(trn_Encoder, columns= ohe.get_feature_names_out())

tst_Features = pd.DataFrame(tst_Encoder, columns= ohe.get_feature_names_out())

#Drop old column names
trn_Features.drop(labels="proto", axis=1, inplace=True)
trn_Features.drop(labels="service", axis=1, inplace=True)
trn_Features.drop(labels="state", axis=1, inplace=True)
trn_Features.drop(labels="attack_cat", axis=1, inplace=True)

tst_Features.drop(labels="proto", axis=1, inplace=True)
tst_Features.drop(labels="service", axis=1, inplace=True)
tst_Features.drop(labels="state", axis=1, inplace=True)
tst_Features.drop(labels="attack_cat", axis=1, inplace=True)

#Joining new features to dataset

trainDF = pd.concat([trainDF,trn_Features], axis=1)

testDF = pd.concat([testDF,tst_Features], axis=1)

trainDF




ValueError: Shape of passed values is (175341, 165), indices imply (175341, 161)

In [8]:
#Imports minmaxscaler to normalise (Scale) data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

scaler.fit(trainDF)

trainDF = scaler.transform(trainDF)
testDF = scaler.transform(testDF)

ValueError: could not convert string to float: 'tcp'