In [1]:
import flwr as fl
import LR_Utils as utils
from flwr.common import NDArrays, Scalar
from sklearn.metrics import log_loss
from sklearn import metrics 
import pandas as pd
from sklearn.linear_model import LogisticRegression
from typing import Tuple, Dict, Optional

In [2]:
def fit_round(server_round: int) -> Dict:
    """Send round number to client."""
    return {"server_round": server_round}

In [3]:
def get_evaluate_fn(model: LogisticRegression):
    """Return an evaluation function for server-side evaluation."""

    _, (X_test, y_test) = utils.load_Data()

    def evaluate(
        server_round: int, parameters: NDArrays, config: Dict[str, Scalar]
    ) -> Optional[Tuple[float, Dict[str, Scalar]]]:
        utils.set_model_params(model, parameters)
        loss = log_loss(y_test, model.predict_proba(X_test))
        accuracy = model.score(X_test, y_test)
        return ({"Sever Loss":loss}, {"Server Accuracy": accuracy})
        
    return evaluate


In [4]:
if __name__ == "__main__":
    model = LogisticRegression()
    utils.set_initial_params(model)
    strategy = fl.server.strategy.FedAvg(
        min_available_clients=2,
        evaluate_fn=get_evaluate_fn(model),
        on_fit_config_fn=fit_round,

    )

In [5]:
fl.server.start_server(
    server_address="127.0.0.1:8080",
    strategy=strategy,
    config=fl.server.ServerConfig(num_rounds=300)
)

INFO flwr 2024-03-09 16:58:56,336 | app.py:163 | Starting Flower server, config: ServerConfig(num_rounds=300, round_timeout=None)
INFO flwr 2024-03-09 16:58:56,373 | app.py:176 | Flower ECE: gRPC server running (300 rounds), SSL is disabled
INFO flwr 2024-03-09 16:58:56,374 | server.py:89 | Initializing global parameters
INFO flwr 2024-03-09 16:58:56,374 | server.py:276 | Requesting initial parameters from one random client
INFO flwr 2024-03-09 16:59:04,310 | server.py:280 | Received initial parameters from one random client
INFO flwr 2024-03-09 16:59:04,311 | server.py:91 | Evaluating initial parameters
INFO flwr 2024-03-09 16:59:04,504 | server.py:94 | initial parameters (loss, other metrics): {'Sever Loss': 2.302585092994046}, {'Server Accuracy': 0.008222805227615022}
INFO flwr 2024-03-09 16:59:04,505 | server.py:104 | FL starting
DEBUG flwr 2024-03-09 16:59:07,094 | server.py:222 | fit_round 1: strategy sampled 2 clients (out of 2)
DEBUG flwr 2024-03-09 16:59:07,143 | server.py:236

In [26]:
import matplotlib.pyplot as plt
import pandas as pd

In [27]:
trainDF = pd.read_csv(r"C:\Users\adamc\Work\HomeLab\ML Dissertation\Datasets\UNSW-NB15\UNSW_NB15_training-set.csv")
testDF = pd.read_csv(r"C:\Users\adamc\Work\HomeLab\ML Dissertation\Datasets\UNSW-NB15\UNSW_NB15_testing-set.csv")


In [28]:
trainDF.shape

testDF.shape

(82332, 45)

In [29]:
trainDF = trainDF.dropna()
trainDF = trainDF.drop_duplicates()

testDF = testDF.dropna()
testDF = testDF.drop_duplicates()

In [30]:
#Removing Redundant Columns
trainDF = trainDF.drop(columns='id')
trainDF = trainDF.drop(columns='label')

testDF = testDF.drop(columns='id')
testDF = testDF.drop(columns='label')


In [31]:
trainDF

Unnamed: 0,dur,proto,service,state,spkts,dpkts,sbytes,dbytes,rate,sttl,...,ct_src_dport_ltm,ct_dst_sport_ltm,ct_dst_src_ltm,is_ftp_login,ct_ftp_cmd,ct_flw_http_mthd,ct_src_ltm,ct_srv_dst,is_sm_ips_ports,attack_cat
0,0.121478,tcp,-,FIN,6,4,258,172,74.087490,252,...,1,1,1,0,0,0,1,1,0,Normal
1,0.649902,tcp,-,FIN,14,38,734,42014,78.473372,62,...,1,1,2,0,0,0,1,6,0,Normal
2,1.623129,tcp,-,FIN,8,16,364,13186,14.170161,62,...,1,1,3,0,0,0,2,6,0,Normal
3,1.681642,tcp,ftp,FIN,12,12,628,770,13.677108,62,...,1,1,3,1,1,0,2,1,0,Normal
4,0.449454,tcp,-,FIN,10,6,534,268,33.373826,254,...,2,1,40,0,0,0,2,39,0,Normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175336,0.000009,udp,dns,INT,2,0,114,0,111111.107200,254,...,24,13,24,0,0,0,24,24,0,Generic
175337,0.505762,tcp,-,FIN,10,8,620,354,33.612649,254,...,1,1,2,0,0,0,1,1,0,Shellcode
175338,0.000009,udp,dns,INT,2,0,114,0,111111.107200,254,...,3,3,13,0,0,0,3,12,0,Generic
175339,0.000009,udp,dns,INT,2,0,114,0,111111.107200,254,...,30,14,30,0,0,0,30,30,0,Generic


In [32]:
#Import one hot encoder from sklearn

from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
enc.fit(trainDF[['proto','service','state','attack_cat']])  # Fit encoder on training data

train_encoded = enc.transform(trainDF[['proto','service','state','attack_cat']])
test_encoded = enc.transform(testDF[['proto','service','state','attack_cat']])

# Replace 'proto','service','state','attack_cat' column with encoded data
trainDF = pd.concat([trainDF.drop(['proto','service','state','attack_cat'], axis=1), pd.DataFrame(train_encoded, columns=enc.get_feature_names_out(['proto','service','state','attack_cat']))], axis=1)
testDF = pd.concat([testDF.drop(['proto','service','state','attack_cat'], axis=1), pd.DataFrame(test_encoded, columns=enc.get_feature_names_out(['proto','service','state','attack_cat']))], axis=1)
    


In [33]:
trainDF.shape

(175341, 204)

In [34]:
#Create classes variable listing all outcomes of dataset
class_name = ['attack_cat_Analysis',
 'attack_cat_Backdoor',
 'attack_cat_DoS',
 'attack_cat_Exploits',
 'attack_cat_Fuzzers',
 'attack_cat_Generic',
 'attack_cat_Normal',
 'attack_cat_Reconnaissance',
 'attack_cat_Shellcode',
 'attack_cat_Worms']
# Select everything other than classes
x_train = trainDF.drop(columns=class_name)
x_test = testDF.drop(columns=class_name)
# Select only classes
y_test = testDF[class_name]
y_train = trainDF[class_name]


In [35]:
trainDF.shape

(175341, 204)

In [36]:
y_train.shape

(175341, 10)

In [37]:
import numpy as np
y_test = np.argmax(y_test,axis=1)
y_train = np.argmax(y_train,axis=1)

In [38]:
y_test.shape

(82332,)

In [39]:
unique, counts = np.unique(y_test, return_counts=True)
print("Unique values:", unique)
print("Counts:", counts)

Unique values: [0 1 2 3 4 5 6 7 8 9]
Counts: [  677   583  4089 11132  6062 18871 37000  3496   378    44]


In [40]:
trainDF.shape

(175341, 204)

In [41]:
testDF.shape

(82332, 204)

In [42]:
x_test.shape

(82332, 194)

In [43]:
x_train.shape

(175341, 194)

In [44]:
y_test.shape

(82332,)

In [45]:
y_train.shape

(175341,)

In [46]:
target_columns = [col for col in trainDF.columns if col.startswith('attack_cat')]
target_columns

['attack_cat_Analysis',
 'attack_cat_Backdoor',
 'attack_cat_DoS',
 'attack_cat_Exploits',
 'attack_cat_Fuzzers',
 'attack_cat_Generic',
 'attack_cat_Normal',
 'attack_cat_Reconnaissance',
 'attack_cat_Shellcode',
 'attack_cat_Worms']

In [47]:
#Imports minmaxscaler to normalise (Scale) data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

scaler.fit(trainDF)

trainDF = scaler.transform(trainDF)
testDF = scaler.transform(testDF)

In [48]:
trainDF

array([[2.02463370e-03, 5.20020801e-04, 3.64497904e-04, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.08317020e-02, 1.35205408e-03, 3.46273009e-03, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [2.70521550e-02, 7.28029121e-04, 1.45799162e-03, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       ...,
       [1.50000028e-07, 1.04004160e-04, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.50000028e-07, 1.04004160e-04, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
       [1.50000028e-07, 1.04004160e-04, 0.00000000e+00, ...,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00]])