In [12]:
%pwd

'D:\\Linh TInh\\Sang RD\\Code\\federated_learning_web_attack_detection'

In [1]:
import fed_learn
import model_lib
import json
import numpy as np
import data_lib
import math
with open('config.json', 'r') as openfile:
    config = json.load(openfile)

global_config = config['global_config']
data_config = config['data_config']
fed_config = config['fed_config']
dp_config = config['dp_config']

"""
    PREPROCESSING DATA
"""
print('-' * 100)
print('[INFO] DATA INFORMATION')

num_classes = None
model_name = None
(x_train, y_train), (x_test, y_test) = (None, None), (None, None)
(x_csic2010_train, y_csic2010_train), (x_csic2010_test, y_csic2010_test) = (None, None), (None, None)
(x_fwaf_train, y_fwaf_train), (x_fwaf_test, y_fwaf_test) = (None, None), (None, None)
(x_httpparams_train, y_httpparams_train), (x_httpparams_test, y_httpparams_test) = (None, None), (None, None)

if data_config['dataset_name'] == 'csic2010':
    print('Using csic2010 dataset ...')
    max_len = 500
    (x_train, y_train), (x_test, y_test) = data_lib.csic2010_load_data(0.2, max_len)
    num_classes = 2
    model_name = 'model_csic2010'
elif data_config['dataset_name'] == 'fwaf':
    print('Using fwaf dataset ...')
    max_len = 500
    (x_train, y_train), (x_test, y_test) = data_lib.fwaf_load_data(0.2, max_len)
elif data_config['dataset_name'] == 'httpparams':
    print('Using httpparams dataset ...')
    max_len = 500
    (x_train, y_train), (x_test, y_test) = data_lib.httpparams_load_data(0.2, max_len)
elif data_config['dataset_name'] == 'fusion':
    print('Using three datasets: csic2010, fwaf, httpparams ...')
    max_len = 500
    (x_csic2010_train, y_csic2010_train), (x_csic2010_test, y_csic2010_test) = data_lib.csic2010_load_data(0.2, max_len)
    (x_fwaf_train, y_fwaf_train), (x_fwaf_test, y_fwaf_test) = data_lib.fwaf_load_data(0.2, max_len)
    (x_httpparams_train, y_httpparams_train), (x_httpparams_test, y_httpparams_test) = data_lib.httpparams_load_data(0.2, max_len)

    x_test = np.concatenate((x_csic2010_test, x_fwaf_test, x_httpparams_test), axis=0)
    y_test = np.concatenate((y_csic2010_test, y_fwaf_test, y_httpparams_test), axis=0)

def model_fn():
    return model_lib.CNN(vocab_size=70,
                         embed_dim=128,
                         input_length=500,
                         num_class=2)

training_config = {
    'dp_mode': global_config['dp_mode'],
    'batch_size': fed_config['batch_size'],
    'global_epochs': fed_config['global_epochs'],
    'local_epochs': fed_config['local_epochs']
    }

weight_summarizer = fed_learn.FedAvg()
server = fed_learn.Server(model_fn, weight_summarizer, training_config, fed_config, dp_config)
server.create_clients()

"""
    DISTRIBUTING DATA FOR CLIENTS
"""
if data_config['dataset_name'] == 'fusion':
    nb_clients_each_datasets = math.ceil(len(server.clients) / 3)

    data_handler = data_lib.DataHandler(x_csic2010_train, y_csic2010_train,
                                        x_csic2010_test, y_csic2010_test)
    data_handler.assign_data_to_clients(server.clients[0:nb_clients_each_datasets],
                                        data_config['data_sampling_technique'])
    del data_handler

    data_handler = data_lib.DataHandler(x_fwaf_train, y_fwaf_train, x_fwaf_test, y_fwaf_test)
    data_handler.assign_data_to_clients(server.clients[nb_clients_each_datasets:2*nb_clients_each_datasets],
                                        data_config['data_sampling_technique'])
    del data_handler

    data_handler = data_lib.DataHandler(x_httpparams_train, y_httpparams_train, x_httpparams_test, y_httpparams_test)
    data_handler.assign_data_to_clients(server.clients[2*nb_clients_each_datasets:],
                                        data_config['data_sampling_technique'])
    del data_handler
else:
    data_handler = data_lib.DataHandler(x_train, y_train, x_test, y_test)
    data_handler.assign_data_to_clients(server.clients,
                                        data_config['data_sampling_technique'])
    del data_handler





if global_config['dp_mode']:
    server.update_dp_config(dp_config)


server.update_training_config(training_config)
server.setup()



----------------------------------------------------------------------------------------------------
[INFO] DATA INFORMATION
Using csic2010 dataset ...
+++ csic2010 dataset: +++
	Number of normal requests:  18640
	Number of anomalous requests:  15873
	Number of total requests:  34513
Under 400 iterations and the sample_rate = 0.011494252873563218, the sigma of client 0 is 0.30825285613536835




Under 400 iterations and the sample_rate = 0.011494252873563218, the sigma of client 1 is 0.30825285613536835
Under 400 iterations and the sample_rate = 0.011494252873563218, the sigma of client 2 is 0.30825285613536835
Under 400 iterations and the sample_rate = 0.011494252873563218, the sigma of client 3 is 0.30825285613536835
Under 400 iterations and the sample_rate = 0.011494252873563218, the sigma of client 4 is 0.30825285613536835


In [4]:
from opacus.validators import ModuleValidator

def model_fn():
    model = model_lib.CNN(vocab_size=70, embed_dim=128, input_length=max_len, num_class=2)

    errors = ModuleValidator.validate(model, strict=False)

    print(errors)
    if not ModuleValidator.is_valid(model):
        model = ModuleValidator.fix(model)
        print("dsasdasdds")

    return model


model = model_fn()

[]
