In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import trange

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score
from sklearnex import patch_sklearn

In [None]:
# Load Data
train_raw = pd.read_csv('../Data/UNSW-NB15/train.csv')
print(train_raw.shape)
test_raw = pd.read_csv('../Data/UNSW-NB15/test.csv')
print(test_raw.shape)

# Seperate label and Drop
train_X = train_raw.drop(['id', 'attack_cat', 'label'], axis=1).select_dtypes(include='number')
train_Y = train_raw['label']
test_X = test_raw.drop(['id', 'attack_cat', 'label'], axis=1).select_dtypes(include='number')
test_Y = test_raw['label']

# Normalize data with min, max of training data
test_X1 = (test_X - train_X.min(axis=0)) / (train_X.max(axis=0) - train_X.min(axis=0))
train_X1 = (train_X - train_X.min(axis=0)) / (train_X.max(axis=0) - train_X.min(axis=0))

test_X1[test_X1 < 0] = 0
test_X1[test_X1 > 1] = 1

In [None]:
model_lr = LogisticRegression(max_iter=10000, random_state=0, n_jobs=-1)
model_gb = GradientBoostingClassifier(random_state=0)

In [None]:
Individual_feature_sets = pd.read_csv('../Results/Individual_Feature_sets.csv')
stopping_points = pd.read_csv('../Results/stopping_points.csv')

In [None]:
f1_all = []
for i in range(7):
    f1s = []
    for k in trange(2, Individual_feature_sets.shape[1]+1):
        features = Individual_feature_sets.iloc[i, 1:k]
        model_lr.fit(train_X[features], train_Y)
        predict = model_lr.predict(test_X[features])
        f1s.append(f1_score(test_Y, predict))
    f1_all.append(f1s)

In [None]:
pd.DataFrame(f1_all, index=['chi2', 'ANOVA', 'mutualinfo', 'sfs(rf)', 'sfs(lr)', 'im(rf)', 'im(lr)']).to_csv('../Results/Individual_F1_LR_Test.csv')

In [None]:
f1_all = []
for i in range(7):
    f1s = []
    for k in trange(2, Individual_feature_sets.shape[1]+1):
        features = Individual_feature_sets.iloc[i, 1:k]
        model_gb.fit(train_X[features], train_Y)
        predict = model_gb.predict(test_X[features])
        f1s.append(f1_score(test_Y, predict))
    f1_all.append(f1s)

In [None]:
pd.DataFrame(f1_all, index=['chi2', 'ANOVA', 'mutualinfo', 'sfs(rf)', 'sfs(lr)', 'im(rf)', 'im(lr)']).to_csv('../Results/Individual_F1_GB_Test.csv')

In [None]:
from tensorflow.python.keras import Sequential, layers, optimizers, losses, metrics, callbacks, backend

In [None]:
def ModelCreate(input_shape):
    model = Sequential()
    model.add(layers.Dense(50, activation='relu', input_shape=input_shape))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(50, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(50, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(50, activation='relu'))
    model.add(layers.Dropout(0.2))
    model.add(layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss=losses.binary_crossentropy, metrics=[metrics.binary_accuracy])
    return model

In [None]:
callback = callbacks.EarlyStopping(monitor='loss', mode='min', patience=3, min_delta=0.01, restore_best_weights=True)

f1_all = []
for i in range(7):
    f1s = []
    for k in trange(2, Individual_feature_sets.shape[1]+1):
        model_nn = ModelCreate((k-1,))
        features = Individual_feature_sets.iloc[i, 1:k]
        model_nn.fit(train_X[features], train_Y, epochs=30, callbacks=[callback], use_multiprocessing=True, verbose=0)
        predict = model_nn.predict(test_X[features], use_multiprocessing=True)
        predict = np.where(predict < 0.5, 0, 1)
        f1s.append(f1_score(test_Y, predict))
    f1_all.append(f1s)

In [None]:
pd.DataFrame(f1_all, index=['chi2', 'ANOVA', 'mutualinfo', 'sfs(rf)', 'sfs(lr)', 'im(rf)', 'im(lr)']).to_csv('../Results/Individual_F1_DNN_Test.csv')

In [None]:
set_feature_sets = pd.read_csv('../Results/Set_Feature_sets')

In [None]:
f1_all = []
for i in range(7):
    f1s = []
    for k in trange(1, set_feature_sets.shape[1]):
        features = set_feature_sets.iloc[i, k]
        if len(features) > 0:
            model_lr.fit(train_X[features], train_Y)
            predict = model_lr.predict(test_X[features])
            f1s.append(f1_score(test_Y, predict))
        else:
            f1s.append(0)
    f1_all.append(f1s)

In [None]:
pd.DataFrame(f1_all, index=['union', 'intersection', 'quorum']).to_csv('../Results/Set_F1_LR_Test.csv')

In [None]:
f1_all = []
for i in range(7):
    f1s = []
    for k in trange(1, set_feature_sets.shape[1]):
        features = set_feature_sets.iloc[i, k]
        if len(features) > 0:
            model_lr.fit(train_X[features], train_Y)
            predict = model_lr.predict(test_X[features])
            f1s.append(f1_score(test_Y, predict))
        else:
            f1s.append(0)
    f1_all.append(f1s)

In [None]:
pd.DataFrame(f1_all, index=['union', 'intersection', 'quorum']).to_csv('../Results/Set_F1_GB_Test.csv')

In [None]:
f1_all = []
for i in range(7):
    f1s = []
    for k in trange(2, set_feature_sets.shape[1]+1):
        features = set_feature_sets.iloc[i, k]
        model_nn = ModelCreate((len(features),))
        model_nn.fit(train_X[features], train_Y, epochs=30, callbacks=[callback], use_multiprocessing=True, verbose=0)
        predict = model_nn.predict(test_X[features], use_multiprocessing=True)
        predict = np.where(predict < 0.5, 0, 1)
        f1s.append(f1_score(test_Y, predict))
    f1_all.append(f1s)

In [None]:
pd.DataFrame(f1_all, index=['union', 'intersection', 'quorum']).to_csv('../Results/Set_F1_DNN_Test.csv')

In [None]:
greedy_feature_sets = pd.read_csv('../Results/Greedy_Feature_sets.csv')

In [None]:
f1_all = []
for k in trange(2, greedy_feature_sets.shape[1]+1):
    features = greedy_feature_sets.iloc[0, 1:k]
    model_lr.fit(train_X[features], train_Y)
    predict = model_lr.predict(test_X[features])
    f1_all.append(f1_score(test_Y, predict))

In [None]:
pd.DataFrame([f1_all], index=['greedy']).to_csv('../Results/Greedy_F1_LR_Test.csv')

In [None]:
f1_all = []
for k in trange(2, greedy_feature_sets.shape[1]+1):
    features = greedy_feature_sets.iloc[0, 1:k]
    model_gb.fit(train_X[features], train_Y)
    predict = model_gb.predict(test_X[features])
    f1_all.append(f1_score(test_Y, predict))

In [None]:
pd.DataFrame([f1_all], index=['greedy']).to_csv('../Results/Greedy_F1_GB_Test.csv')

In [None]:
f1_all = []
for k in trange(2, greedy_feature_sets.shape[1]+1):
    features = greedy_feature_sets.iloc[0, 1:k]
    model_nn = ModelCreate((len(features),))
    model_nn.fit(train_X[features], train_Y, epochs=30, callbacks=[callback], use_multiprocessing=True, verbose=0)
    predict = model_nn.predict(test_X[features], use_multiprocessing=True)
    predict = np.where(predict < 0.5, 0, 1)
    f1_all.append(f1_score(test_Y, predict))

In [None]:
pd.DataFrame([f1_all], index=['greedy']).to_csv('../Results/Greedy_F1_DNN_Test.csv')