In [1]:
import pandas as pd
import joblib
import numpy as np
from sklearn.impute import SimpleImputer
from tabulate import tabulate

merged_dataset = pd.read_csv('C:/Users/katsa/OneDrive/Jupyter_files/cic_nb15_hybrid/merged_test_dataset.csv')

In [2]:
merged_dataset = merged_dataset.sample(frac=0.1)
merged_dataset.reset_index(drop=True, inplace=True)
merged_dataset.head()

Unnamed: 0,dsport,dur,Spkts,Dpkts,sbytes,dbytes,smeansz,dmeansz,flow_bytes/s,flow_packets/s,fwd_packets/s,bwd_packets/s,Label
0,16095.0,198.892,66,68,3926,55486,59,816,298714900.0,673732.5,331838.4,341894.1,BENIGN
1,53.0,0.008,2,0,264,0,132,0,33000000000.0,250000000.0,250000000.0,0.0,BENIGN
2,5190.0,6.235,12,12,1064,2260,89,188,533119500.0,3849238.0,1924619.0,1924619.0,BENIGN
3,53.0,1.053,2,2,130,162,65,81,277302900.0,3798670.0,1899335.0,1899335.0,BENIGN
4,53.0,1.049,2,2,130,162,65,81,278360300.0,3813155.0,1906578.0,1906578.0,BENIGN


In [3]:
# Define the discriminator model
import torch
from torch import nn

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(data_dim, 8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(8, 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(4, 1),
            nn.Sigmoid()
        )

    def forward(self, data):
        validity = self.model(data)
        return validity
    
   

In [4]:
# save all the different network configurations
import glob

discr_models_lst = glob.glob('./models/discr_model*.pth')
print(discr_models_lst)

['./models\\discr_model-lr=0.0002-batch=64-epochs=0.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=1.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=10.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=11.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=12.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=13.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=14.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=15.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=16.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=17.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=18.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=19.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=2.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=20.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=21.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=22.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=23.pth', './models\\discr

In [5]:
discr_models_lst[0][9:-3]

'discr_model-lr=0.0002-batch=64-epochs=0.'

In [6]:
data_dim = 12
discriminator = Discriminator()

In [7]:
# selected_features_nb15 = ['dsport', 'dur', 'Spkts', 'Dpkts', 'sbytes', 'dbytes', 'smeansz', 
#                           'dmeansz', 'flow_bytes/s', 'flow_packets/s', 'fwd_packets/s', 'bwd_packets/s']

In [8]:
print(merged_dataset.columns)

Index(['dsport', 'dur', 'Spkts', 'Dpkts', 'sbytes', 'dbytes', 'smeansz',
       'dmeansz', 'flow_bytes/s', 'flow_packets/s', 'fwd_packets/s',
       'bwd_packets/s', 'Label'],
      dtype='object')


In [9]:
minmaxscaler = joblib.load('minmaxscaler')

In [10]:
simp = SimpleImputer(keep_empty_features=True)

merged_dataset_labels = merged_dataset['Label']

# merged_dataset = merged_dataset[selected_features_nb15]
merged_dataset = merged_dataset.iloc[:, :-1]

merged_dataset.replace([np.inf, -np.inf], np.nan, inplace=True)

merged_dataset_features = simp.fit_transform(merged_dataset)

merged_dataset_features = minmaxscaler.transform(merged_dataset_features)


In [12]:
num_samples = len(merged_dataset_features)
total_benign_conn = merged_dataset_labels.value_counts().BENIGN
total_attack_conn = merged_dataset_labels.value_counts().ATTACK
best_accuracy = -1
best_pair = {}

for PATH_discr in discr_models_lst:
    discriminator.load_state_dict(torch.load(PATH_discr))
    discriminator.eval()
    true_positive = 0
    true_negative = 0
    false_positive = 0
    false_negative = 0
    for i in range(len(merged_dataset_features)):
        row_label = merged_dataset_labels[i]
        row_features = merged_dataset_features[i]
        row_features = torch.tensor(row_features).float()
        model_output = discriminator(row_features)
        if model_output < 0.5:
            attack = True
        else:
            attack = False
        if (row_label == 'BENIGN' and attack == False):
            true_negative += 1
        if (row_label == 'ATTACK' and attack == True):
            true_positive += 1
        if (row_label == 'BENIGN' and attack == True):
            false_positive += 1
        if (row_label == 'ATTACK' and attack == False):
            false_negative += 1
    
    benign_recall = true_negative / (true_negative + false_positive)
    attack_recall = true_positive / (true_positive + false_negative)
    
    benign_precision = true_negative / (true_negative + false_negative)
    attack_precision = true_positive / (true_positive + false_positive)
    
    benign_f1 = 2 * (benign_recall * benign_precision) / (benign_recall + benign_precision)
    attack_f1 = 2 * (attack_recall * attack_precision) / (attack_recall + attack_precision)
    
    accuracy = (true_positive + true_negative) / (true_positive + true_negative + false_positive + false_negative)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_pair  = {PATH_discr : accuracy}
    
    data = {
        "BENIGN": {"Precision": benign_precision, "Recall": benign_recall, "F1-Score": benign_f1},
        "ATTACK": {"Precision": attack_precision, "Recall": attack_recall, "F1-Score": attack_f1}
    }
    
    # Convert the dictionary to a list of lists
    table = [[row_name] + list(row_data.values()) for row_name, row_data in data.items()]
    # Get the column names
    #column_names = [""] + list(data.values()[0].keys())
    column_names = ["Precision", "Recall", "F1-Score"]
    # Print the table using tabulate
    #print(tabulate(table, headers=column_names, tablefmt="fancy_grid"))
    
    str1 = 'Results of ' + str(PATH_discr) + ' ...'
    str2 = "Overall Accuracy: " + str(accuracy)
    str3 = tabulate(table, headers=column_names, tablefmt="fancy_grid")
    output = str1 + "\n" + str2 + "\n" + str3 + "\n" + "###################################################"
    print(output)
    
    output2 = str2 + "\n" + str(data)
    file_name = './test_eval2/' + 'output-' + PATH_discr[9:-3] + 'txt'
    file = open(file_name, 'w')
    a = file.write(output2)
    file.close()

print("Best model overall is: " + str(best_pair))

Results of ./models\discr_model-lr=0.0002-batch=64-epochs=0.pth ...
Overall Accuracy: 0.868011811023622
╒════════╤═════════════╤══════════╤════════════╕
│        │   Precision │   Recall │   F1-Score │
╞════════╪═════════════╪══════════╪════════════╡
│ BENIGN │    0.888143 │ 0.97087  │   0.927666 │
├────────┼─────────────┼──────────┼────────────┤
│ ATTACK │    0.460251 │ 0.168841 │   0.247052 │
╘════════╧═════════════╧══════════╧════════════╛
###################################################
Results of ./models\discr_model-lr=0.0002-batch=64-epochs=1.pth ...
Overall Accuracy: 0.8768700787401574
╒════════╤═════════════╤══════════╤════════════╕
│        │   Precision │   Recall │   F1-Score │
╞════════╪═════════════╪══════════╪════════════╡
│ BENIGN │    0.951448 │ 0.904934 │   0.927608 │
├────────┼─────────────┼──────────┼────────────┤
│ ATTACK │    0.514977 │ 0.686109 │   0.588351 │
╘════════╧═════════════╧══════════╧════════════╛
###################################################
R