In [1]:
import pandas as pd
import joblib
import numpy as np
from sklearn.impute import SimpleImputer
from tabulate import tabulate

nb15_dataset = pd.read_csv("C:/Users/katsa/OneDrive/Jupyter_files/shallow_models_cic_nb15/nb_all_feat_test_dataset.csv")

In [2]:
nb15_dataset = nb15_dataset.sample(frac=0.1)
nb15_dataset.reset_index(drop=True, inplace=True)
nb15_dataset.head()

Unnamed: 0,sport,dsport,dur,sbytes,dbytes,sttl,dttl,sloss,dloss,Sload,...,service_ftp-data,service_http,service_irc,service_pop3,service_radius,service_smtp,service_snmp,service_ssh,service_ssl,Label
0,1043.0,53.0,9e-06,264,0,60,0,0,0,117333300.0,...,0,0,0,0,0,0,0,0,0,BENIGN
1,47439.0,53.0,9e-06,114,0,254,0,0,0,50666660.0,...,0,0,0,0,0,0,0,0,0,ATTACK
2,22799.0,21.0,0.925519,2934,3740,31,29,11,15,24876.85,...,0,0,0,0,0,0,0,0,0,BENIGN
3,47439.0,53.0,6e-06,264,0,60,0,0,0,176000000.0,...,0,0,0,0,0,0,0,0,0,BENIGN
4,57208.0,111.0,0.124177,568,304,31,29,0,0,27444.7,...,0,0,0,0,0,0,0,0,0,BENIGN


In [3]:
# Define the discriminator model
import torch
from torch import nn

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(data_dim, 8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(8, 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(4, 1),
            nn.Sigmoid()
        )

    def forward(self, data):
        validity = self.model(data)
        return validity
    
   

In [4]:
# save all the different network configurations
import glob

discr_models_lst = glob.glob('./models/discr_model*.pth')
print(discr_models_lst)

['./models\\discr_model-lr=0.0002-batch=64-epochs=0.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=1.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=10.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=11.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=12.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=13.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=14.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=15.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=16.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=17.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=18.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=19.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=2.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=20.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=21.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=22.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=23.pth', './models\\discr

In [5]:
discr_models_lst[0][9:-3]

'discr_model-lr=0.0002-batch=64-epochs=0.'

In [6]:
data_dim = 202
discriminator = Discriminator()

In [7]:
# selected_features_nb15 = ['dsport', 'dur', 'Spkts', 'Dpkts', 'sbytes', 'dbytes', 'smeansz', 
#                           'dmeansz', 'flow_bytes/s', 'flow_packets/s', 'fwd_packets/s', 'bwd_packets/s']

In [8]:
print(nb15_dataset.columns)

Index(['sport', 'dsport', 'dur', 'sbytes', 'dbytes', 'sttl', 'dttl', 'sloss',
       'dloss', 'Sload',
       ...
       'service_ftp-data', 'service_http', 'service_irc', 'service_pop3',
       'service_radius', 'service_smtp', 'service_snmp', 'service_ssh',
       'service_ssl', 'Label'],
      dtype='object', length=203)


In [9]:
minmaxscaler = joblib.load('minmaxscaler')

In [10]:
simp = SimpleImputer(keep_empty_features=True)

nb15_dataset_labels = nb15_dataset['Label']

#nb15_dataset = nb15_dataset[selected_features_nb15]

nb15_dataset = nb15_dataset.iloc[:, :-1]

nb15_dataset.replace([np.inf, -np.inf], np.nan, inplace=True)

nb15_dataset_features = simp.fit_transform(nb15_dataset)

nb15_dataset_features = minmaxscaler.transform(nb15_dataset_features)


In [11]:
num_samples = len(nb15_dataset_features)
total_benign_conn = nb15_dataset_labels.value_counts().BENIGN
total_attack_conn = nb15_dataset_labels.value_counts().ATTACK
best_accuracy = -1
best_pair = {}

for PATH_discr in discr_models_lst:
    discriminator.load_state_dict(torch.load(PATH_discr))
    discriminator.eval()
    true_positive = 0
    true_negative = 0
    false_positive = 0
    false_negative = 0
    for i in range(len(nb15_dataset_features)):
        row_label = nb15_dataset_labels[i]
        row_features = nb15_dataset_features[i]
        row_features = torch.tensor(row_features).float()
        model_output = discriminator(row_features)
        if model_output < 0.5:
            attack = True
        else:
            attack = False
        if (row_label == 'BENIGN' and attack == False):
            true_negative += 1
        if (row_label == 'ATTACK' and attack == True):
            true_positive += 1
        if (row_label == 'BENIGN' and attack == True):
            false_positive += 1
        if (row_label == 'ATTACK' and attack == False):
            false_negative += 1
    
    benign_recall = true_negative / (true_negative + false_positive)
    attack_recall = true_positive / (true_positive + false_negative)
    
    benign_precision = true_negative / (true_negative + false_negative)
    attack_precision = true_positive / (true_positive + false_positive)
    
    benign_f1 = 2 * (benign_recall * benign_precision) / (benign_recall + benign_precision)
    attack_f1 = 2 * (attack_recall * attack_precision) / (attack_recall + attack_precision)
    
    accuracy = (true_positive + true_negative) / (true_positive + true_negative + false_positive + false_negative)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_pair  = {PATH_discr : accuracy}
    
    data = {
        "BENIGN": {"Precision": benign_precision, "Recall": benign_recall, "F1-Score": benign_f1},
        "ATTACK": {"Precision": attack_precision, "Recall": attack_recall, "F1-Score": attack_f1}
    }
    
    # Convert the dictionary to a list of lists
    table = [[row_name] + list(row_data.values()) for row_name, row_data in data.items()]
    # Get the column names
    #column_names = [""] + list(data.values()[0].keys())
    column_names = ["Precision", "Recall", "F1-Score"]
    # Print the table using tabulate
    #print(tabulate(table, headers=column_names, tablefmt="fancy_grid"))
    
    str1 = 'Results of ' + str(PATH_discr) + ' ...'
    str2 = "Overall Accuracy: " + str(accuracy)
    str3 = tabulate(table, headers=column_names, tablefmt="fancy_grid")
    output = str1 + "\n" + str2 + "\n" + str3 + "\n" + "###################################################"
    print(output)
    
    output2 = str2 + "\n" + str(data)
    file_name = './test_eval2/' + 'output-' + PATH_discr[9:-3] + 'txt'
    file = open(file_name, 'w')
    a = file.write(output2)
    file.close()

print("Best model overall is: " + str(best_pair))

Results of ./models\discr_model-lr=0.0002-batch=64-epochs=0.pth ...
Overall Accuracy: 0.9812992125984252
╒════════╤═════════════╤══════════╤════════════╕
│        │   Precision │   Recall │   F1-Score │
╞════════╪═════════════╪══════════╪════════════╡
│ BENIGN │    0.991393 │ 0.987145 │   0.989264 │
├────────┼─────────────┼──────────┼────────────┤
│ ATTACK │    0.914286 │ 0.941176 │   0.927536 │
╘════════╧═════════════╧══════════╧════════════╛
###################################################
Results of ./models\discr_model-lr=0.0002-batch=64-epochs=1.pth ...
Overall Accuracy: 0.9875
╒════════╤═════════════╤══════════╤════════════╕
│        │   Precision │   Recall │   F1-Score │
╞════════╪═════════════╪══════════╪════════════╡
│ BENIGN │    0.996253 │ 0.9894   │   0.992815 │
├────────┼─────────────┼──────────┼────────────┤
│ ATTACK │    0.930525 │ 0.974458 │   0.951985 │
╘════════╧═════════════╧══════════╧════════════╛
###################################################
Results of .