In [1]:
import pandas as pd
import joblib
import numpy as np
from sklearn.impute import SimpleImputer

nb15_dataset = pd.read_csv("C:/Users/katsa/OneDrive/Jupyter_files/shallow_models_cic_nb15/nb_12_feat_test_dataset.csv")

In [2]:
nb15_dataset = nb15_dataset.sample(frac=0.1)
nb15_dataset.reset_index(drop=True, inplace=True)
nb15_dataset.head()

Unnamed: 0,dsport,dur,Spkts,Dpkts,sbytes,dbytes,smeansz,dmeansz,flow_bytes/s,flow_packets/s,fwd_packets/s,bwd_packets/s,Label
0,53.0,1.148,2,2,146,178,73,89,282230000.0,3484321.0,1742160.0,1742160.0,BENIGN
1,80.0,668.762,10,8,810,354,81,44,1740530.0,26915.4,14953.0,11962.4,ATTACK
2,37436.0,20.494,56,58,3390,41676,61,719,2198985000.0,5562604.0,2732507.0,2830097.0,BENIGN
3,33741.0,27.939,64,66,3806,52378,59,794,2010952000.0,4652994.0,2290705.0,2362289.0,BENIGN
4,13439.0,104.09,4,4,544,304,136,76,8146796.0,76856.57,38428.28,38428.28,BENIGN


In [3]:
# Define the discriminator model
import torch
from torch import nn

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(data_dim, 8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(8, 4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(4, 1),
            nn.Sigmoid()
        )

    def forward(self, data):
        validity = self.model(data)
        return validity
    
   

In [4]:
# save all the different network configurations
import glob

discr_models_lst = glob.glob('./models/discr_model*.pth')
print(discr_models_lst)

['./models\\discr_model-lr=0.0002-batch=64-epochs=0.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=1.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=10.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=11.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=12.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=13.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=14.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=15.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=16.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=17.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=18.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=19.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=2.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=20.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=21.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=22.pth', './models\\discr_model-lr=0.0002-batch=64-epochs=23.pth', './models\\discr

In [5]:
discr_models_lst[0][9:-3]

'discr_model-lr=0.0002-batch=64-epochs=0.'

In [6]:
data_dim = 12
discriminator = Discriminator()

In [7]:
selected_features_nb15 = ['dsport', 'dur', 'Spkts', 'Dpkts', 'sbytes', 'dbytes', 'smeansz', 
                          'dmeansz', 'flow_bytes/s', 'flow_packets/s', 'fwd_packets/s', 'bwd_packets/s']
    

In [8]:
print(nb15_dataset.columns)

Index(['dsport', 'dur', 'Spkts', 'Dpkts', 'sbytes', 'dbytes', 'smeansz',
       'dmeansz', 'flow_bytes/s', 'flow_packets/s', 'fwd_packets/s',
       'bwd_packets/s', 'Label'],
      dtype='object')


In [9]:
minmaxscaler = joblib.load('minmaxscaler')

In [10]:
simp = SimpleImputer(keep_empty_features=True)

nb15_dataset_labels = nb15_dataset['Label']

nb15_dataset = nb15_dataset[selected_features_nb15]

nb15_dataset.replace([np.inf, -np.inf], np.nan, inplace=True)

nb15_dataset_features = simp.fit_transform(nb15_dataset)

nb15_dataset_features = minmaxscaler.transform(nb15_dataset_features)


In [11]:
num_samples = len(nb15_dataset_features)
best_accuracy_normal = 0
best_accuracy_attack = 0
total_benign_conn = nb15_dataset_labels.value_counts().BENIGN
total_attack_conn = nb15_dataset_labels.value_counts().ATTACK

for PATH_discr in discr_models_lst:
    discriminator.load_state_dict(torch.load(PATH_discr))
    discriminator.eval()
    correct_prediction_normal = 0
    correct_prediction_attack = 0
    for i in range(len(nb15_dataset_features)):
        row_label = nb15_dataset_labels[i]
        row_features = nb15_dataset_features[i]
        row_features = torch.tensor(row_features).float()
        model_output = discriminator(row_features)
        if model_output < 0.5:
            attack = True
        else:
            attack = False
        if (row_label == 'BENIGN' and attack == False):
            correct_prediction_normal += 1
        if (row_label == 'ATTACK' and attack == True):
            correct_prediction_attack += 1
    current_accuracy_normal = correct_prediction_normal / total_benign_conn
    current_accuracy_attack = correct_prediction_attack / total_attack_conn
    output = (f'Number of total samples: {num_samples} | '
              f'Number of correct BENIGN predictions: {correct_prediction_normal} | '
              f'Number of total BENIGN connections: {total_benign_conn} | '
              f'Number of correct ATTACK predictions: {correct_prediction_attack} | '
              f'Number of total ATTACK connections: {total_attack_conn} | '
              f'Accuracy BENIGN: {current_accuracy_normal} | '
              f'Accuracy ATTACK: {current_accuracy_attack} ')
    if current_accuracy_normal > best_accuracy_normal:
        best_pair_normal  = {PATH_discr : current_accuracy_normal}
        best_accuracy_normal = current_accuracy_normal
    if current_accuracy_attack > best_accuracy_attack:
        best_pair_attack = {PATH_discr : current_accuracy_attack}
        best_accuracy_attack = current_accuracy_attack
    print('Results of ' + str(PATH_discr) + ' ...')
    print(output)
    file_name = './test_eval/' + 'output-' + PATH_discr[9:-3] + 'txt'
    file = open(file_name, 'w')
    a = file.write(output)
    file.close()

print('The best pair for BENIGN is ' + str(best_pair_normal))
print('The best piar for ATTACK is ' + str(best_pair_attack))

Results of ./models\discr_model-lr=0.0002-batch=64-epochs=0.pth ...
Number of total samples: 10160 | Number of correct BENIGN predictions: 8586 | Number of total BENIGN connections: 8842 | Number of correct ATTACK predictions: 227 | Number of total ATTACK connections: 1318 | Accuracy BENIGN: 0.971047274372314 | Accuracy ATTACK: 0.17223065250379363 
Results of ./models\discr_model-lr=0.0002-batch=64-epochs=1.pth ...
Number of total samples: 10160 | Number of correct BENIGN predictions: 8065 | Number of total BENIGN connections: 8842 | Number of correct ATTACK predictions: 940 | Number of total ATTACK connections: 1318 | Accuracy BENIGN: 0.9121239538565935 | Accuracy ATTACK: 0.7132018209408194 
Results of ./models\discr_model-lr=0.0002-batch=64-epochs=10.pth ...
Number of total samples: 10160 | Number of correct BENIGN predictions: 8821 | Number of total BENIGN connections: 8842 | Number of correct ATTACK predictions: 901 | Number of total ATTACK connections: 1318 | Accuracy BENIGN: 0.99