# **1. Paths - Libs**

## **Paths**

In [1]:
# Dataset Path
DatasetPath = "E:/An toàn thông tin/Code/Dataset/NSL-KDD Processed/Final - For Using/"
train_dataset_path = DatasetPath + "Trainset/" + "IDS.csv"
test_dataset_path = DatasetPath + "Testset/" + "KDDTest+.csv"

# Save Model Path
SavedModelPath = "E:/An toàn thông tin/Code/Saved Model/IDSModel/"

##**Libs**

In [2]:
# IMPORT LIBS
import pandas as pd
import numpy as np
import torch as th
from torch.autograd import Variable as V
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from sklearn.naive_bayes import GaussianNB, MultinomialNB, ComplementNB, BernoulliNB, CategoricalNB
import pickle

from datetime import date
import timeit
import os

## **Consts**

In [3]:
ATTACK_CATEGORIES = ['DOS', 'U2R_AND_R2L']

#**2. Functions**

## **Processing Data Functions**

In [4]:
# Processing Data
# preprocess_data - Hàm tiền xử lý dữ liệu
def preprocess_data(dataset, attack_category):
    if attack_category not in ATTACK_CATEGORIES:
        raise ValueError("Preprocess Data: Invalid Attack Category")
    attack_data = dataset[dataset['class'] == attack_category]
    normal_data = dataset[dataset['class'] == 'Normal']
    # Combine Data
    combine_data = shuffle(pd.concat([attack_data, normal_data], ignore_index=True)).reset_index(drop=True)
    # Convert class to Binary
    combine_data["class"] = combine_data["class"].map(lambda x : 0 if x == "Normal" else 1)
    # print(f"Amount of records: {combine_data.shape[0]}")
    # print(f"Amount of {attack_category} Traffic: {attack_data.shape[0]}")
    # print(f"Amount of Normal Traffic: {normal_data.shape[0]}")
    # separate data and label
    data, label = np.array(combine_data[combine_data.columns[combine_data.columns != "class"]]), np.array(combine_data["class"])
    return data, label

# load_dataset - Load du lieu voi Attack Category
def load_dataset(attack_category):
    if attack_category not in ATTACK_CATEGORIES:
        raise ValueError("Preprocess Data: Invalid Attack Category")
    # Load Dataset
    train = pd.read_csv(train_dataset_path)
    test = pd.read_csv(test_dataset_path)
    # Preprocess Data
    # print("***TRAINSET***")
    trainx, trainy = preprocess_data(train, attack_category)
    # print("***TESTSET***")
    testx, testy = preprocess_data(test, attack_category)
    return trainx, trainy, testx, testy

# **3. Chạy - Run**

In [5]:
ml_ids_model = "NB" #@param ["NB"]

In [6]:
SavedModelPath

'E:/An toàn thông tin/Code/Saved Model/IDSModel/'

In [7]:
NBMODELS1 = {'GaussianNB' :GaussianNB, 'MultinomialNB' :MultinomialNB, 'ComplementNB' :ComplementNB, 'BernoulliNB' :BernoulliNB}

for attack_category in ATTACK_CATEGORIES:
    print(f"{100* '='}\n ATTACK CATEGORY: {attack_category}")
    labels = ['NB Model', 'Accuracy(%)', 'DR(%)', 'Runtime(s)']
    print("     {: <20} {: >15} {: >15} {: >15}".format(*labels))
    print('    ', 65*'-')
    for model_name, nb_model in NBMODELS1.items():
        start = timeit.default_timer()
        out_val = []
        out_val.append(model_name)
        
        trainx, trainy, testx, testy = load_dataset(attack_category)
        nb = nb_model()
        nb.fit(trainx, trainy)

        pred_testy = nb.predict(testx)
        tn, fn, fp, tp = confusion_matrix(testy, pred_testy).ravel()
        accuracy = (tn + tp)/len(testy)*100
        out_val.append(str(f"{accuracy:.2f}"))
        dr = tp/(tp + fp)*100
        out_val.append(str(f"{dr:.2f}"))
        runtime = str(f"{timeit.default_timer() - start:.2f}")
        out_val.append(runtime)
        print("     {: <20} {: >15} {: >15} {: >15}".format(*out_val))
        # Save Model
        save_category_path = attack_category + '/Machine_Learning/' + ml_ids_model + '/'
        if not os.path.exists(SavedModelPath + save_category_path):
            os.makedirs(SavedModelPath + save_category_path)
        
        today = str(date.today())
        
        short_model_path = save_category_path + "created_date_" + today + "_" + model_name + ".pkl"
        full_model_path = SavedModelPath + short_model_path
        with open(full_model_path, 'wb') as file:
            pickle.dump(nb, file)
            print(f"      > Saved Model to disk: /{short_model_path}")



 ATTACK CATEGORY: DOS
     NB Model                 Accuracy(%)           DR(%)      Runtime(s)
     -----------------------------------------------------------------
     GaussianNB                     78.68           78.36            0.53
      > Saved Model to disk: /DOS/Machine_Learning/NB/created_date_2023-11-27_GaussianNB.pkl
     MultinomialNB                  85.99           69.71            0.44
      > Saved Model to disk: /DOS/Machine_Learning/NB/created_date_2023-11-27_MultinomialNB.pkl
     ComplementNB                   86.04           70.01            0.41
      > Saved Model to disk: /DOS/Machine_Learning/NB/created_date_2023-11-27_ComplementNB.pkl
     BernoulliNB                    86.93           71.17            0.42
      > Saved Model to disk: /DOS/Machine_Learning/NB/created_date_2023-11-27_BernoulliNB.pkl
 ATTACK CATEGORY: U2R_AND_R2L
     NB Model                 Accuracy(%)           DR(%)      Runtime(s)
     --------------------------------------------------

***--> Cant not run with CategoricalNB***

The Saved Model stored in [Google Drive - BlackBox IDS Model](https://drive.google.com/drive/u/1/folders/1M-xotvruMlkFNaQWPf9bpBpzgPhPLpiH)  
The Result of Code stored in [Github - Thesis](https://github.com/thetinybug/thesis-IDSGAN)