# **1. Paths - Libs**

## **Paths**

In [1]:
# Dataset Path
DatasetPath = "E:/An toàn thông tin/Code/Dataset/NSL-KDD Processed/Final - For Using/"
train_dataset_path = DatasetPath + "Trainset/" + "IDS.csv"
test_dataset_path = DatasetPath + "Testset/" + "KDDTest+.csv"

# Save Model Path
SavedModelPath = "E:/An toàn thông tin/Code/Saved Model/IDSModel/"

##**Libs**

In [2]:
# INSTALL LIBS
# !pip3 install numpy adabound torc sklearn matplotlib pandas

In [3]:
# IMPORT LIBS
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from sklearn.linear_model import LogisticRegression
import pickle

from datetime import date
import timeit
import os

## **Consts**

In [4]:
ATTACK_CATEGORIES = ['DOS', 'U2R_AND_R2L']

#**2. Functions**

## **Processing Data Functions**

In [5]:
# Processing Data
# preprocess_data - Hàm tiền xử lý dữ liệu
def preprocess_data(dataset, attack_category):
    if attack_category not in ATTACK_CATEGORIES:
        raise ValueError("Preprocess Data: Invalid Attack Category")
    attack_data = dataset[dataset['class'] == attack_category]
    normal_data = dataset[dataset['class'] == 'Normal']
    # Combine Data
    combine_data = shuffle(pd.concat([attack_data, normal_data], ignore_index=True)).reset_index(drop=True)
    # Convert class to Binary
    combine_data["class"] = combine_data["class"].map(lambda x : 0 if x == "Normal" else 1)
    # separate data and label
    data, label = np.array(combine_data[combine_data.columns[combine_data.columns != "class"]]), np.array(combine_data["class"])
    return data, label

# load_dataset - Load du lieu voi Attack Category
def load_dataset(attack_category):
    if attack_category not in ATTACK_CATEGORIES:
        raise ValueError("Preprocess Data: Invalid Attack Category")
    # Load Dataset
    train = pd.read_csv(train_dataset_path)
    test = pd.read_csv(test_dataset_path)
    trainx, trainy = preprocess_data(train, attack_category)
    testx, testy = preprocess_data(test, attack_category)
    return trainx, trainy, testx, testy

# **3. Chạy - Run**

In [6]:
ml_ids_model = "LR" #@param ["LR"]

In [7]:
SavedModelPath

'E:/An toàn thông tin/Code/Saved Model/IDSModel/'

In [8]:
labels = ['Attack Category', 'Accuracy(%)', 'DR(%)', 'Runtime(s)']
print("{: <20} {: >15} {: >15} {: >15}".format(*labels))
print(65*'-')

for attack_category in ATTACK_CATEGORIES:
    start = timeit.default_timer()
    out_val = []
    out_val.append(attack_category)
    trainx, trainy, testx, testy = load_dataset(attack_category)
    ids = LogisticRegression()
    ids.fit(trainx, trainy)

    score = ids.score(testx, testy)
    pred_testy = ids.predict(testx)
    tn, fn, fp, tp = confusion_matrix(testy, pred_testy).ravel()
    accuracy = (tn + tp)/len(testy)*100
    out_val.append(str(f"{accuracy:.2f}"))
    dr = tp/(tp + fp)*100
    out_val.append(str(f"{dr:.2f}"))
    runtime = str(f"{timeit.default_timer() - start:.2f}")
    out_val.append(runtime)
    print("{: <20} {: >15} {: >15} {: >15}".format(*out_val))
    print(score)
    # Save Model
    save_category_path = attack_category + '/Machine_Learning/'
    if not os.path.exists(SavedModelPath + save_category_path):
        os.makedirs(SavedModelPath + save_category_path)
    
    today = str(date.today())
    
    short_model_path = save_category_path + "created_date_" + today + "_" + "LogicticRegression.pkl"
    full_model_path = SavedModelPath + short_model_path
    with open(full_model_path, 'wb') as file:
        pickle.dump(ids, file)
        print(f" > Model Path: /{short_model_path}")

Attack Category          Accuracy(%)           DR(%)      Runtime(s)
-----------------------------------------------------------------


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


DOS                            89.83           79.40            1.31
0.8982586919806651
 > Model Path: /DOS/Machine_Learning/created_date_2023-11-28_LogicticRegression.pkl
U2R_AND_R2L                    76.74            0.44            0.83
0.7674326778804391
 > Model Path: /U2R_AND_R2L/Machine_Learning/created_date_2023-11-28_LogicticRegression.pkl


The Saved Model stored in [Google Drive - BlackBox IDS Model](https://drive.google.com/drive/u/1/folders/1M-xotvruMlkFNaQWPf9bpBpzgPhPLpiH)  
The Result of Code stored in [Github - Thesis](https://github.com/thetinybug/thesis-IDSGAN)