In [1]:
!pip install hiclass



In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import Pipeline

from hiclass import LocalClassifierPerParentNode
import pandas as pd
import numpy as np

In [3]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
def perf_evaluation(y_true,y_pred,class_labels):

    # # Compute the confusion matrix
    matrix = confusion_matrix(y_true,y_pred,normalize='true')
    conf_matrix = confusion_matrix(y_true,y_pred)
    print("Confusion Matrix:")

    # Plot the confusion matrix as a heatmap
    plt.figure(figsize=(15, 15))
    sns.heatmap(matrix, annot=True,fmt='.4f',cmap="viridis", square=True,
                xticklabels=class_labels, yticklabels=class_labels)
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    plt.show()

    # Accuracy and Classification Report
    accuracy = accuracy_score(y_true, y_pred) * 100
    print("Total Accuracy: ",accuracy)
    report = classification_report(y_true,y_pred)
    print("Classification Report")
    print(report)


    # Calculate TP, TN, FP, FN for each class
    class_tp = {}
    class_tn = {}
    class_fp = {}
    class_fn = {}

    for i, label in enumerate(class_labels):
        tp = conf_matrix[i, i]
        tn = conf_matrix.sum() - conf_matrix[i, :].sum() - conf_matrix[:, i].sum() + tp
        fp = conf_matrix[:, i].sum() - tp
        fn = conf_matrix[i, :].sum() - tp

        class_tp[label] = tp
        class_tn[label] = tn
        class_fp[label] = fp
        class_fn[label] = fn

    # Print true positives, true negatives, false positives, and false negatives for each class
    for label in class_labels:
        print(f"Class {label}:")
        print("True Positives (TP):", class_tp[label])
        print("True Negatives (TN):", class_tn[label])
        print("False Positives (FP):", class_fp[label])
        print("False Negatives (FN):", class_fn[label])
        print()

In [6]:
"""# Read Dataset"""
train_file_path ='/content/drive/MyDrive/ITU_Competition_Intrusion_and_Vulnerability_Detection_in_Software_Defined_Networks(SDN)/train_data_70features_mean_resampled.csv'
test_file_path = '/content/drive/MyDrive/ITU_Competition_Intrusion_and_Vulnerability_Detection_in_Software_Defined_Networks(SDN)/test_data_70features_mean_resampled.csv'

train_df = pd.read_csv(train_file_path,skipinitialspace=True)
test_df = pd.read_csv(test_file_path,skipinitialspace=True)


#Split data into X(input features) and Y (labels)
X_train = train_df.drop(train_df.columns[-1], axis=1)
y_train = train_df[train_df.columns[-1]]

X_test = test_df.drop(test_df.columns[-1], axis=1)
y_test = test_df[test_df.columns[-1]]


In [7]:
print(train_df.shape)

(1810200, 71)


In [8]:
print(train_df.isna().sum())

Destination Port               0
Flow Duration                  0
Total Fwd Packets              0
Total Backward Packets         0
Total Length of Fwd Packets    0
                              ..
Idle Mean                      0
Idle Std                       0
Idle Max                       0
Idle Min                       0
Label                          0
Length: 71, dtype: int64


Add the root level labels (Normal:0, Attack:1)

In [9]:
binary_con = train_df['Label'] == 0
train_df['BinaryLabel'] = np.where(binary_con, 0.0,1.0)

In [10]:
binary_con_test = test_df['Label'] == 0
test_df['BinaryLabel'] = np.where(binary_con_test, 0.0,1.0)

In [11]:
train_df.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label,BinaryLabel
0,-0.434042,1.572338,-0.012273,-0.012496,-0.012682,-0.011594,0.15842,-0.308078,0.240864,0.347875,...,-0.119369,-0.106736,-0.144379,-0.098658,2.551058,-0.114468,2.452727,2.599887,4,1.0
1,-0.435527,-0.444928,-0.023137,-0.016672,-0.041324,-0.016693,-0.245443,0.276552,-0.126039,-0.248627,...,-0.1207,-0.106736,-0.145261,-0.1002,-0.351628,-0.114468,-0.357163,-0.33804,0,0.0
2,-0.431676,-0.444931,-0.023137,-0.016672,-0.040555,-0.016728,-0.228561,0.493701,-0.056712,-0.248627,...,-0.1207,-0.106736,-0.145261,-0.1002,-0.351628,-0.114468,-0.357163,-0.33804,0,0.0
3,-0.434042,-0.436272,-0.016347,-0.00832,-0.010434,-0.009699,1.116782,-0.308078,0.536002,1.20692,...,-0.1207,-0.106736,-0.145261,-0.1002,-0.351628,-0.114468,-0.357163,-0.33804,0,0.0
4,-0.434042,0.015188,-0.016347,-0.01354,-0.031353,-0.016569,0.198676,-0.308078,-0.002621,0.253248,...,0.36028,-0.106736,0.173284,0.457036,0.063469,-0.114468,0.044664,0.082097,0,0.0


Add level 1 labels if attack(DoS:0,Malware:1,WebAttack:2)

In [12]:
condDoS = (train_df['Label'] == 2) | (train_df['Label'] == 3) | (train_df['Label'] == 4) | (train_df['Label'] == 5) | (train_df['Label'] == 6)
condMalware = (train_df['Label'] == 1) | (train_df['Label'] == 7) | (train_df['Label'] == 8) | (train_df['Label'] == 9) | (train_df['Label'] == 10) | (train_df['Label'] == 11)
condWeb = (train_df['Label'] == 12) | (train_df['Label'] == 13) | (train_df['Label'] == 14)
train_df['Level1Label'] = np.where(condDoS, 1.0, np.where(condMalware, 2.0, np.where(condWeb, 3.0, 0.0)))

In [13]:
train_df.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label,BinaryLabel,Level1Label
0,-0.434042,1.572338,-0.012273,-0.012496,-0.012682,-0.011594,0.15842,-0.308078,0.240864,0.347875,...,-0.106736,-0.144379,-0.098658,2.551058,-0.114468,2.452727,2.599887,4,1.0,1.0
1,-0.435527,-0.444928,-0.023137,-0.016672,-0.041324,-0.016693,-0.245443,0.276552,-0.126039,-0.248627,...,-0.106736,-0.145261,-0.1002,-0.351628,-0.114468,-0.357163,-0.33804,0,0.0,0.0
2,-0.431676,-0.444931,-0.023137,-0.016672,-0.040555,-0.016728,-0.228561,0.493701,-0.056712,-0.248627,...,-0.106736,-0.145261,-0.1002,-0.351628,-0.114468,-0.357163,-0.33804,0,0.0,0.0
3,-0.434042,-0.436272,-0.016347,-0.00832,-0.010434,-0.009699,1.116782,-0.308078,0.536002,1.20692,...,-0.106736,-0.145261,-0.1002,-0.351628,-0.114468,-0.357163,-0.33804,0,0.0,0.0
4,-0.434042,0.015188,-0.016347,-0.01354,-0.031353,-0.016569,0.198676,-0.308078,-0.002621,0.253248,...,-0.106736,0.173284,0.457036,0.063469,-0.114468,0.044664,0.082097,0,0.0,0.0


Add level 2 labels for each of the level 1 labels

In [14]:
train_df['Level2Label'] = train_df['Label']

In [15]:
train_df.head()

Unnamed: 0,Destination Port,Flow Duration,Total Fwd Packets,Total Backward Packets,Total Length of Fwd Packets,Total Length of Bwd Packets,Fwd Packet Length Max,Fwd Packet Length Min,Fwd Packet Length Mean,Fwd Packet Length Std,...,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label,BinaryLabel,Level1Label,Level2Label
0,-0.434042,1.572338,-0.012273,-0.012496,-0.012682,-0.011594,0.15842,-0.308078,0.240864,0.347875,...,-0.144379,-0.098658,2.551058,-0.114468,2.452727,2.599887,4,1.0,1.0,4
1,-0.435527,-0.444928,-0.023137,-0.016672,-0.041324,-0.016693,-0.245443,0.276552,-0.126039,-0.248627,...,-0.145261,-0.1002,-0.351628,-0.114468,-0.357163,-0.33804,0,0.0,0.0,0
2,-0.431676,-0.444931,-0.023137,-0.016672,-0.040555,-0.016728,-0.228561,0.493701,-0.056712,-0.248627,...,-0.145261,-0.1002,-0.351628,-0.114468,-0.357163,-0.33804,0,0.0,0.0,0
3,-0.434042,-0.436272,-0.016347,-0.00832,-0.010434,-0.009699,1.116782,-0.308078,0.536002,1.20692,...,-0.145261,-0.1002,-0.351628,-0.114468,-0.357163,-0.33804,0,0.0,0.0,0
4,-0.434042,0.015188,-0.016347,-0.01354,-0.031353,-0.016569,0.198676,-0.308078,-0.002621,0.253248,...,0.173284,0.457036,0.063469,-0.114468,0.044664,0.082097,0,0.0,0.0,0


In [16]:
#Split data into X(input features) and Y (labels)
X_train = train_df.drop(train_df.columns[-4:], axis=1) # dropv last two columns
y_train = train_df[train_df.columns[-4:]] # get last column which is the binary labels

X_test = test_df.drop(test_df.columns[-2:], axis=1)
y_test = test_df[test_df.columns[-2]]

try out hiclass method

In [17]:
from hiclass import LocalClassifierPerParentNode

In [18]:
from sklearn import tree
DT_clf = tree.DecisionTreeClassifier()

In [19]:
lcppn = LocalClassifierPerParentNode(local_classifier=DT_clf)

In [20]:
print(y_train['Label'].sum())

2175238


In [None]:
lcppn.fit(X_train,y_train)

In [None]:
y_pred = lcppn.predict(X_test)

In [None]:
print(y_pred[:,2].shape)
y_pred_l2= y_pred[:,2]
y_pred = np.array(y_pred_l2, dtype=float)
print(y_pred)

In [None]:
class_labels = ['Benign','Bot','DDoS','DoS_Golden_Eye','DoS_Hulk','DoS_Slowhttptest','DoS_Slowloris','FTP_Patator',
        'Hearbleed','Infiltration','PortScan','SSH_Patator','WebAttack_Brute_Force','WebAttack_Sql_Injection','WebAttack_XSS']

In [None]:
perf_evaluation(y_test,y_pred,class_labels)

Binary Classification

In [None]:
from sklearn import tree
DT_clf = tree.DecisionTreeClassifier()
Dt_clf = DT_clf.fit(X_train, y_train)



In [None]:
y_pred = DT_clf.predict(X_test) # change to x_test_0

In [None]:
class_labels= ['Normal','Attack']
perf_evaluation(y_test,y_pred,class_labels)

In [None]:
from sklearn.ensemble import RandomForestClassifier
RF_clf = RandomForestClassifier(n_estimators=20, max_depth=None,min_samples_split=2, random_state=0,verbose=2)

# Train final model
RF_clf = RF_clf.fit(X_train,y_train)
y_pred = RF_clf.predict(X_test)

y_pred = RF_clf.predict(X_test)

perf_evaluation(y_test,y_pred,class_labels)