# Gradiant Boosting Classifier

## 'Label' Feature

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from scripts import preprocess as ref
from sklearn import metrics # is used to create classification results
from sklearn.ensemble import GradientBoostingClassifier


ORIGINAL_CSV = '../data/UNSW-NB15-BALANCED-TRAIN.csv'

origin = pd.read_csv(ORIGINAL_CSV, encoding='ISO-8859-1', low_memory=False)
df = ref.preprocess_data(origin)

df['srcip'] = pd.factorize(df['srcip'])[0]
df['dstip'] = pd.factorize(df['dstip'])[0]

In [9]:
x = df.drop(['attack_cat', 'Label'], axis=1)
y_Label = df['Label']

# Train model with 30% of data will be used as a test model
x_Label_train, x_Label_test, y_Label_train, y_Label_test \
                                    = train_test_split(x,
                                                       y_Label,
                                                       test_size=0.3,
                                                       shuffle=True,
                                                       stratify=y_Label,
                                                       random_state=42)

In [10]:
# Default: max_depth=3, learning_rate=0.1
classifier_Label = GradientBoostingClassifier(n_estimators=20,
                                              learning_rate=0.5,
                                              max_depth=3)
classifier_Label.fit(x_Label_train, y_Label_train)

In [11]:
y_Label_pred = classifier_Label.predict(x_Label_test)

In [12]:
print(metrics.classification_report(y_Label_test, y_Label_pred))

              precision    recall  f1-score   support

           0       1.00      0.99      0.99     67470
           1       0.99      1.00      0.99     67469

    accuracy                           0.99    134939
   macro avg       0.99      0.99      0.99    134939
weighted avg       0.99      0.99      0.99    134939



## 'Attack_cat' Feature

In [13]:
y_attack_cat = df['attack_cat']

# Train model with 30% of data will be used as a test model
x_attack_cat_train, x_attack_cat_test, y_attack_cat_train, y_attack_cat_test \
                                    = train_test_split(x,
                                                       y_attack_cat,
                                                       test_size=0.3,
                                                       shuffle=True,
                                                       stratify=y_attack_cat,
                                                       random_state=32)


In [14]:
# Default: max_depth=3, learning_rate=0.1
classifier_attack_cat = GradientBoostingClassifier(learning_rate=0.1,
                                                   max_depth=3)

classifier_attack_cat.fit(x_attack_cat_train, y_attack_cat_train)

In [15]:
y_attack_cat_pred = classifier_attack_cat.predict(x_attack_cat_test)

In [16]:
vulnerabilities = ["None", "Generic", "Fuzzers", "Exploits", "Dos",
                   "Reconnaissance", "Analysis", "Shellcode", "Backdoors", "Worms"]
print(metrics.classification_report(y_attack_cat_test,
                                    y_attack_cat_pred,
                                    target_names=vulnerabilities,
                                    zero_division=0))

                precision    recall  f1-score   support

          None       1.00      0.98      0.99     67470
       Generic       1.00      0.98      0.99     45251
       Fuzzers       0.82      0.87      0.84      5090
      Exploits       0.60      0.90      0.72      9348
           Dos       0.42      0.18      0.26      3425
Reconnaissance       0.92      0.77      0.83      2953
      Analysis       0.73      0.08      0.15       495
     Shellcode       0.61      0.11      0.19       558
     Backdoors       0.82      0.84      0.83       314
         Worms       0.02      0.46      0.05        35

      accuracy                           0.94    134939
     macro avg       0.69      0.62      0.58    134939
  weighted avg       0.94      0.94      0.93    134939

