In [21]:
from autogluon.tabular import TabularDataset, TabularPredictor
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import classification_report






In [3]:
benign_malicious_classifier = TabularPredictor.load(path='benign_malicious_model_finetune')
persistent_non_persistent_classifier = TabularPredictor.load(path='persistent_non_persistent_model_finetune')
non_persistent_multiclass_classifier = TabularPredictor.load(path='non_persistent_model_finetune')
persistent_multiclass_classifier = TabularPredictor.load(path='persistent_model_finetune')


In [4]:
def classify_flow(flow):
    flow.reset_index(inplace=True, drop=True)
    flow_classification = ''

    prediction = benign_malicious_classifier.predict(flow)[0]

    if prediction == 'Benign':
        flow_classification = prediction
        # print(flow_classification)

    elif prediction == 'malicious':
        # print(prediction)
        prediction = persistent_non_persistent_classifier.predict(flow)[0]
        if prediction == 'persistent':
            prediction = persistent_multiclass_classifier.predict(flow)[0]
            flow_classification = prediction
            # print(flow_classification)
        elif prediction == 'non_persistent':
            prediction = non_persistent_multiclass_classifier.predict(flow)[0]
            flow_classification = prediction
            # print(flow_classification)

    return flow_classification


# Import non_persistent test data

In [30]:
non_persistent_df = pd.DataFrame()

non_persistent_df = pd.concat([pd.read_csv('TEST_DFs/cic_ids_2017_test.csv'), pd.read_csv('TEST_DFs/non_persistent_testbed_test.csv')])

unused = ['Infiltration - Portscan', 'Portscan']

non_persistent_df = non_persistent_df.loc[~non_persistent_df['Label'].isin(unused)]
non_persistent_df['Label'] = non_persistent_df['Label'].replace('BENIGN', 'Benign')


In [31]:
non_persistent_df['Label'].unique()

array(['DoS Slowloris', 'DoS Slowhttptest', 'DoS Hulk', 'DoS GoldenEye',
       'SSH-Patator', 'DDoS', 'Benign', 'FTP-Patator', 'Botnet'],
      dtype=object)

In [32]:
unraveled = pd.read_csv('TEST_DFs/unraveled_test.csv')
persistent_testbed = pd.read_csv('TEST_DFs/persistent_testbed_test.csv')

persistent_df = pd.concat([unraveled, persistent_testbed])

  unraveled = pd.read_csv('TEST_DFs/unraveled_test.csv')


In [33]:
combined_df = pd.concat([persistent_df, non_persistent_df])

In [34]:
expected_prediction = []
actual_prediction = []

for flow_index in range(len(combined_df)):
    test_flow = combined_df.iloc[[flow_index]]
    classification = classify_flow(test_flow)
    # print(classification, test_flow['Label'])
    expected_prediction.append(test_flow['Label'][0])
    actual_prediction.append(classification)




In [35]:
correct_predictions = 0
incorrect_predictions = 0
for expected_prediction_item, actual_prediction_item in zip(expected_prediction, actual_prediction):
    if actual_prediction_item == expected_prediction_item:
        correct_predictions+=1
        # print('-----------------')
        # print('CORRECT')
        # print(expected_prediction_item, actual_prediction_item)
    else:
        incorrect_predictions+=1
        # print('-----------------')
        # print('INCORRECT')
        # print(expected_prediction_item, actual_prediction_item)

print(f'Accuracy: {(correct_predictions/len(expected_prediction))*100}')
print(f'MCC: {matthews_corrcoef(expected_prediction, actual_prediction)*100}')
print(f'Balanced Accuracy: {balanced_accuracy_score(expected_prediction, actual_prediction)*100}')

report = classification_report(expected_prediction, actual_prediction)
print(report)


Accuracy: 89.89692879192172
MCC: 88.7344142746216
Balanced Accuracy: 89.92541746970039
                    precision    recall  f1-score   support

            Benign       0.99      0.95      0.97     11069
            Botnet       0.95      0.95      0.95       526
              DDoS       0.99      0.99      0.99      1998
 Data Exfiltration       0.73      0.90      0.81      2187
     DoS GoldenEye       0.97      1.00      0.99      2009
          DoS Hulk       0.99      0.97      0.98      2011
  DoS Slowhttptest       0.96      0.96      0.96      1407
     DoS Slowloris       0.98      0.99      0.99      1757
Establish Foothold       0.64      0.99      0.78      4211
       FTP-Patator       0.99      0.95      0.97      1758
  Lateral Movement       0.50      0.07      0.12      2896
    Reconnaissance       0.96      0.99      0.98      4835
       SSH-Patator       0.95      0.98      0.97      1562

          accuracy                           0.90     38226
         ma