# Analysis of Results of Pure Anomaly Detection
Results of original GEE VAE autoencoder

## Environment Setting
Import libraries/packages/modules

In [None]:
import pandas as pd
from sklearn import metrics
import matplotlib.pyplot as plt
import seaborn as sns

## Load Results and Run Analysis

In [None]:
results = pd.read_feather('results_ad_test.feather.with_mse')

In [None]:
results.groupby('labels').describe().unstack(1).reset_index().pivot(index='labels', values=0, columns='level_1')

In [None]:
results['labels'].value_counts()

In [None]:
#train background mean = 0.003405, train background std = 0.005413
train_mean = 0.003405
train_std = 0.005413

coef = 1 #1/3, 1/2, 1, 2, 3
threshold = train_mean + train_std * coef

#binarize labels
results['blabels'] = results['labels']
results.blabels[results['blabels'] == 'background'] = 0
results.blabels[results['blabels'] != 0] = 1

#binary predictions according to MSE crossing the threshold
results['bpredictions'] = results['mse']
results.bpredictions[results['mse'] <= threshold] = int(0)
results.bpredictions[results['mse'] > threshold] = int(1)

print(results.blabels.value_counts())
print(results.bpredictions.value_counts())

In [None]:
#function for printing AUC, classification report, and confusion matrix for given data (dataframe with blabels and bpredictions), title is a string to differentiate the printed results
def print_result(data, title):
   if(len(data.blabels.unique()) < 2):
     print(title + ' subset contains a single label')
     return
   print('AUC (' + title + '):', metrics.roc_auc_score(data.blabels.to_list(), data.bpredictions.to_list()))
   print(metrics.classification_report(data.blabels.to_list(), data.bpredictions.to_list(), digits=4))
   print(metrics.confusion_matrix(data.blabels.to_list(), data.bpredictions.to_list()))
   print()

In [None]:
print_result(results[results['labels'] != 'blacklist'], 'without blacklist')

In [None]:
#print results for various test sets (filtered without selected classes)
print('Threshold = ', threshold)

print_result(results, 'all')

results_without_blacklist = results[results['labels'] != 'blacklist']
print_result(results_without_blacklist, 'without blacklist')

print_result(results[(results['labels'] == 'background') | (results['labels'] == 'dos')], 'background + dos')
print_result(results[(results['labels'] == 'background') | (results['labels'] == 'nerisbotnet')], 'background + nerisbotnet')
print_result(results[(results['labels'] == 'background') | (results['labels'] == 'anomaly-spam')], 'background + anomaly-spam')
print_result(results[(results['labels'] == 'background') | (results['labels'] == 'scan11')], 'background + scan11')
print_result(results[(results['labels'] == 'background') | (results['labels'] == 'scan44')], 'background + scan44')

In [None]:
#AUC ROC (based on MSE) without blacklist

fig, ax = plt.subplots(figsize=(5, 5))

fpr, tpr, thresholds = metrics.roc_curve(results_without_blacklist.blabels.to_list(), results_without_blacklist.mse.to_list())
auc = metrics.auc(fpr, tpr)

ax.plot([0, 1], [0,1], 'k--')
ax.plot(fpr, tpr, label=f'MSE (AUC = {auc: .2f})')
ax.set_xlabel('False positive rate')
ax.set_ylabel('True positive rate')
ax.legend(loc='lower right')

fig.show()
#fig.savefig('ad_auc-roc.pdf')

In [None]:
#confusion matrix without blacklist

cmd = metrics.ConfusionMatrixDisplay(metrics.confusion_matrix(results_without_blacklist.blabels.to_list(), results_without_blacklist.bpredictions.to_list()), display_labels=['0','1'])
cmd.plot()
#cmd.figure_.savefig('ad_cm.pdf')

In [None]:
#KDE of MSE without blacklist

normal_recon_error = results_without_blacklist[results_without_blacklist['labels'] == 'background']['mse'].tolist()
malicious_recon_error = results_without_blacklist[results_without_blacklist['labels'] != 'background']['mse'].tolist()

fig, ax = plt.subplots(figsize=(5, 5))

sns.kdeplot(normal_recon_error, ax=ax, label='Background MSE')
sns.kdeplot(malicious_recon_error, ax=ax, label='Anomaly MSE')

#ax.set_title(f'Reconstruction Error Distribution of background traffic and anomalies')
ax.legend(loc='lower right')

fig.show()
#fig.savefig('ad_kde.pdf')

In [None]:
#histogram of MSE without blacklist (with upper limit to better see the density of anomalies)

fig, ax = plt.subplots(figsize=(10, 5))

ax.hist([normal_recon_error, malicious_recon_error], bins=100, range=(0, 0.06), label=['Background', 'Anomaly'])
ax.set_ylim(0, 100)
ax.set_xlabel("MSE")
ax.set_ylabel("Density")
ax.legend(loc='upper right')
fig.show()
#fig.savefig('ad_hist.pdf')