In [2]:
import sys
import os
sys.path.insert(0, os.path.abspath('../src/'))

### Computing t-SNE

In [5]:
import cicids2017
import numpy as np
from pathlib import Path
import pandas as pd
from sklearn.manifold import TSNE
from umap import UMAP


RES_PATH = "BestGruLinearTL"
MODELPATH = Path(f"../res/GruLinearTL.torch")
DATASETPATH_CACHE = Path(f"../dataset/cache")


netdata = cicids2017.load_dataset(DATASETPATH_CACHE)["DT"]
net = cicids2017.configureAnchor(None, checkpoint=MODELPATH)
netdata = net.pointwise_embedding(netdata)
netdata = pd.concat(netdata)


# Remove truncated embeddings
netdata = netdata.loc[netdata["_embedding"].dropna().index]
tsne_input = np.stack(netdata["_embedding"].values)
netdata["_ebs2D"] = list(TSNE(n_components=2).fit_transform(tsne_input))

### DBScan

In [6]:
from sklearn.cluster import DBSCAN

cluster_algorithm = DBSCAN(eps=4, min_samples=5)

cluster_str = str(cluster_algorithm).split('(')[0]
clustering_input = np.stack(netdata["_ebs2D"].values)
clustering = cluster_algorithm.fit(clustering_input)
netdata["_clustering"] = clustering.labels_.astype(str)

### Anomaly detection

In [7]:
normal_cluster = netdata["_clustering"].mode().iloc[0]
netdata["_yhat"] = (netdata["_clustering"] == normal_cluster)
Y = netdata["_isanomaly"]
Y = (Y=="none").replace({True: 0, False: 1}).values
y_hat = netdata["_yhat"].replace({True: 0, False: 1}).values

In [8]:
from sklearn import metrics

metrics_rep = [ metrics.roc_auc_score,
                metrics.precision_score, metrics.recall_score,
                metrics.accuracy_score, metrics.f1_score]
for m in metrics_rep:
    mres = m(Y, y_hat)
    print(f"{m.__name__}(moday+attacks): {mres}")

tn, fp, fn, tp = metrics.confusion_matrix(Y, y_hat, normalize="all").ravel()
print("\n Confusion matrix")
print(f"\ttp: {tp} \tfp: {fp} \n\tfn: {fn} \ttn: {tn}")

roc_auc_score(moday+attacks): 0.8134252049659529
precision_score(moday+attacks): 0.8405286343612335
recall_score(moday+attacks): 0.6498637602179836
accuracy_score(moday+attacks): 0.9255330547519555
f1_score(moday+attacks): 0.733000384172109

 Confusion matrix
	tp: 0.10221793635486982 	fp: 0.01939354976963463 
	fn: 0.05507339547840994 	tn: 0.8233151183970856
