In [None]:
import sys
import os
sys.path.insert(0, os.path.abspath('../src/'))

### Loading model and computing embeddings

In [2]:
import cicids2017
import numpy as np
from pathlib import Path
import pandas as pd
from sklearn.manifold import TSNE

RES_PATH = "."
MODELPATH = Path(f"../res/{RES_PATH}/ts2vec.torch")
# DATASETPATH_CACHE = Path(f"../res/{RES_PATH}/cache")
DATASETPATH_CACHE = Path(f"../dataset/cache")
DATASETPATH = Path("../dataset/CICIDS2017_ntop.pkl")
TARGET_DSET = "DT"


netdata = []
df = cicids2017.load_dataset(DATASETPATH_CACHE)[TARGET_DSET]
days = np.unique(df.index.get_level_values("_time").day)
for d in days:
    daily_df = df[df.index.get_level_values("_time").day == d]
    netdata.append(daily_df)


net = cicids2017.configureAnchor(None, df, checkpoint=MODELPATH)
net.module_.pool = "last"
netdata = net.pointwise_embedding(netdata)
netdata = pd.concat(netdata)


# Remove truncated embeddings
netdata = netdata.loc[netdata["_embedding"].dropna().index]
tsne_input = np.stack(netdata["_embedding"].values)
netdata["_ebs2D"] = list(TSNE(n_components=2).fit_transform(tsne_input))



# Minor changes to dataset

In [3]:
netdata["_group_anomaly"] = netdata["_isanomaly"]

def rename_anomaly(pattern, newname):
    netdata.loc[netdata["_group_anomaly"].str.contains(pattern), "_group_anomaly"] = newname
    
rename_anomaly("Web Attack", "Web Attack")
rename_anomaly("Brute Force", "Brute Force")
rename_anomaly("DoS", "DoS")
rename_anomaly("DDoS", "DDoS")
rename_anomaly("NMap", "NMap")
rename_anomaly("Port Scan", "NMap")

netdata = netdata.reset_index()

### Computing clustering

In [4]:
from sklearn.cluster import KMeans, DBSCAN

# cluster_algorithm = DBSCAN(eps=.2, min_samples=4)
cluster_algorithm = KMeans(n_clusters=5)
cluster_str = str(cluster_algorithm).split('(')[0]
clustering_input = np.stack(netdata["_ebs2D"].values)
clustering = cluster_algorithm.fit(clustering_input)
netdata["_clustering"] = clustering.labels_.astype(str)

In [5]:
netdata = netdata[(netdata["_aperc"] == 0) | (netdata["_aperc"] >= .25)]

### Display

In [6]:
import warnings
from mpl_toolkits.mplot3d import Axes3D
from ipywidgets import Button, Layout
from datetime import datetime
# warnings.filterwarnings("ignore")
from umap import UMAP
import pandas as pd
import matplotlib.patches as mpatches
from pprint import pprint
import numpy as np
from ipywidgets import HBox, VBox, interactive
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.dates as mdates
from IPython.display import display, Markdown
import ipywidgets as widgets
import math
from sklearn.manifold import TSNE
import torch
from ipywidgets import interact, interact_manual
from IPython.display import set_matplotlib_formats
from importlib import reload
from pathlib import Path
from tqdm import tqdm
from sklearn.cluster import KMeans, DBSCAN

In [7]:
import ipywidgets as widgets
import matplotlib.pyplot as plt
from ipywidgets import HBox, VBox, interactive



DAY2INT = {
    "Monday": 0,
    "Tuesday": 1,
    "Wednesday": 2,
    "Thursday": 3,
    "Friday": 4}
INT2DAY = {v: k for k, v in DAY2INT.items()}


# ----- ----- COLORMAPS ----- ----- #
# ----- ----- --------- ----- ----- #
def ncolors(n):
    hexl = list('0123456789ABCDEF')
    hexc = np.random.choice(hexl, size=(n, 6))
    return ['#' + ''.join(x) for x in hexc]

devices = set([(c, h) for (c, h) in zip(netdata["_device_category"], netdata["_host"])])
colorsmap = np.concatenate([ np.unique(netdata["_isanomaly"]),
                             np.unique(netdata["_clustering"]),
                             [ dev[0] for dev in devices],
                             [ dev[1] for dev in devices]])  
colorsmap = dict(zip(colorsmap, ncolors(len(colorsmap))))


# ----- ----- WIDGETS ----- ----- #
# ----- ----- ------- ----- ----- #
devices_str = [f"{h} ({c})" for c, h in devices]
devices_str.sort()                               
device_w_list = widgets.Dropdown(options=["ALL"] + devices_str)

weekdays = np.unique(netdata["_time"].dt.dayofweek)
available_days = list(map(lambda x: INT2DAY[x], weekdays))
days_w_list = widgets.Dropdown(options=["ALL"]+available_days)

available_attacks = list(np.unique(netdata["_isanomaly"]))
show_attacks_dropdown = widgets.Dropdown(options=["ALL"]+available_attacks)

clusters = list(np.unique(netdata["_clustering"]))
show_cluster_dropdown = widgets.Dropdown(options=["NONE"]+clusters)
                               
hide_attacks_checkbox = widgets.Checkbox(value=False, description="Hide all attacks")
show_category_checkbox = widgets.Checkbox(value=False, description="Show category")
group_anomaly_checkbox = widgets.Checkbox(value=False, description="Group anomalies")
clustering_descr = f"Show clustering ({cluster_str})"
show_clustering_checkbox = widgets.Checkbox(value=False, description=clustering_descr)

ts0_selector = HBox([days_w_list, group_anomaly_checkbox])
ts1_selector = HBox([device_w_list, show_category_checkbox])
ts2_selector = HBox([show_attacks_dropdown, hide_attacks_checkbox])
ts3_selector = HBox([show_cluster_dropdown, show_clustering_checkbox])
wlist = VBox([ts0_selector, ts1_selector, ts2_selector, ts3_selector])


# ----- ----- INTERACTOR ----- ----- #
# ----- ----- ---------- ----- ----- #
def labeled_scatter(mask, labelname):
    data = netdata[mask]
    if len(data) == 0:
        return
    ebs2D = np.stack(data["_ebs2D"])
    x1 = ebs2D[:, 0]
    x2 = ebs2D[:, 1]
    
    labels = data[labelname]
    colors = map(lambda x: colorsmap[x], labels)
    colors = np.array(list(colors))
    
    ax = plt.gca()
    for l in np.unique(labels):
        l_idxs = np.where(labels==l)[0]
        ax.scatter(x1[l_idxs], x2[l_idxs], color=colors[l_idxs], label=l)


def split_device_str(s):
    split = s.split(" ")
    host = split[0].strip()
    category = " ".join(split[1:]).replace("(", "").replace(")", "").strip()
    return host, category


def whandler(day, device, show_category, attack, hide_attacks, cluster_focus, show_clusters, attack_label):
    if show_category:
        show_label = "_device_category"
    elif show_clusters:
        show_label = "_clustering"
    else:
        show_label = "_host"
    
    data_len = len(netdata)
    mask = np.full(data_len, True)
    
    if day != "ALL":
        weekday = netdata["_time"].dt.dayofweek
        mask &= (weekday == DAY2INT[day])
    
    if device != "ALL":
        host, _ = split_device_str(device)
        mask &= (netdata["_host"] == host)
    
    if cluster_focus != "NONE":
        mask &= (netdata["_clustering"] == cluster_focus)
    
    # Plotting all but attacks traffic
    clean_mask = mask & (netdata["_isanomaly"] == "none")
    labeled_scatter(clean_mask, show_label)
    
    # Plottin attacks
    if not hide_attacks:
        if attack == "ALL":
            attack_mask = mask & (netdata["_isanomaly"] != "none")
        else:
            attack_mask = mask & (netdata["_isanomaly"] == attack)
        
        if show_clusters:
            show_label = "_clustering"
        elif attack_label:
            show_label = "_group_anomaly"
        else:
            show_label = "_isanomaly"
        
        attack_mask = attack_mask#& (netdata["_clustering"] != "-1")
        
        labeled_scatter(attack_mask, show_label)

    # Fixing repeating labels
    handles, labels = plt.gca().get_legend_handles_labels()
    by_label = dict(zip(labels, handles))
    plt.legend(by_label.values(), by_label.keys(), bbox_to_anchor=(1, 0.5))
    plt.gcf().set_size_inches(10, 10)
    plt.show()
    
%matplotlib inline
output = widgets.interactive(whandler,
                             day=days_w_list,
                             device=device_w_list, 
                             show_category=show_category_checkbox, 
                             attack=show_attacks_dropdown, 
                             hide_attacks=hide_attacks_checkbox,
                             cluster_focus=show_cluster_dropdown, 
                             show_clusters=show_clustering_checkbox,
                             attack_label=group_anomaly_checkbox).children[-1]
display(wlist)
display(output)

VBox(children=(HBox(children=(Dropdown(description='day', options=('ALL', 'Monday', 'Tuesday', 'Wednesday', 'T…

Output()