In [21]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import matplotlib.patches as mpatches
from pprint import pprint
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.dates as mdates
from IPython.display import display, Markdown
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

pd.set_option('float_format', '{:.2f}'.format)
# plt.rcParams.update({'font.size': 28})

In [17]:
# Constants ----- #
BUCKET = 'CIC_IDS_2017_Monday'

In [122]:
# Functions ----- #
def mprint(s):
    display(Markdown(s))
    

def describe_mtimeseries(key, mtimeserie):
    mprint(f"### {key}")
    # Data description ..... #
    start = min(mtimeserie.index)
    end = max(mtimeserie.index)
    mprint(f"**Time range**: {start} **/** {end}")
    mprint(f"**Total data range:** {end-start}")
    mprint(f"**Samples shown**: {len(mtimeserie)}")
    # Time deltas ..... #
    #delta = np.diff(np.sort(mtimeserie.index.values))
    #blind_spot = [(i, x_delta) for (i, x_delta) in enumerate(delta)]
    #blind_spot = [mtimeserie.index[i:i+2] for i, x_delta in blind_spot]
    #blind_spot = list(filter(lambda x: x[1]-x[0] > pd.Timedelta(seconds=30), blind_spot))
    # Description ..... #
    #display(mtimeserie.describe())
    # Scaling ..... #
    clean_scaled_ts = mtimeserie - mtimeserie.min()
    clean_scaled_ts /= (mtimeserie.max()-mtimeserie.min())+1e-7
    # Plotting clean data ..... #
    axes = clean_scaled_ts.plot(subplots=True)
    # clean_scaled_ts.plot(subplots=True, figsize=(12,150))
    # Plotting blind spots ..... #
    #for ax in axes:
    #    for (bstart, bend) in blind_spot:
    #        ax.axvspan(bstart, bend, color='grey', alpha=0.5)
    #        handles, labels = ax.get_legend_handles_labels()
    #        patch = mpatches.Patch(color='grey', label='Missing')
    #        handles.append(patch) 
    #        ax.legend(handles=handles, loc='upper center')
    # Titles and save ..... #
    plt.gcf().suptitle(key)
    # plt.savefig(f'imgs/{bucket}_{key}.png')
    plt.show()
    
    mprint(f"<br>{'-'*50}<br>{'-'*50}<br>")

# Plotting

In [123]:
df = pd.read_pickle(f'../dataset/CIC-IDS-2017-dframes/{BUCKET}.pkl')

plots_series = [
    'active_flows:flows_as_client', 
    'active_flows:flows_as_server',
    'contacts:num_as_client', 
    'contacts:num_as_server'
]

idxs = df.index.droplevel(2).unique()
devices = [f"{host} ({cat})" for cat, host in idxs]
devices.sort()
cols = list(df.columns)
cols.sort()

In [124]:
cols = list(df.columns)
cols.sort()

@interact
def show_articles_more_than(device=devices, 
                            columns=widgets.SelectMultiple(
                                options=cols,
                                value=cols[:1],
                                description='Features',
                                disabled=False
                            ),
                            trange=widgets.FloatSlider(min=.05, max=1., step=.05),
                            offset=widgets.FloatSlider(min=.0, max=1., step=.05)):
    split = device.split(" ")
    columns = list(columns)
    host = split[0].strip()
    category = " ".join(split[1:]).replace("(", "").replace(")", "").strip()
    
    host_ts = df.loc[category, host]
    ts_len = len(host_ts)
    start_idx = int(ts_len * offset)
    end_idx = min(start_idx + int(ts_len * trange), ts_len)
    
    
    describe_mtimeseries(device.strip(), host_ts.iloc[start_idx:end_idx][columns])

interactive(children=(Dropdown(description='device', options=('192.168.10.1 (unknown device class)', '192.168.…