In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams
rcParams.update({'font.size': 12})

In [None]:
datasets = ["ugr16", "cidds", "ton"]
traces = dict()
traces_netshare = dict()
traces_stan = dict()
for d in datasets:
    traces[d] = pd.read_csv("data/raw/"+d+"/raw.csv")
    traces_netshare[d] = pd.read_csv("data/netshare/"+d+"/syn.csv")
    traces_stan[d] = pd.read_csv("data/stan/"+d+"/syn.csv")

In [None]:
for d in datasets:
    traces[d]['delta_ts'] = traces[d]['ts'].diff().fillna(0)
    traces_netshare[d]['delta_ts'] = traces_netshare[d]['ts'].diff().fillna(0)
    traces_stan[d]['delta_ts'] = traces_stan[d]['ts'].diff().fillna(0)

In [None]:
plt.figure(figsize=(12,8))
for i, field in enumerate(["pkt", "byt"]):
    for j, d in enumerate(datasets):

        pkt = traces[d][[field]]
        pkt_netshare = traces_netshare[d][[field]]
        pkt_stan = traces_stan[d][[field]]

        pkt["label"] = "real"
        pkt_netshare["label"] = "NetShare"
        pkt_stan["label"] = "STAN"

        all_pkt = pd.concat([pkt, pkt_netshare, pkt_stan])
        all_pkt = all_pkt.reset_index()
        all_pkt[field] = all_pkt[field].astype(int)
        #print(all_counts)
        plt.subplot(2,3,i*3+j+1)
        sns.ecdfplot(data=all_pkt, x=field, hue="label", log_scale=True)
        if field == "pkt":
            plt.xlabel("#packets per record")
        else:
            plt.xlabel("#bytes per record")
        if i==0:
            plt.title(d.upper())
        if j==0:
            plt.ylabel("CDF")
        else:
            plt.ylabel("")
        
        #plt.show()
    

In [None]:
plt.figure(figsize=(12,8))
for i, field in enumerate(["delta_ts","td"]):
    for j, d in enumerate(datasets):

        pkt = traces[d][[field]]
        pkt_netshare = traces_netshare[d][[field]]
        pkt_stan = traces_stan[d][[field]]

        pkt["label"] = "real"
        pkt_netshare["label"] = "NetShare"
        pkt_stan["label"] = "STAN"

        all_pkt = pd.concat([pkt, pkt_netshare, pkt_stan])
        all_pkt = all_pkt.reset_index()
        
        if field =='td':
            all_pkt[field] = all_pkt[field] * 1e-3
        else:
            all_pkt[field] = all_pkt[field] * 1e-6
        #print(all_counts)
        plt.subplot(2,3,i*3+j+1)
        sns.ecdfplot(data=all_pkt, x=field, hue="label", log_scale=True)
        if field == "td":
            plt.xlabel("Flow duration(s)")
        else:
            plt.xlabel("Interarrival of flow start time(s)")
        if i==0:
            plt.title(d.upper())
        if j==0:
            plt.ylabel("CDF")
        else:
            plt.ylabel("")
        #plt.show()
    