In [None]:
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datetime import datetime
from netaddr import IPNetwork, IPAddress

from helper import get_parametrizations

warnings.filterwarnings("ignore")

In [None]:
netflow: str = '/home/max/WORK/ipd-implementation/netflow/netflow_25k_overflow.csv.gz'
net_df = pd.read_csv(netflow, compression='gzip', header=None)
net_df.columns = [
    'tag',
    'peer_src_ip',
    'in_iface',
    'out_iface',
    'src_ip',
    'dst_net',
    'src_port',
    'dst_port',
    'proto',
    '__',
    '_',
    'ts_start',
    'ts_end',
    'pkts',
    'bytes'
]
net_df.drop([
    'tag',
    'out_iface',
    'src_port',
    'dst_port',
    'proto',
    '__',
    '_',
    'ts_start',
    'pkts',
    'bytes'
], inplace=True, axis=1)
net_df.sort_values('ts_end', inplace=True)

In [None]:
net_start = datetime.fromtimestamp(net_df['ts_end'].loc[0])
net_end = datetime.fromtimestamp(net_df['ts_end'].to_list()[-1])

offload_start = 8
offload_end = 10

from_as = 3
to_as = 4

iteration=150

off_pref = '3.105.0.0/28'
off_pref = IPNetwork(off_pref)

In [None]:
net_start.timestamp()

In [None]:
ext = net_df[net_df['in_iface'].str.contains(f'ext_{to_as}')]
ext['ip_in'] = ext['src_ip'].apply(lambda x: 0 if IPAddress(x) in off_pref else 1)
start_timestamp = ext[ext['ip_in'] == 0]['ts_end'].iloc[0]

In [None]:
times = pd.DataFrame(columns=['param', 'time'])

In [None]:
def calc_time_diff(param: str):
    global times
    try:
        ranges = pd.read_csv(f'/home/max/WORK/masterthesis/pipeline/data/ranges/netflow_25k_overflow/range_{param}.csv.gz', compression='gzip', header=None)
        ranges.columns = ['t', 'ip_version', 'confidence', 'ingress_router',
            'parameter_q', 'parameter_c4', 'parameter_c6', 'parameter_cidr_max4',
            'parameter_cidr_max6', 'parameter_e', 'parameter_t', 'parameter_decay',
            'parameter_study_name', 'prefix_asn', 'netid_string', 'mask',
            'counter_samples', 'counter_samples_needed', 'pni', 'ipd_ranges_count',
            'ipd_cpu_runtime', 'iteration_cpu_runtime', 'ram_usage']
        ranges.sort_values('t', inplace=True)
    except pd.errors.EmptyDataError as e:
        return -1000

    t = ranges[ranges['ingress_router'].str.contains('ext_4')]
    t['ip_in'] = t['netid_string'].apply(lambda x: 0 if IPAddress(x) in off_pref else 1).to_list()

    try:
        first_found = t[t['ip_in'] == 0]['t'].iloc[0]
        return first_found - start_timestamp
    except Exception as e:
        return -1000

    # times = pd.concat(times, pd.DataFrame({'param': [param], 'time': [first_found - start_timestamp]}))

In [None]:
times = pd.DataFrame(columns=['param', 'q', 'c', 'cidr', 'time'])
params = get_parametrizations()

for index, row in params.iterrows():
    param = (f"q{row['q']}_c{row['c4']}-{row['c6']}_cidr_max{row['cidr4']}-{row['cidr6']}_t"
             f"{row['t']}_e{row['e']}_decay{row['decay']}")
    time = calc_time_diff(param)
    times = pd.concat([times, pd.DataFrame({'param': [param], 'q': [row['q']], 'c': [row['c4']], 'cidr': [row['cidr4']], 'time': [time]})])

print(times)

In [None]:
times.to_csv('test_offload.csv', index=False)

In [None]:
calc_time_diff('q0.501_c0.1-30.0_cidr_max20-32_t30_e120_decaydefault')

In [None]:
times

In [None]:
print(len(times.query('q==0.501 and time==-1000')))
print(len(times.query('q==0.7 and time==-1000')))
print(len(times.query('q==0.95 and time==-1000')))
print(len(times.query('q==0.99 and time==-1000')))
print()
for i in np.arange(20, 31, 1).tolist():
    print(len(times.query(f'cidr=={i} and time==-1000')))
print()
for i in [0.05, 0.025, 0.005, 0.1, 0.5, 1, 2, 5]:
    print(len(times.query(f'c=={i} and time==-1000')))


In [None]:
times.query('q==0.99 and time==-1000').shape

In [None]:
len(plottings.query('q==0.501 and time > -1000')['time'].to_list())

In [None]:
len(parametrizations.query('q==0.501'))

In [None]:
plottings = times.sort_values('time')
param = 'c'
values = [0.501, 0.7, 0.95, 0.99]
colors1 = ['#cc79a7', '#337539', '#9f4a96', '#5da899', '#94caec', '#dcce7d', '#2f2585', '#7e2954']
colors2 = ['#000000', '#009e74', '#0071b2', '#56b4e9', '#f0e442', '#e69d00', '#d55c00', '#cc79a7']
colors1 = ['#cc79a7', '#009e74', '#0071b2', '#d55c00', '#e69d00', '#f0e442', '#337539', '#9f4a96', '#5da899', '#dcce7d', '#2f2585', '#7e2954']

values = [0.05, 0.025, 0.005, 0.1, 0.5, 1, 2, 5]
# values = np.arange(20, 31, 1).tolist()
plottings['time'] = plottings['time'].apply(lambda x: x/60 if x >= 0 else x)

for m in values:
    plt.plot(
        np.arange(1, len(plottings.query(f'{param}=={m} and time > -1000')['time'].to_list())+1, 1),
        plottings.query(f'{param}=={m} and time > -1000')['time'].to_list(),
        label=f'{m} ({len(plottings.query(f"{param}=={m} and time == -1000")["time"].to_list())})',
        color=colors1[values.index(m)],
        linewidth=2
    )

    if m == 0.99 or m == 0.95:
        print(plottings.query(f'{param}=={m} and time > -1000'))

    if m == 0.501 or m == 0.7:
        print(plottings.query(f"{param}=={m} and time == -1000"))

# plt.plot(np.arange(1, len(plottings.query('q==0.501 and time > -1000')['time'].to_list())+1, 1), plottings.query('q==0.501 and time > -1000')['time'].to_list(), label=f'0.501 ({len(plottings.query("q==0.501 and time == -1000")["time"].to_list())})')
# plt.plot(np.arange(1, len(plottings.query('q==0.7 and time > -1000')['time'].to_list())+1, 1), plottings.query('q==0.7 and time > -1000')['time'].to_list(),  label=f'0.7 ({len(plottings.query("q==0.7 and time == -1000")["time"].to_list())})')
# plt.plot(np.arange(1, len(plottings.query('q==0.95 and time > -1000')['time'].to_list())+1, 1), plottings.query('q==0.95 and time > -1000')['time'].to_list(),  label=f'0.95 ({len(plottings.query("q==0.95 and time == -1000")["time"].to_list())})')
# plt.plot(np.arange(1, len(plottings.query('q==0.99 and time > -1000')['time'].to_list())+1, 1), plottings.query('q==0.99 and time > -1000')['time'].to_list(),  label=f'0.99 ({len(plottings.query("q==0.99 and time == -1000")["time"].to_list())})', linewidth=2)
ticks = []
ticks = [
    len(plottings.query(f'{param}=={m} and time > -1000')['time'].to_list()) for m in values 
    if len(plottings.query(f'{param}=={m} and time > -1000')['time'].to_list()) not in ticks
]

# ticks = [
#     len(plottings.query('q==0.501 and time > -1000')['time'].to_list()),
#     len(plottings.query('q==0.7 and time > -1000')['time'].to_list()),
#     len(plottings.query('q==0.95 and time > -1000')['time'].to_list()),
#     len(plottings.query('q==0.99 and time > -1000')['time'].to_list()),
# ]
ticks.sort()

plt.legend(title=f'{param} (not detected)')
plt.grid()
plt.xticks(ticks)
# plt.yticks([1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500])
# plt.xlim(0,80)
# plt.ylim(1500,6600)

plt.ylabel('detection time in minutes', fontweight='bold')
plt.xlabel('parametrizations', fontweight='bold')
plt.title('Detection time per parametrization', fontweight='bold')
plt.savefig(f'offload_{param}.pdf')

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
colors = dict(boxes='k', whiskers='k', medians='r', caps='k')


times.boxplot(by='q', column='time', ax=axes[0], color=colors)
times.boxplot(by='c', column='time', ax=axes[1], color=colors)
times.boxplot(by='cidr', column='time', ax=axes[2], color=colors)

axes[0].axes.set_title('q', fontweight='bold')
axes[1].axes.set_title('c', fontweight='bold')
axes[2].axes.set_title('cidr_max', fontweight='bold')

axes[0].axes.set_xlabel('', fontweight='bold')
axes[1].axes.set_xlabel('', fontweight='bold')
axes[2].axes.set_xlabel('', fontweight='bold')

axes[0].axes.set_ylabel('detection time in seconds', fontweight='bold')
axes[1].axes.set_ylabel('', fontweight='bold')
axes[2].axes.set_ylabel('', fontweight='bold')

fig.suptitle('time to detect offload', fontweight='bold')
fig.tight_layout()
fig.savefig('offload_boxplot.pdf')

In [None]:
ranges = pd.read_csv('/home/max/WORK/masterthesis/pipeline/data/ranges/netflow_25k_overflow/range_q0.501_c0.1-30.0_cidr_max20-32_t30_e120_decaydefault.csv.gz', compression='gzip', header=None)
ranges.columns = ['t', 'ip_version', 'confidence', 'ingress_router',
       'parameter_q', 'parameter_c4', 'parameter_c6', 'parameter_cidr_max4',
       'parameter_cidr_max6', 'parameter_e', 'parameter_t', 'parameter_decay',
       'parameter_study_name', 'prefix_asn', 'netid_string', 'mask',
       'counter_samples', 'counter_samples_needed', 'pni', 'ipd_ranges_count',
       'ipd_cpu_runtime', 'iteration_cpu_runtime', 'ram_usage']
ranges.sort_values('t', inplace=True)

In [None]:
t = ranges[ranges['ingress_router'].str.contains('ext_4')]
t['ip_in'] = t['netid_string'].apply(lambda x: 0 if IPAddress(x) in off_pref else 1)
t[t['ip_in'] == 0]['t'].iloc[0]

In [None]:
1693551870-1693551044.0

In [None]:
ranges['t'].iloc[0]

In [None]:
t[t['ingress_router'] == 'SANF.ext_4_LOND'].to_csv('test_offload2.csv', index=False)

In [None]:
t['diff'] = t['t'].diff()

In [None]:
t['diff'].unique()

In [None]:
t[t['ip_in'] == 0]['t'].diff()

In [None]:
start_timestamp

In [None]:
(1693555890-start_timestamp)

In [None]:
time_ranges = ranges.query(f't>={start_offload_timestamp} and t<={end_offload_timestamp}')
old_as_ranges = time_ranges[time_ranges['ingress_router'].str.contains('ext_3')]
new_as_ranges = time_ranges[time_ranges['ingress_router'].str.contains('ext_4')]

In [None]:
new_as_ranges

In [None]:
new_as_ranges[new_as_ranges['netid_string'].str.contains('3.1')]

In [None]:
old_as_ranges

In [None]:
f'{start_offload_timestamp.day}.{start_offload_timestamp.month}.{start_offload_timestamp.year} {start_offload_timestamp.hour}:{start_offload_timestamp.minute}:{start_offload_timestamp.second}'

In [None]:
f'{end_offload_timestamp.day}.{end_offload_timestamp.month}.{end_offload_timestamp.year} {end_offload_timestamp.hour}:{end_offload_timestamp.minute}:{end_offload_timestamp.second}'

In [None]:
time_to_first_range = (datetime.fromtimestamp(1693551870)-net_start).total_seconds()/60

In [None]:
datetime.fromtimestamp(1693551870)

In [None]:
1693551900%30

In [None]:
ranges

In [None]:
ranges['t'][:100]

In [None]:
times = pd.DataFrame(columns=['param', 'time'])

In [None]:
# range_file: str = '/home/max/WORK/masterthesis/pipeline/data/ranges/offload/range_q0.501_c64.0-24.0_cidr_max23-38_t30_e120_decaydefault.csv.gz'
range_file: str = '/home/max/WORK/masterthesis/pipeline/data/ranges/offload/range_q0.7_c48.0-18.0_cidr_max20-32_t30_e120_decaydefault.csv.gz'

offstart = 1693050709
offend = 1693053709

offpref = IPNetwork('2.101.0.0/28')

from_as = 2
targ_as = 5

In [None]:
def get_parametrizations() -> pd.DataFrame:
    # PARAMETERS
    qs: list = [0.501, 0.7, 0.95, 0.99]
    cs: list = [0.05, 0.025, 0.005, 0.1, 0.5, 1, 2, 5]
    # cs: list = [0.1, 0.5, 1, 2, 5]
    cs2: list = [12.0, 18, 24, 30]
    cidrs: list = np.arange(20, 31, 1).tolist()
    cidrs2: list = np.arange(32, 51, 2).tolist() + [54]
    es: list = [120]
    decays: list = ['default']
    ts: list = [30]

    q_col: list = []
    c_col: list = []
    c2_col: list = []
    cidr_col: list = []
    cidr2_col: list = []

    # COMBINE
    for q in qs:
        for _ in range(len(cs) * len(cidrs)):
            q_col.append(q)
    for c in cs:
        for _ in range(len(cidrs)):
            c_col.append(c)
    for c in cs2:
        for _ in range(len(cidrs)):
            c2_col.append(c)

    c_col: list = int(len(q_col)/len(c_col)) * c_col
    c2_col: list = int(len(q_col)/len(c2_col)) * c2_col
    cidr_col: list = int(len(q_col)/len(cidrs)) * cidrs
    cidr2_col: list = int(len(q_col)/len(cidrs2)) * cidrs2

    return pd.DataFrame({
        'q': q_col,
        'c4': c_col,
        'c6': c2_col,
        'cidr4': cidr_col,
        'cidr6': cidr2_col,
        'e': len(q_col) * es,
        't': len(q_col) * ts,
        'decay': len(q_col) * decays,
    })

In [None]:
def check_param(param: str):
    global times
    try:
        df = pd.read_csv(f'/home/max/WORK/masterthesis/pipeline/data/ranges/netflow_25k_overflow/range_{param}.csv.gz', compression='gzip', header=None)
        df.columns = ['t', 'ip_version', 'confidence', 'ingress_router',
                      'parameter_q', 'parameter_c4', 'parameter_c6', 'parameter_cidr_max4',
                      'parameter_cidr_max6', 'parameter_e', 'parameter_t', 'parameter_decay',
                      'parameter_study_name', 'prefix_asn', 'netid_string', 'mask',
                      'counter_samples', 'counter_samples_needed', 'pni', 'ipd_ranges_count',
                      'ipd_cpu_runtime', 'iteration_cpu_runtime', 'ram_usage']
        df.sort_values('t', inplace=True)

        timeframe = df.query(f't < {offstart} or t > {offend}')

        ips = timeframe[timeframe['netid_string'].str.startswith(f'{from_as}.')]
        offloads = ips[ips['ingress_router'].str.contains(f'ext_{targ_as}')]

        # print(offloads)

        time: float = (offloads['t'].min() - offstart)/60
        if np.isnan(time):
            time = -100

        times = pd.concat([times, pd.DataFrame({'param': [param], 'time': [time]})])
    except pd.errors.EmptyDataError as e:
        # print(f'NO DATA FOR {param}')
        times = pd.concat([times, pd.DataFrame({'param': [param], 'time': [-100]})])

In [None]:
1693053750; 1693053750

In [None]:
check_param('q0.501_c32.0-12.0_cidr_max22-36_t30_e120_decaydefault')

In [None]:
# df = pd.read_csv('/home/max/WORK/masterthesis/pipeline/data/ranges/range_q0.95_c64-24_cidr_max28-48_t30_e120_decaydefault.csv.gz', compression='gzip', header=None)
df = pd.read_csv(range_file, compression='gzip', header=None)
df.columns = ['t', 'ip_version', 'confidence', 'ingress_router',
       'parameter_q', 'parameter_c4', 'parameter_c6', 'parameter_cidr_max4',
       'parameter_cidr_max6', 'parameter_e', 'parameter_t', 'parameter_decay',
       'parameter_study_name', 'prefix_asn', 'netid_string', 'mask',
       'counter_samples', 'counter_samples_needed', 'pni', 'ipd_ranges_count',
       'ipd_cpu_runtime', 'iteration_cpu_runtime', 'ram_usage']
df.sort_values('t', inplace=True)
print(df.shape)
print(f'{df["t"].min()}-{df["t"].max()}')

In [None]:
df['t'][:30]

In [None]:
parametrizations = get_parametrizations()
# parametrizations.query('q==0.6', inplace=True)

# print(times)

for index, row in parametrizations.iterrows():
    path: str = (f"q{row['q']}_c{row['c4']}-{row['c6']}_cidr_max{row['cidr4']}-{row['cidr6']}_t"
                 f"{row['t']}_e{row['e']}_decay{row['decay']}")
    check_param(path)

print(times)


In [None]:
from datetime import datetime, timedelta

start: datetime = datetime.fromtimestamp(1693047709)
print(start + timedelta(minutes=100))
print((start + timedelta(minutes=100)).timestamp())

In [None]:
times[times['time'].notnull()].to_csv('test2.csv', index=None)

In [None]:
times.to_csv('test.csv', index=None)

In [None]:
get_parametrizations()['q'].unique()

In [None]:
len(get_parametrizations())