In [None]:
import os, sys
import pandas as pd
import numpy as np
import math
import glob
from scipy import stats
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
import subprocess
import json
import itertools
import re

from IPython.display import display, HTML

%matplotlib notebook

cc_algorithms = ['lia', 'olia', 'balia', 'wvegas', 'cubic']

In [None]:
# Reading JSON config files
def load_config(topology):
    file_name = './topologies/{}.json'.format(topology)
    if not os.path.isfile(file_name):
        print('JSON topology file not found! {}'.format(file_name))
        return None

    with open(file_name, 'r') as f:
        config = json.load(f)

    return config


def get_iperf_pairings(topology):
    config = load_config(topology)
    pairs = []
    for node in [node for node in config['nodes'] if node['id'].startswith('h')]:
        if 'server' in node['properties']:
            pairs.append((str(node['id']), str(node['properties']['server'])))

    # make sure every host is included in some connection
    hosts = itertools.chain.from_iterable(pairs)
    for node in [node for node in config['nodes'] if node['id'].startswith('h')]:
        if node['id'] not in hosts:
            print('Host {} not contained in any host pairings!'.format(node))
    return pairs

In [None]:
def read_iperf_pair_tp(folder, client, server, repetitions):
    file_name = folder + '/{}-{}_iperf.csv'
    goodputs = []
    
    tp_dfs = pd.DataFrame()
    for rep in range(repetitions):
        cli_file = file_name.format(rep, client)
        ser_file = file_name.format(rep, server)
        
        with open(cli_file, 'r') as f:
            content = f.read().replace('iperf Done.', '').strip().splitlines()
            recv_line = content[-1].split()
            send_line = content[-2].split()
            recv_tp = float(recv_line[6])
            send_tp = float(send_line[6])
            # error when encountering non standard unit for throughput
            if 'Mbits' not in recv_line[7]:
                print('ERROR: iperf reported in another unit than Mbps! See {}'.format(cli_file))
            goodputs.append(recv_tp)
            
        with open(ser_file, 'r') as f:
            content = f.read().replace('iperf3: interrupt - the server has terminated', '').strip().splitlines()
            content = content[6:-4] # TODO replace with conditional cutting
            df = pd.DataFrame([l.replace('[', '').replace(']','').split() for l in content],
                             columns=['ID', 'Interval', 'Interval_unit', 'Transfer', 'Transfer_unit', 'Bandwidth', 'Bandwidth_unit'])
            df['repetition'] = rep
            tp_dfs = tp_dfs.append(df, ignore_index=True)
            
    goodputs = pd.Series(goodputs)
    tp_dfs['client'] = client
    tp_dfs['server'] = server
    tp_dfs['Interval_start'] = tp_dfs['Interval'].str.split('-').str[0].astype(float)
    tp_dfs['Interval_end'] = tp_dfs['Interval'].str.split('-').str[1].astype(float)
    tp_dfs['ID'] = pd.to_numeric(tp_dfs['ID'])
    tp_dfs['Transfer'] = pd.to_numeric(tp_dfs['Transfer'])
    tp_dfs['Bandwidth'] = pd.to_numeric(tp_dfs['Bandwidth'])
    return (goodputs.mean(), confidence_interval(goodputs)), tp_dfs


def read_iperf_srv_tp(folder, file):
    with open('{}/{}'.format(folder, file), 'r') as f:
        content = f.read().replace('iperf Done.', '').strip().splitlines()
        recv_line = content[-1].split()
        recv_tp = float(recv_line[6])
        return recv_tp


# TODO simplify and use in upper function to eliminate duplicat code
def read_iperf_srv_tp_trace(folder, client, server, repetitions):
    file_name = folder + '/{}-{}_iperf.csv'
    goodputs = []
    
    tp_dfs = pd.DataFrame()
    for rep in range(repetitions):
        ser_file = file_name.format(rep, server)
            
        with open(ser_file, 'r') as f:
            content = f.read().replace('iperf3: interrupt - the server has terminated', '').strip().splitlines()
            content = content[6:-4] # TODO replace with conditional cutting
            df = pd.DataFrame([l.replace('[', '').replace(']','').split() for l in content],
                             columns=['ID', 'Interval', 'Interval_unit', 'Transfer', 'Transfer_unit', 'Bandwidth', 'Bandwidth_unit'])
            df['repetition'] = rep
            tp_dfs = tp_dfs.append(df, ignore_index=True)
            
    goodputs = pd.Series(goodputs)
    tp_dfs['client'] = client
    tp_dfs['server'] = server
    tp_dfs['Interval_start'] = tp_dfs['Interval'].str.split('-').str[0].astype(float)
    tp_dfs['Interval_end'] = tp_dfs['Interval'].str.split('-').str[1].astype(float)
    tp_dfs['ID'] = pd.to_numeric(tp_dfs['ID'])
    tp_dfs['Transfer'] = pd.to_numeric(tp_dfs['Transfer'])
    tp_dfs['Bandwidth'] = pd.to_numeric(tp_dfs['Bandwidth'])
    return tp_dfs


def confidence_interval(series, z=1.96):
    """
    Calculate confidence interval for a given series. Default is 95% confidence interval.
    See https://en.wikipedia.org/wiki/Confidence_interval#Basic_steps for further values.
    """
    stats = series.agg(['mean', 'count', 'std'])
    return z*stats['std']/math.sqrt(stats['count'])

    
def read_pcap_csv(file_name):
    df = pd.read_csv(file_name, delimiter='\t')
    # put rtt into [ms] instead of [s]
    df['tcp.analysis.ack_rtt'] = df['tcp.analysis.ack_rtt'] * 1000
    return df


def read_iperf_pair_rtt(folder, client, repetitions):
    file_name = folder + '/{}-{}_iperf_dump.csv'
    df = pd.DataFrame()
    for rep in range(repetitions):
        pcap = read_pcap_csv(file_name.format(rep, client))
        pcap['repetition'] = rep
        df = df.append(pcap, ignore_index=True)
    
    rtt_df = df.dropna(subset=['tcp.analysis.ack_rtt'])
    rtt_stats = rtt_df.groupby(['repetition'])['tcp.analysis.ack_rtt'].mean()
    return (rtt_stats.mean(), confidence_interval(rtt_stats)), df


def extract_rep_host_name(filename):
    """ Returns a tuple (host_name, rep) """
    rep_hname = filename.split('_')[0].split('-')
    return rep_hname[1], int(rep_hname[0])





def load_iperf_experiments_new(topology, repetitions=3):
    """
    Read in log data from experiments, every throughput should come out in [Mbps] and times in [ms].
    One exception to this rule is the relative time since experiment start in the rtt trace.
    """
    df = pd.DataFrame()
    pairs = get_iperf_pairings(topology)
    
    df_tp_traces = pd.DataFrame()
    df_rtt_traces = pd.DataFrame()

    for dirpath, dirnames, filenames in os.walk('./logs/{}'.format(topology)):
        if dirnames:
            continue
        
        # Leaf folder, read in files and analyze
        # dirpath has form "./logs/two_paths/cubic/25Mbps-9Mbps/10ms-10ms"
        dirpath_split = dirpath.split('/') # ['.', 'logs', 'two_paths', 'lia', '9Mbps-13Mbps', '10ms-10ms']
        cc = dirpath_split[-3]
        bws = dirpath_split[-2].split('-')
        des = dirpath_split[-1].split('-')
        
        # read in data for pairings
        row = {'cc': cc}
        tmp_tp_df = pd.DataFrame()
        # tmp_rtt_df = pd.DataFrame()
            
        for cli, ser in pairs:
            # TODO handle trace df
            (mean_tp, conf_tp), df_tp_trace = read_iperf_pair_tp(dirpath, cli, ser, repetitions)
            row[ser + '_tp'] = mean_tp
            row[ser + '_tp_conf'] = conf_tp
            df_tp_trace['cc'] = cc
            tmp_tp_df = tmp_tp_df.append(df_tp_trace, ignore_index=True)
            
            (mean_rtt, conf_rtt), df_rtt_trace = read_iperf_pair_rtt(dirpath, cli, repetitions)
            row[cli + '_rtt'] = mean_rtt
            row[cli + '_rtt_conf'] = conf_rtt
            # df_rtt_trace['cc'] = cc
            # tmp_rtt_df = tmp_rtt_df.append(df_rtt_trace, ignore_index=True)
        
        # Add bw and de groups to dfs
        for bandwidth, group in zip(bws, ['a', 'b', 'c', 'd']):
            b = int(re.sub('[^0-9]', '', bandwidth))
            row['bw_' + group] = b
            tmp_tp_df['bw_' + group] = b
            # tmp_rtt_df['bw_' + group] = b
        for delay, group in zip(des, ['a', 'b', 'c', 'd']):
            d = int(re.sub('[^0-9]', '', delay))
            row['de_' + group] = d
            tmp_tp_df['de_' + group] = d
            # tmp_rtt_df['de_' + group] = d
        
        df = df.append(row, ignore_index=True)
        df_tp_traces = df_tp_traces.append(tmp_tp_df, ignore_index=True)
        # df_rtt_traces = df_rtt_traces.append(tmp_rtt_df, ignore_index=True)
        
    return df

def load_single_iperf_experiment(topology, ccs, rates, delays):
    df = pd.DataFrame()
    pairs = get_iperf_pairings(topology)
    client_names = [c for c, _ in pairs]
    
    columns = {}
    for n in client_names:
        columns[n + '_tp'] = {}
    
    for dirpath, dirnames, filenames in os.walk('./logs/{}/{}/{}/{}/'.format(topology, ccs, rates, delays)):
        client_tuples = [(name,) + extract_rep_host_name(name) for name in filenames]
        client_tuples = [t for t in client_tuples if t[0].endswith('iperf.csv') and t[1] in client_names]
        
        # add each client as a tp datapoint
        for file_name, host_name, rep in client_tuples:
            tp = read_iperf_srv_tp(dirpath, file_name)
            columns[host_name + '_tp'][rep] = tp
    df = pd.DataFrame(columns)
    df.index.name = 'rep'
    df['topology'] = topology
    df['ccs'] = ccs
    df['bw'] = rates
    df['delays']
    return df
        


# load_iperf_experiments_new('single_bottleneck')[0]
load_single_iperf_experiment('single_bottleneck', 'lia-lia', '10Mbps', '10.0ms')

In [None]:
def init_plots(df, delay_point=30, bw_point=10):
    if len([c for c in df.columns if c.startswith('de_')]) > 2:
        print('Plotting does not yet support more than two delay groups!')
        return
    
    n_clients = len([c for c in df.columns if c.endswith('_rtt_conf')])
    fig = plt.figure(figsize=(9.5, 3.5*n_clients))
    
    # TODO handle this better, this only takes out part of what should be removed
    for column in [c for c in df.columns if c.startswith('bw')]:
        df = df[df[column] == bw_point]

    for i in range(n_clients):
        ax1 = fig.add_subplot(n_clients, 2, 2*i+1)
        ax2 = fig.add_subplot(n_clients, 2, 2*i+2)
        cli = 'h{}'.format(i*2+1)
        serv = 'h{}'.format(i*2+2)
        
        for cc in df['cc'].unique():
            tmp = df[df['cc'] == cc]
            if 'de_b' in tmp.columns:
                tmp = tmp[tmp['de_b'] == delay_point]
            
            tmp = tmp.sort_values('de_a')

            a1 = tmp.plot(x='de_a', y='{}_tp'.format(serv), yerr='{}_tp_conf'.format(serv), label=cc,
                          ax=ax1, grid=True) #, ylim=(0,22))
            a2 = tmp.plot(x='de_a', y='{}_rtt'.format(cli), yerr='{}_rtt_conf'.format(cli), label=cc,
                          ax=ax2, grid=True) #, ylim=(0,130))

        ax1.set_title('Iperf Throughput on {}'.format(serv))
        ax1.set_ylabel('Mbps Throughput')
        ax1.set_xlabel('ms delay of (second) link')
        ax1.autoscale(True, axis='x')

        ax2.set_title('Iperf Packet RTT on {}'.format(cli))
        ax2.set_ylabel('ms')
        ax2.set_xlabel('ms delay of (second) link')
        ax2.autoscale(True, axis='x')

        if 'de_b' in df.columns:
            ax1.axvline(delay_point, color='black', ls=':')
            ax2.axvline(delay_point, color='black', ls=':')
    
    plt.tight_layout()
        

def de_3d_plot(df):
    fig = plt.figure(figsize=(9.5, 9.5))
    for i, cc in enumerate(df['cc'].unique()):
        ax = fig.add_subplot(3, 2, i+1, projection='3d')
        ax.set_title('Throughput for {}'.format(cc))
        ax.set_xlabel('ms delay group a')
        ax.set_ylabel('ms delay group b')

        tmp = df[df['cc'] == cc]
        tmp = tmp[(tmp['bw_a'] == 10) & (tmp['bw_b'] == 10)]
        ax.plot_trisurf(tmp['de_a'], tmp['de_b'], tmp['h2_tp'], cmap='viridis');
        ax.set_zlim(0, 25)
        ax.view_init(25, 25)
    plt.tight_layout()
        
def tp_3d_plot(df):
    for cc in df['cc'].unique():
        fig = plt.figure()
        ax = plt.axes(projection='3d')
        ax.set_title('Throughput for {}'.format(cc))
        ax.set_xlabel('ms delay group a')
        ax.set_ylabel('ms delay group b')

        tmp = df[df['cc'] == cc]
        tmp = tmp[(tmp['bw_a'] == 10) & (tmp['bw_b'] == 10)]
        ax.plot_trisurf(tmp['de_a'], tmp['de_b'], tmp['h2_tp'], cmap='viridis');
        ax.view_init(25, 25)
    plt.tight_layout()
    
# init_plots(two_paths_df)
# de_3d_plot(two_paths_df)

In [None]:
def plot_rtt_timeline_per_cc(topo, bw_dir, de_dir, client='h1', repetitions=3):
    fig = plt.figure(figsize=(9.5, 9.5))
    max_y = 0
    for i, cc in enumerate(cc_algorithms):
        ax = fig.add_subplot(3, 2, i+1)
        file = './logs/{}/{}/{}/{}/0-h1_iperf_dump.csv'.format(topo, cc, bw_dir, de_dir)
        folder = './logs/{}/{}/{}/{}/'.format(topo, cc, bw_dir, de_dir)
        _, df = read_iperf_pair_rtt(folder, client, repetitions)
        df = df.dropna(subset=['tcp.analysis.ack_rtt'])
        df = df[df['tcp.analysis.ack_rtt'] > 0.06]
        max_y = max(max_y, df['tcp.analysis.ack_rtt'].max())
        
        for key, grp in df.groupby(['repetition', 'tcp.stream']):
            sub = grp.plot(ax=ax, style='+', x='frame.time_relative', y='tcp.analysis.ack_rtt',
                           label='stream {}'.format(key))
            
        ax.set_title('{}: RTT Timeline on Host {}'.format(cc, client))
        ax.set_ylabel('ms RTT')
        ax.set_xlabel('s Experiment Time')
        ax.autoscale(True)
        ax.legend()
    
    plt.tight_layout()
    
def autolabel(rects, ax):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for i, rect in enumerate(rects):
        height = rect.get_height()
        ax.annotate('{:.2f}'.format(height), xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3 + i*2),  # 3 points vertical offset
                    textcoords="offset points", ha='center', va='bottom', rotation=90)
        
def plot_tp_timeline_per_cc(topo, bw_dir, de_dir, servers=['h2'], repetitions=3, smoothing=1):
    fig = plt.figure(figsize=(9.5, 9.5*len(servers)))
    for i, cc in enumerate(cc_algorithms):
        for j, serv in enumerate(servers):
            ax = fig.add_subplot(3*len(servers), 2, len(servers)*i+j+1)
            folder = './logs/{}/{}/{}/{}'.format(topo, cc, bw_dir, de_dir)
            df = read_iperf_srv_tp_trace(folder, client='h1', server=serv, repetitions=repetitions)

            for key, grp in df.groupby(['repetition']):
                width = grp['Interval_start'].max() / 25
                if smoothing > 1:
                    ax.plot(grp['Interval_start'], grp['Bandwidth'].rolling(window=smoothing).mean(),
                            label='rep {}'.format(key))
                else:
                    grp.plot(ax=ax, x='Interval_start', y='Bandwidth', style='+',
                             label='rep {}'.format(key))
                b = ax.bar(grp['Interval_start'].max()+(key+1)*width+5, grp['Bandwidth'].mean(), width=width)
                ax.autoscale(True)
                autolabel(b, ax)

            ax.title.set_text('{}: Throughput Timeline on Host {}'.format(cc, serv))
            ax.set_ylabel('Mbps Throughput')
            ax.set_xlabel('s Experiment Time')
            ax.set_ylim((0, 50))
            ax.legend()
    plt.tight_layout()

# plot_rtt_timeline_per_cc('shared_link', '15Mbps', '10ms')
# plot_tp_timeline_per_cc('shared_link', '15Mbps', '10ms', smoothing=10)

# Two Paths

In [None]:
two_paths_df = load_iperf_experiments_new('two_paths')

In [None]:
init_plots(two_paths_df, 30)
# init_plots(two_paths_df, 90)
de_3d_plot(two_paths_df)

In [None]:
plot_rtt_timeline_per_cc('two_paths', '10Mbps-10Mbps', '10ms-10ms', 'h1', 1)
plot_tp_timeline_per_cc('two_paths', '10Mbps-10Mbps', '10ms-10ms', ['h2'], 3)

# Shared Bottleneck

In [None]:
shared_link_df = load_iperf_experiments_new('shared_link')

In [None]:
print('BW: ', shared_link_df['bw_a'].unique())
print('De: ', shared_link_df['de_a'].unique())
shared_link_df.head(10)

In [None]:
tmp_shared_link_df = shared_link_df[shared_link_df['de_a'] != 10]
init_plots(tmp_shared_link_df)
# de_3d_plot(shared_link_df)

In [None]:
# weird results for 0ms/30ms
bw, de = '10Mbps', '30ms'
plot_rtt_timeline_per_cc('shared_link', bw, de, 'h1', 3)
plot_tp_timeline_per_cc('shared_link', bw, de, ['h2', 'h4'], 3, smoothing=5)

# MP vs SP

In [None]:
mp_vs_sp_df = load_iperf_experiments_new('mp_vs_sp')

In [None]:
fig = plt.figure(figsize=(9.5, 9.5))
cur_df = mp_vs_sp_df[mp_vs_sp_df['bw_a'] == 15]
cur_df['x'] = cur_df['bw_a']/cur_df['bw_b']
cur_df = cur_df.sort_values('x')

for i, cc in enumerate(mp_vs_sp_df['cc'].unique()):
    ax = fig.add_subplot(3, 2, i+1)
    tmp = cur_df[cur_df['cc'] == cc]
    
    ax.plot(tmp['x'], tmp['h2_tp'], label='h2 (mp)')
    ax.plot(tmp['x'], tmp['h4_tp'], label='h4 (sp)')
    ax.plot(tmp['x'], tmp['bw_b'], label='bw_b', linestyle='dashed', c='blue')
    ax.plot(tmp['x'], tmp['bw_a'], label='bw_a', linestyle='dashed', c='orange')
    ax.set_title('{}'.format(cc))
    ax.set_xlabel('bw_a / bw_b')
    ax.set_ylabel('Mbps Throughput')
    ax.set_ylim((0, cur_df['bw_b'].max()+1))
    ax.legend()
    ax.grid()
plt.tight_layout()

In [None]:
fig = plt.figure(figsize=(9.5, 9.5))
cur_df = mp_vs_sp_df[mp_vs_sp_df['bw_a'] == 15]
cur_df['x'] = cur_df['bw_a']/cur_df['bw_b']
cur_df = cur_df.sort_values('x')

for i, cc in enumerate(mp_vs_sp_df['cc'].unique()):
    ax = fig.add_subplot(3, 2, i+1)
    tmp = cur_df[cur_df['cc'] == cc]
    
    ax.plot(tmp['x'], tmp['h2_tp'] / tmp['bw_a'], label='h2 (mp)')
    ax.plot(tmp['x'], tmp['h4_tp'] / tmp['bw_b'], label='h4 (sp)')
    #ax.plot(tmp['x'], tmp['bw_b'], label='bw_b', linestyle='dashed')
    #ax.plot(tmp['x'], tmp['bw_a'], label='bw_a', linestyle='dashed')
    ax.set_title('{}'.format(cc))
    ax.set_xlabel('bw_a / bw_b')
    ax.set_ylabel('Throughput normalized')
    ax.set_ylim((0, 2))
    ax.legend()
    ax.grid()
plt.tight_layout()

In [None]:
# init_plots(mp_vs_sp_df)
# de_3d_plot(mp_vs_sp_df)


 # Single Bottleneck

In [None]:
single_bottleneck_df = load_iperf_experiments_new('single_bottleneck')

In [None]:
single_bottleneck_df

In [None]:
init_plots(single_bottleneck_df)
de_3d_plot(single_bottleneck_df)

# Asymetric MP

In [None]:
asym_mp_df = load_iperf_experiments_new('asym_mp')

In [None]:
init_plots(asym_mp_df)
de_3d_plot(asym_mp_df)

# OLD CODE

In [None]:
# filenames: ./logs/{topo}/{cc}/{0Mbps-0Mbps}/{0ms-0ms}/{rep}-{hostname}_iperf.txt
base_path = 'logs'
dir_path = './' + base_path + '/{}/{}/{}Mbps-{}Mbps/{}ms-{}ms/{}-'

def read_flow(sender_file_name, receiver_file_name):
    s = pd.read_csv(sender_file_name, sep='\t')
    r = pd.read_csv(receiver_file_name, sep='\t')
    # print(sender_file_name)
    df = pd.merge(s, r, how='outer', on='pkt_id')
    df['latency [s]'] = df['rcv_t [s]'] - df['snd_t [s]']
    df['latency [ms]'] = (df['rcv_t [s]'] - df['snd_t [s]']) * 1000
    df['sec'] = df['rcv_t [s]'] - df.at[(0, 'snd_t [s]')]
    df['disk_sec'] = df['sec'].apply(np.floor)
    df['disk_msec'] = df['sec'].apply(lambda x: np.floor(x * 1000))
    return df

def tp_array(df, bucket_size_ms=100):
    m = int(df['disk_msec'].max() / bucket_size_ms)
    byt, borders, _ = stats.binned_statistic(df['disk_msec'], df['payload [bytes]'], 'sum', bins=[i*bucket_size_ms for i in range(m)])
    ndf = pd.DataFrame({'msec': borders[:-1], 'tp [Mbps]': byt * 0.008 / bucket_size_ms})
    return ndf

def mean_tp(df, cutoff_s=2):
    latest_time = df['disk_msec'].max()
    df = tp_array(df)
    df = df[(df['msec'] >= cutoff_s*1e3) & (df['msec'] < latest_time - cutoff_s*1e3)]
    return df['tp [Mbps]'].mean()

def mean_latency(df, cutoff_s=2):
    latest_time = df['disk_msec'].max()
    df = df[(df['disk_msec'] > cutoff_s * 1000) & (df['disk_msec'] < latest_time - cutoff_s * 1000)]
    return df['latency [ms]'].mean()

# TODO enable single flow and multiflow loading
def load_experiments(topology, cc_algorithms, tps_a, tps_b, delays_a, delays_b, repetitions=3, postfix=''):
    df = pd.DataFrame()
    for cc in cc_algorithms:
        for tp_a in tps_a:
            for tp_b in tps_b:
                for delay_a in delays_a:
                    for delay_b in delays_b:
                        temp = pd.DataFrame()
                        for rep in range(repetitions):
                            # file name without host specified!
                            file_name = dir_path.format(topology, cc, tp_a, tp_b, delay_a, delay_b, rep)
                            # print(file_name + 'h2{}.txt'.format(postfix))
                            # if os.
                            flow1 = read_flow(file_name + 'h1{}.txt'.format(postfix), file_name + 'h2{}.txt'.format(postfix))
                            flow2 = read_flow(file_name + 'h3{}.txt'.format(postfix), file_name + 'h4{}.txt'.format(postfix))
                            temp = temp.append({
                                'cc': cc,
                                'tp_a': tp_a,
                                'tp_b': tp_b,
                                'delay_a': delay_a,
                                'delay_b': delay_b,
                                'rep': rep,
                                'mean_tp_flow1': mean_tp(flow1),
                                'mean_de_flow1': mean_latency(flow1),
                                'mean_tp_flow2': mean_tp(flow2),
                                'mean_de_flow2': mean_latency(flow2)
                            }, ignore_index=True)
                        # FIXME std of means is not good enough, calculate real std
                        df = df.append({
                            'cc': cc,
                            'tp_a': tp_a,
                            'tp_b': tp_b,
                            'delay_a': delay_a,
                            'delay_b': delay_b,
                            'mean_tp_flow1': temp['mean_tp_flow1'].mean(),
                            'std_tp_flow1': temp['mean_tp_flow1'].std(),
                            'mean_de_flow1': temp['mean_de_flow1'].mean(),
                            'std_de_flow1': temp['mean_de_flow1'].std(),
                            'mean_tp_flow2': temp['mean_tp_flow2'].mean(),
                            'std_tp_flow2': temp['mean_tp_flow2'].std(),
                            'mean_de_flow2': temp['mean_de_flow2'].mean(),
                            'std_de_flow2': temp['mean_de_flow2'].std()
                        }, ignore_index=True)
    return df


topologies = ['mptcp-host-pair', 'MPflow_lpkt','MPvsSP', 'MPvsSP_lpkt']
cc_algorithms = ['lia', 'olia', 'balia', 'wvegas', 'cubic']

# Changing Latency Analysis

In [None]:
# logs/mptcp-host-pair/balia/1ms-1ms/0-h1.txt

topo = 'two_paths'

df = pd.DataFrame([])

delays = np.arange(1, 102, 20)
tp = 10

# df = load_experiments(topo, cc_algorithms, [10], [10], delays, delays)

for cc in cc_algorithms:
    for delay_a in delays:
        for delay_b in delays:
            temp_df = pd.DataFrame()
            for rep in range(1): # TODO reenable repetitions 3
                # file name without host specified!
                file_name = dir_path.format(topo, cc, tp, tp, delay_a, delay_b, rep)
                flow = read_flow(file_name + 'h1.txt', file_name + 'h2.txt')
                temp_df = temp_df.append({
                    'cc': cc,
                    'delay_a': delay_a,
                    'delay_b': delay_b,
                    'rep': rep,
                    'mean_tp': mean_tp(flow),
                    'mean_de': mean_latency(flow)
                }, ignore_index=True)
                # print(flow)
            df = df.append({
                'cc': cc,
                'delay_a': delay_a,
                'delay_b': delay_b,
                'mean_tp': temp_df['mean_tp'].mean(),
                'std_tp': temp_df['mean_tp'].std(),
                'mean_de': temp_df['mean_de'].mean(),
                'std_de': temp_df['mean_de'].std(),
            }, ignore_index=True)

df

In [None]:
# iperf csv
# timestamp,source_address,source_port,destination_address,destination_port,interval,transferred_bytes,bits_per_second
def read_iperf(sender_file_name, receiver_file_name):
    header_names = ['timestamp','source_address','source_port','destination_address',
                    'destination_port','xxx','interval','transferred_bytes','bits_per_second']
    s = pd.read_csv(sender_file_name, names=header_names, index_col=False)
    s['Mbps'] = s['bits_per_second'] / 1000000
    return s



# TODO: remove when more data is available
def load_iperf_experiments(topology, cc_algorithms, tps_a, tps_b, delays_a, delays_b, repetitions=range(3)):
    df = pd.DataFrame()
    for cc in cc_algorithms:
        for tp_a in tps_a:
            for tp_b in tps_b:
                for delay_a in delays_a:
                    for delay_b in delays_b:
                        temp = pd.DataFrame()
                        for rep in repetitions:
                            # file name without host specified!
                            file_name = dir_path.format(topology, cc, tp_a, tp_b, delay_a, delay_b, rep)
                            flow1 = read_iperf(file_name + 'h1_iperf.csv', file_name + 'h2_iperf.csv')
                            # flow2 = read_iperf(file_name + 'h3_iperf.txt', file_name + 'h4_iperf.txt')
                            temp = temp.append({
                                'mean_tp_flow1': flow1['Mbps'].iloc[-1],
                                # 'mean_tp_flow2': flow2['Mbps'],
                                # 'mean_de_flow2': mean_latency(flow2)
                            }, ignore_index=True)
                        # FIXME: std of means is not good enough, calculate real std
                        df = df.append({
                            'cc': cc,
                            'tp_a': tp_a,
                            'tp_b': tp_b,
                            'delay_a': delay_a,
                            'delay_b': delay_b,
                            'mean_tp_flow1': temp['mean_tp_flow1'].mean(),
                            'std_tp_flow1': temp['mean_tp_flow1'].std(),
                            # 'mean_de_flow1': temp['mean_de_flow1'].mean(),
                            # 'std_de_flow1': temp['mean_de_flow1'].std(),
                            # 'mean_tp_flow2': temp['mean_tp_flow2'].mean(),
                            # 'std_tp_flow2': temp['mean_tp_flow2'].std(),
                            # 'mean_de_flow2': temp['mean_de_flow2'].mean(),
                            # 'std_de_flow2': temp['mean_de_flow2'].std()
                        }, ignore_index=True)
    return df
    
#delays = np.arange(1, 102, 20)
#read_iperf('./logs/two_paths/lia/10Mbps-10Mbps/0ms-0ms/0-h1_iperf.txt', '')

#df_iperf = load_iperf_experiments('mptcp-host-pair', cc_algorithms, [10], [10], delays, delays, repetitions=[0])
#df_iperf

In [None]:
# from scapy.utils import

def read_iperf_text(sender_file_name, receiver_file_name):
    s = pd.Series()
    # print(sender_file_name)
    with open(sender_file_name, 'r') as f:
        lines = f.read().splitlines()
        sender_line = lines[-4]
        receiver_line = lines[-3]
        # print(sender_line.split())
        # print(receiver_line.split())
        s['client Mbps'] = float(sender_line.split()[-4])
        s['server Mbps'] = float(receiver_line.split()[-3])
    #with open(receiver_file_name, 'r') as f:
    #    lines = f.read().splitlines()
    #    last_line = lines[-1]
    #    # print(last_line.split(' '))
    #    # s['server Mbps'] = float(last_line.split(' ')[-2])
    # print(s)
    return s
    
def load_iperf_experiments_txt(topology, cc_algorithms, tps_a, tps_b, delays_a, delays_b, repetitions=range(3),
                              pcap=False):
    df = pd.DataFrame()
    for cc in cc_algorithms:
        for tp_a in tps_a:
            for tp_b in tps_b:
                for delay_a in delays_a:
                    for delay_b in delays_b:
                        temp = pd.DataFrame()
                        for rep in repetitions:
                            # file name without host specified!
                            file_name = dir_path.format(topology, cc, tp_a, tp_b, delay_a, delay_b, rep)
                            flow1 = read_iperf_text(file_name + 'h1_iperf.csv', file_name + 'h2_iperf.csv')
                            # flow2 = read_iperf(file_name + 'h3_iperf.txt', file_name + 'h4_iperf.txt')
                            
                            # read rtt
                            rtt1 = None
                            if pcap:
                                rtt1 = read_rtt_from_pcap(file_name + 'h1_iperf_dump.pcap')
                            
                            temp = temp.append({
                                'tp_flow1_c': flow1['client Mbps'],
                                'tp_flow1': flow1['server Mbps'],
                                'rtt_flow1': rtt1,
                                # 'mean_tp_flow2': flow2['Mbps'],
                                # 'mean_de_flow2': mean_latency(flow2)
                            }, ignore_index=True)
                        # FIXME: std of means is not good enough, calculate real std
                        # print(temp['tp_flow1'])
                        df = df.append({
                            'cc': cc,
                            'tp_a': tp_a,
                            'tp_b': tp_b,
                            'delay_a': delay_a,
                            'delay_b': delay_b,
                            'mean_tp_flow1': temp['tp_flow1'].mean(),
                            'std_tp_flow1': temp['tp_flow1'].std(),
                            'mean_c_tp_flow1': temp['tp_flow1_c'].mean(),
                            'std_c_tp_flow1': temp['tp_flow1_c'].std(),
                            'mean_rtt_flow1': temp['rtt_flow1'].mean(),
                            # 'mean_de_flow1': temp['mean_de_flow1'].mean(),
                            # 'std_de_flow1': temp['mean_de_flow1'].std(),
                            # 'mean_tp_flow2': temp['mean_tp_flow2'].mean(),
                            # 'std_tp_flow2': temp['mean_tp_flow2'].std(),
                            # 'mean_de_flow2': temp['mean_de_flow2'].mean(),
                            # 'std_de_flow2': temp['mean_de_flow2'].std()
                        }, ignore_index=True)
                break
    return df

delays = np.arange(0, 101, 20)

df_iperf = load_iperf_experiments_txt('two_paths', cc_algorithms, [10], [10], delays, delays, repetitions=[0,1,2])
df_iperf

In [None]:
def read_rtt_from_pcap(file_name):
    cmd = "tshark -r {} -e tcp.analysis.ack_rtt -T fields"
    a = subprocess.Popen(['tshark', '-r', file_name, '-e', 'tcp.analysis.ack_rtt' ,'-T', 'fields'],
                         stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    out, _ = a.communicate()

    series = pd.Series(out.split(b'\n'))
    series = pd.to_numeric(series, errors='coerce').dropna()
    return series.mean()

# read_rtt_from_pcap('./logs/two_paths/lia/10Mbps-10Mbps/0ms-0ms/0-h1_iperf_dump.pcap')

In [None]:
import matplotlib
import pandas
## comparing different iperf runtimes
delay_point = 40

delays = np.arange(0, 102, 20)

df_iperf = load_iperf_experiments_txt('two_paths', cc_algorithms, [10], [10],
                                      [delay_point], delays, repetitions=[0,1,2])
df_iperf_long = load_iperf_experiments_txt('two_paths_d', cc_algorithms, [10], [10],
                                       [delay_point], delays, repetitions=[0])


x_liperf = df_iperf_long[df_iperf_long['delay_a'] == delay_point]
x_iperf = df_iperf[df_iperf['delay_a'] == delay_point]


fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 5.5))

for cc in x_liperf['cc'].unique():
    tmp = x_liperf[x_liperf['cc'] == cc]
    tmp.plot(x='delay_b', y='mean_tp_flow1', label=cc, ax=axes[0], legend=True, grid=True, ylim=(0,22))
    # tmp.plot(x='delay_b', y='mean_c_tp_flow1', label=cc + 'c', ax=axes[0], legend=True, grid=True, ylim=(0,22))

axes[0].set_title('Iperf with tcpdump but no repetitions')
axes[0].axvline(delay_point, color='black', ls=':')
axes[0].set_ylabel('Mbps')
axes[0].set_xlabel('ms delay of second link')

for cc in x_iperf['cc'].unique():
    tmp = x_iperf[x_iperf['cc'] == cc]
    tmp.plot(x='delay_b', y='mean_tp_flow1', yerr='std_tp_flow1', label=cc, ax=axes[1], legend=True, grid=True, ylim=(0,22))
    # tmp.plot(x='delay_b', y='mean_c_tp_flow1', yerr='std_c_tp_flow1', label=cc + 'c', ax=axes[1], legend=True, grid=True, ylim=(0,22))
    
axes[1].set_title('Iperf test')
axes[1].set_ylabel('Mbps')
axes[1].axvline(delay_point, color='black', ls=':')
axes[1].set_xlabel('ms delay of second link')

In [None]:
import matplotlib
import pandas
## comparing different iperf runtimes
delay_point = 60

delays = np.arange(0, 102, 30)

#df_iperf = load_iperf_experiments_txt('two_paths', cc_algorithms, [10], [10],
#                                      [delay_point], delays, repetitions=[0,1,2], pcap=True)

x_iperf = df_iperf[df_iperf['delay_a'] == delay_point]


fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 5.5))

axes[0].set_title('Iperf Throughput')
axes[0].axvline(delay_point, color='black', ls=':')
axes[0].set_ylabel('Mbps')
axes[0].set_xlabel('ms delay of second link')

for cc in x_iperf['cc'].unique():
    tmp = x_iperf[x_iperf['cc'] == cc]
    tmp.plot(x='delay_b', y='mean_tp_flow1', yerr='std_tp_flow1', label=cc, ax=axes[0], legend=True, grid=True, ylim=(0,22))
    tmp.plot(x='delay_b', y='mean_rtt_flow1', label=cc, ax=axes[1], legend=True, grid=True)
    # tmp.plot(x='delay_b', y='mean_c_tp_flow1', yerr='std_c_tp_flow1', label=cc + 'c', ax=axes[1], legend=True, grid=True, ylim=(0,22))
    
axes[1].set_title('Iperf rtt')
axes[1].set_ylabel('s')
axes[1].axvline(delay_point, color='black', ls=':')
axes[1].set_xlabel('ms delay of second link')

#df_iperf

In [None]:
## with delay comparison which is not yet working

import matplotlib
import pandas
## comparing different iperf runtimes
delay_point = 40

delays = np.arange(0, 102, 20)

df_iperf = load_iperf_experiments_txt('two_paths', cc_algorithms, [10], [10],
                                      [delay_point], delays, repetitions=[0,1,2])
df_iperf_long = load_iperf_experiments_txt('two_paths_d', cc_algorithms, [10], [10],
                                       [delay_point], delays, repetitions=[0])


x_liperf = df_iperf_long[df_iperf_long['delay_a'] == delay_point]
x_iperf = df_iperf[df_iperf['delay_a'] == delay_point]


fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(15, 5.5))

for cc in x_liperf['cc'].unique():
    tmp = x_liperf[x_liperf['cc'] == cc]
    tmp.plot(x='delay_b', y='mean_tp_flow1', label=cc, ax=axes[0,0], legend=True, grid=True, ylim=(0,22))
    # tmp.plot(x='delay_b', y='mean_c_tp_flow1', label=cc + 'c', ax=axes[0], legend=True, grid=True, ylim=(0,22))
    tmp.plot(x='delay_b', y='mean_rtt_flow1', label=cc, ax=axes[1,0], legend=True, grid=True, ylim=(0,22))

axes[0,0].set_title('Iperf with tcpdump but no repetitions')
axes[0,0].axvline(delay_point, color='black', ls=':')
axes[0,0].set_ylabel('Mbps')
axes[0,0].set_xlabel('ms delay of second link')

for cc in x_iperf['cc'].unique():
    tmp = x_iperf[x_iperf['cc'] == cc]
    tmp.plot(x='delay_b', y='mean_tp_flow1', yerr='std_tp_flow1', label=cc, ax=axes[0,1], legend=True, grid=True, ylim=(0,22))
    # tmp.plot(x='delay_b', y='mean_c_tp_flow1', yerr='std_c_tp_flow1', label=cc + 'c', ax=axes[1], legend=True, grid=True, ylim=(0,22))
    # tmp.plot(x='delay_b', y='mean_rtt_flow1', label=cc, ax=axes[1,1], legend=True, grid=True, ylim=(0,22))
    
axes[0,1].set_title('Iperf test')
axes[0,1].set_ylabel('Mbps')
axes[0,1].axvline(delay_point, color='black', ls=':')
axes[0,1].set_xlabel('ms delay of second link')

In [None]:
## comparing different iperf runtimes
def plot(delay_point=0):
    #delay_point = 20

    delays = np.arange(0, 102, 20)

    df_iperf = load_iperf_experiments_txt('two_paths', cc_algorithms, [10], [10],
                                          [delay_point], delays, repetitions=[0,1,2])
    df_iperf_long = load_iperf_experiments('two_paths_m', cc_algorithms, [10], [10],
                                          [delay_point], np.arange(0, 101, 10), repetitions=[0,1,2])


    x_liperf = df_iperf_long[df_iperf_long['delay_a'] == delay_point]
    x_iperf = df_iperf[df_iperf['delay_a'] == delay_point]

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 5.5))

    for cc in x_liperf['cc'].unique():
        tmp = x_liperf[x_liperf['cc'] == cc]
        tmp.plot(x='delay_b', y='mean_tp_flow1', yerr='std_tp_flow1', capsize=5, label=cc, ax=axes[0], legend=True, grid=True, ylim=(0,22))

    axes[0].set_title('Iperf smaller steps')
    axes[0].axvline(delay_point, color='black', ls=':')
    axes[0].set_ylabel('Mbps')
    axes[0].set_xlabel('ms delay of second link')

    for cc in x_iperf['cc'].unique():
        tmp = x_iperf[x_iperf['cc'] == cc]
        tmp.plot(x='delay_b', y='mean_tp_flow1', yerr='std_tp_flow1', capsize=5, label=cc, ax=axes[1], legend=True, grid=True, ylim=(0,22))

    axes[1].set_title('Iperf')
    axes[1].set_ylabel('Mbps')
    axes[1].axvline(delay_point, color='black', ls=':')
    axes[1].set_xlabel('ms delay of second link')