In [1]:
import os
import re
import shutil
import subprocess

import pandas as pd
import numpy as np

import plotly.plotly as py
from plotly.graph_objs import *

In [2]:
home = os.getcwd()
print(home)

/home/theano/mountpoint/tsch/mininetpipe


In [5]:
def remove_file_whitespace(fpath):
    # Removes whitespace from output file
    # No return value; replaces original .out file
    fname = os.path.basename(fpath)

    with open("tmp.txt", "w") as tmp:
        with open(fpath, "r") as f:
            for line in f:
                subbed = re.sub("\s*", "", line.strip())
                tmp.write("%s\n" % subbed)
    shutil.move("tmp.txt", fpath)
    
def convert_to_usec(fpath):
    # Converts the file at fpath to us by calling an octave program
    # No return value; will write a file fpath.dat
    # TODO: decide if this is reasonable or should just skip write-to-file
    remove_file_whitespace(fpath)
    
    subprocess.call(["octave", 
                    os.path.join(home, "convert_raw_to_usec.oct"),
                   os.path.basename(fpath).split(".")[0],
                os.path.split(fpath)[:-1][0]])
    
def load_datfiles(dirname, fname_filter="*"):
    # Loads all the files in dirname/, filtered by fname_filter
    # Returns a list of (filename, df) tuples
    files = os.listdir(dirname)
    files = [fname for fname in files 
             if fname.startswith(fname_filter) and ".out" in fname]
    
    dfs = []
    
    for fname in files: 
        stem = os.path.join(dirname, fname.split(".")[0])
        print("Working on %s..." % fname)
        if not os.path.exists(stem + ".dat"):
            print("Converting to usec...")
            convert_to_usec(stem + ".out")
        dfs.append(load_datfile(stem + ".dat"))
            
    return dfs

def load_datfile(fpath):
    # Returns a (filename, df) tuple where df has latency computed
    df = pd.read_csv(fpath, 
                     index_col=False, 
                     names=["send_secs", "recv_secs"])
    df["latency(us)"] = df["recv_secs"] - df["send_secs"]
    return (os.path.basename(fpath), df)

def plot_multiple_cdfs(df_tuple_list):
    # Takes a list of (fname, df) tuples and returns a plotly Fig of a CDF of RTTs
    # and a new df where each column is an experiment and values are computed RTTs
    # for that experiment
    
    # Sort by recv-send delay
    df_tuple_list.sort(key=lambda x: int(re.sub("[^\d]+", "", x[0].split("-")[-1])))
    
    dfs = [pd.Series(x[1]["latency(us)"], name=x[0]) for x in df_tuple_list]
    latencies = pd.concat(dfs, axis=1)
    latencies.columns = [", ".join([x.split("-")[0]] + x.split(".")[0].split("-")[2:]) for x in latencies.columns]

    data = []
    
    for column in latencies.columns:
        # Bins (bases) returned by np.histogram are open on the RIGHT edge, except the last one: 
        # [0, 1), [1, 2), ... [n-1, n]
        values, base = np.histogram(latencies[column], bins=1000)
        cumsum = np.cumsum(values)
        
        cdf = Scatter(x=base[:-1],
                      y=cumsum/float(len(latencies[column])),
                      name=column)
        data.append(cdf)
        
    buttons = []
    for i, column in enumerate(latencies.columns):
        buttons.append(
            dict(
                args=['visible', [False if x != i else True for x in range(len(latencies.columns))]],
                label=column,
                method='restyle'
            )
        )
    
    buttons.append(
        dict(
            args=['visible', [True for x in latencies.columns]],
            label='All',
            method='restyle'
        )
    )
    
    layout = Layout(
        title="CDF of RTT (us)",
        width=900,
        height=600,
        xaxis={'title' : 'RTT (us)'},
        yaxis={'title' : 'frequency'},
        updatemenus=list([
            dict(
                yanchor='top',
                buttons=buttons
            ),
        ]),
    )
    fig = Figure(data=Data(data), layout=layout)
    
    return fig, latencies

def plot_multiple_hists(df_tuple_list):
    # Takes a list of (fname, df) tuples and returns a plotly Fig for a histogram of RTTs
    # and a new df where each column is an experiment and values are computed RTTs
    # for that experiment
    
    # Sort by recv-send delay
    df_tuple_list.sort(key=lambda x: int(re.sub("[^\d]+", "", x[0].split("-")[-1])))
    
    dfs = [pd.Series(x[1]["latency(us)"], name=x[0]) for x in df_tuple_list]
    latencies = pd.concat(dfs, axis=1)
    latencies.columns = [", ".join([x.split("-")[0]] + x.split(".")[0].split("-")[2:]) for x in latencies.columns]

    data = []
    
    for column in latencies.columns:
        hist = Histogram(x=latencies[column].tolist(),
                         opacity=0.6,
                         name=column)
        data.append(hist)
        
    buttons = []
    for i, column in enumerate(latencies.columns):
        buttons.append(
            dict(
                args=['visible', [False if x != i else True for x in range(len(latencies.columns))]],
                label=column,
                method='restyle'
            )
        )
    
    buttons.append(
        dict(
            args=['visible', [True for x in latencies.columns]],
            label='All',
            method='restyle'
        )
    )
    
    layout = Layout(
        title="CDF of RTT (us)",
        width=900,
        height=600,
        xaxis={'title' : 'RTT (us)'},
        yaxis={'title' : 'count'},
        barmode='overlay',
        updatemenus=list([
            dict(
                yanchor='top',
                buttons=buttons
            ),
        ]),
    )
    fig = Figure(data=Data(data), layout=layout)
    
    return fig, latencies

def get_expdir(expname):
    # If your exp directory is under "results"
    return os.path.join(home, "results", expname)

In [6]:
dfs_tcp = load_datfiles(get_expdir("xlong_exps"), "TCP")  # search for files with "TCP" in name
fig_tcp, latencies_tcp = plot_multiple_cdfs(dfs_tcp)

py.iplot(fig_tcp, filename="cumulative histogram")

Working on TCP-1507734784-r1000000-s640.out...
Working on TCP-1507733920-r1000000-s160.out...
Working on TCP-1507734229-r1000000-s320.out...
Working on TCP-1507733295-r1000000-s20.out...
Working on TCP-1507733486-r1000000-s40.out...
Working on TCP-1507732990-r1000000-s0.out...
Working on TCP-1507733683-r1000000-s80.out...
Converting to usec...


In [7]:
dfs_udp = load_datfiles(get_expdir("xlong_exps"), "UDP")  # search for files with "TCP" in name
fig_udp, latencies_udp = plot_multiple_cdfs(dfs_udp)

py.iplot(fig_udp, filename="cumulative histogram")

Working on UDP-1507737518-r1000000-s160.out...
Converting to usec...
Working on UDP-1507737022-r1000000-s40.out...
Converting to usec...
Working on UDP-1507737817-r1000000-s320.out...
Converting to usec...
Working on UDP-1507738311-r1000000-s640.out...
Converting to usec...
Working on UDP-1507736871-r1000000-s20.out...
Converting to usec...
Working on UDP-1507737283-r1000000-s80.out...
Converting to usec...
Working on UDP-1507736742-r1000000-s0.out...
Converting to usec...


In [10]:
dfs_mtcp = load_datfiles(get_expdir("xlong_exps"), "mTCP")  # search for files with "TCP" in name


Working on mTCP-1507729668-r1000000-s80.out...
Working on mTCP-1507730539-r1000000-s640.out...
Working on mTCP-1507729227-r1000000-s0.out...
Working on mTCP-1507729326-r1000000-s20.out...
Working on mTCP-1507729477-r1000000-s40.out...
Working on mTCP-1507731297-r1000000-s1280.out...
Working on mTCP-1507729847-r1000000-s160.out...
Working on mTCP-1507730119-r1000000-s320.out...


ValueError: range parameter must be finite.

In [None]:
fig_mtcp, latencies_udp = plot_multiple_cdfs(dfs_mtcp)

py.iplot(fig_mtcp, filename="cumulative histogram")