In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import math
plt.rcParams["pdf.fonttype"] = 42
matplotlib.use("TkAgg")
result_dirs=["../%s-scripts/files_to_run/results"%(scheme) for scheme in ["sperc", "rcp", "pfabric"]]
result_dirs.append("../../waterfilling/output_from_ideal-144")

import os
def get_fct_files(result_dirs):
    fct_files = []
    for result_dir in result_dirs:
        for dirpath, _, files in os.walk(result_dir):
            for f in files:
                if f.endswith("flow.tr"):
                    fct_files.append("%s/%s"%(dirpath, f))
    return fct_files

In [None]:
fct_files = get_fct_files(result_dirs)
print("%d files found: %s" % (len(fct_files), str(fct_files[:5])))

In [None]:
# Update this cell when there is a change in the convention for naming files
def get_config(filename):
    """
    Returns cdf, load, topo, servers, scheme1, kw, date
     or None if there was an error parsing the filename.
    """
    c = {}
    config_match=re.match(".*/cdf(.*)_load(.*)_topo(.*)_(.*)_(.*)_(.*)_(.*).flow.tr", filename)
    if config_match:    
        c["cdf"], c["load"], c["topo"], c["servers"], c["scheme"], c["kw"], c["date"] = config_match.groups()
    return c

    

In [None]:
# Update this cell to change appearance of plots

fontsize = 12
styles = {} # scheme: {kw: [line_label, style, color, alpha]}
styles["sperc"] = {
    "wt1-pri2-match-ign": ["s-PERC (basic)", "solid", "green", 0.9],
    "wt1-pri2-match-start100-short-ign": ["s-PERC (short)", "solid", "blue", 0.9]}
styles["rcp"] = {"a0.4b0.2": ["RCP", "dashed", "red", 0.9]}
styles["pfabric"] = {"basic": ["p-Fabric", "-.", "orange", 0.9]}
styles["waterfilling"] = {"ideal-maxmin": ["Ideal", "dotted", "black", 0.9]}

schemes_in_order = [("pfabric", "basic"), 
                    ("rcp", "a0.4b0.2"),
                    ("sperc", "wt1-pri2-match-start100-short-ign"),
                    ("sperc","wt1-pri2-match-ign"),                   
                    ("waterfilling", "ideal-maxmin")]

legend_rel_pos={"learning60": (0.45, 0.05), "learning80": (0.45, 0.05),\
               "search60": (0.35, 1.52), "search80": (0.60, 1.48)}

buckets_by_cdf = {
    "search": [0.01, 0.33, 0.66, 1.0],
    "learning": [0.01, 1.0]
}

def get_styles(filename):
    """
    Returns line_label, style, color, alpha for scheme, kw in filename
    """
    c = get_config(filename)
    scheme, kw = c["scheme"], c["kw"]
    return styles[scheme][kw]

In [None]:
import re

# assuming spine-leaf topology
def flow_rtt(gid):
    src_tor, dst_tor = [int(int(host)/16) for host in gid.split("-")]
    if src_tor == dst_tor:
        return 10.8e-6
    return 11.6e-6

def fct_file_to_df(filename):
    config = get_config(filename)
    
    print(", ".join(["%s: %s" % (key, config[key]) for key in 
                                     ["cdf", "load", "topo", "servers",
                                     "scheme", "kw", "date"]]))
    load = int(float(config["load"])*100)
    
    if config["scheme"] == "waterfilling":
        df = pd.read_csv(filename, delim_whitespace=True, header=None,\
                        names=["fid_", "fid", "end_time_", "end_time",\
                               "start_time_", "start_time", "fldur_", "fldur",\
                               "bytes_", "bytes", "tmp_pkts_", "tmp_pkts",\
                               "gid_", "gid"])
        

        df.bytes = (df.bytes/1500)*1460
        df["fldur_data"] = df.fldur + df["gid"].map(flow_rtt)
    else:
        df = pd.read_csv(filename, delim_whitespace=True, header=None,\
                            names=["flow_stats", "flow_end", "gid_", "gid",\
                                    "start_time_", "start_time", "pid_", "pid",\
                                    "fid_", "fid", "bytes_", "bytes", "fldur_",\
                                      "fldur", "fldur_data_", "fldur_data", "actfl_",\
                                      "actfl_", "bps", "bps_", "tmp_pkts_", "tmp_pkts",\
                                       "bytes_scheduled_", "bytes_scheduled", "bytes_sent_", "bytes_sent"])
    return df

In [None]:

def transfer_time(num_bytes, line_rate=100): #Gb/s
    return (num_bytes * 8.0)/ (line_rate * 1.0e9)

def augment_df(df):    
    df["min_fct"] = df["bytes"].map(transfer_time) + df["gid"].map(flow_rtt)
    df["norm_fct"] = df["fldur_data"]/df["min_fct"]
    return df
    

In [None]:
def split_df_by_num_flows(df, buckets, sort_key="bytes"):
    df.sort_values([sort_key], ascending=[1],inplace=True)
    num_flows = len(df)
    dfs = []
    thresh = []
    min_flow = 0
    
    for max_fraction in buckets:
        max_flow = int(max_fraction * (num_flows - 1))
        thresh.append(max_flow)
        dfs.append(df.iloc[min_flow:max_flow,:])
        min_flow = max_flow
    return dfs, thresh

In [None]:
def split_df_by_num_bytes(df, buckets, sort_key="bytes"):
    assert(len(buckets) >= 1)
    assert(buckets[-1] == 1)

    df.sort_values([sort_key], ascending=[1],inplace=True)
    num_bytes = float(sum(df.bytes.values))
    cumsum_bytes = np.cumsum(df.bytes.values) / num_bytes
    
    num_buckets = len(buckets)
    num_flows = len(df)
    
    dfs = []
    thresh = []
    min_flow = 0
    bucket = 0
    for flow in range(num_flows):
        assert(bucket < num_buckets)
        max_fraction =  buckets[bucket]
        if cumsum_bytes[flow] >= max_fraction:
            thresh.append(flow)
            dfs.append(df.iloc[min_flow:flow, :])
            min_flow = flow
            bucket += 1

    return dfs, thresh

In [None]:
for f in fct_files:
    print("%s: %s" % (f, str(get_styles(f))))

In [None]:
# we group files by CDF, load
files_by_cdf_load = {}
for f in fct_files:
    cdf = get_config(f)["cdf"]
    load = int(float(get_config(f)["load"]) * 100)
    if (cdf, load) not in files_by_cdf_load: 
        files_by_cdf_load[(cdf, load)] = []
    files_by_cdf_load[(cdf, load)].append(f)

cdf_loads = files_by_cdf_load.keys()
print(cdf_loads)
print(files_by_cdf_load)

for cdf, load in cdf_loads:
    files_by_cdf_load[(cdf, load)] = sorted(files_by_cdf_load[(cdf, load)], key=lambda f:\
         schemes_in_order.index((get_config(f)["scheme"],
                                 get_config(f)["kw"])))
    print([(get_config(f)["scheme"],get_config(f)["kw"])\
          for f in files_by_cdf_load[(cdf, load)]])

In [None]:
def get_xticks(xmin, xmax):
    xrange = xmax - xmin
    print(xrange)
    if (xrange > 40): xticks = range(xmin, xmax+1, 10)
    elif (xrange > 20): xticks = [0, 10, 15] + list(range(xmin, xmax+1, 10))
    else: xticks = [tick for tick in [0, 1, 2, 3, 4, 5, 10, 15, 20] if tick <= xmax]
    print(xticks)
    return xticks

In [None]:

labeled_subplots = {}
matplotlib.use("TkAgg")
print(matplotlib.get_backend())

from matplotlib.legend_handler import HandlerLine2D
myHandlerMap = {}

for cdf, load in cdf_loads:
    buckets =  buckets_by_cdf[cdf] # by max, starting at 0.0 
    num_buckets = len(buckets)
    xmin = {}
    xmax = {}
    if num_buckets <= 2: figsize = (6,3)
    else: figsize = None        
    fig, ax = plt.subplots(nrows=int(math.ceil(num_buckets/2.0)), ncols=2, figsize=figsize)
    fig.tight_layout()

    for file_num, f in enumerate(files_by_cdf_load[(cdf, load)]):
        dfs, thresh =\
            split_df_by_num_bytes(augment_df(fct_file_to_df(f)),\
                                  buckets)

        line_label, style, color, alpha = get_styles(filename=f)
        
        for i in range(num_buckets):
            if num_buckets <= 2: axi = ax[i]
            else: axi = ax[int(i/2)][i%2]
            
                
            norm_fct = sorted([float(fct) for fct in dfs[i].norm_fct.values])
            cum_dist = np.linspace(0., 1., len(norm_fct))
            
            line = axi.plot(norm_fct, cum_dist, label=line_label,
                            linestyle=style, color=color, alpha=alpha)
            print(line)
            myHandlerMap[str(line)] = HandlerLine2D()
            if file_num == 0:
                #print("file_num = %d, i = %d" % (file_num, i))
                min_frac = 0; min_flow = 0;
                if i > 0: 
                    min_frac = buckets[i-1]; 
                    min_flow = thresh[i-1]
                max_frac = buckets[i]
                max_flow = thresh[i] 
                num_flows = len(dfs[i])
                title = "%.0f-%.0f%% of bytes, %.0f%% of flows" % \
                (min_frac*100, max_frac*100, float(max_flow-min_flow)/num_flows)
                
                axi.set_title(title, fontsize=fontsize)
                max_cdf_with_enough_samples = min(1.0, (num_flows/30.0)/100.0)
                line = axi.axhline(y=max_cdf_with_enough_samples,\
                        linestyle='dashed', color='r')
                #print(line)
                axi.set_ylabel("CDF", fontsize=fontsize, labelpad=0)
                axi.set_xlabel("FCT (norm. by min. FCT)", fontsize=fontsize)
                xmin[i] = norm_fct[0]
                xmax[i] = norm_fct[-1]
            
            xmin[i] = min(xmin[i], norm_fct[0])
            xmax[i] = max(xmax[i], norm_fct[-1])
            if file_num == len(files_by_cdf_load[(cdf, load)]) - 1:
                #print("file_num = %d" % file_num)
                # round to nearest ten on left, int on right
                xmin[i] = int(xmin[i]/10.0) * 10
                xmax[i] = min(100, int(math.ceil(xmax[i])))
                xmax[0] = 10
                #print(xmin[i])
                #print(xmax[i])
                axi.set_xlim(xmin=xmin[i], xmax=xmax[i])
                axi.set_ylim(ymin=0, ymax=1.0)
                xticks = get_xticks(xmin[i], xmax[i])
                
                axi.set_xticks(xticks)
                for y in [0.5, 0.999]:
                    line = axi.axhline(y=y, linestyle='solid', color='black', alpha=0.1)
                    #print(line)
                for x in xticks:
                    line = axi.axvline(x=x, linestyle='solid', color='black', alpha=0.1)
                    #print(line)
                    
            # end for bucket in range(num_buckets)
        # end for f in files_by_cdf[cdf]
    
    fig.subplots_adjust(hspace=.5)
    legend=plt.legend( handler_map=myHandlerMap,\
                  loc=(0.5,0.0),\
                  bbox_to_anchor=legend_rel_pos[cdf+str(load)])
    plt.savefig("maxmin-%s-%d.pdf"%(cdf, load) , bbox_inches='tight')    
    # end for cdf, load in cdf_loads