In [4]:
# Proper names for plots
# and filters to select which files / runs to plot for each experiment based on config

import pandas as pd
import numpy as np
import glob

proper_name = {\
    "naive": "n-PERC", "sperc_basic" : "s-PERC",\
    "perc": "PERC", "cpg": "CPG", "skivee": "SL",\
    "cpg1": "Depth of WF1 Dependency Tree",\
    "cpg2": "Depth of WF2 Dependency Tree",\
    "wf": "Number of Distinct Bottleneck Rates",\
    "distinct": "Number of Distinct Bottleneck Rates"}

formula = {"conv_time_rtts_per_wf": "(RTTs / $N_{\infty}$)",\
    "conv_time_rtts_per_cpg1" : "(RTTs / $N_{1}$)",\
    "conv_time_rtts_per_cpg2" : "(RTTs / $N_{2}$)"}

def get_proper_name(conv_time_str):
    conv_time, thresh = conv_time_str.split("-")
    thresh = int(thresh)
    if thresh > 1: thresh_str = "to %dpc" % thres
    else: thresh_str = ""    
    return "Convergence Times %s %s" % (thresh_str, formula[conv_time])

metric = {}
metric["sparse-ct"] = ["conv_time_rtts_per_cpg1"]
metric["dense-ct"] = ["conv_time_rtts_per_cpg2"]
metric["robustness-approx"] = ["conv_time_rtts_per_cpg1-1"]
metric["robustness-drop"] = ["conv_time_rtts_per_cpg1-1"]

filters_by_experiment = {}

config_filter = {}
config_filter["alg"] = ["naive", "sperc_basic", "perc", "skivee"] # "sperc_ignore"
# sperc-basic, which propagates max(b, max_e) is faster in practice than sperc-ignore,
# which uses an ignore bit to propagate infty, when b < maxe, and maxe otherwise,
# though both have the same worst-case bounds.
config_filter["links-flows-path"]=[(1000,1000,5),\
              (1000,1000,10),\
              (100, 10000,5),\
              (100, 1000, 5),\
              (100, 100, 5),\
              (100, 10000, 10),\
              (100, 1000, 10),\
              (100, 100, 10)]        
config_filter["seed-path-links"]=range(1, 51)
config_filter["experiment"] = "sparse-ct"
config_filter["kw"] = "sep29a"
filters_by_experiment["sparse-ct"] = config_filter


config_filter = {}
config_filter["alg"] = ["naive", "sperc_basic", "perc", "skivee"]
config_filter["links-flows-path"]=[(100,100,80)]
config_filter["seed-path-links"]=range(1, 51)
config_filter["experiment"] = "dense-ct"
config_filter["kw"] = "sep29a"
filters_by_experiment["dense-ct"] = config_filter



config_filter = {}
config_filter["alg"] = ["sperc_robust"] #, "skivee"]
config_filter["links-flows-path"]= [(1000,1000,5),\
                                    (1000,1000,10),\
                                   #(100, 10000,5),\
                                   (100, 1000, 5),\
                                   (100, 100, 5),\
                                   (100, 100, 10),\
                                  #(100, 10000, 10),\
                                   (100, 1000, 10)]
#config_filter["N-l-m-err"] = [ (32, 12, 10, "0.01"), (32, 10, 8, "0.02"),\
#                               (32, 10, 6, "0.04"), (32, 14, 12, "0.0")]
config_filter["N-l-m"] = [ (32, 12, 10), (32, 10, 8), (32, 10, 6), (32, 14, 12)]
config_filter["seed-path-links"]=range(1, 16)
config_filter["experiment"] = "robustness-approx"
filters_by_experiment["robustness-approx"] = config_filter

config_filter = {}
config_filter["alg"] = ["sperc_robust"] #, "skivee"]
config_filter["links-flows-path"]= [(1000,1000,5),\
                                    (1000,1000,10),\
                                   #(100, 10000,5),\
                                   (100, 1000, 5),\
                                   (100, 100, 5),\
                                   (100, 100, 10),\
                                  #(100, 10000, 10),\
                                   (100, 1000, 10)]
config_filter["drop-prob"] = ["0.01", "0.001"]
config_filter["seed-path-links"]=range(1, 21)
config_filter["experiment"] = "robustness-drop"
filters_by_experiment["robustness-drop"] = config_filter




In [6]:
# Get pandas dataframe df_agg from convergence time output files for experiment

experiment = "robustness-drop"
# 2019oct13:To correct

config_filter = filters_by_experiment[experiment]
results_dirs=["/home/lavanya_jose/numerical/run-files/%s/results"%experiment]
png_file="/home/lavanya_jose/numerical/%s.png"%experiment

def get_png_file(metric):
    return "/home/lavanya_jose/numerical/%s-%s.png"%(experiment, metric)
# read all the CT files in output_directory
# parse the parameters and 
# if this is a file we want to look at
# save contents into a dataframe

def IsCtFile(filename):
    return filename.startswith("ct-")

import re

def ParseFilenameToConfigUsingRegex(filename, regex_str):
    match = re.match(regex_str, filename)
    if not match:
        print("Couldn't parse CtFilename %s" % filename)
        return {}
    config_keys = ["experiment", "alg", "links", "flows", "path", "seed-path-links", "kw",\
                   "N", "m", "l", "drop-prob", "rto"]
    values = match.groups()
    config = {"experiment":values[0]}
    assert(len(values) > 2)
    for key, value in zip(values[1::2], values[2::2]):
        assert(key in config_keys)        
        if key in ["links", "flows", "N", "m", "l"] or key.startswith("seed"):
            config[key] = int(value)
        else:
            config[key] = value
    
    return config

def ParseCtFilenameToConfig(filename):
    # ct-robustness-approx_alg=sperc_robust_links=1000_flows=1000_path=_seed-path-links=1_
    regex_str = "ct-(.*)_(.+)=(.*)_(.+)=(\d+)_(.+)=(\d+)_(.+)=(.*)_(.+)=(\d+)_"
    # ct-robustness-approx_alg=sperc_robust_links=1000_flows=1000_path=_seed-path-links=1_
    # N=32_m=10_l=12_kw=sep29a
    if "robustness-approx" in filename:
        regex_str += "(.+)=(\d+)_(.+)=(\d+)_(.+)=(\d+)_(.+)=(.+)"
     # ct-robustness-drop_alg=sperc_robust_links=1000_flows=1000_path=_seed-path-links=1_
    # drop-prob=0.001_rto=2_kw=sep29a
    elif "robustness-drop" in filename:
        regex_str += "(.+)=(0\.\d+)_(.+)=(\d+)_(.+)=(.+)"
    # ct-sparse-ct_alg=naive_links=1000_flows=1000_path=_seed-path-links=1_
    # kw=sep29a
    else:
        assert("sparse-ct" in filename or "dense-ct" in filename)
        regex_str += "(.+)=(.+)"
    return ParseFilenameToConfigUsingRegex(filename, regex_str)

def GetNumMatches(config, config_filter):
    matches = {}
    for key in config:
        value = config[key]
        if key in config_filter:
            if value not in config_filter[key]:
                print("Value for key %s did not match" % (key))
                print("... actual %s, expected one of %s" % (str(value), str(config_filter[key])))
                return -1
            matches[key] = True
    return len(matches)
        
        
#filename="ct-sparse-ct_alg=naive_links=1000_flows=1000_path=_seed-path-links=1_kw=sep29a"
#IsCtFile(filename)
#config = ParseCtFilenameToConfig(filename)
#config["path"] = 5
#print(GetNumMatches(config, config_filter))

import os
import pandas as pd
run_data_to_plot = {}
for results_dir in results_dirs: 
    for dirpath, _, files in os.walk(results_dir):   
        for filename in files:
            if not IsCtFile(filename): continue
            
            config = ParseCtFilenameToConfig(filename)  
            
            config["links-flows-path"] = (config["links"], config["flows"], config["path"])
            
            if "N" in config:
                config["N-l-m"] = (config["N"], config["l"], config["m"])

            num_matches = GetNumMatches(config, config_filter)
                
            if (num_matches != len(config_filter)):
                print("config_filter has %d keys, but only %d matched" % (len(config_filter), num_matches))
                continue
                
            df =  pd.read_csv("%s/%s"%(dirpath,filename), delim_whitespace=False, skiprows=0)
                        
            for key in config:
                value = str(config[key])
                if type(value) in [str, int]: df[key] = value
            df["filename"] = filename
            
            # pre-compute some fields
            df["cpg1_tens"] = df["cpg1"].round(decimals=-1)
            df["wf_tens"] = df[" wf"].round(decimals=-1)
            df["RTT"] = df[" max_rtt"]
            # check assert(df["RTT"] == df["path"] * 10 * 2)
            # after all flows are seen once (subtract 1 RTT)
            
            for col in df.columns:
                if col.startswith("convergence_time-"):
                    thresh = int(col.split("-")[1] )  
                    conv_time_rtts = "conv_time_rtts-%d"%thresh
                    df[conv_time_rtts]= (df[col]-df["RTT"]) / df["RTT"]
                    df["conv_time_rtts_per_wf-%d"%thresh] = df[conv_time_rtts]/df[" wf"]
                    df["conv_time_rtts_per_cpg1-%d"%thresh] = df[conv_time_rtts]/df["cpg1"]
                    df["conv_time_rtts_per_cpg2-%d"%thresh] = df[conv_time_rtts]/df[" cpg2"]
            
            run_data_to_plot[filename] = df
            
df_agg = pd.concat([v for v in run_data_to_plot.values()])
print(len(df_agg))
#print(df_agg)



12


In [9]:
# Summary of experiment runs
if experiment in ["sparse-ct", "dense-ct"]: 
    key1 = "alg"
    thresh = 1
elif experiment == "robustness-drop":
    key1 = "drop-prob"
else:
    assert(experiment == "robustness-approx")
    key1 = "N-l-m"
    thresh = 1

print("Breakdown of runs by %s" % key)
for value1 in config_filter[key1]:
    if type(value1) not in [int, str]: value1 = str(value1)
    print (" ", len(df_agg[df_agg[key1]==value1]), " runs of ", value1)
    for key2 in ["path", "conv_time_rtts-%d"%thresh, "conv_time_rtts_per_cpg1-%d"%thresh]:
        value2 = [float(f) for f in (df_agg[df_agg[key1]==value1][key2]).values]
        print("  ", key2),
        if len(value2) > 0:
            print("     min: %.0f, max: %.0f, " % (np.min(value2), np.max(value2))),
            print("     median: %.0f, mean: %.0f" % (np.median(value2), np.mean(value2)))
        print("     len: %d, samples: %s, " % (len(value2), str(value2[:5]))),
            
     

Breakdown of runs by path
  6  runs of  0.01
   path
     min: 5, max: 5, 
     median: 5, mean: 5
     len: 6, samples: [5.0, 5.0, 5.0, 5.0, 5.0], 
   conv_time_rtts-1
     min: 48, max: 54, 
     median: 50, mean: 51
     len: 6, samples: [50.6, 53.6, 47.6, 50.3, 54.5], 
   conv_time_rtts_per_cpg1-1
     min: 2, max: 5, 
     median: 2, mean: 3
     len: 6, samples: [2.2, 2.0615384615384618, 4.327272727272727, 2.095833333333333, 1.9464285714285714], 
  6  runs of  0.001
   path
     min: 5, max: 5, 
     median: 5, mean: 5
     len: 6, samples: [5.0, 5.0, 5.0, 5.0, 5.0], 
   conv_time_rtts-1
     min: 44, max: 53, 
     median: 51, mean: 50
     len: 6, samples: [53.3, 44.3, 52.4, 50.3, 52.4], 
   conv_time_rtts_per_cpg1-1
     min: 2, max: 5, 
     median: 2, mean: 3
     len: 6, samples: [1.9035714285714285, 4.027272727272727, 2.0153846153846153, 2.1869565217391305, 2.183333333333333], 


In [11]:
# Order of box-plots on horizontal axis
if experiment in ["sparse-ct", "dense-ct"]: 
    boxplot_key = "alg"
    sorted_names = ["naive", "sperc_ignore", "sperc_basic", "perc", "skivee", "cpg"]
elif experiment == "robustness-drop":
    boxplot_key = "drop-prob"
    sorted_names = ["0.001", "0.01"]
else:
    assert(experiment == "robustness-approx")
    boxplot_key = "N-l-m"
    sorted_names = ["(32, 12, 10)", "(32, 10, 8)", "(32, 10, 6)", "(32, 14, 12)"]


In [12]:
# Plot first metric only (e.g., convergence time to get within 1% of ideal).

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt


y = metric[config_filter["experiment"]][0] 

fig, ax = plt.subplots() #nrows=1,ncols=1)
boxplot_names = list(np.unique(df_agg[boxplot_key].values))
boxplot_names = sorted(boxplot_names, key= lambda k: sorted_names.index(k))
print(boxplot_names)


df = df_agg[df_agg[y]>0]
data = []
xticklabels = []
for name in boxplot_names:
    data_b = df[df[boxplot_key]==name][y].values
    data_b.shape = (-1, 1)
    assert(type(data_b)==np.ndarray)
    assert(data_b.shape[1] == 1)
    num_values = data_b.shape[0]
    if (num_values == 0):
        print("No values for %s" % name)
        continue
    if name in proper_name: name = proper_name[name] 
    data_b = np.reshape(data_b, -1)
    median = "%.2f" % np.median(data_b)
    ninety_fifth = "%2.f" % np.percentile(data_b, 95)
    maximum =  "%.2f" % np.max(data_b)
    print (name, y, "median", median, "95th", ninety_fifth, "maximum", maximum, "num", num_values)   
    
    data.append(data_b)
    xticklabels.append("%s \n (%d)"%(name, len(data_b)))

ax.boxplot(data)
ax.set_xticklabels(xticklabels, fontsize=18)
ax.tick_params(axis='y', labelsize=18)
fig.suptitle(get_proper_name(y),fontsize=18)

plt.savefig(png_file)



['0.001', '0.01']
0.001 conv_time_rtts_per_cpg1-1 median 2.19 95th  4 maximum 4.52 num 6
0.01 conv_time_rtts_per_cpg1-1 median 2.15 95th  5 maximum 4.94 num 6


In [13]:
# Plot each metric (e.g., convergence time for a given threshold) on a different plot.

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt


for y in metric[config_filter["experiment"]]: 
    fig, ax = plt.subplots() #nrows=1,ncols=1)


    df = df_agg[df_agg[y]>0]
    data = []
    xticklabels = []
    for name in boxplot_names:
        data_b = df[df[boxplot_key]==name][y].values
        data_b.shape = (-1, 1)
        assert(type(data_b)==np.ndarray)
        assert(data_b.shape[1] == 1)
        num_values = data_b.shape[0]
        if (num_values == 0):
            print("No values for %s" % name)
            continue
        if name in proper_name: name = proper_name[name] 
        data_b = np.reshape(data_b, -1)
        median = "%.2f" % np.median(data_b)
        ninety_fifth = "%2.f" % np.percentile(data_b, 95)
        maximum =  "%.2f" % np.max(data_b)
        print (name, y, "median", median, "95th", ninety_fifth, "maximum", maximum, "num", num_values)   
    
        data.append(data_b)
        xticklabels.append("%s \n (%d)"%(name, len(data_b)))

    ax.boxplot(data)
    ax.set_xticklabels(xticklabels, fontsize=18)
    ax.tick_params(axis='y', labelsize=18)
    fig.suptitle(get_proper_name(y),fontsize=18)
    plt.savefig(get_png_file(y))



0.001 conv_time_rtts_per_cpg1-1 median 2.19 95th  4 maximum 4.52 num 6
0.01 conv_time_rtts_per_cpg1-1 median 2.15 95th  5 maximum 4.94 num 6
