In [None]:
import itertools
import pandas
import numpy
import glob
import math
import tqdm
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

In [None]:
!mkdir results

In [None]:
file = "./results.txt"

In [None]:
results = []
with open(file) as f:
    for line in f:
        separated = line.strip().split(" ")
        sep1 = itertools.islice(separated, 0, None, 2)
        sep2 = itertools.islice(separated, 1, None, 2)
        _dict = dict(zip(sep1, sep2))
        results.append(_dict)
        
results = pandas.DataFrame(results)
results

In [None]:
results.dtypes

In [None]:
results["name"] = results["name"].convert_dtypes()
results["tag"] = results["tag"].convert_dtypes()
results["--nIterations"] = results["--nIterations"].astype("Int64")
results["--nActors"] = results["--nActors"].astype("Int64")
results["--sWorkload"] = results["--sWorkload"].convert_dtypes()
results["--nEvents"] = results["--nEvents"].astype("Int64")
results["--sSystem"] = results["--sSystem"].convert_dtypes()
results["average(s)"] = results["average(s)"].astype("float64")
results["--nChainLength"] = results["--nChainLength"].astype("Int64")
results["--nAtomSize"] = results["--nAtomSize"].astype("Int64")
results["--nWorkflows"] = results["--nWorkflows"].astype("Int64")
results["--nParallelism"] = results["--nParallelism"].astype("Int64")
results["--nPartitions"] = results["--nPartitions"].astype("Int64")
results["--sQuery"] = results["--sQuery"].convert_dtypes()
results.dtypes

In [None]:
# add tag to name
results["full-name"] = results["tag"].fillna('') + results["name"].fillna('')
results

In [None]:
results["average-throughput (events/s)"] = results["--nEvents"] / results["average(s)"]
results

In [None]:
results["full-name"].unique()

In [None]:
results.dtypes

In [None]:
# Styles Mapping

colours = ["r", "g", "b", "c", "y"]
markers = ["o", "v", "^", "x", "p"]
linestyles = ['-','--',':','-.','']
linewidths = [3, 2, 1, 1, 0.5]
hatches = ['/', '\\', '|', '-', '+', 'x', 'o', 'O', '.', '*']

sSystems = ["async", "microBatching", "noGuarantees", "sync", "Akka"]

s_colours = dict(zip(sSystems, colours))
s_markers = dict(zip(sSystems, markers))
s_linestyles = dict(zip(sSystems, linestyles))
s_linewidths = dict(zip(sSystems, linewidths))
s_hatches = dict(zip(sSystems, hatches))

In [None]:
# line chart

def plot_line(
    name,
    benchmarks,
    filter_col,
    group_cols,
    x_axis,
    y_axis,
    x_label,
    y_label,
    log_scale_y = False,
    log_scale_x = False,
    ignore_cols = [],
    ignore_vals = [],
    filename = "",
    withUnnamedStyles = False,
):
    
    plt.figure(figsize=(1.618 * 3, 3))

    filtered = results[results[filter_col].isin(benchmarks)]
    
    if ignore_cols != []:
        filtered = filtered[~filtered[ignore_cols].isin(ignore_vals).values]

    grouped = filtered.groupby(group_cols)

    # Plotting
    for i, (_label, df) in enumerate(grouped):
        label = _label[1]
        x = df[x_axis]
        y = df[y_axis]
        
        if not withUnnamedStyles:
            marker = s_markers[label]
            linestyle = s_linestyles[label]
            colour = s_colours[label]
            linewidth = s_linewidths[label]
            plt.plot(x, y, label=label, marker=marker, ls=linestyle, c=colour, linewidth=linewidth)
        else:
            marker = markers[i]
            linestyle = linestyles[i]
            colour = colours[i]
            linewidth = linewidths[i]
            plt.plot(x, y, label=label, marker=marker, ls=linestyle, c=colour, linewidth=linewidth)

    # formatting
    plt.ylabel(y_label)
    plt.xlabel(x_label)
    plt.title(name)
    plt.grid(which="both", linestyle='--')
    plt.tick_params(which='minor', color='r')
    plt.legend()

    if log_scale_y: plt.yscale("log")
    if log_scale_x: plt.xscale("log")

    plt.tight_layout()
    
    plt.ylim(bottom=0)

    if filename != "": plt.savefig(filename)


In [None]:
# bar chart

def plot_bar(
    name,
    benchmarks,
    filter_col,
    group_cols,
    y_axis,
    y_label,
    x_label,
    log_scale_y = False,
    ignore_cols = [],
    ignore_vals = [],
    translation = dict(),
    filename = "",
):
    plt.figure(figsize=(1.618 * 3, 3))
    filtered = results[results[filter_col].isin(benchmarks)]
    if ignore_cols != []:
        filtered = filtered[~filtered[ignore_cols].isin(ignore_vals).values]
    grouped = filtered.groupby(group_cols)
    
    from collections import OrderedDict

    groupss = dict(list(grouped))
    outer_groups = [x[0] for x in groupss.keys()]
    groupedgroups = list(OrderedDict.fromkeys(outer_groups))
    
    n = len(groupedgroups) # number of outer groups
    n_inner = len(groupss) / len(groupedgroups) # number of inner groups
    width=(n_inner - 1.0)/n_inner 
#     barwidth=0.8/n_inner

    for i, (_label, df) in enumerate(grouped):
        label = _label[1]
        
        # style
        marker = s_markers[label]
        linestyle = s_linestyles[label]
        colour = s_colours[label]
        linewidth = s_linewidths[label]
        hatch = s_hatches[label]

        # data
        y = df[y_axis]

        # names, etc.
        label = _label[1]

        # plot
        if i < n_inner:
            plt.bar(i*width + math.floor((i) / n_inner)*(width), df[y_axis], width, align='center', label=label, ls=linestyle, color=colour, linewidth=linewidth, hatch=hatch)
        else:
            plt.bar(i*width + math.floor((i) / n_inner)*(width), df[y_axis], width, align='center', ls=linestyle, color=colour, linewidth=linewidth, hatch=hatch)
        
    x = numpy.arange(n)
    if translation != dict():
        plt.xticks(x*(n_inner+1)*width  + ((n_inner-1)*width/2.0), [translation[k] for k in groupedgroups])
    else:
        plt.xticks(x*(n_inner+1)*width  + ((n_inner-1)*width/2.0), groupedgroups)
    # formatting
    plt.ylabel(y_label)
    plt.xlabel(x_label)
    plt.title(name)
    plt.grid(which="both", linestyle='--', axis="y")
    plt.legend()
    if log_scale_y: plt.yscale("log")

    plt.tight_layout()
    
    if filename != "": plt.savefig(filename)


In [None]:
# NEXMarkBenchmark
plot_bar(
    name = "NEXMark",
    benchmarks = ["NEXMarkBenchmark"],
    filter_col = "full-name",
    group_cols = ["--sQuery", "--sSystem"],
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "",
#     log_scale_y = True,
    ignore_cols = ["--sSystem"],
    ignore_vals = ["sync"],
    filename = "results/nexmark.pdf",
)


In [None]:
# MICROBENCHMARKS

# PingPong

plot_bar(
    name = "PingPong",
    benchmarks = ["PingPongBenchmark"],
    filter_col = "full-name",
    group_cols = ["full-name", "--sSystem"],
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "",
#     log_scale_y = True,
    ignore_cols = ["--sSystem"],
    ignore_vals = ["sync"],
    translation = {"PingPongBenchmark": "PingPong"},
    filename = "results/pingpongmicro.pdf",
)

In [None]:
# MICROBENCHMARKS
plot_line(
    name = "Thread Ring of Tasks",
    benchmarks = ["ThreadRingTasks"],
    filter_col = "full-name",
    group_cols = ["full-name", "--sSystem"],
    x_axis = "--nChainLength",
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "chain length (no. of tasks)",
#     log_scale_y = True,
    log_scale_x = True,
    ignore_cols = ["--sSystem"],
    ignore_vals = ["sync"],
    filename = "results/threadringmicro.pdf",
)

# no figure saved
plot_line(
    name = "Thread Ring of Workflows",
    benchmarks = ["ThreadRingWorkflows"],
    filter_col = "full-name",
    group_cols = ["full-name", "--sSystem"],
    x_axis = "--nChainLength",
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "chain length (no. of workflows)",
#     log_scale_y = True,
    log_scale_x = True,
    ignore_cols = ["--sSystem"],
    ignore_vals = ["sync"],
)

plot_line(
    name = "Thread Ring of Alternating Workflows",
    benchmarks = ["ThreadRingWorkflowsAlternatingSequencers"],
    filter_col = "full-name", 
    group_cols = ["full-name", "--sSystem"],
    x_axis = "--nChainLength",
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "chain length (no. of workflows)",
#     log_scale_y = True,
    log_scale_x = True,
    ignore_cols = ["--sSystem"],
    ignore_vals = ["sync"],
    filename = "results/threadringwfsmicro.pdf",
)

plot_line(
    name = "Counting Actor",
    benchmarks = ["CountingActorBenchmark"],
    filter_col = "full-name", 
    group_cols = ["full-name", "--sSystem"],
    x_axis = "--nAtomSize",
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "atom size (#events)",
#     log_scale_y = True,
    log_scale_x = True,
    ignore_cols = ["--sSystem"],
    ignore_vals = ["sync"],
    filename = "results/countingactormicro.pdf",
)


plot_line(
    name = "Fork Join Throughput",
    benchmarks = ["ForkJoinThroughputBenchmark"],
    filter_col = "full-name", 
    group_cols = ["full-name", "--sSystem"],
    x_axis = "--nWorkflows",
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "fork-width (#workflows)",
#     log_scale_y = True,
    log_scale_x = True,
    ignore_cols = ["--sSystem"],
    ignore_vals = ["sync"],
)

In [None]:
# Parallelism-threads, Data-parallelism

plot_line(
    name = "Partitions",
    benchmarks = ["partitionsDataParallelThroughputBenchmark"],
    filter_col = "full-name",    
    group_cols = ["full-name", "--sWorkload"],
    x_axis = "--nPartitions",
    y_axis = "average-throughput (events/s)",
    log_scale_x = True,
    y_label = "throughput (events/s)",
    x_label = "number of partitions",
    withUnnamedStyles=True,
    filename = "results/parallelismpartitions.pdf",
)

plot_line(
    name = "Parallelism",
    benchmarks = ["parallelismDataParallelThroughputBenchmark"],
    filter_col = "full-name",    
    group_cols = ["full-name", "--sWorkload"],
    x_axis = "--nParallelism",
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "number of threads",
    withUnnamedStyles=True,
    filename = "results/parallelismthreads.pdf",
)

In [None]:
ca = results[(results["full-name"] == "partitionsDataParallelThroughputBenchmark") & (results["--sWorkload"] == "countingActor")]
pp = results[(results["full-name"] == "partitionsDataParallelThroughputBenchmark") & (results["--sWorkload"] == "pingPong")]
tr = results[(results["full-name"] == "partitionsDataParallelThroughputBenchmark") & (results["--sWorkload"] == "threadRingTasks")]
print(
    ca["average-throughput (events/s)"].max() / ca["average-throughput (events/s)"].min(),
    pp["average-throughput (events/s)"].max() / pp["average-throughput (events/s)"].min(),
    tr["average-throughput (events/s)"].max() / tr["average-throughput (events/s)"].min(),
)

In [None]:
ca = results[(results["full-name"] == "parallelismDataParallelThroughputBenchmark") & (results["--sWorkload"] == "countingActor")]
pp = results[(results["full-name"] == "parallelismDataParallelThroughputBenchmark") & (results["--sWorkload"] == "pingPong")]
tr = results[(results["full-name"] == "parallelismDataParallelThroughputBenchmark") & (results["--sWorkload"] == "threadRingTasks")]
print(
    ca["average-throughput (events/s)"].max() / ca["average-throughput (events/s)"].min(),
    pp["average-throughput (events/s)"].max() / pp["average-throughput (events/s)"].min(),
    tr["average-throughput (events/s)"].max() / tr["average-throughput (events/s)"].min(),
)

In [None]:
# AtomAlignmentBenchmark

plot_line(
    name = "Atom Alignment",
    benchmarks = ["AtomAlignmentBenchmark"],
    filter_col = "full-name",    
    group_cols = ["full-name", "--sSystem"],
    x_axis = "--nAtomSize",
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "atom size (#events)",
    log_scale_x = True,
    ignore_cols = ["--sSystem"],
    ignore_vals = ["sync"],
#     withUnnamedStyles=True,
    filename = "results/alignmentmicro.pdf",
)

plot_line(
    name = "Atom Alignment With Work",
    benchmarks = ["withWorkAtomAlignmentBenchmark"],
    filter_col = "full-name",    
    group_cols = ["full-name", "--sSystem"],
    x_axis = "--nAtomSize",
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "atom size (#events)",
    log_scale_x = True,
#     withUnnamedStyles=True,
    filename = "results/alignmentmicrowithwork.pdf",
)

In [None]:
# Akka benchmarks

plot_bar(
    name = "Akka Benchmark",
    benchmarks = ["AkkaBenchmarks"],
    filter_col = "tag",
    group_cols = ["--sWorkload", "--sSystem"],
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "",
#     log_scale_y = True,
#     ignore_cols = ["--sSystem"],
#     ignore_vals = ["sync"],
#     translation = {"PingPongBenchmark": "PingPong"},
    filename = "results/akkamicro.pdf",
)


In [None]:
# ChainOfTasks Benchmark

plot_line(
    name = "Chain of Tasks",
    benchmarks = ["--nChainLengthChainOfTasksWithWork"],
    filter_col = "full-name",    
    group_cols = ["full-name", "--sSystem"],
    x_axis = "--nChainLength",
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "chain length (#tasks)",
    log_scale_x = True,
#     withUnnamedStyles=True,
    filename = "results/chainoftaskslength.pdf",
)

plot_line(
    name = "Chain of Tasks",
    benchmarks = ["--nAtomSizeChainOfTasksWithWork"],
    filter_col = "full-name",    
    group_cols = ["full-name", "--sSystem"],
    x_axis = "--nAtomSize",
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "atom size (#events)",
    log_scale_x = True,
    filename = "results/chainoftaskssize.pdf",
#     withUnnamedStyles=True,
)

In [None]:
results[results["tag"] == "syncbenchmark"]

In [None]:
# ChainOfTasks Benchmark

plot_bar(
    name = "MicroBenchmarks",
    benchmarks = ["syncbenchmark"],
    filter_col = "tag",    
    group_cols = ["name", "--sSystem"],
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "",
    ignore_cols = ["name"],
    ignore_vals = ["ChainOfTasksWithWork", "NEXMarkBenchmark"],
    translation = {"CountingActorBenchmark": "CountingActor", "PingPongBenchmark": "PingPong", "ThreadRingTasks": "ThreadRing"},
    filename = "results/syncmicros.pdf",
)

plot_bar(
    name = "Chain of Tasks",
    benchmarks = ["syncbenchmarkChainOfTasksWithWork"],
    filter_col = "full-name",    
    group_cols = ["name", "--sSystem"],
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "",
#     ignore_cols = ["name"],
#     ignore_vals = ["ChainOfTasksWithWork", "NEXMarkBenchmark"],
    translation = {"ChainOfTasksWithWork": "Chain of Tasks"},
    filename = "results/syncchain.pdf",
)

plot_bar(
    name = "NEXMark",
    benchmarks = ["syncbenchmarkNEXMarkBenchmark"],
    filter_col = "full-name",    
    group_cols = ["--sQuery", "--sSystem"],
    y_axis = "average-throughput (events/s)",
    y_label = "throughput (events/s)",
    x_label = "",
#     ignore_cols = ["name"],
#     ignore_vals = ["ChainOfTasksWithWork", "NEXMarkBenchmark"],
#     translation = {"ChainOfTasksWithWork": "Chain of Tasks"},
    filename = "results/syncnexmark.pdf",
)

In [None]:
filtered = results[results["name"].isin({"DataParallelThroughputBenchmark"})]

In [None]:
xxx = filtered[((filtered["--nParallelism"] == 1) | (filtered["--nParallelism"] == 16)) & (filtered["tag"] == "parallelism")]
xxx

In [None]:
xxx = filtered[((filtered["--nParallelism"] == 1) | (filtered["--nParallelism"] == 8)) & (filtered["tag"] == "parallelism")]
xxx

In [None]:
xxx[(xxx["--nParallelism"] == 8)]["average-throughput (events/s)"].values / xxx[(xxx["--nParallelism"] == 1)]["average-throughput (events/s)"].values

In [None]:
xxx[(xxx["--nParallelism"] == 1)]["average-throughput (events/s)"].values

In [None]:
filtered = results[results["tag"].isin({"AkkaBenchmarks"})]
filtered

In [None]:
filtered[ filtered["--sSystem"] == "Akka"]["average-throughput (events/s)"].values / filtered[ filtered["--sSystem"] == "async"]["average-throughput (events/s)"].values

In [None]:
filtered = results[results["name"].isin({"AtomAlignmentBenchmark"})]
filtered

In [None]:
#  Akka Benchmarks
tmp = results[results["tag"].isin({"AkkaBenchmarks"})]
tmp

In [None]:
akka = tmp[tmp["--sSystem"] == "Akka"]
akkavals = akka["average-throughput (events/s)"].values
asnc = tmp[tmp["--sSystem"] == "async"]
asncvals = asnc["average-throughput (events/s)"].values
print(akkavals)
print(asncvals)
print(akkavals/asncvals)

In [None]:
results[results["name"] == "PingPongBenchmark"]

In [None]:
results[(results["name"] == "CountingActorBenchmark") & (results["--nAtomSize"] == 1)]