# Microbenchmark Experiment Analysis

## Notebook Configuration

In [None]:
########## GENERAL
# Experiment directory path
EXPERIMENT_DIRPATH = "BuzzBlogBenchmark_%Y-%m-%d-%H-%M-%S"

########## EXECUTION LOGS
# Function to aggregate PIT data
PIT_AGGREGATE_FUNC = "mean"

## Notebook Setup

In [None]:
# Import libraries
%matplotlib inline
import matplotlib.pyplot as plt
import os
import pandas as pd
import tarfile
import warnings
import yaml
warnings.filterwarnings("ignore")

## Experiment Metadata

In [None]:
########## UTILITIES
def load_experiment_metadata():
    "Return a YAML object with experiment metadata."
    with open(os.path.join(EXPERIMENT_DIRPATH, "metadata.yml")) as metadata_file:
        return yaml.load(metadata_file, Loader=yaml.Loader)

In [None]:
print(yaml.dump(load_experiment_metadata(), default_flow_style=False))

## Execution Logs

In [None]:
########## UTILITIES
def get_microbenchmark_tarball_path():
    "Return the path to the microbenchmark tarball."
    for node_hostname in os.listdir(os.path.join(EXPERIMENT_DIRPATH, "logs")):
        for filename in os.listdir(os.path.join(EXPERIMENT_DIRPATH, "logs", node_hostname)):
            if filename.endswith("_microbench.tar.gz"):
                return os.path.join(EXPERIMENT_DIRPATH, "logs", node_hostname, filename)

def get_microbenchmarked_service():
    "Return the name of the microbenchmarked service"
    return get_microbenchmark_tarball_path().split("/")[-1].split("_")[0]
            
def list_microbenchmarks():
    "Return a list of microbenchmarks in the tarball."
    tarball = tarfile.open(get_microbenchmark_tarball_path())
    return [f.name.split(".")[-2].split("/")[-1] for f in tarball if f.name.endswith(".csv")]

def load_microbenchmark_logs(microbenchmark):
    "Return a DataFrame with execution logs of the specified microbenchmark."
    tarball = tarfile.open(get_microbenchmark_tarball_path())
    with tarball.extractfile("./microbench_%s/%s.csv" % (get_microbenchmarked_service(), microbenchmark)) as microbenchmark_file:
        df = pd.read_csv(microbenchmark_file)
        return df

### Throughput

In [None]:
microbenchmarks = list_microbenchmarks()
fig = plt.figure(figsize=(16 * len(microbenchmarks), 32 * len(microbenchmarks)))
for (i, microbenchmark) in enumerate(microbenchmarks):
    df = load_microbenchmark_logs(microbenchmark)
    df["window"] = df.apply(lambda r: int(r["timestamp_milli"] / 1000), axis=1)
    df = df.groupby(["window"])["window"].count()
    df = df.reindex(range(0, int(df.index.max()) + 1), fill_value=0)
    ax = fig.add_subplot(len(microbenchmarks), 1, i + 1)
    ax.grid(alpha=0.75)
    df.plot(ax=ax, kind="bar", title="Throughput: %s microbenchmark" % microbenchmark,
            xlabel="Time (seconds)", ylabel="Calls per second",
            color="blue", grid=True, xticks=range(0, int(df.index.max()) + 1, 10))

### Point-in-Time Execution Time

In [None]:
microbenchmarks = list_microbenchmarks()
fig = plt.figure(figsize=(16 * len(microbenchmarks), 32 * len(microbenchmarks)))
for (i, microbenchmark) in enumerate(microbenchmarks):
    df = load_microbenchmark_logs(microbenchmark)
    df["window"] = df.apply(lambda r: int(r["timestamp_milli"] / 1000), axis=1)
    df = df.groupby(["window"])["exec_time_milli"].agg(PIT_AGGREGATE_FUNC)
    df = df.reindex(range(0, int(df.index.max()) + 1), fill_value=0)
    ax = fig.add_subplot(len(microbenchmarks), 1, i + 1)
    ax.grid(alpha=0.75)
    df.plot(ax=ax, kind="bar", title="PIT Execution Time: %s microbenchmark" % microbenchmark,
            xlabel="Time (seconds)", ylabel="%s Execution Time (milliseconds)" % PIT_AGGREGATE_FUNC,
            color="purple", grid=True, xticks=range(0, int(df.index.max()) + 1, 60))

### Execution Time Distribution

In [None]:
microbenchmarks = list_microbenchmarks()
fig = plt.figure(figsize=(16 * len(microbenchmarks), 32 * len(microbenchmarks)))
for (i, microbenchmark) in enumerate(microbenchmarks):
    df = load_microbenchmark_logs(microbenchmark)
    df["exec_time_bin"] = df.apply(lambda r: int(r["exec_time_milli"]), axis=1)
    ax = fig.add_subplot(len(microbenchmarks), 1, i + 1)
    ax.grid(alpha=0.75)
    ax.set_yscale("log")
    ax.set_xlim((0, df["exec_time_bin"].max()))
    df["exec_time_bin"].plot(ax=ax, kind="hist",
                             title="Execution Time Distribution: %s microbenchmark" % microbenchmark,
                             xlabel="Execution Time (milliseconds)", ylabel="Count",
                             bins=range(df["exec_time_bin"].max()),
                             grid=True, color="green")

## Experiment Configuration

In [None]:
########## UTILITIES
def load_system_conf():
    "Return a YAML object with system configuration."
    with open(os.path.join(EXPERIMENT_DIRPATH, "conf", "system.yml")) as system_conf_file:
        return yaml.load(system_conf_file, Loader=yaml.Loader)

### System Configuration File

In [None]:
print(yaml.dump(load_system_conf()))