# Experiment Analysis

## Notebook Configuration

In [None]:
########## GENERAL
# Experiment directory path
EXPERIMENT_DIRPATH = "sample"

########## EXECUTION LOGS
# Unit prefixes (options: "milli", "micro", "nano")
TIMESTAMP_UNIT_PREFIX = "milli"
LATENCY_UNIT_PREFIX = "nano"
# Function to aggregate latency measurements (options: "mean", "min", "max")
LATENCY_AGGREGATE_FUNC = "mean"
# Number of latency bins
N_LATENCY_BINS = 100

## Notebook Setup

In [None]:
# Import libraries
%matplotlib inline
import matplotlib.pyplot as plt
import os
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# Constants
UNIT_PREFIX_FACTOR = {"nano": 10**9, "micro": 10**6, "milli": 10**3}
UNIT_PREFIX_SYMBOL = {"nano": "n", "micro": "u", "milli": "m"}

# Utilities
def get_benchmark_logs_df(benchmark):
  df = pd.read_csv(os.path.join(EXPERIMENT_DIRPATH, benchmark + ".csv"))
  return df

def list_benchmarks():
  return [filename.split(".")[0] for filename in os.listdir(EXPERIMENT_DIRPATH) if os.path.isfile(os.path.join(EXPERIMENT_DIRPATH, filename)) and filename.endswith(".csv")]

## Graphs

### Throughput

In [None]:
benchmarks = list_benchmarks()
fig = plt.figure(figsize=(8 * len(benchmarks), 12 * len(benchmarks)))
for (i, benchmark) in enumerate(benchmarks):
  df = get_benchmark_logs_df(benchmark)
  df["window"] = df.apply(lambda r: int(r["timestamp"] * UNIT_PREFIX_FACTOR.get(TIMESTAMP_UNIT_PREFIX, 1)), axis=1)
  df = df.groupby(["window"])["window"].count()
  df = df.reindex(range(0, int(df.index.max()) + 1), fill_value=0)
  ax = fig.add_subplot(len(benchmarks), 1, i + 1)
  ax.set_xlim((0, df.index.max()))
  ax.grid(alpha=0.75)
  df.plot(ax=ax,
          title="Throughput: %s" % benchmark,
          xlabel="Time (%ss)" % UNIT_PREFIX_SYMBOL.get(TIMESTAMP_UNIT_PREFIX, ""),
          ylabel="Count (Executions)",
          color="blue",
          grid=True)

### Instantaneous Latency

In [None]:
benchmarks = list_benchmarks()
fig = plt.figure(figsize=(8 * len(benchmarks), 12 * len(benchmarks)))
for (i, benchmark) in enumerate(benchmarks):
  df = get_benchmark_logs_df(benchmark)
  df["window"] = df.apply(lambda r: int(r["timestamp"] * UNIT_PREFIX_FACTOR.get(TIMESTAMP_UNIT_PREFIX, 1)), axis=1)
  df["latency"] = df.apply(lambda r: int(r["latency"] * UNIT_PREFIX_FACTOR.get(LATENCY_UNIT_PREFIX, 1)), axis=1)
  df = df.groupby(["window"])["latency"].agg(LATENCY_AGGREGATE_FUNC)
  ax = fig.add_subplot(len(benchmarks), 1, i + 1)
  ax.set_xlim((0, df.index.max()))
  ax.grid(alpha=0.75)
  df.interpolate(method="linear").plot(ax=ax,
                                       title="Latency: %s" % benchmark,
                                       xlabel="Time (%ss)" % UNIT_PREFIX_SYMBOL.get(TIMESTAMP_UNIT_PREFIX, ""),
                                       ylabel="Latency (%ss)" % UNIT_PREFIX_SYMBOL.get(LATENCY_UNIT_PREFIX, ""),
                                       color="purple",
                                       grid=True)

### Latency Distribution

In [None]:
benchmarks = list_benchmarks()
fig = plt.figure(figsize=(8 * len(benchmarks), 12 * len(benchmarks)))
for (i, benchmark) in enumerate(benchmarks):
  df = get_benchmark_logs_df(benchmark)
  df["latency"] = df.apply(lambda r: int(r["latency"] * UNIT_PREFIX_FACTOR.get(LATENCY_UNIT_PREFIX, 1)), axis=1)
  ax = fig.add_subplot(len(benchmarks), 1, i + 1)
  ax.grid(alpha=0.75)
  ax.set_yscale("log")
  ax.set_xlim((0, df["latency"].max()))
  df["latency"].plot(ax=ax,
                     bins=N_LATENCY_BINS,
                     kind="hist",
                     title="Latency Distribution: %s benchmark" % benchmark,
                     xlabel="Latency (%ss)" % UNIT_PREFIX_SYMBOL.get(LATENCY_UNIT_PREFIX, ""),
                     ylabel="Count (Executions)",
                     grid=True,
                     color="green")