In [117]:
import itertools as it
import warnings
from collections import deque
from typing import Dict

import multiprocess as mp
import numpy as np
import pandas as pd
import scipy.stats as stats
from tqdm.notebook import tqdm

from edgedroid.models import *
from sampling_util import *

reference_name = "Adaptive\nGaussian fit"

def experimental_run(
        repetition: int,
        task_steps: int,
        rtt: float,
) -> pd.DataFrame:
    min_sr = 0.5
    alpha = 3.0

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        ground_truths: Dict[str, ExecutionTimeModel] = {
            "Low" : ExpKernelRollingTTFETModel(neuroticism=0.0),
            "High": ExpKernelRollingTTFETModel(neuroticism=1.0)
        }

        samplers: Dict[str, Sampler] = {
            "Greedy"                                : GreedySampler(),
            reference_name: JunjuesSampler(
                cdf=FittedNaiveExecutionTimeModel(dist=stats.norm).get_cdf_at_instant,
                min_sr=min_sr,
                alpha=alpha,
            ),
            "Adaptive\nED2, low neuro"                 : JunjuesSampler(
                cdf=ExpKernelRollingTTFETModel(neuroticism=0.0).get_cdf_at_instant,
                min_sr=min_sr,
                alpha=alpha,
            ),
            "Adaptive\nED2, high neuro"                 : JunjuesSampler(
                cdf=ExpKernelRollingTTFETModel(neuroticism=1.0).get_cdf_at_instant,
                min_sr=min_sr,
                alpha=alpha,
            ),
            "Adaptive\nED2 ExG, low neuro"        : JunjuesSampler(
                cdf=DistExpKernelRollingTTFETModel(neuroticism=0.0, dist=stats.exponnorm).get_cdf_at_instant,
                min_sr=min_sr,
                alpha=alpha,
            ),
            "Adaptive\nED2 ExG, high neuro"        : JunjuesSampler(
                cdf=DistExpKernelRollingTTFETModel(neuroticism=1.0, dist=stats.exponnorm).get_cdf_at_instant,
                min_sr=min_sr,
                alpha=alpha,
            ),
        }

    rows = deque()

    for (gt_name, gt), (s_name, sampler) in it.product(ground_truths.items(), samplers.items()):
        gt.reset()
        prev_ttf = rtt

        for step in range(1, task_steps + 1):
            exec_time = gt.advance(prev_ttf).get_execution_time()
            sampling_result = sampler(exec_time, rtt)

            ttf = sampling_result.duration - exec_time
            wait_time = ttf - rtt

            rows.append({
                "ground_truth": gt_name,
                "sampler"     : s_name,
                "rtt"         : rtt,
                "repetition"  : repetition,
                "step"        : step,
                "exec_time"   : exec_time,
                "duration"    : sampling_result.duration,
                "ttf"         : ttf,
                "wait_time"   : wait_time,
                "num_samples" : sampling_result.num_samples,
            })

            prev_ttf = ttf

    df = pd.DataFrame(rows)
    df["ground_truth"] = df["ground_truth"].astype(pd.CategoricalDtype(ground_truths.keys(), ordered=False))
    df["sampler"] = df["sampler"].astype(pd.CategoricalDtype(samplers.keys(), ordered=False))
    return df


rtts = [0.15, 0.3, 0.6]
repetitions = 100
num_steps = 100

results = deque()
combs = list(it.product(rtts, range(1, repetitions + 1)))

with tqdm(total=len(combs)) as bar, mp.Pool() as pool:
    def _callback(df: pd.DataFrame):
        results.append(df)
        bar.update()


    def _errback(error):
        raise error


    for rtt, rep in combs:
        pool.apply_async(
            experimental_run,
            kwds=dict(repetition=rep, task_steps=num_steps, rtt=rtt),
            callback=_callback,
            error_callback=_errback,
        )

    pool.close()
    pool.join()

results = pd.concat(results)
results

In [118]:
%matplotlib inline

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

sns.set_style("whitegrid")
sns.set_context("paper", font_scale=2.5)
sns.set_palette("Dark2")

In [119]:
# mean number of samples per rep
samples_per_rep = results.groupby(["ground_truth", "sampler", "rtt", "repetition"])["num_samples"].mean().reset_index()
samples_per_rep["rtt"] = samples_per_rep["rtt"].apply(lambda e: f"{int(e * 1000):} ms").astype(str)
samples_per_rep

In [124]:
# import numpy as np

# order = ["Adaptive (ED2 LN)", "Adaptive (ED2 HN)", "Adaptive (ED2 LN, ExG fit)", "Adaptive (ED2 HN, ExG fit)",
#          "Adaptive (reference, gaussian fit)", "Greedy"]

fg = sns.catplot(
    kind="bar",
    # join=False,
    data=samples_per_rep,
    x="num_samples",
    hue="rtt",
    y="sampler",
    # order=order,
    col="ground_truth",
    aspect=3,
    height=8,
)
# fg.set(xticks=np.linspace(0, 40, 5))
fg.legend.set_title("RTT")
fg.set_axis_labels("Samples per step", "Sampling\nscheme")
# for ax in fg.axes.flat:
#     for i in ax.containers:
#         ax.bar_label(i, padding=-50, fmt="%0.2f")
plt.show()

In [131]:
ix = pd.IndexSlice

table = (
    samples_per_rep
    .groupby(["ground_truth", "sampler", "rtt"], observed=True)
    ["num_samples"]
    .mean()
    .unstack()
)
table.columns = [r"\SI{" f"{int(e.split()[0]):3d}" r"}{\milli\second}" for e in table.columns]
print(
    table
    # .loc[order]
    .loc[ix[:, ["Greedy", reference_name], :]]
    # .stack()
    # .unstack(level=0)
    # .stack(level=0)
    .style
    .format(lambda e: r"\num{" f"{e:0.2f}" r"}")
    .to_latex()
)

In [122]:
import pingouin as pg

ix = pd.IndexSlice


def diff_wrt_ref_rtt(df: pd.DataFrame) -> pd.DataFrame:
    ref_samples = df.loc[df["sampler"] == reference_name, "num_samples"].to_numpy()
    ref_mean = ref_samples.mean()

    def diff_wrt_ref(samples: pd.Series) -> pd.Series:
        ttest = pg.ttest(samples.to_numpy(), ref_samples, alternative="two-sided")
        diff_raw = samples.mean() - ref_mean
        diff = diff_raw / ref_mean
        ci = ttest.at["T-test", "CI95%"]

        pval = ttest.at["T-test", "p-val"]
        sig_pval = pval < 0.05

        return pd.Series({
            "diff"     : diff,
            "diff_raw" : diff_raw,
            "p-val"    : pval,
            "p < 0.05" : sig_pval,
            "CI95_Low" : ci[0] / ref_mean,
            "CI95_High": ci[1] / ref_mean}, name="Result")

    return df.groupby("sampler")["num_samples"].apply(diff_wrt_ref)


samples_diff = samples_per_rep.groupby(["ground_truth", "rtt"]).apply(diff_wrt_ref_rtt).stack().stack().unstack(level=2).reset_index()
samples_diff = samples_diff[samples_diff["sampler"] != reference_name]
samples_diff["sampler"] = samples_diff["sampler"].cat.remove_unused_categories()
samples_diff

In [129]:
import matplotlib.ticker as tkr

plot_data = samples_diff.set_index(["ground_truth", "sampler", "rtt"])[
    ["CI95_Low", "CI95_High", "diff"]].stack().reset_index().rename(columns={0: "values"})
# plot_data["values"] = plot_data["values"] * -1

fg = sns.catplot(
    kind="bar",
    data=plot_data,
    estimator=lambda e: sorted(e)[1],
    errorbar=lambda e: (min(e), max(e)),
    y="sampler",
    hue="rtt",
    x="values",
    # order=["Adaptive (ED2 LN)", "Adaptive (ED2 HN)", "Adaptive (ED2 LN, ExG fit)", "Adaptive (ED2 HN, ExG fit)", ],
    # hue="Configuration",
    col="ground_truth",
    aspect=3,
    height=4,
    # yerr=duration_diff["CI95%"].to_numpy(),
)
fg.legend.set_title("RTT")
fg.set_axis_labels("Diff. in mean number of samples w.r.t. reference", "Samplign\nScheme")
# fg.set(xticks=np.linspace(-0.25, 0, 6))
for ax in fg.axes.flat:
    ax.xaxis.set_major_formatter(tkr.FuncFormatter(lambda x, p: f"{x:0.0%}" if x != 0.0 else "0%"))
plt.show()

def plot_by_ground_truth(data: pd.DataFrame, *args, **kwargs) -> None:
    # print(args, kwargs)
    ax = plt.gca()
    gt_label = data["ground_truth"].unique()[0]
    if gt_label == "Low":
        filter_neuro = "low"
    else:
        filter_neuro = "high"

    data = data[
        data["sampler"].astype(str).str.contains(filter_neuro)
        # | (gt_low["sampler"] == "Greedy")
        | (data["sampler"] == reference_name)
    ].copy()
    data["sampler"] = data["sampler"].cat.remove_unused_categories()

    sns.barplot(
        data=data,
        x="values",
        hue="rtt",
        y="sampler",
        ax=ax,
    )
    ax.legend()

fg = sns.FacetGrid(plot_data, col="ground_truth", aspect=2.5, sharey=False, height=4)
fg.map_dataframe(plot_by_ground_truth)
fg.add_legend(title="RTT", borderpad=2)
# fg.legend.set_title("RTT")
fg.set_axis_labels("Difference in mean number of\nsamples w.r.t. reference", "Samplign\nScheme")
fg.set_titles(col_template="{col_name} neuroticism external timing model")
fg.set(xticks=np.linspace(-0.3, 0.0, 6))
for ax in fg.axes.flat:
    ax.xaxis.set_major_formatter(tkr.FuncFormatter(lambda x, p: f"{x:+0.0%}" if x != 0.0 else "0%"))
plt.show()
