In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from edgedroid.models.timings import *
import edgedroid.data as default_data

exec_time_data, neuroticism_data, imp_data, dur_data = default_data.load_default_exec_time_data()
exec_time_data

In [2]:
data = preprocess_data(exec_time_data, neuroticism_data, imp_data, dur_data)
data

In [3]:
import itertools
from collections import deque
import pandas as pd
from typing import Tuple, Deque
from tqdm.notebook import tqdm

result_rows = deque()

for run_id in tqdm(data["run_id"].unique(), desc="Run"):
    test_data = data[data["run_id"] == run_id].copy()

    def get_test_data(*args, **kwargs) -> Tuple[pd.DataFrame, pd.arrays.IntervalArray, pd.arrays.IntervalArray, pd.arrays.IntervalArray]:
        etime_data = exec_time_data[exec_time_data["run_id"] != run_id].copy()
        return etime_data, neuroticism_data, imp_data, dur_data

    class EmpiricalTestModel(EmpiricalETM):
        get_data = get_test_data
        
    class TheoreticalTestModel(FittedETM):
        get_data = get_test_data

    class NaiveTestModel(FirstOrderETM):
        get_data = get_test_data

    neuroticism = test_data["neuroticism_raw"].unique()[0]
    naive_model = NaiveTestModel()
    
    models: Deque[Tuple[str, ExecutionTimeModel, int, CleanupMode]] = deque()
    models.append(
        ("1st order", naive_model, 0, CleanupMode.NONE)
    )
    
    for (ttf_bins, cleanup) in itertools.product(range(1, 10), CleanupMode):
        kernel = ExponentialTTFWindowKernel(window_size=12)
        emp_model = EmpiricalTestModel(kernel=kernel, neuroticism=neuroticism, ttf_levels=ttf_bins, cleanup=cleanup)
        theo_model = TheoreticalTestModel(kernel=kernel, neuroticism=neuroticism, ttf_levels=ttf_bins, cleanup=cleanup)
        
        models.extend((
            ("empirical", emp_model, ttf_bins, cleanup),
            ("theoretical", theo_model, ttf_bins, cleanup)
        ))
        
    for _ in range(30):
        for i, (prev_ttf, exec_time) in enumerate(test_data[["ttf", "next_exec_time"]].itertuples(index=False)):
            for model_name, model, ttf_bins, cleanup in models:
                if i == 0:
                    # reset the model between runs
                    model.reset()
                else:
                    # first row has no previous ttf
                    model.advance(prev_ttf)
                    
                prediction = model.get_execution_time()
                result_rows.append({
                    "real": exec_time,
                    "prediction": prediction,
                    "error": prediction - exec_time,
                    "model": model_name,
                    "ttf_bins": ttf_bins,
                    "cleanup": str(cleanup)
                })

results = pd.DataFrame(result_rows)
results

In [4]:
import numpy as np
results["sqr_error"] = np.square(results["error"])

results

In [5]:
results["model_tag"] = results["model"] + " " + results["cleanup"].astype(str)
results

In [6]:
# %matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

fg = sns.catplot(results, kind="point", x="ttf_bins", y="sqr_error", hue="model_tag", height=5, aspect=2)
fg.set(ylim=(0, None))
fg.set_ylabels("MSE")
for ax in fg.axes.flat:
    ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha='right')
plt.show()

In [7]:
results.groupby(["model", "ttf_bins", "cleanup"])["sqr_error"].mean().sort_values()