In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from edgedroid.models.timings import *
import edgedroid.data as default_data

data = default_data.load_curve_fitting_data()
data

Unnamed: 0,participant,prev_ttf,exec_time,prev_duration,neuro,neuroticism
0,134146,0.0,3.029,"[-inf, 5.0)","[-inf, 0.5)",0.375
1,134146,0.0,4.004,"[-inf, 5.0)","[-inf, 0.5)",0.375
2,134146,0.0,2.625,"[-inf, 5.0)","[-inf, 0.5)",0.375
3,134146,0.0,4.680,"[-inf, 5.0)","[-inf, 0.5)",0.375
4,134146,0.0,5.197,"[-inf, 5.0)","[-inf, 0.5)",0.375
...,...,...,...,...,...,...
6755,137353,0.0,6.358,"[5.0, 9.0)","[0.5, inf)",0.625
6756,137353,0.0,3.840,"[5.0, 9.0)","[0.5, inf)",0.625
6757,137353,0.0,2.932,"[9.0, inf)","[0.5, inf)",0.625
6758,137353,0.0,1.874,"[9.0, inf)","[0.5, inf)",0.625


In [2]:
from collections import deque
import pandas as pd
from typing import Tuple, Deque
from tqdm.notebook import tqdm
import numpy as np

result_rows = deque()
for participant in tqdm(data["participant"].unique(), desc="Run"):
    class CurveFittingTestModel(CurveFittingExecutionTimeModel):
        @staticmethod
        def get_data() -> (
            Tuple[
                pd.DataFrame,
                pd.arrays.IntervalArray,
                pd.arrays.IntervalArray,
                pd.arrays.IntervalArray,
            ]
        ):
            data, *rest = CurveFittingExecutionTimeModel.get_data()
            return (data[data["participant"] != participant].copy(), *rest)
        
    class PowerFitTestModel(CurveFittingTestModel):
        curve = "a * x^b + c"
        
        @staticmethod
        def _exec_time_func(x, a, b, c) -> float:
            return a * np.power(x, b) + c
        
    class SquareFitTestModel(CurveFittingTestModel):
        curve = "a * x^2 + b * x + c"

        @staticmethod
        def _exec_time_func(x, a, b, c) -> float:
            return a * np.square(x) + b * x + c
    
    class CubeFitTestModel(CurveFittingTestModel):
        curve = "a * x^3 + b * x^2 + c * x + d"

        @staticmethod
        def _exec_time_func(x, a, b, c, d) -> float:
            return a * np.power(x, 3) + b * np.square(x) + c * x + d
        
    class ExponentialFitTestModel(CurveFittingTestModel):
        curve = "a * e^x + b"

        @staticmethod
        def _exec_time_func(x, a, b) -> float:
            return a * np.exp(x) + b
        
    def get_test_data(*args, **kwargs) -> (
        Tuple[
            pd.DataFrame,
            pd.arrays.IntervalArray,
            pd.arrays.IntervalArray,
            pd.arrays.IntervalArray,
        ]
    ):
        data, *rest = ExecutionTimeModel.get_data()
        return (data[data["run_id"] != participant].copy(), *rest)

    class EmpiricalTestModel(EmpiricalETM):
        get_data = get_test_data
        
    class EmpiricalMeanTestModel(EmpiricalAggregateETM):
        get_data = get_test_data
        
        def __init__(self, *args, **kwargs):
            super().__init__(*args, aggregate_fn=np.mean, **kwargs)
            
    class EmpiricalMedianTestModel(EmpiricalAggregateETM):
        get_data = get_test_data
        
        def __init__(self, *args, **kwargs):
            super().__init__(*args, aggregate_fn=np.median, **kwargs)
        
    class TheoreticalTestModel(FittedETM):
        get_data = get_test_data

    class NaiveTestModel(FirstOrderETM):
        get_data = get_test_data
        
    class NaiveAggTestModel(FirstOrderAggregateETM):
        get_data = get_test_data
        
    cleanup = CleanupMode.TRUNCATE
    
    run_data = data[data["participant"] == participant].copy()

    neuroticism = run_data["neuroticism"].unique()[0]
    naive_model = NaiveTestModel()
    naive_mean_model = NaiveAggTestModel(np.mean)
    naive_median_model = NaiveAggTestModel(np.median)
    # curve_fitting_model = CurveFittingTestModel(neuroticism)
    
    power_fit_model = PowerFitTestModel(neuroticism)
    square_fit_model = SquareFitTestModel(neuroticism)
    cube_fit_model = CubeFitTestModel(neuroticism)
    exponential_fit_model = ExponentialFitTestModel(neuroticism)
    
    # deque containing (model name, model object, ttf_bins, window_size, kernel name)
    models: Deque[Tuple[str, ExecutionTimeModel, int, int, str]] = deque()
    models.extend((
        ("1st order", naive_model, 0, 0, "none"),
        ("1st order (median)", naive_median_model, 0, 0, "none"),
        ("1st order (mean)", naive_mean_model, 0, 0, "none"),
        (power_fit_model.curve, power_fit_model , 0, 0, "none"),
        (square_fit_model.curve, square_fit_model, 0, 0, "none"),
        (cube_fit_model.curve, cube_fit_model, 0, 0, "none"),
        (exponential_fit_model.curve, exponential_fit_model, 0, 0, "none")
    ))
    
    # for (ttf_bins, window_size) in itertools.product(range(1, 10), (4, 8, 12)):
    #     exp_kernel = ExponentialTTFWindowKernel(window_size=window_size)
    #     steep_linear_kernel = LinearTTFWindowKernel(window_size=window_size, max_relative_weight=10)
    #     shallow_linear_kernel = LinearTTFWindowKernel(window_size=window_size, max_relative_weight=2)
    #     average_kernel = AverageTTFWindowKernel(window_size=window_size)
    #     
    #     kernels = (
    #         ("exponential", exp_kernel),
    #         ("linear_steep", steep_linear_kernel),
    #         ("linear_shallow", shallow_linear_kernel),
    #         ("average", average_kernel)
    #     )
    #     model_classes = (
    #         ("empirical", EmpiricalTestModel),
    #         ("empirical (mean)", EmpiricalMeanTestModel),
    #         ("empirical (median)", EmpiricalMedianTestModel),
    #         ("theoretical", TheoreticalTestModel)
    #     )
    #     
    #     for (class_name, model_cls), (kernel_name, kernel) in itertools.product(model_classes, kernels):
    #         m = model_cls(kernel=kernel, neuroticism=neuroticism, ttf_levels=ttf_bins, cleanup=cleanup)
    #         models.append((class_name, m, ttf_bins, window_size, kernel_name))
        
    for _ in range(30):
        for i, (prev_ttf, exec_time, neuro, prev_duration) in enumerate(run_data[["prev_ttf", "exec_time", "neuro", "prev_duration"]].itertuples(index=False)):
            for model_name, model, ttf_bins, window_size, kernel_name in models:
                if i == 0:
                    # reset the model between runs
                    model.reset()
                else:
                    # first row has no previous ttf
                    model.advance(prev_ttf)
                    
                prediction = model.get_execution_time()
                error = prediction - exec_time
                result_rows.append({
                    "real": exec_time,
                    "prediction": prediction,
                    "error": error,
                    "sqr_error": np.square(error),
                    "model": model_name,
                    "duration": prev_duration,
                    "neuro": neuro,
                    # "kernel": kernel_name,
                    # "ttf_bins": ttf_bins,
                    # "window_size": window_size,
                })

results = pd.DataFrame(result_rows)
for col in ("model", ):  # "kernel"):
    results[col] = results[col].astype("category")

results.to_parquet("./full_validation.parquet")
results

Run:   0%|          | 0/40 [00:00<?, ?it/s]

Unnamed: 0,real,prediction,error,sqr_error,model,duration,neuro
0,3.029,7.599540,4.570540,20.889837,1st order,"[-inf, 5.0)","[-inf, 0.5)"
1,3.029,5.261648,2.232648,4.984719,1st order (median),"[-inf, 5.0)","[-inf, 0.5)"
2,3.029,5.873496,2.844496,8.091159,1st order (mean),"[-inf, 5.0)","[-inf, 0.5)"
3,3.029,3.937954,0.908954,0.826197,a * x^b + c,"[-inf, 5.0)","[-inf, 0.5)"
4,3.029,3.876765,0.847765,0.718705,a * x^2 + b * x + c,"[-inf, 5.0)","[-inf, 0.5)"
...,...,...,...,...,...,...,...
1419595,4.241,5.821658,1.580658,2.498480,1st order (mean),"[9.0, inf)","[0.5, inf)"
1419596,4.241,3.685106,-0.555894,0.309018,a * x^b + c,"[9.0, inf)","[0.5, inf)"
1419597,4.241,3.659760,-0.581240,0.337840,a * x^2 + b * x + c,"[9.0, inf)","[0.5, inf)"
1419598,4.241,3.229473,-1.011527,1.023186,a * x^3 + b * x^2 + c * x + d,"[9.0, inf)","[0.5, inf)"


In [3]:
# results.groupby(["model", "kernel", "window_size", "ttf_bins"], observed=True)["sqr_error"].mean().sort_values(ascending=True)

results.groupby(["model", "neuro", "duration"], observed=True)["sqr_error"].mean().sort_values(ascending=True)

# look at neuroticism
# look at durations
# look different functions
# look at delta in ttf to reset duration (maybe not instantaneous delta)

# look at different curves for each quadrant

model                          neuro        duration   
a * e^x + b                    [-inf, 0.5)  [5.0, 9.0)      6.853862
a * x^b + c                    [-inf, 0.5)  [5.0, 9.0)      6.934405
a * x^2 + b * x + c            [-inf, 0.5)  [5.0, 9.0)      6.965101
a * x^3 + b * x^2 + c * x + d  [-inf, 0.5)  [5.0, 9.0)      6.975263
1st order (median)             [-inf, 0.5)  [5.0, 9.0)      7.006482
1st order (mean)               [-inf, 0.5)  [5.0, 9.0)      7.876696
1st order (median)             [0.5, inf)   [5.0, 9.0)      8.980667
a * e^x + b                    [0.5, inf)   [5.0, 9.0)      9.105339
a * x^3 + b * x^2 + c * x + d  [0.5, inf)   [5.0, 9.0)      9.230164
a * x^2 + b * x + c            [0.5, inf)   [5.0, 9.0)      9.240160
1st order (mean)               [0.5, inf)   [5.0, 9.0)      9.288172
a * x^b + c                    [0.5, inf)   [5.0, 9.0)      9.294628
1st order (mean)               [-inf, 0.5)  [-inf, 5.0)     9.394525
1st order (median)             [-inf, 0.5)  [-i

In [4]:
results.groupby(["model"], observed=True)["sqr_error"].mean().sort_values(ascending=True)


model
1st order (median)               10.084425
a * e^x + b                      10.197333
1st order (mean)                 10.268799
a * x^2 + b * x + c              10.271327
a * x^b + c                      10.279599
a * x^3 + b * x^2 + c * x + d    10.292186
1st order                        20.441406
Name: sqr_error, dtype: float64

In [5]:
# # %matplotlib inline
# import matplotlib.pyplot as plt
# import seaborn as sns
# 
# fg = sns.catplot(results, kind="point", x="ttf_bins", y="sqr_error", hue="model_tag", height=5, aspect=2)
# fg.set(ylim=(0, None))
# fg.set_ylabels("MSE")
# for ax in fg.axes.flat:
#     ax.set_xticks(ax.get_xticks(), ax.get_xticklabels(), rotation=45, ha='right')
# plt.show()

In [6]:
# results.groupby(["model", "ttf_bins", "cleanup"])["sqr_error"].mean().sort_values()