Skip to content

Commit

Permalink
model selection
Browse files Browse the repository at this point in the history
  • Loading branch information
molguin92 committed Oct 22, 2023
1 parent 0a94664 commit 0fb2a10
Show file tree
Hide file tree
Showing 6 changed files with 1,907 additions and 254 deletions.
898 changes: 768 additions & 130 deletions analysis_2023/errors.ipynb

Large diffs are not rendered by default.

312 changes: 312 additions & 0 deletions analysis_2023/full_validation.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions analysis_2023/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ numpy
seaborn
matplotlib
pmdarima
tqdm
914 changes: 798 additions & 116 deletions analysis_2023/validation.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions edgedroid/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
"EmpiricalETM",
"FittedETM",
"LegacyETM",
"CleanupMode",
]


Expand Down
35 changes: 27 additions & 8 deletions edgedroid/models/timings.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import numpy as np
import numpy.typing as npt
import pandas as pd
from nptyping import Shape
from pandas import arrays
from scipy import stats

Expand Down Expand Up @@ -415,16 +414,33 @@ def _convolve_kernel(arr: pd.Series, kernel: npt.NDArray):
return pd.Series(result[kernel.size :], index=index)


def _winsorize(arr: npt.NDArray) -> npt.NDArray:
low_bound = np.percentile(arr, 5)
high_bound = np.percentile(arr, 95)
def _winsorize(
arr: npt.NDArray, low_percentile: int = 5, high_percentile: int = 95
) -> npt.NDArray:
low_bound = np.percentile(arr, low_percentile)
high_bound = np.percentile(arr, high_percentile)

arr[arr < low_bound] = low_bound
arr[arr > high_bound] = high_bound

return arr


def _truncate(
arr: npt.NDArray, low_percentile: int = 5, high_percentile: int = 95
) -> npt.NDArray:
low_bound = np.percentile(arr, low_percentile)
high_bound = np.percentile(arr, high_percentile)

return np.copy(arr[np.logical_and(arr >= low_bound, arr <= high_bound)])


class CleanupMode(enum.Enum):
NONE = enum.auto()
WINSORIZE = enum.auto()
TRUNCATE = enum.auto()


class EmpiricalETM(ExecutionTimeModel):
@staticmethod
def make_kernel(window: int, exp_factor: float = 0.7):
Expand All @@ -439,7 +455,7 @@ def __init__(
neuroticism: float | None,
window: int = 12,
ttf_levels: int = 4,
winsorize: bool = True,
cleanup: CleanupMode = CleanupMode.WINSORIZE,
):
data, neuro_bins, *_ = self.get_data()

Expand Down Expand Up @@ -469,8 +485,11 @@ def __init__(
self._views: Dict[pd.Interval, npt.NDArray] = {}
for binned_rolling_ttf, df in data.groupby("binned_rolling_ttf", observed=True):
exec_times = df["next_exec_time"].to_numpy()
if winsorize:

if cleanup == CleanupMode.WINSORIZE:
exec_times = _winsorize(exec_times)
elif cleanup == CleanupMode.TRUNCATE:
exec_times = _truncate(exec_times)

self._views[binned_rolling_ttf] = exec_times

Expand Down Expand Up @@ -534,13 +553,13 @@ def __init__(
dist: stats.rv_continuous = stats.exponnorm,
window: int = 12,
ttf_levels: int = 4,
winsorize: bool = True,
cleanup: CleanupMode = CleanupMode.WINSORIZE,
):
super(FittedETM, self).__init__(
neuroticism=neuroticism,
window=window,
ttf_levels=ttf_levels,
winsorize=winsorize,
cleanup=cleanup,
)

self._dists: Dict[pd.Interval, stats.rv_continuous] = {}
Expand Down

0 comments on commit 0fb2a10

Please sign in to comment.