Skip to content

Commit

Permalink
cleaning up
Browse files Browse the repository at this point in the history
  • Loading branch information
molguin92 committed Mar 4, 2024
1 parent c0a75cb commit 05be8b4
Show file tree
Hide file tree
Showing 12 changed files with 75 additions and 905 deletions.
15 changes: 6 additions & 9 deletions edgedroid/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
from collections import deque
from dataclasses import asdict, dataclass
from typing import List, Optional, Dict, Any, Iterator, Generator
from typing import List, Optional

import pandas as pd
from numpy import typing as npt

from . import sampling
from . import timings
from .sampling import *
from .timings import *


@dataclass(frozen=True)
Expand Down Expand Up @@ -176,7 +171,9 @@ def _init_iter() -> Generator[ModelFrame, sampling.FrameTimings, None]:
# clear the frame timestamp buffer
step_frame_timestamps.clear()

def _frame_iter_for_step() -> Generator[ModelFrame, sampling.FrameTimings, None]:
def _frame_iter_for_step() -> (
Generator[ModelFrame, sampling.FrameTimings, None]
):
# TODO: implement sampling records
# replay frames for step
frame_iter = self._frame_dists.step_iterator(
Expand Down
1 change: 0 additions & 1 deletion edgedroid/models/timings/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from .base import *
from .curve import *
from .realistic import *
from .reference import *
122 changes: 3 additions & 119 deletions edgedroid/models/timings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,7 @@

import abc
import copy

import enum
from collections import deque
from typing import TypeVar, Iterator, Tuple, Dict, Any

import pandas as pd
from pandas import arrays
from typing import TypeVar, Iterator, Dict, Any


class ModelException(Exception):
Expand All @@ -33,105 +27,6 @@ class ModelException(Exception):
pass


class Transition(str, enum.Enum):
H2L = "Higher2Lower"
L2H = "Lower2Higher"
NONE = "NoTransition"


def preprocess_data(
exec_time_data: pd.DataFrame,
neuro_bins: arrays.IntervalArray | pd.IntervalIndex,
impair_bins: arrays.IntervalArray | pd.IntervalIndex,
duration_bins: arrays.IntervalArray | pd.IntervalIndex,
# transition_fade_distance: Optional[int] = None,
) -> pd.DataFrame:
"""
Processes a DataFrame with raw execution time data into a DataFrame
usable by the model.
The argument DataFrame must be in order (of steps) and have the following
columns:
- run_id (categorical or int)
- neuroticism (float)
- exec_time (float)
- ttf (float)
Parameters
----------
exec_time_data
Raw experimental data
neuro_bins
Bins to use for neuroticism values.
impair_bins
Bins to use for time-to-feedback (impairment).
duration_bins
Bins to use for sequences of same impairment.
Returns
-------
A DataFrame.
"""

proc_data = exec_time_data.copy()

for col in ("run_id", "neuroticism", "exec_time", "ttf"):
if col not in proc_data.columns:
raise ModelException(f"Base data missing required column: {col}")

proc_data["neuroticism_raw"] = proc_data["neuroticism"]
proc_data["neuroticism"] = pd.cut(
proc_data["neuroticism"], pd.IntervalIndex(neuro_bins)
)

processed_dfs = deque()
for run_id, df in proc_data.groupby("run_id"):
df = df.copy()
df["ttf"] = df["ttf"].shift().fillna(0)

df["impairment"] = pd.cut(df["ttf"], pd.IntervalIndex(impair_bins))
df = df.rename(columns={"exec_time": "next_exec_time"})

# df["next_exec_time"] = df["exec_time"].shift(-1)
df["prev_impairment"] = df["impairment"].shift()
# df["transition"] = Transition.NONE.value

# for each segment with the same impairment, count the number of steps
# (starting from 1)
diff_imp_groups = df.groupby(
(df["impairment"].ne(df["prev_impairment"])).cumsum()
)
df["duration"] = diff_imp_groups.cumcount() + 1

df["transition"] = None
df.loc[
df["prev_impairment"] < df["impairment"], "transition"
] = Transition.L2H.value
df.loc[
df["prev_impairment"] > df["impairment"], "transition"
] = Transition.H2L.value

df["transition"] = (
df["transition"].fillna(method="ffill").fillna(Transition.NONE.value)
)

processed_dfs.append(df)

proc_data = pd.concat(processed_dfs, ignore_index=False)

# coerce some types for proper functionality
proc_data["transition"] = proc_data["transition"].astype("category")
proc_data["neuroticism"] = proc_data["neuroticism"].astype(pd.IntervalDtype(float))
proc_data["impairment"] = proc_data["impairment"].astype(pd.IntervalDtype(float))
proc_data["duration_raw"] = proc_data["duration"]
proc_data["duration"] = pd.cut(
proc_data["duration"], pd.IntervalIndex(duration_bins)
).astype(pd.IntervalDtype(float))
proc_data = proc_data.drop(columns="prev_impairment")

return proc_data

# workaround for typing methods of classes as returning the same type as the
# enclosing class, while also working for extending classes
TTimingModel = TypeVar("TTimingModel", bound="ExecutionTimeModel")
Expand All @@ -143,17 +38,8 @@ class ExecutionTimeModel(Iterator[float], metaclass=abc.ABCMeta):
"""

@staticmethod
def get_data() -> (
Tuple[
pd.DataFrame,
pd.arrays.IntervalArray,
pd.arrays.IntervalArray,
pd.arrays.IntervalArray,
]
):
import edgedroid.data as e_data

return e_data.load_default_exec_time_data()
def get_data() -> Any:
pass

def __iter__(self):
return self
Expand Down Expand Up @@ -252,5 +138,3 @@ def fresh_copy(self: TTimingModel) -> TTimingModel:
@abc.abstractmethod
def get_model_params(self) -> Dict[str, Any]:
pass


51 changes: 0 additions & 51 deletions edgedroid/models/timings/curve.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from scipy.optimize import curve_fit

from .base import ExecutionTimeModel, TTimingModel, ModelException
from .realistic import CleanupMode, _winsorize, _truncate


class CurveFit:
Expand Down Expand Up @@ -204,53 +203,3 @@ def reset(self) -> None:

def get_model_params(self) -> Dict[str, Any]:
raise Exception("Not implemented yet!")


class LegacyModel(ExecutionTimeModel):
@staticmethod
def get_data() -> pd.DataFrame:
import edgedroid.data as e_data

return e_data.load_curve_fitting_data()

def __init__(self, seed: int = 4): # https://xkcd.com/221/
super().__init__()
rng = np.random.default_rng(seed)

data = self.get_data()
data = data[data["prev_ttf"] == data["prev_ttf"].min()].copy()

self.times = (
data.groupby(["prev_duration"], observed=True)["exec_time"]
.apply(lambda a: rng.choice(a))
.reset_index()
) # one execution time per duration

self._current_duration = 0

def advance(self: TTimingModel, ttf: float | int) -> TTimingModel:
self._current_duration += 1
return self

def get_execution_time(self) -> float:
return self.times.loc[
self.times["prev_duration"].array.contains(self._current_duration),
].iat[0, 1]

def get_expected_execution_time(self) -> float:
return self.get_execution_time()

def get_mean_execution_time(self) -> float:
return self.get_execution_time()

def get_cdf_at_instant(self, instant: float):
raise Exception("Not implemented yet!")

def state_info(self) -> Dict[str, Any]:
raise Exception("Not implemented yet!")

def reset(self) -> None:
self._current_duration = 0

def get_model_params(self) -> Dict[str, Any]:
raise Exception("Not implemented yet!")
Loading

0 comments on commit 05be8b4

Please sign in to comment.