analysis_2023

molguin92 · Nov 30, 2023 · 75406b6 · 75406b6
1 parent e9bd818
commit 75406b6
Show file tree

Hide file tree

Showing 12 changed files with 1,204 additions and 812 deletions.
diff --git a/analysis_2023/experiment_util.py b/analysis_2023/experiment_util.py
@@ -0,0 +1,89 @@
+import itertools
+import multiprocess as mp
+import uuid
+from collections import deque
+from dataclasses import dataclass, asdict
+from typing import Iterable, Deque, Callable, Collection, TypeAlias, Tuple, Any, Optional
+
+import pandas as pd
+
+from edgedroid.models.timings import ExecutionTimeModel
+
+ModelBuilder: TypeAlias = Callable[[], Tuple[str, ExecutionTimeModel]]
+TTFSeqBuilder: TypeAlias = Callable[[], Iterable[float]]
+
+
+@dataclass(frozen=True, eq=True)
+class StepRow:
+    run_id: str
+    model_name: str
+    prev_ttf: float
+    step_index: int
+    exec_time: float
+    step_time: float
+    task_duration: float
+
+
+def simulated_task(
+        model_builder: ModelBuilder,
+        ttfs_sequence_builder: TTFSeqBuilder,
+) -> pd.DataFrame:
+    """
+    Executes a simulated run with a sequence of TTFs.
+    This function assumes an implicit 0-valued TTF before the first step.
+    The final TTF produced by the TTF iterator is not used to generate a new step, but influences the step duration of the final step.
+    """
+    run_id = uuid.uuid4().hex
+    model_name, model = model_builder()
+    ttfs = ttfs_sequence_builder()
+
+    steps: Deque[dict] = deque()
+
+    total_duration = 0
+
+    # first step has a previous TTF of 0
+    prev_ttf = 0.0
+    for step_idx, ttf in enumerate(ttfs):
+        exec_time = model.get_execution_time()
+        step_time = exec_time + (ttf / 2.0)
+        total_duration += step_time
+
+        steps.append(
+            asdict(StepRow(
+                run_id=run_id,
+                model_name=model_name,
+                prev_ttf=prev_ttf,
+                step_index=step_idx,
+                exec_time=exec_time,
+                step_time=step_time,
+                task_duration=total_duration,
+            ))
+        )
+
+        model.advance(ttf)
+        prev_ttf = ttf
+
+    return pd.DataFrame(steps)
+
+
+def _map_helper(args: Tuple[Any]):
+    return simulated_task(*args)
+
+
+def run_experiment(
+        model_builders: Collection[ModelBuilder],
+        ttf_seq_builder: TTFSeqBuilder,
+        reps_per_setup: int,
+        processes: Optional[int] = None,
+) -> pd.DataFrame:
+    tasks = itertools.chain.from_iterable(itertools.repeat(model_builders, reps_per_setup))
+    with mp.Pool(processes=processes) as pool:
+        return pd.concat(
+            pool.imap_unordered(
+                _map_helper,
+                zip(
+                    tasks,
+                    itertools.repeat(ttf_seq_builder)
+                )
+            )
+        )
diff --git a/analysis_2023/experiments.ipynb b/analysis_2023/experiments.ipynb
@@ -0,0 +1,171 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2023-11-30T18:06:19.183672Z",
+     "start_time": "2023-11-30T18:06:14.678306Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "                              run_id                     model_name  prev_ttf  \\\n0   c5372fd77dcf40ef9feba61a03ca42f9                       Baseline       0.0   \n1   c5372fd77dcf40ef9feba61a03ca42f9                       Baseline       0.1   \n2   c5372fd77dcf40ef9feba61a03ca42f9                       Baseline       0.1   \n3   c5372fd77dcf40ef9feba61a03ca42f9                       Baseline       0.1   \n4   c5372fd77dcf40ef9feba61a03ca42f9                       Baseline       0.1   \n..                               ...                            ...       ...   \n95  46d86f9cb94f49bfa0e8a722e140cc36  Realistic Fitted (high neuro)       3.0   \n96  46d86f9cb94f49bfa0e8a722e140cc36  Realistic Fitted (high neuro)       3.0   \n97  46d86f9cb94f49bfa0e8a722e140cc36  Realistic Fitted (high neuro)       3.0   \n98  46d86f9cb94f49bfa0e8a722e140cc36  Realistic Fitted (high neuro)       3.0   \n99  46d86f9cb94f49bfa0e8a722e140cc36  Realistic Fitted (high neuro)       3.0   \n\n    step_index  exec_time  step_time  task_duration ttf_level  \n0            0   5.170087   5.220087       5.220087       low  \n1            1   6.124708   6.174708      11.394795       low  \n2            2   8.156299   8.206299      19.601094       low  \n3            3   6.243062   6.293062      25.894156       low  \n4            4   1.841317   1.891317      27.785473       low  \n..         ...        ...        ...            ...       ...  \n95          95   3.165549   4.665549     697.534427      high  \n96          96   5.973634   7.473634     705.008061      high  \n97          97   4.659607   6.159607     711.167667      high  \n98          98   5.843266   7.343266     718.510933      high  \n99          99   7.599466   9.099466     727.610399      high  \n\n[96000 rows x 8 columns]",
+      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>run_id</th>\n      <th>model_name</th>\n      <th>prev_ttf</th>\n      <th>step_index</th>\n      <th>exec_time</th>\n      <th>step_time</th>\n      <th>task_duration</th>\n      <th>ttf_level</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>c5372fd77dcf40ef9feba61a03ca42f9</td>\n      <td>Baseline</td>\n      <td>0.0</td>\n      <td>0</td>\n      <td>5.170087</td>\n      <td>5.220087</td>\n      <td>5.220087</td>\n      <td>low</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>c5372fd77dcf40ef9feba61a03ca42f9</td>\n      <td>Baseline</td>\n      <td>0.1</td>\n      <td>1</td>\n      <td>6.124708</td>\n      <td>6.174708</td>\n      <td>11.394795</td>\n      <td>low</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>c5372fd77dcf40ef9feba61a03ca42f9</td>\n      <td>Baseline</td>\n      <td>0.1</td>\n      <td>2</td>\n      <td>8.156299</td>\n      <td>8.206299</td>\n      <td>19.601094</td>\n      <td>low</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>c5372fd77dcf40ef9feba61a03ca42f9</td>\n      <td>Baseline</td>\n      <td>0.1</td>\n      <td>3</td>\n      <td>6.243062</td>\n      <td>6.293062</td>\n      <td>25.894156</td>\n      <td>low</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>c5372fd77dcf40ef9feba61a03ca42f9</td>\n      <td>Baseline</td>\n      <td>0.1</td>\n      <td>4</td>\n      <td>1.841317</td>\n      <td>1.891317</td>\n      <td>27.785473</td>\n      <td>low</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>95</th>\n      <td>46d86f9cb94f49bfa0e8a722e140cc36</td>\n      <td>Realistic Fitted (high neuro)</td>\n      <td>3.0</td>\n      <td>95</td>\n      <td>3.165549</td>\n      <td>4.665549</td>\n      <td>697.534427</td>\n      <td>high</td>\n    </tr>\n    <tr>\n      <th>96</th>\n      <td>46d86f9cb94f49bfa0e8a722e140cc36</td>\n      <td>Realistic Fitted (high neuro)</td>\n      <td>3.0</td>\n      <td>96</td>\n      <td>5.973634</td>\n      <td>7.473634</td>\n      <td>705.008061</td>\n      <td>high</td>\n    </tr>\n    <tr>\n      <th>97</th>\n      <td>46d86f9cb94f49bfa0e8a722e140cc36</td>\n      <td>Realistic Fitted (high neuro)</td>\n      <td>3.0</td>\n      <td>97</td>\n      <td>4.659607</td>\n      <td>6.159607</td>\n      <td>711.167667</td>\n      <td>high</td>\n    </tr>\n    <tr>\n      <th>98</th>\n      <td>46d86f9cb94f49bfa0e8a722e140cc36</td>\n      <td>Realistic Fitted (high neuro)</td>\n      <td>3.0</td>\n      <td>98</td>\n      <td>5.843266</td>\n      <td>7.343266</td>\n      <td>718.510933</td>\n      <td>high</td>\n    </tr>\n    <tr>\n      <th>99</th>\n      <td>46d86f9cb94f49bfa0e8a722e140cc36</td>\n      <td>Realistic Fitted (high neuro)</td>\n      <td>3.0</td>\n      <td>99</td>\n      <td>7.599466</td>\n      <td>9.099466</td>\n      <td>727.610399</td>\n      <td>high</td>\n    </tr>\n  </tbody>\n</table>\n<p>96000 rows × 8 columns</p>\n</div>"
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import functools\n",
+    "from typing import Callable\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from experiment_util import run_experiment, ModelBuilder\n",
+    "from edgedroid.models import timings\n",
+    "import numpy as np\n",
+    "\n",
+    "window_size = 8\n",
+    "ttf_bins = 6\n",
+    "cleanup = timings.CleanupMode.TRUNCATE\n",
+    "\n",
+    "num_steps = 100\n",
+    "num_reps = 40\n",
+    "\n",
+    "def build_realistic_empirical(high_neuro: bool) -> tuple[str, timings.ExecutionTimeModel]:\n",
+    "    kernel = timings.LinearTTFWindowKernel(window_size=window_size, max_relative_weight=10)\n",
+    "    model = timings.EmpiricalETM(kernel=kernel, neuroticism=1.0 if high_neuro else 0.0, ttf_levels=ttf_bins, cleanup=cleanup)\n",
+    "    name = f\"Realistic ({'high' if high_neuro else 'low'} neuro)\"\n",
+    "    return name, model\n",
+    "\n",
+    "def build_realistic_empirical_mean(high_neuro: bool) -> tuple[str, timings.ExecutionTimeModel]:\n",
+    "    kernel = timings.LinearTTFWindowKernel(window_size=window_size, max_relative_weight=10)\n",
+    "    model = timings.EmpiricalAggregateETM(aggregate_fn=np.mean, kernel=kernel, neuroticism=1.0 if high_neuro else 0.0, ttf_levels=ttf_bins, cleanup=cleanup)\n",
+    "    name = f\"Realistic Mean ({'high' if high_neuro else 'low'} neuro)\"\n",
+    "    return name, model\n",
+    "\n",
+    "def build_realistic_fitted(high_neuro: bool) -> tuple[str, timings.ExecutionTimeModel]:\n",
+    "    kernel = timings.LinearTTFWindowKernel(window_size=window_size, max_relative_weight=10)\n",
+    "    model = timings.FittedETM(kernel=kernel, neuroticism=1.0 if high_neuro else 0.0, ttf_levels=ttf_bins, cleanup=cleanup)\n",
+    "    name = f\"Realistic Fitted ({'high' if high_neuro else 'low'} neuro)\"\n",
+    "    return name, model\n",
+    "\n",
+    "def high_neuro(builder_fn: Callable[[bool], tuple[str, timings.ExecutionTimeModel]]) -> ModelBuilder:\n",
+    "    return functools.partial(builder_fn, high_neuro=True)\n",
+    "\n",
+    "def low_neuro(builder_fn: Callable[[bool], tuple[str, timings.ExecutionTimeModel]]) -> ModelBuilder:\n",
+    "    return functools.partial(builder_fn, high_neuro=True)\n",
+    "\n",
+    "def build_legacy() -> tuple[str, timings.ExecutionTimeModel]:\n",
+    "    kernel = timings.LinearTTFWindowKernel(window_size=window_size, max_relative_weight=10)\n",
+    "    model = timings.LegacyETM(kernel, ttf_levels=ttf_bins)\n",
+    "    name = \"Legacy\"\n",
+    "    return name, model\n",
+    "\n",
+    "def build_baseline() -> tuple[str, timings.ExecutionTimeModel]:\n",
+    "    return \"Baseline\", timings.FirstOrderETM()\n",
+    "\n",
+    "def low_ttf_seq_builder() -> list[float]:\n",
+    "    return [0.1] * num_steps\n",
+    "\n",
+    "def mid_ttf_seq_builder() -> list[float]:\n",
+    "    return [1.6] * num_steps\n",
+    "\n",
+    "def high_ttf_seq_builder() -> list[float]:\n",
+    "    return [3.0] * num_steps\n",
+    "\n",
+    "low_ttf_results = run_experiment(\n",
+    "    model_builders=[\n",
+    "        high_neuro(build_realistic_empirical),\n",
+    "        low_neuro(build_realistic_empirical),\n",
+    "        high_neuro(build_realistic_empirical_mean),\n",
+    "        low_neuro(build_realistic_empirical_mean),\n",
+    "        high_neuro(build_realistic_fitted),\n",
+    "        low_neuro(build_realistic_fitted),\n",
+    "        build_legacy,\n",
+    "        build_baseline,\n",
+    "    ],\n",
+    "    ttf_seq_builder=low_ttf_seq_builder,\n",
+    "    reps_per_setup=num_reps,\n",
+    ")\n",
+    "low_ttf_results[\"ttf_level\"] = \"low\"\n",
+    "\n",
+    "mid_ttf_results = run_experiment(\n",
+    "    model_builders=[\n",
+    "        high_neuro(build_realistic_empirical),\n",
+    "        low_neuro(build_realistic_empirical),\n",
+    "        high_neuro(build_realistic_empirical_mean),\n",
+    "        low_neuro(build_realistic_empirical_mean),\n",
+    "        high_neuro(build_realistic_fitted),\n",
+    "        low_neuro(build_realistic_fitted),\n",
+    "        build_legacy,\n",
+    "        build_baseline,\n",
+    "    ],\n",
+    "    ttf_seq_builder=mid_ttf_seq_builder,\n",
+    "    reps_per_setup=num_reps,\n",
+    ")\n",
+    "mid_ttf_results[\"ttf_level\"] = \"mid\"\n",
+    "\n",
+    "high_ttf_results = run_experiment(\n",
+    "    model_builders=[\n",
+    "        high_neuro(build_realistic_empirical),\n",
+    "        low_neuro(build_realistic_empirical),\n",
+    "        high_neuro(build_realistic_empirical_mean),\n",
+    "        low_neuro(build_realistic_empirical_mean),\n",
+    "        high_neuro(build_realistic_fitted),\n",
+    "        low_neuro(build_realistic_fitted),\n",
+    "        build_legacy,\n",
+    "        build_baseline,\n",
+    "    ],\n",
+    "    ttf_seq_builder=high_ttf_seq_builder,\n",
+    "    reps_per_setup=num_reps,\n",
+    ")\n",
+    "high_ttf_results[\"ttf_level\"] = \"high\"\n",
+    "\n",
+    "results = pd.concat((low_ttf_results, mid_ttf_results, high_ttf_results))\n",
+    "results[\"run_id\"] = results[\"run_id\"].astype(\"category\")\n",
+    "results[\"ttf_level\"] = results[\"ttf_level\"].astype(\"category\")\n",
+    "results.to_parquet(\"./fixed_ttf_results.parquet\")\n",
+    "results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false
+   },
+   "id": "a59804a3a036e64a"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/analysis_2023/fixed_ttf_results.parquet b/analysis_2023/fixed_ttf_results.parquet
diff --git a/analysis_2023/requirements.txt b/analysis_2023/requirements.txt
@@ -7,3 +7,4 @@ seaborn
 matplotlib
 pmdarima
 tqdm
+multiprocess