In [72]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Program, Task
import pandas as pd
from dataclasses import replace

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [73]:
# CHANGE ME!
TASK_ID = 'rolling_average'
AUTHOR = 'scott'

In [192]:
task = Task(
    id=TASK_ID,
    name="rolling average",
    description="for each data point, compute average of data points within last 7 days",
    category='Time Series',
    plan=[{
        "id": "windows",
        "description": "enumerate windows",
    }, {
        "id": "group",
        "description": "map value to window sets containing it",
    }, {
        "id": "filter",
        "description": "average each window set",
    }, ],
    sample_input={
        "data": [
            {"time": 20, "x": 14.5},
            {"time": 3, "x": 3.0},
            {"time": 1, "x": 1.0},
            {"time": 9, "x": 7.0},
            {"time": 10, "x": 11.0},
            {"time": 11, "x": 12.0},
        ]
    },
    sample_output=[
        {"end_time": 1, "average": 1.0},
        {"end_time": 3, "average": 2.0},
        {"end_time": 9, "average": 5.0},
        {"end_time": 10, "average": 9.0},
        {"end_time": 11, "average": 10.0},
        {"end_time": 20, "average": 14.5}
    ]
)
task.save()

prototype = Program(
    task=TASK_ID,
    author=AUTHOR,
    language=''    
)

In [193]:
# TODO
pandas = replace(prototype,
    language='python-pandas',
    implementation='',
    source='''def rolling_average(data):
    d = data.copy()
    
    data.time = pd.to_datetime(data.time * 10**9)
    data = (data.sort_values('time').set_index('time')
                .rolling(window='7s').mean())

    return pd.DataFrame.from_dict({
        'end_time': d.sort_values('time').reset_index().time,
        'average': data.reset_index().x})''').load_plan()
pandas.execute(task)
pandas.save()

In [225]:
sql = replace(prototype,
    language='sql',
    source='''SELECT
end.time as end_time,  AVG(other.x) as average
FROM data as end
JOIN data as other
ON other.time <= end.time and other.time > end.time - 7
GROUP BY end.time''').load_plan()
sql.execute(task)
sql.save()

In [212]:
datalog = replace(prototype,
    language='datalog',
    source='''.decl bucket(end_time: number, total: float, n: float)
.decl window(end_time: number, time: number)
window(end_time, t) :- data(end_time, _), data(t, _), t <= end_time, t > end_time - 7.
bucket(end_time, total, n) :- data(end_time, _),
  total = sum v : {data(t, v), window(end_time, t)},
  n     = sum z : {data(t, _), window(end_time, t), z = 1.0}.
rolling_average(end_time, v) :-
  bucket(end_time, total, n), v = total / n.''').load_plan()
datalog.execute(task, debug=False)
datalog.save()

In [226]:
python_imp = replace(prototype,
    language='python-imperative',
    implementation='',
    source='''def rolling_average(data):
    data.sort(key=lambda v: v["time"])
    result = []
    for i, value in enumerate(data):
        end = value["time"]
        total, count = 0.0, 0
        for j in range(i, -1, -1):
            if data[j]["time"] <= end - 7:
                break
            total += data[j]["x"]; count += 1
        result.append({"end_time": end, "average": total / count })
    return result''').load_plan()
python_imp.execute(task)
python_imp.save()

In [223]:
python_fun = replace(prototype,
    language='python-functional',
    implementation='',
    source='''def rolling_average(data):
    return [{
        "end_time": x["time"],
        "average": sum(vs) / len(vs)
      }
      for x in data
      for vs in [
        [y["x"] for y in data
          if y["time"] <= x["time"] and y["time"] > x["time"] - 7]]]''').load_plan()
python_fun.execute(task)
python_fun.save()

In [111]:
program = sql
program.widget(task)

Output()

CodeWidget(program='{"task": "time_series", "language": "sql", "plan": {}, "source": " \\n    \\nSELECT AVG(x)…