In [4]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Program, Task
import pandas as pd
from dataclasses import replace

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [64]:
# CHANGE ME!
TASK_ID = 'rolling_average'
AUTHOR = 'scott'

In [65]:
task = Task(
    id=TASK_ID,
    name="compute rolling average",
    description="compute average of each distinct set of data points with time-diameter 7",
    category='Time Series',
    plan=[],
    sample_input={
        "data": [
            {"time": 20, "x": 14.0},
            {"time": 3, "x": 3.0},
            {"time": 1, "x": 1.0},
            {"time": 9, "x": 7.0},
            {"time": 10, "x": 11.0},
            {"time": 11, "x": 12.0},
        ]
    },
    sample_output=[
        {"start_time": 1, "average": 2.0},
        {"start_time": 3, "average": 5.0},
        {"start_time": 9, "average": 10.0},
        {"start_time": 10, "average": 11.5},
        {"start_time": 11, "average": 12.0},
        {"start_time": 20, "average": 14.0}
    ]
)
task.save() 

prototype = Program(
    task=TASK_ID,
    author=AUTHOR,
    language=''    
)

In [66]:
sql = replace(prototype,
    language='sql',
    source='''SELECT
start.time as start_time,  AVG(other.x) as average
FROM data as start
JOIN data as other
ON other.time >= start.time and other.time < start.time + 7
GROUP BY start.time
  
''').load_plan()
sql.execute(task)
sql.save()

In [48]:
datalog = replace(prototype,
    language='datalog',
    source='''.decl bucket(start_time: number, total: float, n: float)
bucket(start_time, total, n) :- data(start_time, _),
  total = sum v : {data(t, v), t >= start_time, t < start_time + 7},
  n     = sum z : {data(t, v), t >= start_time, t < start_time + 7, z = 1.0}.
rolling_average(start_time, v) :-
  bucket(start_time, total, n), v = total / n.''').load_plan()
datalog.execute(task, debug=False)
datalog.save()

In [56]:
python_imp = replace(prototype,
    language='python-imperative',
    implementation='',
    source='''def rolling_average(data):
    data.sort(key=lambda v: v["time"])
    result = []
    for i, value in enumerate(data):
        start = value["time"]
        total, count = 0.0, 0
        for j in range(i, len(data)):
            if data[j]["time"] >= start + 7:
                break
            total += data[j]["x"]; count += 1
        result.append({"start_time": start, "average": total / count })
    return result''').load_plan()
python_imp.execute(task)
python_imp.save()

In [67]:
python_fun = replace(prototype,
    language='python-functional',
    implementation='',
    source='''def rolling_average(data):
    return [{
        "start_time": x["time"],
        "average": sum(vs) / len(vs)
      }
      for x in data
      for vs in [
        [y["x"] for y in data
          if y["time"] >= x["time"] and y["time"] < x["time"] + 7]]]''').load_plan()
python_fun.execute(task)
python_fun.save()

In [353]:
pandas = replace(prototype,
    language='python-pandas',
    implementation='',
    source='''def average_adjacent(data):
    data = data.sort_values("time")#.set_index("time")
    lagging = data[1:].reset_index(drop=True)
    leading = data[:-1].reset_index(drop=True)
    leading.x = (lagging.x + leading.x)/2
    return leading''').load_plan()
pandas.execute(task)
pandas.save()

In [111]:
program = sql
program.widget(task)

Output()

CodeWidget(program='{"task": "time_series", "language": "sql", "plan": {}, "source": " \\n    \\nSELECT AVG(x)…