In [310]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Program, Task
import pandas as pd
from dataclasses import replace

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [311]:
# CHANGE ME!
TASK_ID = 'average_adjacent'
AUTHOR = 'scott'

In [327]:
task = Task(
    id=TASK_ID,
    name="compute adjacent averages",
    description="compute time-ordered averages of adjacent values in time orderered data",
    category='Time Series',
    plan=[],
    sample_input={
        "data": [
            {"time": 20, "x": 14.0},
            {"time": 3, "x": 3.0},
            {"time": 1, "x": 1.0},
            {"time": 9, "x": 7.0},
            {"time": 10, "x": 9.0},
            {"time": 11, "x": 11.0},
        ]
    },
    sample_output=[
        {"time": 1, "x": 2.0},
        {"time": 3, "x": 5.0},
        {"time": 9, "x": 8.0},
        {"time": 10, "x": 10.0},
        {"time": 11, "x": 12.5}
    ]
)
task.save()

prototype = Program(
    task=TASK_ID,
    author=AUTHOR,
    language=''    
)

In [328]:
python_imp = replace(prototype,
    language='python-imperative',
    implementation='',
    source='''def average_adjacent(data):
    if len(data) == 0:
        return []
    if len(data) == 1:
        return [data[0]["x"]]
    data.sort(key=lambda v: v["time"])
    previous, current = None, data[0]["x"]
    time = data[0]["time"]
    result = []
    for value in data[1:]:
        previous = current; current = value["x"]
        result.append({"time": time, "x": (previous + current)/2})
        time = value["time"]
    return result''').load_plan()
python_imp.execute(task)
python_imp.save()

In [336]:
python_fun = replace(prototype,
    language='python-functional',
    implementation='',
    source='''def average_adjacent(data):
    data = sorted(data, key=lambda v: v["time"])
    return [{
      "time": v["time"],
      "x": (v["x"]+next["x"])/2 
    } for next, v in zip(data[1:], data[:-1])]''').load_plan()
python_fun.execute(task)
python_fun.save()

In [353]:
pandas = replace(prototype,
    language='python-pandas',
    implementation='',
    source='''def average_adjacent(data):
    data = data.sort_values("time")#.set_index("time")
    lagging = data[1:].reset_index(drop=True)
    leading = data[:-1].reset_index(drop=True)
    leading.x = (lagging.x + leading.x)/2
    return leading''').load_plan()
pandas.execute(task)
pandas.save()

In [357]:
datalog = replace(prototype,
    language='datalog',
    source='''
.decl less(x: number, y: number)
.decl immediate(x: number, y: number)
.decl intermediate(x: number, y: number)
less(x, y) :- data(x, _), data(y, _), x < y.
intermediate(x, y) :- less(x, z), less(z, y).
immediate(x, y) :- less(x, y), !intermediate(x, y).
average_adjacent(t1, v) :- immediate(t1, t2), data(t1, v1), data(t2, v2), v = (v1+v2)/2.''').load_plan()
datalog.execute(task, debug=False)
datalog.save()

In [362]:
sql = replace(prototype,
    language='sql',
    source='''SELECT
  time, (x + next) / 2 as x
FROM (SELECT
  time, x, LEAD(x, 1) OVER (ORDER BY time) as next
  FROM data)
WHERE next is not null''').load_plan()
sql.execute(task)
sql.save()

In [111]:
program = sql
program.widget(task)

Output()

CodeWidget(program='{"task": "time_series", "language": "sql", "plan": {}, "source": " \\n    \\nSELECT AVG(x)…