In [184]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Program, Task
import pandas as pd
from dataclasses import replace

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [185]:
# CHANGE ME!
TASK_ID = 'average_adjacent'
AUTHOR = 'scott'

In [250]:
task = Task(
    id=TASK_ID,
    description="compute averages of adjacent values in time-ordering",
    category='Time Series',
    plan=[],
    sample_input={
        "data": [
            {"time": 20, "x": 14.0},
            {"time": 3, "x": 3.0},
            {"time": 1, "x": 1.0},
            {"time": 9, "x": 7.0},
            {"time": 10, "x": 9.0},
            {"time": 11, "x": 11.0},
        ]
    },
    sample_output=[2.0, 5.0, 8.0, 10.0, 12.5]
)
task.save()

prototype = Program(
    task=TASK_ID,
    author=AUTHOR,
    language=''    
)

In [251]:
python_imp = replace(prototype,
    language='python-imperative',
    implementation='',
    source='''def average_adjacent(data):
    if len(data) == 0:
        return []
    if len(data) == 1:
        return [data[0]["x"]]
    data = sorted(data, key=lambda v: v["time"])
    a, b = None, data[0]["x"]
    result = []
    for value in data[1:]:
        a = b; b = value["x"]
        result.append((a+b)/2)
    return result
''').load_plan()
python_imp.execute(task)
python_imp.save()

In [252]:
python_fun = replace(prototype,
    language='python-functional',
    implementation='',
    source='''# from itertools import zip
def average_adjacent(data):
    data = sorted(data, key=lambda v: v["time"])
    return [(a["x"]+b["x"])/2 for a, b in zip(data[1:], data[:-1])]
''').load_plan()
python_fun.execute(task)
python_fun.save()

In [253]:
pandas = replace(prototype,
    language='python-pandas',
    implementation='',
    source='''import pandas as pd
import math
def average_adjacent(data):
    data = data.sort_values("time").set_index("time")
    d1 = data[1:].reset_index(drop=True)
    d2 = data[:-1].reset_index(drop=True)
    result = (d1.x + d2.x)/2
    return result.tolist()
''').load_plan()
pandas.execute(task)
pandas.save()

In [255]:
datalog = replace(prototype,
    language='datalog',
    source='''
.decl less(x: number, y: number)
.decl immediate(x: number, y: number)
.decl intermediate(x: number, y: number)
less(x, y) :- data(x, _), data(y, _), x < y.
intermediate(x, y) :- less(x, z), less(z, y).
immediate(x, y) :- less(x, y), !intermediate(x, y).
average_adjacent(v) :- immediate(x, y), data(x, v1), data(y, v2), v = (v1+v2)/2.
''').load_plan()
datalog.execute(task, debug=True)
datalog.save()

Path: /tmp/tmpw_oajhcz
.decl data(time:number, x:float)
.input data
.decl average_adjacent(x0:float)
.output average_adjacent

.decl less(x: number, y: number)
.decl immediate(x: number, y: number)
.decl intermediate(x: number, y: number)
less(x, y) :- data(x, _), data(y, _), x < y.
intermediate(x, y) :- less(x, z), less(z, y).
immediate(x, y) :- less(x, y), !intermediate(x, y).
average_adjacent(v) :- immediate(x, y), data(x, v1), data(y, v2), v = (v1+v2)/2.



In [306]:
sql = replace(prototype,
    language='sql',
    source='''SELECT
  (x + previous) / 2
FROM (SELECT
  x, LAG(x, 1) OVER (ORDER BY time) as previous
  FROM data)
WHERE previous is not null
''').load_plan()
sql.execute(task)
sql.save()

In [111]:
program = sql
program.widget(task)

Output()

CodeWidget(program='{"task": "time_series", "language": "sql", "plan": {}, "source": " \\n    \\nSELECT AVG(x)…