In [23]:
%load_ext autoreload
%autoreload 2
from expressiveness_benchmark.types import Plan, Task, Language, SourceRange, Program
from code_widget.example import CodeWidget
from dataclasses import replace
import json
import pandas as pd

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
# CHANGE ME!
TASK_ID = 'continent_by_population'
AUTHOR = 'will'

In [25]:
task = Task(
    id=TASK_ID,
    category="Aggregation",
    description="Continent with the highest average population",
    plan=[
        Plan(id="filter", description="Filter >35"),
    ],
    sample_input={
        "countries": [
            {"name": "USA", "population": 328, "continent": "North America"},
            {"name": "Canada", "population": 37, "continent": "North America"},
            {"name": "Ethiopia", "population": 109, "continent": "Africa"},
            {"name": "Kenya", "population": 51, "continent": "Africa"},
        ]
    },
    sample_output="North America",
)
task.save()

prototype = Program(
    task=TASK_ID,
    author=AUTHOR,
    language=''    
)

In [26]:
sql = replace(prototype,
    language='sql',
    source='''SELECT continent 
FROM countries
GROUP BY continent
ORDER BY AVG(population) DESC
LIMIT 1
''')
sql.execute(task)
sql.save()

In [27]:
datalog = replace(prototype,
    language='datalog',
    source='''
.decl average_population(continent:symbol, avg:number)

average_population(continent, avg) :-
  countries(continent, _, _),
  total = sum p : countries(continent, _, p),
  num_countries = count : countries(continent, _, _),
  avg = total / num_countries.
  
continent_by_population(continent) :- 
  countries(continent, _, _), 
  average_population(continent, max_avg),
  max_avg = max a : { countries(c, _, _), average_population(c, a) }.
''')
datalog.execute(task)
datalog.save()

In [8]:
pandas = replace(prototype,
    language='python-pandas',
    source='''
def continent_by_population(countries):
  mean_pop = countries.groupby('continent').population.mean()
  return mean_pop.index[mean_pop.argmax()]
''')
pandas.execute(task)
pandas.save()

In [13]:
imperative = replace(prototype,
    language='python-imperative',
    source='''
def continent_by_population(countries):
  continent_stats = defaultdict(lambda: [0, 0])
  for country in countries:
    continent = country['continent']
    continent_stats[continent][0] += country['population']
    continent_stats[continent][1] += 1
     
  max_continent = None
  max_average = None
  for continent, [total, count] in continent_stats.items():
    average = total / count
    if max_average is None or max_average < average:
      max_average = average
      max_continent = continent
      
  return max_continent
''')
imperative.execute(task)
imperative.save()

In [20]:
functional = replace(prototype,
    language='python-functional',
    source='''
def continent_by_population(countries):
  continents = set([c['continent'] for c in countries])
  populations_by_continent = [
    (continent, [c['population'] for c in countries if c['continent'] == continent])
    for continent in continents
  ]
  averages = [
    (continent, sum(pops) / len(pops))
    for continent, pops in populations_by_continent
  ]
  return max(averages, key=lambda t: t[1])[0]
''')
functional.execute(task)
functional.save()