### Notebook to demo H3 aggregation

```
conda install -c conda-forge h3-py
```

In [1]:
from spark_esri import spark_start, spark_stop

import h3
import pandas as pd
from pyspark.sql.functions import pandas_udf, avg
from pyspark.sql.types import StringType

In [2]:
spark_stop()

config = {
    "spark.driver.memory": "16G",
    "spark.executor.memory": "16G"
}
spark = spark_start(config)

In [3]:
@pandas_udf(returnType=StringType())
def geo_to_h3(lat: pd.Series, lon: pd.Series) -> pd.Series:
    return pd.Series([h3.geo_to_h3(l1, l2, 9) for l1, l2 in zip(lat, lon)])

In [4]:
%%time

spark\
    .range(1_000_000)\
    .selectExpr("id", "rand()*360D-180D lon", "rand()*180D-90D lat")\
    .withColumn("h3", geo_to_h3("lat", "lon"))\
    .groupby("h3")\
    .agg(avg("lon").alias("a_lon"), avg("lat").alias("a_lat"))\
    .show()

+---------------+-------------------+-------------------+
|             h3|              a_lon|              a_lat|
+---------------+-------------------+-------------------+
|89cf15c4c3bffff|  -74.6647747954433|-52.693300866272196|
|89e174d50a3ffff| 58.531446088677086| -67.82375331375933|
|895b73d1a23ffff| 171.00574881945653|  7.919585098570835|
|893ce2043dbffff|  90.66817709650394| 24.984775028380795|
|8904057ab6bffff| 146.73747622017055|  75.67234637381009|
|89c56d8308fffff|-23.063688559739205| -34.22443333277086|
|8902ce9034fffff|-103.59266598948172|   70.4082288178719|
|89f29533553ffff|-161.66214924158498| -88.38420002814281|
|89ef3518297ffff| -48.33792136010945| -86.00040764997252|
|89de4661e53ffff| -57.48443605445647| -49.91041810329152|
|89ae204d427ffff|  93.12491177532615|-23.471222013891364|
|89af850c647ffff|  87.27259031136703|-27.165742048820135|
|8993922804fffff| -113.6801979879013| -6.730946183111428|
|89e5456e52bffff| 103.35911593001634|  -69.4653749367924|
|89df8672e8fff