In [1]:
import os
import h3
import googlemaps
from keplergl import KeplerGl
from datetime import datetime
from pyspark.sql import SparkSession, DataFrame, Row
from pyspark.sql import functions as F
from pyspark.sql import types as T
from pyspark.errors import AnalysisException
from dotenv import load_dotenv

load_dotenv("../.env-deploy", override=True)

True

In [2]:
data_home = "/Users/kwesi/Desktop/ai/gpts/mlsgpt/data"
jar_files = ["postgresql-42.7.3.jar", "mysql-connector-j-8.0.33.jar"]
jar_opts = ",".join([f"{data_home}/jars/{jar}" for jar in jar_files])
warehouse = f"{data_home}/warehouse"

spark: SparkSession = (
    SparkSession.builder\
    .appName("MLSGPT")
    .config("spark.dynamicAllocation.enabled", "true")
    .config("spark.shuffle.service.enabled", "true")
    .config("spark.sql.warehouse.dir", f"{warehouse}")
    .config("spark.sql.session.timeZone", "UTC")
    .config("spark.jars", f"{jar_opts}") 
    .enableHiveSupport()
    .getOrCreate()
)
spark.sparkContext.setLogLevel("ERROR")

24/06/04 23:44:33 WARN Utils: Your hostname, marley.local resolves to a loopback address: 127.0.0.1; using 10.0.0.135 instead (on interface en0)
24/06/04 23:44:33 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
24/06/04 23:44:34 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [3]:
def h3_index(lat:float, lon:float, resolution:int) -> str:
    return h3.geo_to_h3(lat, lon, resolution)
df = spark.read.parquet(f"{data_home}/h3/property.parquet")

In [4]:
h3_resolutions = [5, 6, 7, 8, 9, 10]
gmaps = googlemaps.Client(key=os.environ["GOOGLE_MAPS_API_KEY"])
geocode_result = gmaps.geocode("University of Toronto, Toronto, ON, Canada")
lat, lng = geocode_result[0]["geometry"]["location"]["lat"], geocode_result[0]["geometry"]["location"]["lng"]

In [11]:
for res in h3_resolutions:
    res = str(res).zfill(2)
    data = (
        df
        .select(F.col(f"H3IndexR{res}").alias("hex_id"), F.lit(1).alias("value"))
        .groupBy("hex_id").agg(F.sum("value").alias("value"))
        .toPandas()
    )
    config = {
        'version': 'v1',
        'config': {
            'mapState': {
                'latitude': lat,
                'longitude': lng,
                'zoom': 5
            }
        }
    }
    map_1 = KeplerGl(height=600, config=config)
    map_1.add_data(data=data, name="data_1")
    map_1.save_to_html(file_name=f"kepler_{res}.html")

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to kepler_05.html!
User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to kepler_06.html!
User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to kepler_07.html!
User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to kepler_08.html!
User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to kepler_09.html!
User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to kepler_10.html!
