# Visualising Overture data and breaking it down into sectors

In [1]:
import numpy as np
import json
# import shapely
from scalenav.plotting import cmap
import scalenav.oop as snoo
import matplotlib.pyplot as plt

import ibis as ib
from ibis import _
import ibis.selectors as s

ib.options.interactive = True
ib.options.graphviz_repr = True

import pydeck as pdk


## Params

In [2]:
from parameters import *
bboxs = json.load(open("data/bboxs.json","r"))

In [None]:
# the spatial extension
# https://duckdb.org/docs/extensions/spatial/functions

# the h3 extension in duckdb
# https://github.com/isaacbrodsky/h3-duckdb?tab=readme-ov-file

conn = snoo.sn_connect("../" + overture_db_filename,interactive=True, memory_limit="100GB",threads = 8)

## Guide on working with Overture
Smaller files are downloaded here

https://docs.overturemaps.org/guides/

https://github.com/OvertureMaps/data?tab=readme-ov-file 

## Selecting an area of interest

In [None]:
bbox_name = 'global'
bbox = bboxs[bbox_name]

In [None]:
# Set the viewport location
view_state = pdk.ViewState(latitude=np.mean(bbox[1::2]), longitude=np.mean(bbox[0::2]), zoom=3, bearing=0, pitch=30)

## Local places and landuse file

In [None]:
conn.list_tables()

In [None]:
# overture_data = conn.read_parquet("../" + overture_places_landuses_filename)
overture_data = conn.table("overture_pois")


In [None]:
print(overture_data.columns)

In [None]:
overture_sections = overture_data.section.value_counts().execute()

In [None]:
overture_sections.plot.bar(x="section",y="section_count",title="ISIC Coverage",xlabel="ISIC section",ylabel="Count",legend=False,rot=0)
plt.savefig("isic_count_overture.png")
plt.show()

In [None]:
overture_data_top_cat = overture_data.sec_cat.value_counts().head(10).execute().sec_cat.to_list()

In [None]:
overture_data_top_cat

In [None]:
overture_data.count()

In [None]:
overture_data.distinct(on="h3_id").count()

## Economic classification of Overture places and landuses

In [None]:
overture_data.head().execute()

In [None]:
overture_data.filter(~_.dose.isnull()).select("sec_cat").distinct()

In [None]:
overture_data = overture_data.drop_null(subset=["dose"],how="any")

In [None]:
overture_data.count()

In [None]:
overture_data = overture_data.filter(_.match_score > .4)

In [None]:
overture_data.count()

### ISIC categories mapped

In [None]:
places_isic = overture_data.select(["id","dose","sec_cat","x","y"])

In [None]:
places_isic.head()

## Plotting DOSE classifications

In [None]:
places_isic = places_isic.drop_null(subset="dose")

In [None]:
places_isic.head()

In [None]:
places_isic.dose.value_counts()

In [None]:
places_isic[places_isic.dose=="manufacturing"]

## Adding econ data

In [None]:
conn.raw_sql(
"""CREATE OR REPLACE TABLE dose_wdi as (
    SELECT * FROM '../datasets/local_data/dose-wdi/0_4/dose_wdi_geo.parquet');
""")

In [None]:
dose_wdi = conn.table("dose_wdi")

In [None]:
dose_wdi = (
    dose_wdi.select("gid_0","country","gid_1","grp_usd_2015","services_usd_2015","manufacturing_usd_2015","agriculture_usd_2015","geometry","x","y")
    .filter(_.x>bbox[0],
            _.x<bbox[2],
            _.y>bbox[1],
            _.y<bbox[3])
)

In [None]:
print(dose_wdi.count())
dose_wdi.head()

## Spatial join v2

This approach first groups features spatially on H3, then pivots the values to get a table with a row for each spatial index and the densities of features in each column. 

In [None]:
overture_data.head()

In [None]:
overture_data = snoo.sn_project(overture_data,res=agg_res)

In [None]:
# overture_data.select(s.of_type(str) & ~s.matches("id"))

## Transorming the data

In [None]:
overture_h3_dens = (
    overture_data
    .select("id","h3_id","dose")
    .pivot_wider(
        id_cols="h3_id",
        names_from="dose",
        values_from="id",
        values_agg="count",
        values_fill=0,
    )
)

In [None]:
overture_h3_dens.distinct(on="h3_id").select("h3_id").count()

In [None]:
overture_h3_geom = snoo.sn_add_centr(overture_h3_dens)
overture_h3_geom.head()

In [None]:
# conn.drop_table("overture_h3_geom")
overture_h3_geom = conn.create_table(obj=overture_h3_geom,name="overture_h3_geom")

In [None]:
conn.list_tables()

In [None]:
h3_gid_dens = overture_h3_geom.alias("t_geom").sql("""
SELECT t_geom.* EXCLUDE geom,
        dose_wdi.gid_1,
        dose_wdi.gid_0,
        dose_wdi.grp_usd_2015,
        dose_wdi.services_usd_2015,
        dose_wdi.manufacturing_usd_2015,
        dose_wdi.agriculture_usd_2015
    FROM t_geom 
    LEFT OUTER JOIN dose_wdi 
    ON ST_CONTAINS(dose_wdi.geometry,t_geom.geom::GEOMETRY);
""").cache()

In [None]:
print(h3_gid_dens.count())
h3_gid_dens.head()

In [None]:
conn.list_tables()

In [None]:
#  similar to previous cell but using windows: 
grid_dens = (
    h3_gid_dens
    .mutate(
        services_dens=(_.services/_.services.sum().over(group_by=_.gid_1)),
        manufacturing_dens=(_.manufacturing/_.manufacturing.sum().over(group_by=_.gid_1)),
        )
    .mutate(
        services_h3_gdp = (_.services_dens*_.services_usd_2015).round(),
        manufacturing_h3_gdp = (_.manufacturing_dens*_.manufacturing_usd_2015).round(),
            )
    .fill_null({
        "services_h3_gdp" : 0,
        "manufacturing_h3_gdp" : 0,
        })
)

In [None]:
grid_dens.head()

In [None]:
# ib.to_sql(grid_dens)

In [None]:
h3_grid=(
    grid_dens
    .select(~s.matches("(_usd_)|(_dens)|(agriculture)")
            )
).execute()

In [None]:
print(h3_grid.shape)
h3_grid.head()

In [None]:
h3_grid["services_cols"] = cmap(input=h3_grid.services_h3_gdp,log=True,palette=gdp_pal)
h3_grid["manufacturing_cols"] = cmap(input=h3_grid.manufacturing_h3_gdp,log=True,palette=gdp_pal)

### Formatted strings

In [None]:
h3_grid["serv_format"] = h3_grid.services_h3_gdp.apply(lambda x: f"{x:,}")
h3_grid["manuf_format"] = h3_grid.manufacturing_h3_gdp.apply(lambda x: f"{x:,}")

In [None]:
# h3_grid

## Map downscaled

In [None]:
# Define a layer to display on a map
layer = pdk.Layer(
    "H3HexagonLayer",
    h3_grid[h3_grid.services_h3_gdp!=0],
    pickable=True,
    stroked=True,
    filled=True,
    opacity = .6,
    extruded=False,
    get_hexagon="h3_id",
    get_fill_color= "services_cols",
    get_line_color=[255, 255, 255, 0],
    line_width_min_pixels=1,
    line_width_max_pixels=2,
)

# Render
r = pdk.Deck(layers=[layer], initial_view_state=view_state, tooltip={"html": "<h3> Estimated GDP </h3> <p> Services sector : {serv_format} </p> <p> Manufacturing sector : {manuf_format} </p>"})
r.to_html(f"../deck_maps/{bbox_name}_h3_services_hexagon_layer.html", iframe_height=700)

In [None]:
# Define a layer to display on a map
# layer = pdk.Layer(
#     "H3HexagonLayer",
#     h3_grid[h3_grid.manufacturing_h3_gdp!=0],
#     pickable=True,
#     stroked=True,
#     filled=True,
#     opacity = .6,
#     extruded=False,
#     get_hexagon="h3_id",
#     get_fill_color= "manufacturing_cols",
#     get_line_color=[255, 255, 255, 0],
#     line_width_min_pixels=1,
#     line_width_max_pixels=2,
# )

# # Render
# r = pdk.Deck(layers=[layer], initial_view_state=view_state, tooltip={"html": "<h3> Estimated GDP </h3> <p> Services sector : {serv_format} </p> <p> Manufacturing sector : {manuf_format} </p>"})
# r.to_html(f"../deck_maps/{bbox_name}_h3_manufacturing_hexagon_layer.html", iframe_height=700)