In [None]:
import geopandas as gpd
import numpy as np
import pandas as pd

# import seaborn as sns
# import xgboost as xgb
from h3 import int_to_str, str_to_int
from h3ronpy import grid_disk_aggregate_k
from pytorch_lightning import seed_everything
from sklearn.cluster import KMeans

# from palettable.scientific.diverging import Roma_20
# from palettable.scientific.sequential import Hawaii_6, Hawaii_16
from srai.embedders import CountEmbedder
from srai.h3 import h3_to_shapely_geometry
from srai.joiners import IntersectionJoiner
from srai.loaders import OSMOnlineLoader
from srai.plotting import plot_numeric_data
from srai.regionalizers import H3Regionalizer, geocode_to_region_gdf

In [None]:
# finish from: https://github.com/kraina-ai/srai-tutorial/blob/osm-deep-dive/tutorial/06_use_osm_data_in_ml_model.ipynb

In [None]:
SEED = 71
seed_everything(SEED)

In [None]:
cities_names = ["Madrid", "Seville", "Valencia"]
regions = geocode_to_region_gdf(cities_names)
regions.index = cities_names

In [None]:
regions

In [None]:
bicycle_stations = OSMOnlineLoader().load(area=regions, tags={"amenity": "bicycle_rental"})
bicycle_stations

In [None]:
bicycle_stations_in_city = IntersectionJoiner().transform(
    regions, bicycle_stations, return_geom=True
)

bicycle_stations_per_city = {}
for city_name in cities_names:
    bicycle_stations_per_city[city_name] = bicycle_stations_in_city.loc[city_name]

In [None]:
bicycle_stations_per_city["Madrid"].explore(tiles="CartoDB Positron")

In [None]:
import pyarrow as pa

H3_RESOLUTION = 11
H3_NEIGHBOURS = 5
H3_PREDICTION_BUFFER = 10

h3_regionalizer = H3Regionalizer(resolution=H3_RESOLUTION)
h3_regions_gdfs = []
for city_name, bicycle_stations_data in bicycle_stations_per_city.items():
    city_h3_regions = h3_regionalizer.transform(bicycle_stations_data)

    expanded_city_h3_regions = (
        pa.table(
            grid_disk_aggregate_k(
                city_h3_regions.index.map(str_to_int),
                H3_NEIGHBOURS + H3_PREDICTION_BUFFER,
                "min",
            )
        )
        .to_pandas()
        .rename(columns={"k": "distance_to_station", "cell": "region_id"})
    )
    expanded_city_h3_regions["region_id"] = expanded_city_h3_regions["region_id"].map(int_to_str)
    expanded_city_h3_regions = expanded_city_h3_regions.set_index("region_id")
    expanded_city_h3_regions["city"] = city_name
    expanded_city_h3_regions = gpd.GeoDataFrame(
        expanded_city_h3_regions,
        geometry=h3_to_shapely_geometry(expanded_city_h3_regions.index),
        crs=4326,
    )
    h3_regions_gdfs.append(expanded_city_h3_regions)

h3_regions = gpd.pd.concat(h3_regions_gdfs)

min_bound = h3_regions["distance_to_station"].min()
max_bound = h3_regions["distance_to_station"].max()
normalized_distance_to_station = (h3_regions["distance_to_station"] - min_bound) / (
    max_bound - min_bound
)

# viz(
#     [h3_regions, bike_data],
#     polygon_kwargs=dict(
#         opacity=0.7,
#         stroked=False,
#         get_fill_color=apply_continuous_cmap(
#             normalized_distance_to_station,
#             Hawaii_16,
#         ),
#     ),
#     scatterplot_kwargs=dict(radius_min_pixels=2, get_fill_color=[0, 0, 0, 255]),
#     map_kwargs=dict(basemap_style=CartoBasemap.Voyager),
# )

In [None]:
h3_regions

In [None]:
grid_disk_aggregate_k(
    city_h3_regions.index.map(str_to_int).values,
    H3_NEIGHBOURS + H3_PREDICTION_BUFFER,
    "min",
)

In [None]:
import h3.api.numpy_int as h3
import pyarrow as pa
from h3ronpy import DEFAULT_CELL_COLUMN_NAME
from h3ronpy.pandas.vector import cells_dataframe_to_geodataframe

cells = np.array(
    [
        h3.geo_to_h3(5.2, -5.2, 7),
        h3.geo_to_h3(5.3, -5.1, 7),
    ],
    dtype=np.uint64,
)

In [None]:
pa.table(grid_disk_aggregate_k(city_h3_regions.index.map(str_to_int).values, 9, "min")).to_pandas()

In [None]:
from h3ronpy import grid_disk

cells_dataframe_to_geodataframe(
    pd.DataFrame(
        {DEFAULT_CELL_COLUMN_NAME: pa.array(grid_disk(cells, 9, flatten=True)).to_pandas()}
    )
).plot()

In [None]:
# we are limiting the scope for this example

# loader = OvertureMapsLoader(include_all_possible_columns=False, release="2024-12-18.0")

# features_gdf = loader.load(area_gdf)

# features_gdf

In [None]:
# regionalizer = H3Regionalizer(resolution=9)
# regions_gdf = regionalizer.transform(area_gdf)
# plot_regions(regions_gdf, tiles_style="CartoDB positron")

In [None]:
joiner = IntersectionJoiner()
joint_gdf = joiner.transform(regions_gdf, features_gdf)
joint_gdf

In [None]:
embeddings = CountEmbedder(count_subcategories=False).transform(
    regions_gdf, features_gdf, joint_gdf
)
embeddings

In [None]:
clusterizer = KMeans(n_clusters=5, random_state=SEED)
clusterizer.fit(embeddings)

embeddings["cluster"] = clusterizer.labels_
embeddings

In [None]:
plot_numeric_data(regions_gdf, "cluster", embeddings)