<a href="https://colab.research.google.com/github/kavyajeetbora/foursquare_ai/blob/master/notebooks/17_h3_boundary.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Generate H3 index by Country Boundary

In [None]:
!pip install -q h3
!wget -q https://raw.githubusercontent.com/kavyajeetbora/foursquare_ai/master/docs/India_land.parquet -O india.parquet

import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.geometry import Polygon
import h3
import matplotlib.pyplot as plt

In [None]:
# Load country boundary
gdf = gpd.read_parquet(r"/content/india.parquet")

# Ensure geometry is in WGS84 (required by H3)
gdf = gdf.to_crs(epsg=4326)
gdf = gdf[gdf['Name']=='India'].copy()
geometry = gdf.geometry.union_all()

In [None]:
def polygon_to_h3(geom, resolution):
    if geom.is_empty:
        return set()

    h3_cells = set()

    if geom.geom_type == "Polygon":
        polygons = [geom]
    elif geom.geom_type == "MultiPolygon":
        polygons = geom.geoms
    else:
        raise ValueError("Geometry must be Polygon or MultiPolygon")

    for poly in polygons:
        # exterior ring
        exterior = [(y, x) for x, y in poly.exterior.coords]

        # ✅ skip invalid exterior
        if len(exterior) < 3:
            continue

        # ✅ keep ONLY valid interior rings
        holes = []
        for ring in poly.interiors:
            coords = [(y, x) for x, y in ring.coords]
            if len(coords) >= 3:
                holes.append(coords)

        # ✅ IMPORTANT:
        # pass holes as *args, not list
        h3shape = h3.LatLngPoly(exterior, *holes)

        h3_cells.update(
            h3.polygon_to_cells(h3shape, res=resolution)
        )

    return h3_cells

In [None]:
resolution = 5
cells = polygon_to_h3(geometry, resolution)

def h3_cell_to_polygon(h):
    # H3 returns (lat, lon)
    boundary_latlon = h3.cell_to_boundary(h)

    # ✅ Convert to (lon, lat) for Shapely
    boundary_lonlat = [(lon, lat) for lat, lon in boundary_latlon]

    return Polygon(boundary_lonlat)

df = pd.DataFrame({"h3_id": list(cells)})

df["geometry"] = df["h3_id"].apply(
    lambda h: h3_cell_to_polygon(h)
)

gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
random_values = np.random.rand(len(df))

gdf['score'] = random_values

gdf.to_parquet(rf"..\data\boundaries\India_h3_res{resolution}.parquet")

In [None]:
print(gdf.head())

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))  # Optional: Set figure size

gdf.plot(
    column='score',      # Color by this column
    cmap='viridis',      # Colormap (options: 'plasma', 'inferno', 'magma', 'cividis', etc.)
    legend=True,         # Show colorbar legend
    ax=ax,               # Plot on the axis
    edgecolor='black',   # Optional: Border color for hexes/polygons
    linewidth=0.5        # Optional: Border thickness
)

ax.set_title('Hex Map Color-Coded by Score')  # Optional: Title
ax.set_axis_off()                             # Optional: Remove axes for cleaner map

plt.show()  # Display the plot

## Generate h3 cell boundaries using `duckdb`

In [None]:
import duckdb


con = duckdb.connect()

con.execute("INSTALL httpfs; LOAD httpfs;")
con.execute("INSTALL spatial; LOAD spatial;")
con.execute("INSTALL h3 FROM community;")
con.execute("LOAD h3;")

india_parquet = r"..\data\boundaries\India.parquet"

resolution = 7

query = f"""
WITH geom AS (
  SELECT ST_Union_Agg(ST_MakeValid(geometry)) AS g
  FROM read_parquet('{india_parquet}')
  WHERE Name = 'India'
),
parts AS (
  SELECT (d.unnest).geom AS geom
  FROM geom,
  UNNEST(ST_Dump(g)) AS d(unnest)
),
cells AS (
  SELECT UNNEST(h3_polygon_wkt_to_cells(ST_AsText(geom), {resolution})) AS h3_id
  FROM parts
)
SELECT
  h3_id,
  ST_GeomFromText(h3_cell_to_boundary_wkt(h3_id)) AS geom
FROM cells
"""

df = con.execute(query).df()

In [None]:
df.head()