In [1]:
import numpy as np
import shapely
from arro3.core import Array, DataType, Table
from geoarrow.rust.io import GeoParquetDataset
from obstore.store import S3Store

from lonboard import Map, PolygonLayer, viz
from lonboard.basemap import CartoStyle, MaplibreBasemap

In [4]:
store = S3Store.from_url(
    "s3://overturemaps-us-west-2/release/2025-11-19.0/",
    region="us-west-2",
    skip_signature=True,
)

In [5]:
store.list_with_delimiter()

{'common_prefixes': ['theme=addresses',
  'theme=base',
  'theme=buildings',
  'theme=divisions',
  'theme=places',
  'theme=transportation'],
 'objects': []}

In [6]:
store.list_with_delimiter("theme=buildings")

{'common_prefixes': ['theme=buildings/type=building',
  'theme=buildings/type=building_part'],
 'objects': []}

In [7]:
objects = store.list_with_delimiter("theme=buildings/type=building")["objects"]
objects[:2]

[{'path': 'theme=buildings/type=building/part-00000-d0940975-810a-44e5-b2e9-d3b94cd70795-c000.zstd.parquet',
  'last_modified': datetime.datetime(2025, 11, 19, 18, 7, 18, tzinfo=datetime.timezone.utc),
  'size': 1005316607,
  'e_tag': '"183ac428cd6e7d899e4a894744ae6c66-192"',
  'version': None},
 {'path': 'theme=buildings/type=building/part-00001-d0940975-810a-44e5-b2e9-d3b94cd70795-c000.zstd.parquet',
  'last_modified': datetime.datetime(2025, 11, 19, 18, 7, 20, tzinfo=datetime.timezone.utc),
  'size': 1081003442,
  'e_tag': '"8658d9412ae5323a7df6db45fdfcf4c5-207"',
  'version': None}]

In [8]:
dataset = GeoParquetDataset.open(objects, store=store)

  dataset = GeoParquetDataset.open(objects, store=store)


In [9]:
dataset.num_row_groups

16869

In [10]:
dataset.num_rows

2541497985

In [11]:
dataset.fragments[:5]

[<geoarrow.rust.io.GeoParquetFile at 0xffff672f8090>,
 <geoarrow.rust.io.GeoParquetFile at 0xffff672f86f0>,
 <geoarrow.rust.io.GeoParquetFile at 0xffff672fa9d0>,
 <geoarrow.rust.io.GeoParquetFile at 0xffff672fa1f0>,
 <geoarrow.rust.io.GeoParquetFile at 0xffff672faa90>]

In [12]:
file_bounds = [shapely.box(*fragment.file_bbox()) for fragment in dataset.fragments]

# Visualizing file-based Spatial Partitioning

In [13]:
m = viz(file_bounds)
for layer in m.layers:
    layer.opacity = 0.05
    layer.auto_highlight = True
m

  warn(


<lonboard._map.Map object at 0xffff64ab2990>

In [14]:
row_groups_bounds = []
for fragment in dataset.fragments:
    bounds = fragment.row_groups_bounds()
    file_path = Array([fragment.path] * len(bounds), DataType.string())
    row_group_idx = np.arange(len(bounds))
    bounds_table = Table.from_arrays(
        [bounds, file_path, row_group_idx],
        names=["bounds", "file_path", "row_group_idx"],
    )
    row_groups_bounds.append(bounds_table)

In [15]:
row_groups_bounds[0]["bounds"]

arro3.core.ChunkedArray<Struct(xmin Float64, ymin Float64, xmax Float64, ymax Float64)>
[
  [
    {xmin: -179.96853637695312, ymin: -84.29460906982422, xmax: -2.8229823112487793, ymax: -44.999996185302734},
    {xmin: -73.12483215332031, ymin: -54.84376525878906, xmax: -67.69155883789062, ymax: -51.32923126220703},
    {xmin: -75.62561798095703, ymin: -53.43510437011719, xmax: -68.67027282714844, ymax: -47.81304931640625},
    {xmin: -73.1250228881836, ymin: -50.62410354614258, xmax: -67.49979400634766, ymax: -46.40622329711914},
    {xmin: -73.125, ymin: -47.109153747558594, xmax: -67.49977111816406, ymax: -45.000179290771484},
    {xmin: -75.63844299316406, ymin: -47.81181335449219, xmax: -71.7187271118164, ymax: -42.18750762939453},
    {xmin: -74.77418518066406, ymin: -45.0, xmax: -67.50022888183594, ymax: -42.187477111816406},
    {xmin: -73.12507629394531, ymin: -43.593807220458984, xmax: -68.55471801757812, ymax: -41.4842529296875},
    {xmin: -73.1251220703125, ymin: -41.528400

In [16]:
COLORS = [
    "#FC49A3",  # pink
    "#CC66FF",  # purple-ish
    "#66CCFF",  # sky blue
    "#66FFCC",  # teal
    "#00FF00",  # lime green
    "#FFCC66",  # light orange
    "#FF6666",  # salmon
    "#FF0000",  # red
    "#FF8000",  # orange
    "#FFFF66",  # yellow
    "#00FFFF",  # turquoise
]

In [17]:
layers = []
for i, chunk in enumerate(row_groups_bounds):
    color = COLORS[i % len(COLORS)]
    layer = PolygonLayer(
        chunk,
        get_fill_color=color,
        get_line_color=[0, 0, 0, 200],
        opacity=0.03,
        line_width_min_pixels=0.8,
        auto_highlight=True,
    )
    layers.append(layer)

In [21]:
bmap = MaplibreBasemap(mode='reverse-controlled', style = CartoStyle.DarkMatter)
m = Map(layers, height=600, basemap=bmap)
m

<lonboard._map.Map object at 0xffff2cf38710>