# CLAUDE please update this notebook with a new branch where you change to meet the latest TODO, new branch:

⏺ Added the TODO. The short version: pull Classification, Intensity, ReturnNumber, NumberOfReturns into the worker Arrow table, then aggregate
  per-hex class counts, ground elevation, structure height, vegetation density, and canopy height. That gives you land-use coloring, true building
  heights above ground, suspended bridge hex, and vegetation density — all from the same EPT source you're already querying.


In [3]:
# --- USER PARAMETERS ---
EPT_URL = "https://s3-us-west-2.amazonaws.com/usgs-lidar-public/CA_SanFrancisco_1_B23/ept.json"
H3_RES = 13
DDB_TABLE = f"san_fran_rich_res_{H3_RES}"
SRC_CRS = 'EPSG:3857'
DST_CRS = 'EPSG:4326'
DB_PATH = 'duckdb/san_fran_ept_lpc.ddb'
TILE_ZOOM = 16
MAX_WORKERS = 14
SUB_RESOLUTION = None  # Example: 1.0 (meters) to thin the data as it downloads
BBOX = [-13638426.0, 4536715.0, -13617318.0, 4556481.0] # [min_x, min_y, max_x, max_y]

In [2]:
# # Install extensions globally (only needed once)
import duckdb
duckdb.sql("INSTALL h3 FROM community")
duckdb.sql("INSTALL httpfs")
duckdb.sql("INSTALL spatial")
# duckdb.sql("INSTALL pdal FROM community")
def get_con():
    """In-memory connection for workers. LOAD only, no INSTALL."""
    con = duckdb.connect()
    con.sql("""
        SET temp_directory = './tmp';
        SET memory_limit = '512MB';
     --   SET s3_region = 'us-west-2';
        LOAD h3;
        LOAD httpfs;
        LOAD spatial;
        SET enable_progress_bar = false;
    """)
    return con

In [3]:
import pdal
import pyarrow as pa
import duckdb
import mercantile
import concurrent.futures
import time
import os

def process_tile_to_h3(tile):
    """Worker: PDAL Points -> Rich H3 Aggregates -> Returns Arrow Table"""
    tb = mercantile.xy_bounds(tile)
    bounds = f"([{tb.left},{tb.right}],[{tb.bottom},{tb.top}])"
    
    reader_opts = {"filename": EPT_URL, "bounds": bounds}
    if SUB_RESOLUTION:
        reader_opts["resolution"] = SUB_RESOLUTION

    try:
        pipeline = pdal.Reader.ept(**reader_opts).pipeline()
        count = pipeline.execute()
        if count == 0 or len(pipeline.arrays) == 0:
            return None
        arr = pipeline.arrays[0]
        if len(arr) == 0: return None

        # Pull X, Y, Z + Classification, Intensity, ReturnNumber, NumberOfReturns
        arrow_tbl = pa.Table.from_arrays(
            [
                pa.array(arr['X']), pa.array(arr['Y']), pa.array(arr['Z']),
                pa.array(arr['Classification']),
                pa.array(arr['Intensity']),
                pa.array(arr['ReturnNumber']),
                pa.array(arr['NumberOfReturns']),
            ],
            names=['X', 'Y', 'Z', 'Classification', 'Intensity', 'ReturnNumber', 'NumberOfReturns']
        )
        
        con = get_con()
        con.register('tile_data', arrow_tbl)
        hex_summary = con.sql(f"""
            SELECT 
                h3_latlng_to_cell(
                    ST_Y(ST_Transform(ST_Point(X, Y), '{SRC_CRS}', '{DST_CRS}', always_xy := true)), 
                    ST_X(ST_Transform(ST_Point(X, Y), '{SRC_CRS}', '{DST_CRS}', always_xy := true)), 
                    {H3_RES}
                ) AS hex,
                -- Elevation stats
                AVG(Z) AS avg_z,
                MIN(Z) AS min_z,
                MAX(Z) AS max_z,
                MAX(Z) - MIN(Z) AS z_range,
                COUNT(*) AS cnt,
                -- Ground elevation (Classification 2)
                AVG(Z) FILTER (WHERE Classification = 2) AS ground_z,
                COUNT(*) FILTER (WHERE Classification = 2) AS ground_cnt,
                -- Building (Classification 6)
                COUNT(*) FILTER (WHERE Classification = 6) AS building_cnt,
                -- Bridge (Classification 17)
                COUNT(*) FILTER (WHERE Classification = 17) AS bridge_cnt,
                -- Vegetation (Classification 3,4,5)
                COUNT(*) FILTER (WHERE Classification IN (3,4,5)) AS veg_cnt,
                -- Water (Classification 9)
                COUNT(*) FILTER (WHERE Classification = 9) AS water_cnt,
                -- Intensity
                AVG(Intensity) AS avg_intensity,
                -- Vegetation density: ratio of multi-return points
                COUNT(*) FILTER (WHERE NumberOfReturns > 1)::DOUBLE / NULLIF(COUNT(*), 0) AS multi_return_ratio,
                -- Canopy height: first return Z - last return Z
                AVG(Z) FILTER (WHERE ReturnNumber = 1) - AVG(Z) FILTER (WHERE ReturnNumber = NumberOfReturns AND NumberOfReturns > 1) AS canopy_height
            FROM tile_data
            GROUP BY 1
        """).fetch_arrow_table()
        con.unregister('tile_data')
        con.close()
        return hex_summary
    except Exception as e:
        print(f"  TILE FAILED {tile}: {e}")
        return None

In [4]:
def run_pipeline():
    os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
    
    # 1. Generate Tile List
    sw = mercantile.lnglat(BBOX[0], BBOX[1])
    ne = mercantile.lnglat(BBOX[2], BBOX[3])
    tiles = list(mercantile.tiles(sw.lng, sw.lat, ne.lng, ne.lat, zooms=[TILE_ZOOM]))
    
    # 2. Initialize Persistent Storage — open briefly, then close
    con = duckdb.connect(DB_PATH)
    con.sql("""
        CREATE OR REPLACE TABLE raw_hex_batches (
            hex UBIGINT,
            avg_z DOUBLE, min_z DOUBLE, max_z DOUBLE, z_range DOUBLE, cnt BIGINT,
            ground_z DOUBLE, ground_cnt BIGINT,
            building_cnt BIGINT, bridge_cnt BIGINT, veg_cnt BIGINT, water_cnt BIGINT,
            avg_intensity DOUBLE, multi_return_ratio DOUBLE, canopy_height DOUBLE
        )
    """)
    con.close()

    print(f"Processing {len(tiles)} tiles with {MAX_WORKERS} workers...")
    start = time.time()

    # 3. Parallel Process — accumulate Arrow tables in memory, flush in batches
    FLUSH_EVERY = 50
    pending_tables = []

    def flush_to_db(tables):
        if not tables:
            return
        combined = pa.concat_tables(tables)
        con = duckdb.connect(DB_PATH)
        con.sql("INSERT INTO raw_hex_batches SELECT * FROM combined")
        con.close()

    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
        futures = {executor.submit(process_tile_to_h3, t): t for t in tiles}
        for i, future in enumerate(concurrent.futures.as_completed(futures)):
            pa_temp_table = future.result()
            if pa_temp_table:
                pending_tables.append(pa_temp_table)
            
            if len(pending_tables) >= FLUSH_EVERY:
                flush_to_db(pending_tables)
                pending_tables = []
            
            if i % 100 == 0:
                print(f"Batch {i}/{len(tiles)} | Time: {time.time()-start:.1f}s")

    # Flush remaining
    flush_to_db(pending_tables)

    # 4. Final Global Reduction — merge hex spanning tile boundaries
    print("Finalizing global reduction...")
    con = duckdb.connect(DB_PATH)
    con.sql("LOAD h3")
    con.sql(f"""
        CREATE OR REPLACE TABLE {DDB_TABLE} AS
        SELECT 
            hex,
            h3_cell_to_lat(hex) AS lat,
            h3_cell_to_lng(hex) AS lng,
            -- Elevation: weighted avg, global min/max
            SUM(avg_z * cnt) / SUM(cnt) AS avg_z,
            MIN(min_z) AS min_z,
            MAX(max_z) AS max_z,
            MAX(max_z) - MIN(min_z) AS z_range,
            SUM(cnt) AS cnt,
            -- Ground: weighted avg across tiles
            SUM(ground_z * ground_cnt) / NULLIF(SUM(ground_cnt), 0) AS ground_z,
            SUM(ground_cnt) AS ground_cnt,
            -- Structure height above ground
            MAX(max_z) - (SUM(ground_z * ground_cnt) / NULLIF(SUM(ground_cnt), 0)) AS structure_height,
            -- Classification counts
            SUM(building_cnt) AS building_cnt,
            SUM(bridge_cnt) AS bridge_cnt,
            SUM(veg_cnt) AS veg_cnt,
            SUM(water_cnt) AS water_cnt,
            -- Intensity: weighted avg
            SUM(avg_intensity * cnt) / SUM(cnt) AS avg_intensity,
            -- Multi-return ratio: weighted avg
            SUM(multi_return_ratio * cnt) / SUM(cnt) AS multi_return_ratio,
            -- Canopy height: weighted avg (only where we have data)
            SUM(canopy_height * veg_cnt) / NULLIF(SUM(veg_cnt), 0) AS canopy_height,
            -- Dominant class
            CASE 
                WHEN SUM(building_cnt) >= SUM(ground_cnt) AND SUM(building_cnt) >= SUM(veg_cnt) THEN 'building'
                WHEN SUM(veg_cnt) >= SUM(ground_cnt) THEN 'vegetation'
                WHEN SUM(water_cnt) > SUM(cnt) * 0.5 THEN 'water'
                WHEN SUM(bridge_cnt) > 0 THEN 'bridge'
                ELSE 'ground'
            END AS dominant_class
        FROM raw_hex_batches
        GROUP BY 1
    """)
    
    elapsed = time.time() - start
    con.sql(f"SELECT COUNT(*) as total_hex FROM {DDB_TABLE}").show()
    con.sql(f"""
        SELECT dominant_class, COUNT(*) as hex_count 
        FROM {DDB_TABLE} 
        GROUP BY 1 ORDER BY 2 DESC
    """).show()
    print(f"Elapsed: {elapsed/60:.1f} min")
    con.close()

run_pipeline()

Processing 1188 tiles with 14 workers...
Batch 0/1188 | Time: 1.1s
Batch 100/1188 | Time: 78.1s
Batch 200/1188 | Time: 404.7s
Batch 300/1188 | Time: 842.3s
Batch 400/1188 | Time: 1318.8s
Batch 500/1188 | Time: 1776.8s


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

Batch 600/1188 | Time: 2168.7s
Batch 700/1188 | Time: 2539.3s
Batch 800/1188 | Time: 2829.5s
Batch 900/1188 | Time: 2973.1s
Batch 1000/1188 | Time: 3021.9s
Batch 1100/1188 | Time: 3031.9s
Finalizing global reduction...
┌───────────┐
│ total_hex │
│   int64   │
├───────────┤
│   2958715 │
└───────────┘

┌────────────────┬───────────┐
│ dominant_class │ hex_count │
│    varchar     │   int64   │
├────────────────┼───────────┤
│ ground         │   2490172 │
│ building       │    452836 │
│ bridge         │     14660 │
│ water          │       949 │
│ vegetation     │        98 │
└────────────────┴───────────┘

Elapsed: 50.7 min


In [None]:
# Re-run ONLY the final reduction with fixed classification + structure_height
# No PDAL needed — just re-aggregates raw_hex_batches (seconds)
import duckdb

con = duckdb.connect(DB_PATH)
con.sql("LOAD h3")
con.sql(f"""
    CREATE OR REPLACE TABLE {DDB_TABLE} AS
    SELECT 
        hex,
        h3_cell_to_lat(hex) AS lat,
        h3_cell_to_lng(hex) AS lng,
        -- Elevation: weighted avg, global min/max
        SUM(avg_z * cnt) / SUM(cnt) AS avg_z,
        MIN(min_z) AS min_z,
        MAX(max_z) AS max_z,
        MAX(max_z) - MIN(min_z) AS z_range,
        SUM(cnt) AS cnt,
        -- Ground: weighted avg across tiles
        SUM(ground_z * ground_cnt) / NULLIF(SUM(ground_cnt), 0) AS ground_z,
        SUM(ground_cnt) AS ground_cnt,
        -- Structure height: avg_z - ground_z (robust to max_z outliers)
        (SUM(avg_z * cnt) / SUM(cnt)) - (SUM(ground_z * ground_cnt) / NULLIF(SUM(ground_cnt), 0)) AS structure_height,
        -- Classification counts
        SUM(building_cnt) AS building_cnt,
        SUM(bridge_cnt) AS bridge_cnt,
        SUM(veg_cnt) AS veg_cnt,
        SUM(water_cnt) AS water_cnt,
        -- Intensity: weighted avg
        SUM(avg_intensity * cnt) / SUM(cnt) AS avg_intensity,
        -- Multi-return ratio: weighted avg
        SUM(multi_return_ratio * cnt) / SUM(cnt) AS multi_return_ratio,
        -- Canopy height: weighted avg (only where we have data)
        SUM(canopy_height * veg_cnt) / NULLIF(SUM(veg_cnt), 0) AS canopy_height,
        -- Dominant class: argmax with minimum 10% threshold for non-ground
        CASE
            WHEN SUM(building_cnt) >= GREATEST(SUM(bridge_cnt), SUM(veg_cnt), SUM(water_cnt))
                 AND SUM(building_cnt)::DOUBLE / SUM(cnt) > 0.1 THEN 'building'
            WHEN SUM(bridge_cnt) >= GREATEST(SUM(building_cnt), SUM(veg_cnt), SUM(water_cnt))
                 AND SUM(bridge_cnt)::DOUBLE / SUM(cnt) > 0.1 THEN 'bridge'
            WHEN SUM(veg_cnt) >= GREATEST(SUM(building_cnt), SUM(bridge_cnt), SUM(water_cnt))
                 AND SUM(veg_cnt)::DOUBLE / SUM(cnt) > 0.1 THEN 'vegetation'
            WHEN SUM(water_cnt)::DOUBLE / SUM(cnt) > 0.3 THEN 'water'
            ELSE 'ground'
        END AS dominant_class
    FROM raw_hex_batches
    GROUP BY 1
""")

con.sql(f"SELECT COUNT(*) as total_hex FROM {DDB_TABLE}").show()
con.sql(f"""
    SELECT dominant_class, COUNT(*) as hex_count 
    FROM {DDB_TABLE} 
    GROUP BY 1 ORDER BY 2 DESC
""").show()
# Sanity: ground hex should have near-zero structure_height
con.sql(f"""
    SELECT dominant_class,
           ROUND(AVG(structure_height), 1) AS avg_struct_h,
           ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY structure_height), 1) AS p99_struct_h,
           ROUND(MAX(structure_height), 1) AS max_struct_h
    FROM {DDB_TABLE}
    GROUP BY 1 ORDER BY 2 DESC
""").show()
con.close()

In [None]:
# 3D map — two layers: ground (Inferno by elevation) + structures (Viridis by height above ground)
from lonboard import Map, H3HexagonLayer
from arro3.core import Table
from lonboard.colormap import apply_continuous_cmap
from palettable.matplotlib import Viridis_20, Inferno_20
from matplotlib.colors import Normalize
import numpy as np
import pyarrow.compute as pc

con = duckdb.connect(DB_PATH)
con.sql("LOAD h3")

ground_df = con.sql(f"""
    SELECT h3_h3_to_string(hex) AS hex, max_z, cnt
    FROM {DDB_TABLE}
    WHERE dominant_class = 'ground' AND cnt > 10
""").fetch_arrow_table()

struct_df = con.sql(f"""
    SELECT h3_h3_to_string(hex) AS hex, max_z, structure_height, dominant_class, cnt
    FROM {DDB_TABLE}
    WHERE dominant_class != 'ground' AND cnt > 10
""").fetch_arrow_table()
con.close()

# --- Ground layer: Inferno by max_z, extruded by max_z ---
ground_tbl = Table.from_arrow(ground_df)
ground_z = np.nan_to_num(np.array(pc.fill_null(ground_df["max_z"], 0)), nan=0)
norm_elev = Normalize(vmin=ground_z.min(), vmax=np.percentile(ground_z, 99), clip=True)
ground_colors = apply_continuous_cmap(norm_elev(ground_z), Inferno_20)

ground_layer = H3HexagonLayer(
    ground_tbl,
    get_hexagon=ground_tbl["hex"],
    get_fill_color=ground_colors,
    extruded=True,
    get_elevation=ground_z,
    elevation_scale=3,
    stroked=False,
    opacity=1,
    coverage=1,
)

# --- Structure layer: Viridis by structure_height, extruded by structure_height ---
struct_tbl = Table.from_arrow(struct_df)
struct_h = np.array(pc.fill_null(struct_df["structure_height"], 0))
struct_h = np.clip(struct_h, 0, None)
struct_p99 = np.percentile(struct_h[struct_h > 0], 99) if (struct_h > 0).any() else 1
norm_struct = Normalize(vmin=0, vmax=struct_p99, clip=True)
struct_colors = apply_continuous_cmap(norm_struct(struct_h), Viridis_20)

struct_layer = H3HexagonLayer(
    struct_tbl,
    get_hexagon=struct_tbl["hex"],
    get_fill_color=struct_colors,
    extruded=True,
    get_elevation=struct_h,
    elevation_scale=3,
    stroked=False,
    opacity=1,
    coverage=1,
)

Map(
    layers=[ground_layer, struct_layer],
    view_state={"longitude": -122.44, "latitude": 37.76, "zoom": 12, "pitch": 60, "bearing": 30},
)

## Dual-layer ground/structure map (WIP)
The cell above splits hex into ground (Inferno by terrain elevation) and structures (Viridis by height above ground). Still confusing visually — H3HexagonLayer extrudes from z=0, so structures don't float above terrain. Needs deck.gl ColumnLayer with `getPosition=[lng, lat, ground_z]` to actually suspend buildings/bridges. Parking this for now.

In [None]:
# 3D elevation map — all hex, Inferno by max_z
from lonboard import Map, H3HexagonLayer
from arro3.core import Table
from lonboard.colormap import apply_continuous_cmap
from palettable.matplotlib import Inferno_20
from matplotlib.colors import Normalize
import numpy as np
import pyarrow.compute as pc

con = duckdb.connect(DB_PATH)
con.sql("LOAD h3")
df = con.sql(f"""
    SELECT h3_h3_to_string(hex) AS hex, max_z, cnt
    FROM {DDB_TABLE}
    WHERE cnt > 10
""").fetch_arrow_table()
con.close()

table = Table.from_arrow(df)
max_z = np.nan_to_num(np.array(pc.fill_null(df["max_z"], 0)), nan=0)

norm = Normalize(vmin=max_z.min(), vmax=np.percentile(max_z, 99), clip=True)
colors = apply_continuous_cmap(norm(max_z), Inferno_20)

layer = H3HexagonLayer(
    table,
    get_hexagon=table["hex"],
    get_fill_color=colors,
    extruded=True,
    get_elevation=max_z,
    elevation_scale=3,
    stroked=False,
    opacity=1,
    coverage=1,
)

Map(
    layers=[layer],
    view_state={"longitude": -122.44, "latitude": 37.76, "zoom": 12, "pitch": 60, "bearing": 30},
)

## Classification map — ASPRS standard colors
Standard LiDAR classification colors per hex: ground (brown), building (red), vegetation (green), water (blue), bridge (gray). Extruded by `max_z` for 3D terrain.

In [12]:
# 3D classification map — ASPRS standard colors, extruded by max_z
from lonboard import Map, H3HexagonLayer
from arro3.core import Table
import numpy as np
import pyarrow.compute as pc

con = duckdb.connect(DB_PATH)
con.sql("LOAD h3")
df = con.sql(f"""
    SELECT h3_h3_to_string(hex) AS hex, max_z, dominant_class, cnt
    FROM {DDB_TABLE}
    WHERE cnt > 10
""").fetch_arrow_table()
con.close()

table = Table.from_arrow(df)
max_z = np.nan_to_num(np.array(pc.fill_null(df["max_z"], 0)), nan=0)
classes = np.array(df.column("dominant_class"))

# ASPRS standard classification colors [R, G, B, A]
CLASS_COLORS = {
    'ground':     [186, 148, 86, 255],   # brown
    'building':   [255, 0, 0, 255],      # red
    'vegetation': [0, 160, 0, 255],      # green
    'water':      [0, 100, 255, 255],    # blue
    'bridge':     [160, 160, 160, 255],  # gray
}
DEFAULT_COLOR = [200, 200, 200, 255]

colors = np.array([CLASS_COLORS.get(c, DEFAULT_COLOR) for c in classes], dtype=np.uint8)

layer = H3HexagonLayer(
    table,
    get_hexagon=table["hex"],
    get_fill_color=colors,
    extruded=True,
    get_elevation=max_z,
    elevation_scale=3,
    stroked=False,
    opacity=1,
    coverage=1,
)

Map(
    layers=[layer],
    view_state={"longitude": -122.44, "latitude": 37.76, "zoom": 12, "pitch": 60, "bearing": 30},
)

VBox(children=(<lonboard._map.Map object at 0x11fda63d0>, VBox(children=(ErrorOutput(), ErrorOutput()), layout…

## Classification + height — per-class sequential colormaps, alpha by density
Each class gets its own perceptually uniform gradient mapped by height. Ground = Viridis by `max_z`, Building = Bilbao (warm amber) by `structure_height`, Vegetation = Algae (teal-green), Bridge = Davos (cool blue-gray), Water = Deep (ocean blue) flat. Alpha = `log(cnt)` normalized to [80,255] — sparse hex fade out, dense hex stay opaque. All colorblind-safe.

In [9]:
# Classification + height — per-class sequential colormaps, alpha by density
from lonboard import Map, H3HexagonLayer
from arro3.core import Table
from lonboard.colormap import apply_continuous_cmap
from palettable.cmocean.sequential import Algae_20, Deep_20
from palettable.scientific.sequential import Bilbao_20, Davos_20
from palettable.matplotlib import Viridis_20
from matplotlib.colors import Normalize
import numpy as np
import pyarrow.compute as pc
import duckdb
DB_PATH = 'duckdb/san_fran_ept_lpc.ddb'
duckdb.sql("INSTALL h3 FROM community;")
con = duckdb.connect('duckdb/san_fran_ept_lpc.ddb')
con.sql("LOAD h3")
df = con.sql(f"""
    SELECT h3_h3_to_string(hex) AS hex, avg_z, ground_z, structure_height, dominant_class, cnt
    FROM san_fran_rich_res_13
    WHERE cnt > 10
""").fetch_arrow_table()
con.close()

table = Table.from_arrow(df)

In [None]:

n = len(df)
avg_z = np.nan_to_num(np.array(pc.fill_null(df["avg_z"], 0)), nan=0)
ground_z = np.nan_to_num(np.array(pc.fill_null(df["ground_z"], 0)), nan=0)
struct_h = np.clip(np.nan_to_num(np.array(pc.fill_null(df["structure_height"], 0)), nan=0), 0, None)
classes = np.array(df.column("dominant_class"))
cnt = np.array(df.column("cnt"))

# Extrusion: ground hex by ground_z, structure hex by avg_z
elev = np.where(classes == 'ground', ground_z, avg_z)

# Normalizers — color ground by ground_z, structures by structure_height
norm_elev = Normalize(vmin=np.percentile(ground_z[ground_z > 0], 1) if (ground_z > 0).any() else 0,
                      vmax=np.percentile(ground_z[ground_z > 0], 99) if (ground_z > 0).any() else 1, clip=True)
struct_p99 = np.percentile(struct_h[struct_h > 0], 99) if (struct_h > 0).any() else 1
norm_struct = Normalize(vmin=0, vmax=struct_p99, clip=True)

elev_vals = norm_elev(ground_z)
struct_vals = norm_struct(struct_h)

# apply_continuous_cmap returns RGB (3ch) — build RGB array, add alpha after
colors_rgb = np.zeros((n, 3), dtype=np.uint8)

# Ground — Viridis by ground_z
m = classes == 'ground'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(elev_vals[m], Viridis_20)

# Building — Bilbao (warm amber) by structure_height
m = classes == 'building'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(struct_vals[m], Bilbao_20)

# Vegetation — Algae (teal-green) by structure_height
m = classes == 'vegetation'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(struct_vals[m], Algae_20)

# Bridge — Davos (cool blue-gray) by structure_height
m = classes == 'bridge'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(struct_vals[m], Davos_20)

# Water — Deep (ocean blue) flat mid-tone
m = classes == 'water'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(np.full(m.sum(), 0.5), Deep_20)

# Alpha by point density — log(cnt) normalized to [80, 255]
log_cnt = np.log1p(cnt.astype(np.float64))
alpha_min, alpha_max = 80, 255
lc_min, lc_max = log_cnt.min(), log_cnt.max()
if lc_max > lc_min:
    alpha = alpha_min + (log_cnt - lc_min) / (lc_max - lc_min) * (alpha_max - alpha_min)
else:
    alpha = np.full(n, alpha_max, dtype=np.float64)
alpha = alpha.astype(np.uint8)

# Combine RGB + alpha
colors = np.concatenate([colors_rgb, alpha.reshape(-1, 1)], axis=1)

layer = H3HexagonLayer(
    table,
    get_hexagon=table["hex"],
    get_fill_color=colors,
    extruded=True,
    get_elevation=elev,
    elevation_scale=3,
    stroked=False,

    coverage=1,
)

Map(
    layers=[layer],
    view_state={"longitude": -122.44, "latitude": 37.76, "zoom": 12, "pitch": 60, "bearing": 30},
)


In [16]:
print(colors.shape, colors.dtype)
print(colors[:5])
# How many of each class?
print(np.unique(classes, return_counts=True))

# A few random indices per class
for cls in ["ground", "building", "vegetation", "bridge", "water"]:
    idx = np.where(classes == cls)[0][:5]
    print(cls, "indices:", idx)
    if len(idx) > 0:
        print(colors[idx])



(2952320, 4) uint8
[[ 62 188 115 209]
 [ 62 188 115 210]
 [101 203  94 206]
 [ 36 169 130 206]
 [ 36 168 130 210]]
(array(['bridge', 'building', 'ground', 'vegetation', 'water'],
      dtype=object), array([  14660,  446595, 2490018,      98,     949]))
ground indices: [0 1 2 3 4]
[[ 62 188 115 209]
 [ 62 188 115 210]
 [101 203  94 206]
 [ 36 169 130 206]
 [ 36 168 130 210]]
building indices: [18 19 20 27 47]
[[255 255 255 206]
 [255 255 255 199]
 [255 255 255 199]
 [255 255 255 205]
 [255 255 255 206]]
vegetation indices: [ 6796  6848  6862 25864 98496]
[[215 249 208 197]
 [215 249 208 209]
 [215 249 208 206]
 [104 182 101 206]
 [215 249 208 205]]
bridge indices: [71 73 74 78 87]
[[ 45  87 148 205]
 [ 50  94 152 208]
 [ 17  43 112 206]
 [ 74 117 157 206]
 [ 28  63 131 205]]
water indices: [ 2394  8167 17244 17245 17278]
[[ 71 142 157 184]
 [ 71 142 157 188]
 [ 71 142 157 182]
 [ 71 142 157 184]
 [ 71 142 157 186]]


In [None]:

n = len(df)
avg_z = np.nan_to_num(np.array(pc.fill_null(df["avg_z"], 0)), nan=0)
ground_z = np.nan_to_num(np.array(pc.fill_null(df["ground_z"], 0)), nan=0)
struct_h = np.clip(np.nan_to_num(np.array(pc.fill_null(df["structure_height"], 0)), nan=0), 0, None)
classes = np.array(df.column("dominant_class"))
cnt = np.array(df.column("cnt"))

# Extrusion: ground hex by ground_z, structure hex by avg_z
elev = np.where(classes == 'ground', ground_z, avg_z)

# Normalizers — color ground by ground_z, structures by structure_height
norm_elev = Normalize(vmin=np.percentile(ground_z[ground_z > 0], 1) if (ground_z > 0).any() else 0,
                      vmax=np.percentile(ground_z[ground_z > 0], 99) if (ground_z > 0).any() else 1, clip=True)
struct_p99 = np.percentile(struct_h[struct_h > 0], 99) if (struct_h > 0).any() else 1
norm_struct = Normalize(vmin=0, vmax=struct_p99, clip=True)

elev_vals = norm_elev(ground_z)
struct_vals = norm_struct(struct_h)

# apply_continuous_cmap returns RGB (3ch) — build RGB array, add alpha after
colors_rgb = np.zeros((n, 3), dtype=np.uint8)
ground = apply_continuous_cmap(elev_vals, Viridis_20)
building = apply_continuous_cmap(struct_vals, Bilbao_20)
vegetation = apply_continuous_cmap(struct_vals, Algae_20)
bridge = apply_continuous_cmap(struct_vals[m], Davos_20)
water= apply_continuous_cmap(np.full(m.sum(), 0.5), Deep_20)
# Ground — Viridis by ground_z
m = classes == 'ground'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(elev_vals[m], Viridis_20)

# Building — Bilbao (warm amber) by structure_height
m = classes == 'building'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(struct_vals[m], Bilbao_20)

# Vegetation — Algae (teal-green) by structure_height
m = classes == 'vegetation'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(struct_vals[m], Algae_20)

# Bridge — Davos (cool blue-gray) by structure_height
m = classes == 'bridge'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(struct_vals[m], Davos_20)

# Water — Deep (ocean blue) flat mid-tone
m = classes == 'water'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(np.full(m.sum(), 0.5), Deep_20)

# Alpha by point density — log(cnt) normalized to [80, 255]
log_cnt = np.log1p(cnt.astype(np.float64))
alpha_min, alpha_max = 80, 255
lc_min, lc_max = log_cnt.min(), log_cnt.max()
if lc_max > lc_min:
    alpha = alpha_min + (log_cnt - lc_min) / (lc_max - lc_min) * (alpha_max - alpha_min)
else:
    alpha = np.full(n, alpha_max, dtype=np.float64)
alpha = alpha.astype(np.uint8)

# Combine RGB + alpha
colors = np.concatenate([colors_rgb, alpha.reshape(-1, 1)], axis=1)

layer = H3HexagonLayer(
    table,
    get_hexagon=table["hex"],
    get_fill_color=colors,
    extruded=True,
    get_elevation=elev,
    elevation_scale=3,
    stroked=False,

    coverage=1,
)

Map(
    layers=[layer],
    view_state={"longitude": -122.44, "latitude": 37.76, "zoom": 12, "pitch": 60, "bearing": 30},
)


In [12]:
print(m)

[False False False ... False False False]


In [None]:
# # Vegetation density — color by multi_return_ratio, extrude by canopy_height
# from matplotlib.colors import Normalize
# from lonboard.colormap import apply_continuous_cmap
# from palettable.cartocolors.sequential import BurgYl_7  # yellow-to-dark-green

# veg_ratio = np.array(table["multi_return_ratio"])
# veg_ratio = np.nan_to_num(veg_ratio, nan=0)
# norm = Normalize(vmin=0, vmax=np.percentile(veg_ratio[veg_ratio > 0], 95) if (veg_ratio > 0).any() else 1, clip=True)
# veg_colors = apply_continuous_cmap(norm(veg_ratio), BurgYl_7)

# canopy = np.array(table["canopy_height"])
# canopy = np.nan_to_num(canopy, nan=0)
# canopy = np.clip(canopy, 0, None)

# veg_layer = H3HexagonLayer(
#     table,
#     get_hexagon=table["hex"],
#     get_fill_color=veg_colors,
#     extruded=True,
#     get_elevation=canopy,
#     elevation_scale=5,
#     stroked=False,
#     opacity=0.9,
#     coverage=1,
# )

# Map(
#     layers=[veg_layer],
#     view_state={"longitude": -122.44, "latitude": 37.76, "zoom": 12, "pitch": 60, "bearing": 30},
# )

In [13]:
print("Kernel is responsive")

Kernel is responsive


In [17]:
classes = np.char.lower(np.char.strip(classes.astype(str)))

m = classes == 'ground'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(elev_vals[m], Viridis_20)

m = classes == 'building'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(struct_vals[m], Bilbao_20)

m = classes == 'vegetation'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(struct_vals[m], Algae_20)

m = classes == 'bridge'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(struct_vals[m], Davos_20)

m = classes == 'water'
if m.any():
    colors_rgb[m] = apply_continuous_cmap(np.full(m.sum(), 0.5), Deep_20)


In [18]:
print("ground:", (classes == "ground").sum())
print("building:", (classes == "building").sum())
print("vegetation:", (classes == "vegetation").sum())
print("bridge:", (classes == "bridge").sum())
print("water:", (classes == "water").sum())


ground: 2490018
building: 446595
vegetation: 98
bridge: 14660
water: 949


In [14]:
import duckdb
DB_PATH = 'duckdb/san_fran_ept_lpc.ddb'
try:
    con = duckdb.connect(DB_PATH, read_only=True)
    con.sql("LOAD h3")
    con.sql("SELECT COUNT(*) as total_hex FROM san_fran_rich_res_13").show()
    con.sql("SELECT dominant_class, COUNT(*) as hex_count FROM san_fran_rich_res_13 GROUP BY 1 ORDER BY 2 DESC").show()
    con.close()
except Exception as e:
    print(f"Error: {e}")


Error: Connection Error: Can't open a connection to same database file with a different configuration than existing connections


In [27]:
# Classification + height — per-class sequential colormaps, alpha by density
from lonboard import Map, H3HexagonLayer
from arro3.core import Table
from lonboard.colormap import apply_continuous_cmap
from palettable.cmocean.sequential import Algae_20, Deep_20
from palettable.scientific.sequential import Bilbao_20, Davos_20
from palettable.matplotlib import Viridis_20
from matplotlib.colors import Normalize
import numpy as np
import pyarrow.compute as pc
import duckdb
DB_PATH = 'duckdb/san_fran_ept_lpc.ddb'
duckdb.sql("INSTALL h3 FROM community;")
con = duckdb.connect('duckdb/san_fran_ept_lpc.ddb')
con.sql("LOAD h3")
df = con.sql(f"""
    SELECT h3_h3_to_string(hex) AS hex, avg_z, ground_z, structure_height, dominant_class, cnt
    FROM san_fran_rich_res_13
    WHERE cnt > 10
""").fetch_arrow_table()
con.close()

table = Table.from_arrow(df)

In [30]:

n = len(df)
avg_z = np.nan_to_num(np.array(pc.fill_null(df["avg_z"], 0)), nan=0)
ground_z = np.nan_to_num(np.array(pc.fill_null(df["ground_z"], 0)), nan=0)
struct_h = np.clip(np.nan_to_num(np.array(pc.fill_null(df["structure_height"], 0)), nan=0), 0, None)
# classes = np.array(df.column("dominant_class"))
cnt = np.array(df.column("cnt"))

# Extrusion: ground hex by ground_z, structure hex by avg_z
elev = pc.if_else(pc.is_valid(classes), ground_z, avg_z)

# Normalizers — color ground by ground_z, structures by structure_height
norm_elev = Normalize(vmin=np.percentile(ground_z[ground_z > 0], 1) if (ground_z > 0).any() else 0,
                      vmax=np.percentile(ground_z[ground_z > 0], 99) if (ground_z > 0).any() else 1, clip=True)
# struct_p99 = np.percentile(struct_h[struct_h > 0], 99) if (struct_h > 0).any() else 1
# norm_struct = Normalize(vmin=0, vmax=struct_p99, clip=True)

elev_vals = norm_elev(ground_z)
# struct_vals = norm_struct(struct_h)

# apply_continuous_cmap returns RGB (3ch) — build RGB array, add alpha after
colors_rgb = np.zeros((n, 3), dtype=np.uint8)
m
if m.any():
    colors_rgb = apply_continuous_cmap(elev_vals[m], Viridis_20)
# # Ground — Viridis by ground_z
# classes = np.asarray(df.column("dominant_class")).astype(str)
# classes = np.char.lower(np.char.strip(classes))


# m = classes == 'ground'
# if m.any():
#     colors_rgb[m] = apply_continuous_cmap(elev_vals[m], Viridis_20)

# m = classes == 'building'
# if m.any():
#     colors_rgb[m] = apply_continuous_cmap(struct_vals[m], Bilbao_20)

# m = classes == 'vegetation'
# if m.any():
#     colors_rgb[m] = apply_continuous_cmap(struct_vals[m], Algae_20)

# m = classes == 'bridge'
# if m.any():
#     colors_rgb[m] = apply_continuous_cmap(struct_vals[m], Davos_20)

# m = classes == 'water'
# if m.any():
#     colors_rgb[m] = apply_continuous_cmap(np.full(m.sum(), 0.5), Deep_20)

# colors =
# # Alpha by point density — log(cnt) normalized to [80, 255]
# log_cnt = np.log1p(cnt.astype(np.float64))
# alpha_min, alpha_max = 80, 255
# lc_min, lc_max = log_cnt.min(), log_cnt.max()
# if lc_max > lc_min:
#     alpha = alpha_min + (log_cnt - lc_min) / (lc_max - lc_min) * (alpha_max - alpha_min)
# else:
#     alpha = np.full(n, alpha_max, dtype=np.float64)
# alpha = alpha.astype(np.uint8)

# Combine RGB + alpha
# colors = np.concatenate([colors_rgb, alpha.reshape(-1, 1)], axis=1)

layer = H3HexagonLayer(
    table,
    get_hexagon=table["hex"],
    get_fill_color=colors_rgb,
    extruded=True,
    get_elevation=elev,
    elevation_scale=3,
    stroked=False,

    coverage=1,
)

Map(
    layers=[layer],
    view_state={"longitude": -122.44, "latitude": 37.76, "zoom": 12, "pitch": 60, "bearing": 30},
)


TraitError: accessor must have same length as table