
# H3 Dominance Hexes — Dual Metrics (Deals & Price Volume)

This notebook builds **H3 hexagon aggregations** for **agents** and **brokerages** and exports GeoJSON files for use in Mapbox.
It **always includes both metrics** (deal count and price volume) in every output file, so the front-end can display both
regardless of which metric toggle is selected.


In [1]:

# ==== Configuration ====
# Path to input table. Supported: CSV or Parquet. Must include columns:
# latitude, longitude, price, Agent_Name, Brokerage_Firm
INPUT_PATH = "Adam F. - 2025 Miami Brokerage and Broker Rankings - Agents_Brokers.csv"   # <-- change to your file name (CSV or Parquet)
OUTPUT_PREFIX = "."        # folder to write geojsons

# Hex resolutions to export
RES_LIST = [7, 8, 9]

# Minimum deals per hex to include
MIN_SAMPLE = 10

# Optional: restrict to Miami-Dade bbox (lon/lat): (minx, miny, maxx, maxy)
# Set to None to skip filtering
MIAMI_DADE_BBOX = (-80.9, 25.1, -80.0, 26.1)  # rough bounds; adjust if needed

# If your input uses different column names, map them here
COLS = {
    "lat": "latitude",
    "lon": "longitude",
    "price": "price",
    "agent": "Agent_Name",
    "broker": "Brokerage_Firm",
}


In [2]:

import pandas as pd
import numpy as np
from pathlib import Path
import json
import os

# h3 v4+
import h3

def _to_py_num(x):
    if isinstance(x, (np.integer,)):
        return int(x)
    if isinstance(x, (np.floating,)):
        return float(x)
    return x

def _in_bbox(lon, lat, bbox):
    minx, miny, maxx, maxy = bbox
    return (lon >= minx) & (lon <= maxx) & (lat >= miny) & (lat <= maxy)


In [3]:

# ==== Load & clean ====
p = Path(INPUT_PATH)
if not p.exists():
    raise FileNotFoundError(f"Input file not found: {INPUT_PATH}")

if p.suffix.lower() in (".parquet", ".pq"):
    df = pd.read_parquet(p)
else:
    df = pd.read_csv(p)

# Normalize expected columns
df = df.rename(columns={
    COLS["lat"]: "latitude",
    COLS["lon"]: "longitude",
    COLS["price"]: "price",
    COLS["agent"]: "Agent_Name",
    COLS["broker"]: "Brokerage_Firm",
})

# Basic cleaning
df = df.dropna(subset=["latitude","longitude","price"])
df["latitude"] = pd.to_numeric(df["latitude"], errors="coerce")
df["longitude"] = pd.to_numeric(df["longitude"], errors="coerce")
df["price"] = pd.to_numeric(df["price"], errors="coerce").fillna(0)

# Optional geographic clip
if MIAMI_DADE_BBOX is not None:
    mask = _in_bbox(df["longitude"].values, df["latitude"].values, MIAMI_DADE_BBOX)
    df = df.loc[mask].copy()

# Standardize string columns
for c in ["Agent_Name","Brokerage_Firm"]:
    if c in df.columns:
        df[c] = df[c].fillna("").astype(str).str.strip()

print("Loaded rows:", len(df))
print(df[["latitude","longitude","price","Agent_Name","Brokerage_Firm"]].head(3))


Loaded rows: 26844
    latitude  longitude      price             Agent_Name  \
0  25.846485 -80.130144  122125121      ZACHARY VICHINSKY   
1  25.846485 -80.130144  122125121  THE JILLS ZEDER GROUP   
2  25.848982 -80.126524   74250000   NELSON GONZALEZ TEAM   

                Brokerage_Firm  
0  BESPOKE REAL ESTATE FLORIDA  
1       COLDWELL BANKER REALTY  
2              BHHS EWM REALTY  


In [8]:
def compute_hex(df_in: pd.DataFrame, res: int, field: str, min_sample: int = 10):
    """
    Build a FeatureCollection of H3 hexes with both metrics (count & volume) per name.
    field: 'Agent_Name' or 'Brokerage_Firm'
    """
    df = df_in.dropna(subset=["latitude","longitude"]).copy()
    df["h3"] = df.apply(
        lambda r: h3.latlng_to_cell(float(r["latitude"]), float(r["longitude"]), int(res)),
        axis=1
    )

    features = []
    for h, g in df.groupby("h3", sort=False):
        n = len(g)
        if n < min_sample:
            continue

        # aggregate counts & volume per name
        agg = g.groupby(field, dropna=False)["price"].agg(count="count", sum="sum")

        # Deterministic order:
        #   count desc → volume desc → name A→Z (case-insensitive)
        df_rank = agg.copy()
        df_rank["name"] = df_rank.index.astype(str)
        df_rank["name_lower"] = df_rank["name"].str.lower()
        df_rank = df_rank.sort_values(
            by=["count", "sum", "name_lower", "name"],
            ascending=[False, False, True, True],
        )

        top3 = df_rank.head(3)
        top3_names  = top3["name"].tolist()
        top3_counts = [int(v) for v in top3["count"].tolist()]
        top3_volume = [float(v) for v in top3["sum"].tolist()]

        total_price = float(g["price"].sum())
        top_count   = int(top3["count"].iloc[0]) if not top3.empty else 0
        top_name    = top3_names[0] if top3_names else ""

        props = {
            "h3": h,
            "sample_size": int(n),
            "sum_price": total_price,
            f"{field}_top3_names": json.dumps(top3_names, ensure_ascii=False),
            f"{field}_top3_counts": json.dumps(top3_counts),
            f"{field}_top3_volume": json.dumps(top3_volume),
            # used by the map’s color ramp/label:
            f"top_{field}_share": float(top_count / n) if n else 0.0,
            f"top_{field}_name": top_name,
        }


        try:
            # v3/v4 that support positional geo_json flag
            boundary = h3.cell_to_boundary(h, True)  # returns [ [lon, lat], ... ]
        except TypeError:
            # older signature: returns [(lat, lon), ...] — convert to GeoJSON order
            coords = h3.cell_to_boundary(h)
            boundary = [[lng, lat] for lat, lng in coords]

        features.append({
            "type": "Feature",
            "geometry": {"type": "Polygon", "coordinates": [boundary]},
            "properties": props,
        })

    return {"type": "FeatureCollection", "features": features}


In [9]:

# ==== Export ====
Path(OUTPUT_PREFIX).mkdir(parents=True, exist_ok=True)
out_files = []

for res in RES_LIST:
    print(f"Building res={res}…")

    for field in ["Agent_Name","Brokerage_Firm"]:
        fc = compute_hex(df, res=res, field=field, min_sample=MIN_SAMPLE)
        mode = "agent" if field == "Agent_Name" else "broker"

        # Write two files per mode (count & volume) with identical payload (both metrics included)
        for metric in ["count","volume"]:
            fname = Path(OUTPUT_PREFIX) / f"hex_{mode}_{metric}_res{res}.geojson"
            fname.write_text(json.dumps(fc, ensure_ascii=False))
            out_files.append(str(fname))

print("Wrote:")
for f in out_files:
    print("  -", f)


Building res=7…
Building res=8…
Building res=9…
Wrote:
  - hex_agent_count_res7.geojson
  - hex_agent_volume_res7.geojson
  - hex_broker_count_res7.geojson
  - hex_broker_volume_res7.geojson
  - hex_agent_count_res8.geojson
  - hex_agent_volume_res8.geojson
  - hex_broker_count_res8.geojson
  - hex_broker_volume_res8.geojson
  - hex_agent_count_res9.geojson
  - hex_agent_volume_res9.geojson
  - hex_broker_count_res9.geojson
  - hex_broker_volume_res9.geojson


In [10]:

# ==== Quick sanity check on one file ====
import json
sample_file = out_files[0] if out_files else None
print("Sample file:", sample_file)
if sample_file:
    gj = json.loads(Path(sample_file).read_text())
    print("Features:", len(gj.get("features", [])))
    if gj.get("features"):
        p = gj["features"][0]["properties"]
        names = json.loads(p.get("Agent_Name_top3_names") or p.get("Brokerage_Firm_top3_names") or "[]")
        counts = json.loads(p.get("Agent_Name_top3_counts") or p.get("Brokerage_Firm_top3_counts") or "[]")
        volume = json.loads(p.get("Agent_Name_top3_volume") or p.get("Brokerage_Firm_top3_volume") or "[]")
        print("Example names:", names)
        print("Example counts:", counts)
        print("Example volume:", volume[:3])


Sample file: hex_agent_count_res7.geojson
Features: 259
Example names: ['THE JILLS ZEDER GROUP', 'KENLEY CERVERA', 'VANESA CARPIGNANO']
Example counts: [10, 9, 8]
Example volume: [215176084.0, 15187000.0, 17080000.0]
