# H3 Dominance Hexes (v4-compatible)
Aggregate listings into **H3 hex bins** and compute dominance (market share) for **Agents** and **Brokerages**, by **count** and **price volume**. Exports **GeoJSON** layers for Mapbox.

**Requires columns**: `latitude, longitude, Agent_Name, Brokerage_Firm` (and optionally `price`).


## 1) Setup

In [1]:
# If needed:
# %pip install pandas h3
import json
from pathlib import Path
import pandas as pd
try:
    import h3
except Exception as e:
    raise RuntimeError("Please install the 'h3' package: pip install h3")
print("h3 version:", getattr(h3, "__version__", "unknown"))

h3 version: 4.3.1


## 2) H3 compatibility wrappers

In [2]:
def h3_cell(lat, lng, res):
    if hasattr(h3, "geo_to_h3"):               # v3.x
        return h3.geo_to_h3(lat, lng, res)
    elif hasattr(h3, "latlng_to_cell"):        # v4.x
        return h3.latlng_to_cell(lat, lng, res)
    raise AttributeError("No H3 lat/lng→cell function found.")

def h3_boundary(cell):
    if hasattr(h3, "h3_to_geo_boundary"):      # v3.x
        try:
            return h3.h3_to_geo_boundary(cell, geo_json=True)
        except TypeError:
            return h3.h3_to_geo_boundary(cell)
    elif hasattr(h3, "cell_to_boundary"):      # v4.x
        return h3.cell_to_boundary(cell)
    raise AttributeError("No H3 cell→boundary function found.")

## 3) Load your data

In [3]:
# Option A: set csv_path (CSV must include required columns)
csv_path = 'Adam F. - 2025 Miami Brokerage and Broker Rankings - Agents_Brokers.csv'  # e.g., 'data/sofla_listings.csv'
if csv_path:
    df = pd.read_csv(csv_path)

# If df already exists in memory, the above is skipped.
required = ['latitude','longitude','Agent_Name','Brokerage_Firm']
missing = [c for c in required if c not in df.columns]
if missing:
    raise ValueError(f"Missing required columns: {missing}")

df = df.dropna(subset=['latitude','longitude']).copy()
df['latitude'] = pd.to_numeric(df['latitude'], errors='coerce')
df['longitude'] = pd.to_numeric(df['longitude'], errors='coerce')
df = df.dropna(subset=['latitude','longitude'])
df = df[(df['latitude'].between(-90,90)) & (df['longitude'].between(-180,180))].copy()

if 'price' not in df.columns:
    df['price'] = None

print("Rows after cleaning:", len(df))
df.head(2)

Rows after cleaning: 26844


Unnamed: 0,Unique_ID,mlsId,Property_ID,Address,price,Side,Brokerage_Firm,zipcode,Agent_Name,TRD_note,...,longitude,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21
0,A11529138-ZACHARY VICHINSKY-Buy,A11529138,42771732.0,18 La Gorce Cir,122125121,Buy,BESPOKE REAL ESTATE FLORIDA,33141.0,ZACHARY VICHINSKY,,...,-80.130144,,,,,,,,,
1,A11529138-THE JILLS ZEDER GROUP-List,A11529138,42771732.0,18 La Gorce Cir,122125121,List,COLDWELL BANKER REALTY,33141.0,THE JILLS ZEDER GROUP,,...,-80.130144,,,,,,,,,


## 4) Configuration

In [4]:
# H3 resolutions to export
RES_LIST = [7, 8, 9]

# Minimum sample per hex for confidence
MIN_SAMPLE = 10

# Optional: time window if you have a date column
USE_DATE_FILTER = False
DATE_COLUMN = 'CloseDate'
DATE_START = '2024-01-01'  # inclusive
DATE_END = None            # exclusive

if USE_DATE_FILTER and DATE_COLUMN in df.columns:
    d = pd.to_datetime(df[DATE_COLUMN], errors='coerce', utc=True).dt.tz_localize(None)
    df = df.assign(_date=d)
    if DATE_START:
        df = df[df['_date'] >= pd.to_datetime(DATE_START)]
    if DATE_END:
        df = df[df['_date'] < pd.to_datetime(DATE_END)]
    print("Rows after date filter:", len(df))

## 5) Helper functions

In [5]:
def _safe_json(v):
    try:
        return json.dumps(v, ensure_ascii=False)
    except Exception:
        return json.dumps([str(x) for x in v], ensure_ascii=False)

def summarize_counts(series, topk=3):
    counts = series.value_counts()
    names = counts.head(topk).index.tolist()
    vals  = counts.head(topk).tolist()
    total = int(counts.sum())
    top_name = names[0] if names else None
    top_val  = int(vals[0]) if vals else 0
    share = (top_val / total) if total else 0.0
    return {"top_name": top_name, "top_value": top_val, "top_share": round(share,4),
            "topk_names": names, "topk_values": [int(v) for v in vals], "total": total}

def summarize_volume(group, field_name='Agent_Name', weight_col='price', topk=3):
    g = group.groupby(field_name)[weight_col].sum().sort_values(ascending=False)
    names = g.head(topk).index.tolist()
    vals  = g.head(topk).tolist()
    total = float(g.sum()) if len(g) else 0.0
    top_name = names[0] if names else None
    top_val  = float(vals[0]) if vals else 0.0
    share = (top_val / total) if total else 0.0
    return {"top_name": top_name, "top_value": round(top_val,2), "top_share": round(share,4),
            "topk_names": names, "topk_values": [float(v) for v in vals], "total": round(total,2)}

def hex_boundary_lonlat(h):
    ring_latlng = h3_boundary(h)
    return [(lng, lat) for (lat, lng) in ring_latlng]

def geojson_feature_polygon(hex_id, props):
    coords = [hex_boundary_lonlat(hex_id)]
    if coords[0][0] != coords[0][-1]:
        coords[0].append(coords[0][0])
    return {"type": "Feature", "properties": props,
            "geometry": {"type": "Polygon", "coordinates": [coords[0]]}}

## 6) Aggregate & export GeoJSON

In [6]:
def compute_hex(df_in, res, field, metric='count', min_sample=MIN_SAMPLE):
    df = df_in.copy()
    df['h3'] = df.apply(lambda r: h3_cell(r['latitude'], r['longitude'], res), axis=1)

    feats = []
    for h, g in df.groupby('h3'):
        sample_size = int(len(g))
        base = {
            "hex_id": h, "h3_res": res, "sample_size": sample_size,
            "sum_price": float(g['price'].sum()) if 'price' in g else None,
            "avg_price": float(g['price'].mean()) if 'price' in g else None,
            "dominance_metric": metric, "group_field": field,
            "meets_min_sample": bool(sample_size >= min_sample),
        }
        if metric == 'count':
            s = summarize_counts(g[field])
            props = {**base,
                f"top_{field}_name": s["top_name"],
                f"top_{field}_count": s["top_value"],
                f"top_{field}_share": s["top_share"],
                f"{field}_top3_names": _safe_json(s["topk_names"]),
                f"{field}_top3_counts": _safe_json(s["topk_values"]),
            }
        else:
            s = summarize_volume(g, field_name=field, weight_col='price')
            props = {**base,
                f"top_{field}_name": s["top_name"],
                f"top_{field}_volume": s["top_value"],
                f"top_{field}_share": s["top_share"],
                f"{field}_top3_names": _safe_json(s["topk_names"]),
                f"{field}_top3_volume": _safe_json(s["topk_values"]),
                "sum_price": s["total"],
            }
        feats.append(geojson_feature_polygon(h, props))
    return {"type": "FeatureCollection", "features": feats}

out_files = []
for res in RES_LIST:
    for field in ['Agent_Name','Brokerage_Firm']:
        for metric in ['count','volume']:
            fc = compute_hex(df, res=res, field=field, metric=metric, min_sample=MIN_SAMPLE)
            name = f"hex_{'agent' if field=='Agent_Name' else 'broker'}_{metric}_res{res}.geojson"
            Path(name).write_text(json.dumps(fc, ensure_ascii=False))
            out_files.append(name)
            print("Wrote", name, "hexes:", len(fc["features"]))
out_files

Wrote hex_agent_count_res7.geojson hexes: 307
Wrote hex_agent_volume_res7.geojson hexes: 307
Wrote hex_broker_count_res7.geojson hexes: 307
Wrote hex_broker_volume_res7.geojson hexes: 307
Wrote hex_agent_count_res8.geojson hexes: 1463
Wrote hex_agent_volume_res8.geojson hexes: 1463
Wrote hex_broker_count_res8.geojson hexes: 1463
Wrote hex_broker_volume_res8.geojson hexes: 1463
Wrote hex_agent_count_res9.geojson hexes: 4954
Wrote hex_agent_volume_res9.geojson hexes: 4954
Wrote hex_broker_count_res9.geojson hexes: 4954
Wrote hex_broker_volume_res9.geojson hexes: 4954


['hex_agent_count_res7.geojson',
 'hex_agent_volume_res7.geojson',
 'hex_broker_count_res7.geojson',
 'hex_broker_volume_res7.geojson',
 'hex_agent_count_res8.geojson',
 'hex_agent_volume_res8.geojson',
 'hex_broker_count_res8.geojson',
 'hex_broker_volume_res8.geojson',
 'hex_agent_count_res9.geojson',
 'hex_agent_volume_res9.geojson',
 'hex_broker_count_res9.geojson',
 'hex_broker_volume_res9.geojson']

## 7) Mapbox GL example (paste into your app)

In [None]:
print('''map.addSource("hex-agent-count-res8", { type: "geojson", data: "hex_agent_count_res8.geojson" });
map.addLayer({
  id: "hex-agent-count-fill", type: "fill", source: "hex-agent-count-res8",
  paint: {
    "fill-color": [
      "interpolate", ["linear"], ["get", "top_Agent_Name_share"],
      0.0, "#f2f2f2",
      0.25, "#c7d9ff",
      0.5, "#7fb3ff",
      0.75, "#2a7dff",
      0.9, "#0047b3"
    ],
    "fill-opacity": ["case", [">=", ["get","sample_size"], 10], 0.65, 0.2]
  }
});
map.addLayer({ id:"hex-agent-count-outline", type:"line", source:"hex-agent-count-res8",
  paint: { "line-color":"#fff", "line-width":0.5, "line-opacity":0.6 }
});''')