In [13]:
import os, re, time, html
import numpy as np
import pandas as pd
import folium
from folium import Popup

# -------------------- CONFIG --------------------
CSV_PATH = "WestPalmBeachCondoMap2025 - Sheet1.csv"  # <-- change if needed
ADDRESS_COL = "Address"
CACHE_FILE = "geocode_cache.csv"     # normalized address -> lat, lon
REQUESTS_PER_SECOND = 5              # throttle to respect quotas
APPEND_CITY_STATE = True             # append ", West Palm Beach, FL" if missing
MAPBOX_STYLE = "mapbox/streets-v12"    # swap styles: streets-v12, outdoors-v12, satellite-streets-v12, etc.
# ------------------------------------------------

# ---- Load keys: try `%store` first, then env fallbacks ----
try:
    get_ipython().run_line_magic("store", "-r google_maps_API_Key")
except Exception:
    pass

try:
    get_ipython().run_line_magic("store", "-r map_box_api_key")
except Exception:
    pass

import googlemaps

if 'google_maps_API_Key' not in globals() or not google_maps_API_Key:
    google_maps_API_Key = os.getenv("GOOGLE_MAPS_API_KEY", "")

if not google_maps_API_Key:
    raise ValueError("No Google Maps API key found. Set `%store google_maps_API_Key` or env var GOOGLE_MAPS_API_KEY.")

if 'map_box_api_key' not in globals() or not map_box_api_key:
    map_box_api_key = (
        os.getenv("MAPBOX_TOKEN")
        or os.getenv("MAPBOX_ACCESS_TOKEN")
        or os.getenv("MAPBOX_API_KEY", "")
    )
if not map_box_api_key:
    raise ValueError("No Mapbox token found. Set `%store map_box_api_key` or env var MAPBOX_TOKEN / MAPBOX_ACCESS_TOKEN.")

gmaps_key = googlemaps.Client(key=google_maps_API_Key)

# ---- Load CSV ----
df = pd.read_csv(CSV_PATH)
if ADDRESS_COL not in df.columns:
    raise ValueError(f"Expected an '{ADDRESS_COL}' column in your CSV. Got: {df.columns.tolist()}")

# ---- Normalize address & ensure city/state only if missing ----
def ensure_wpb_fl(addr: str) -> str:
    if not isinstance(addr, str): 
        return ""
    s = addr.strip()
    if re.search(r',?\s*west\s+palm\s+beach\s*,\s*fl\b', s, flags=re.I):
        return s
    return s + ", West Palm Beach, FL"

df[ADDRESS_COL] = df[ADDRESS_COL].astype(str).str.strip()
if APPEND_CITY_STATE:
    df[ADDRESS_COL] = df[ADDRESS_COL].map(ensure_wpb_fl)

# ---- Cache helpers ----
def _norm_addr(s: str) -> str:
    return " ".join(str(s).strip().lower().split())

if os.path.exists(CACHE_FILE):
    cache_df = pd.read_csv(CACHE_FILE)
    cache_df = cache_df.dropna(subset=["norm_address"]).drop_duplicates("norm_address")
    geocode_cache = dict(zip(cache_df["norm_address"], zip(cache_df["lat"], cache_df["lon"])))
else:
    geocode_cache = {}

def _save_cache(d: dict):
    if not d: 
        return
    out = pd.DataFrame([{"norm_address": k, "lat": v[0], "lon": v[1]} for k, v in d.items()])
    out.to_csv(CACHE_FILE, index=False)

# ---- Geocode (googlemaps client) with retry + "bad-cache" bypass ----
def geocode(addr: str, retry=3, backoff=1.6, force_regenerate=False):
    """
    Returns (lat, lon, status) where status is "OK", "CACHE", or a failure string.
    Uses cache only if present AND valid (non-NaN). Set force_regenerate=True to ignore cache entirely.
    """
    if not isinstance(addr, str) or not addr.strip():
        return (np.nan, np.nan, "EMPTY")

    na = _norm_addr(addr)

    # Use cache if valid and not forcing regeneration
    if not force_regenerate and na in geocode_cache:
        lat, lon = geocode_cache[na]
        if not (pd.isna(lat) or pd.isna(lon)):
            return (lat, lon, "CACHE")
        # else: cached NaN -> try again with API

    last_status = "UNKNOWN"
    for attempt in range(retry):
        try:
            g = gmaps_key.geocode(addr)
            if g:
                lat = g[0]["geometry"]["location"]["lat"]
                lon = g[0]["geometry"]["location"]["lng"]
                geocode_cache[na] = (lat, lon)   # store valid result
                return (lat, lon, "OK")
            else:
                last_status = "ZERO_RESULTS"
        except googlemaps.exceptions.ApiError as e:
            last_status = f"API_ERROR:{getattr(e, 'status', 'UNKNOWN')}"
        except googlemaps.exceptions.TransportError:
            last_status = "TRANSPORT"
        except Exception:
            last_status = "EXCEPTION"
        time.sleep(backoff**attempt)

    # Only after failing now, record NaN to avoid re-hammering
    geocode_cache[na] = (np.nan, np.nan)
    return (np.nan, np.nan, last_status)

# ---- Quick API smoke test ----
test_lat, test_lon, test_status = geocode("West Palm Beach, FL", force_regenerate=False)
print(f"API test: {test_status} (lat={test_lat}, lon={test_lon})")

# ---- Geocode all rows (throttled) ----
min_interval = 1.0 / max(1, REQUESTS_PER_SECOND)
lats, lons, statuses = [], [], []
last = 0.0
ok_count = cache_count = fail_count = 0

addresses = df[ADDRESS_COL].astype(str).tolist()
for i, addr in enumerate(addresses, start=1):
    wait = last + min_interval - time.time()
    if wait > 0:
        time.sleep(wait)
    lat, lon, status = geocode(addr)
    if status == "OK":
        ok_count += 1
    elif status == "CACHE":
        cache_count += 1
    else:
        fail_count += 1
    if i % 5 == 0 or i == len(addresses):
        print(f"[{i}/{len(addresses)}] OK:{ok_count} CACHE:{cache_count} FAIL:{fail_count} (last={status})")
    lats.append(lat); lons.append(lon); statuses.append(status)
    last = time.time()

_save_cache(geocode_cache)

df["__lat"] = pd.to_numeric(lats, errors="coerce")
df["__lon"] = pd.to_numeric(lons, errors="coerce")
df["__geocode_status"] = statuses

mapped = df.dropna(subset=["__lat", "__lon"]).copy()
print(f"Rows with coordinates: {len(mapped)} / {len(df)}")
if mapped.empty:
    print("No points mapped. Status breakdown (all rows):")
    print(pd.Series(statuses).value_counts(dropna=False))
    raise RuntimeError("Geocoding produced no coordinates—see logs above.")

# ---- Limit popup fields to specific columns (in order) ----
DESIRED_FIELDS = ["Address", "Project name", "Developer", "Unit Count", "Stories"]

ALIASES = {
    "project name": ["Project Name", "Project", "Name"],
    "unit count": ["Units", "Unit_Count", "UnitCount", "# Units", "Total Units"],
    "stories": ["Floors", "# Stories", "Height (stories)"]
}

# Build a mapping from desired label -> actual column name in the dataframe
lower_cols = {c.lower(): c for c in mapped.columns}

def resolve_column(desired_label: str):
    key = desired_label.lower()
    if key in lower_cols:
        return lower_cols[key]
    for alt in ALIASES.get(key, []):
        if alt.lower() in lower_cols:
            return lower_cols[alt.lower()]
    return None

DISPLAY_MAP = [(label, resolve_column(label)) for label in DESIRED_FIELDS]
DISPLAY_MAP = [(label, col) for label, col in DISPLAY_MAP if col is not None]

if not DISPLAY_MAP:
    raise ValueError(
        "None of the desired popup columns were found in your data. "
        f"Available columns: {list(mapped.columns)}"
    )

# Optional tooltip: prefer Project name if present
tooltip_col = next((col for label, col in DISPLAY_MAP if label.lower() == "project name"), None)

def make_popup_html(row):
    rows_html = []
    for label, col in DISPLAY_MAP:
        val = row.get(col)
        if pd.isna(val):
            continue
        s = str(val).strip()
        if not s or s.lower() in {"nan", "none"}:
            continue
        if s.lower().startswith(("http://", "https://")):
            v = f'<a href="{s}" target="_blank" rel="noopener">{html.escape(s)}</a>'
        else:
            v = html.escape(s)
        rows_html.append(
            f"<tr><th style='text-align:left;padding-right:8px;white-space:nowrap'>{html.escape(label)}</th><td>{v}</td></tr>"
        )
    return "<table>" + "".join(rows_html) + "</table>" if rows_html else "<i>No details</i>"

# ---- Build map (Mapbox tiles) ----
center = [float(mapped["__lat"].mean()), float(mapped["__lon"].mean())]
m = folium.Map(
    location=center,
    zoom_start=12,
    control_scale=True,
    tiles=(
        f"https://api.mapbox.com/styles/v1/{MAPBOX_STYLE}/tiles/256/{{z}}/{{x}}/{{y}}@2x"
        f"?access_token={map_box_api_key}"
    ),
    attr="© Mapbox © OpenStreetMap",
    name="Base"
)

# ---- Add markers (pin-style with red icon) ----
for _, r in mapped.iterrows():
    tooltip_text = str(r.get(tooltip_col)) if tooltip_col and pd.notna(r.get(tooltip_col)) else None
    folium.Marker(
        location=(float(r["__lat"]), float(r["__lon"])),
        icon=folium.Icon(color="red", icon="info-sign"),
        popup=Popup(make_popup_html(r), max_width=450),
        tooltip=tooltip_text,
    ).add_to(m)

# Optional: Layer control (handy if you add more layers later)
folium.LayerControl(collapsed=True).add_to(m)

# ---- Save ----
m.save("index.html")
print("✅ Saved map to index.html")

m  # show in notebook (if running in Jupyter)


API test: CACHE (lat=26.7144532, lon=-80.0549456)
[5/13] OK:0 CACHE:5 FAIL:0 (last=CACHE)
[10/13] OK:0 CACHE:10 FAIL:0 (last=CACHE)
[13/13] OK:0 CACHE:13 FAIL:0 (last=CACHE)
Rows with coordinates: 13 / 13
✅ Saved map to index.html


In [11]:
df.columns

Index(['Address', 'Project name', 'Developer', 'Description', 'URL',
       'Outreach', 'Unit Count', 'Stories', 'Loan', 'Sales Launch Year',
       'Brokerage', '% Sold', 'Completion Date', '__lat', '__lon',
       '__geocode_status'],
      dtype='object')

In [2]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/miami_map_09_02_25
