In [1]:
import os, re, time, html
import numpy as np
import pandas as pd
import folium
from folium import Popup, Tooltip

# -------------------- CONFIG --------------------
CSV_PATH = "sofla_foreclosure_devsites_11_26_2025 - Sheet1.csv"  # <-- change if needed
ADDRESS_COL = "Address"
CACHE_FILE = "geocode_cache.csv"     # normalized address -> lat, lon
REQUESTS_PER_SECOND = 5              # throttle to respect quotas
APPEND_CITY_STATE = False             # append ", West Palm Beach, FL" if missing
MAPBOX_STYLE = "mapbox/streets-v11"    # e.g. streets-v12, outdoors-v12, satellite-streets-v12
# ------------------------------------------------

# ---- Load keys: try `%store` first, then env fallbacks ----
try:
    get_ipython().run_line_magic("store", "-r google_maps_API_Key")
except Exception:
    pass

try:
    get_ipython().run_line_magic("store", "-r map_box_api_key")
except Exception:
    pass

import googlemaps

if 'google_maps_API_Key' not in globals() or not google_maps_API_Key:
    google_maps_API_Key = os.getenv("GOOGLE_MAPS_API_KEY", "")
if not google_maps_API_Key:
    raise ValueError("No Google Maps API key found. Set `%store google_maps_API_Key` or env var GOOGLE_MAPS_API_KEY.")

if 'map_box_api_key' not in globals() or not map_box_api_key:
    map_box_api_key = (
        os.getenv("MAPBOX_TOKEN")
        or os.getenv("MAPBOX_ACCESS_TOKEN")
        or os.getenv("MAPBOX_API_KEY", "")
    )
if not map_box_api_key:
    raise ValueError("No Mapbox token found. Set `%store map_box_api_key` or env var MAPBOX_TOKEN / MAPBOX_ACCESS_TOKEN.")

gmaps_key = googlemaps.Client(key=google_maps_API_Key)

# ---- Load CSV ----
df = pd.read_csv(CSV_PATH)
if ADDRESS_COL not in df.columns:
    raise ValueError(f"Expected an '{ADDRESS_COL}' column in your CSV. Got: {df.columns.tolist()}")

# ---- Normalize address & ensure city/state only if missing ----

### Didn't need for this map. Had the full address

# def ensure_wpb_fl(addr: str) -> str:
#     if not isinstance(addr, str): 
#         return ""
#     s = addr.strip()
#     if re.search(r',?\s*west\s+palm\s+beach\s*,\s*fl\b', s, flags=re.I):
#         return s
#     return s + ", Riviera Beach, FL"

# df[ADDRESS_COL] = df[ADDRESS_COL].astype(str).str.strip()
# if APPEND_CITY_STATE:
#     df[ADDRESS_COL] = df[ADDRESS_COL].map(ensure_wpb_fl)

# ---- Cache helpers ----
def _norm_addr(s: str) -> str:
    return " ".join(str(s).strip().lower().split())

if os.path.exists(CACHE_FILE):
    cache_df = pd.read_csv(CACHE_FILE)
    cache_df = cache_df.dropna(subset=["norm_address"]).drop_duplicates("norm_address")
    geocode_cache = dict(zip(cache_df["norm_address"], zip(cache_df["lat"], cache_df["lon"])))
else:
    geocode_cache = {}

def _save_cache(d: dict):
    if not d: 
        return
    out = pd.DataFrame([{"norm_address": k, "lat": v[0], "lon": v[1]} for k, v in d.items()])
    out.to_csv(CACHE_FILE, index=False)

# ---- Geocode with retry & cache ----
def geocode(addr: str, retry=3, backoff=1.6, force_regenerate=False):
    if not isinstance(addr, str) or not addr.strip():
        return (np.nan, np.nan, "EMPTY")
    na = _norm_addr(addr)
    if not force_regenerate and na in geocode_cache:
        lat, lon = geocode_cache[na]
        if not (pd.isna(lat) or pd.isna(lon)):
            return (lat, lon, "CACHE")
    last_status = "UNKNOWN"
    for attempt in range(retry):
        try:
            g = gmaps_key.geocode(addr)
            if g:
                lat = g[0]["geometry"]["location"]["lat"]
                lon = g[0]["geometry"]["location"]["lng"]
                geocode_cache[na] = (lat, lon)
                return (lat, lon, "OK")
            else:
                last_status = "ZERO_RESULTS"
        except googlemaps.exceptions.ApiError as e:
            last_status = f"API_ERROR:{getattr(e, 'status', 'UNKNOWN')}"
        except googlemaps.exceptions.TransportError:
            last_status = "TRANSPORT"
        except Exception:
            last_status = "EXCEPTION"
        time.sleep(backoff**attempt)
    geocode_cache[na] = (np.nan, np.nan)
    return (np.nan, np.nan, last_status)

# ---- Smoke test ----
test_lat, test_lon, test_status = geocode("West Palm Beach, FL", force_regenerate=False)
print(f"API test: {test_status} (lat={test_lat}, lon={test_lon})")

# ---- Geocode all rows (throttled) ----
min_interval = 1.0 / max(1, REQUESTS_PER_SECOND)
lats, lons, statuses = [], [], []
last = 0.0
ok_count = cache_count = fail_count = 0

for i, addr in enumerate(df[ADDRESS_COL].astype(str), start=1):
    wait = last + min_interval - time.time()
    if wait > 0:
        time.sleep(wait)
    lat, lon, status = geocode(addr)
    if status == "OK": ok_count += 1
    elif status == "CACHE": cache_count += 1
    else: fail_count += 1
    if i % 5 == 0 or i == len(df):
        print(f"[{i}/{len(df)}] OK:{ok_count} CACHE:{cache_count} FAIL:{fail_count} (last={status})")
    lats.append(lat); lons.append(lon); statuses.append(status)
    last = time.time()

_save_cache(geocode_cache)

df["__lat"] = pd.to_numeric(lats, errors="coerce")
df["__lon"] = pd.to_numeric(lons, errors="coerce")
df["__geocode_status"] = statuses
mapped = df.dropna(subset=["__lat", "__lon"]).copy()
print(f"Rows with coordinates: {len(mapped)} / {len(df)}")
if mapped.empty:
    raise RuntimeError("No coordinates produced — check logs above.")

# ---- Tooltip: exact 5 columns ----
TOOLTIP_FIELDS = ["Address", "Project name", "Developer", "Unit Count", "Stories"]

def make_tooltip_html(row) -> str:
    rows_html = []
    for label in TOOLTIP_FIELDS:
        if label not in row.index:
            continue
        val = row.get(label)
        if pd.isna(val): continue
        s = str(val).strip()
        if not s or s.lower() in {"nan", "none"}: continue
        rows_html.append(
            f"<tr><th style='text-align:left;padding-right:6px;white-space:nowrap'>{html.escape(label)}</th>"
            f"<td>{html.escape(s)}</td></tr>"
        )
    if not rows_html:
        return "<i>No details</i>"
    return ("<div style='font:12px/1.2 Arial, sans-serif'>"
            "<table style='border-collapse:collapse'>" +
            "".join(rows_html) +
            "</table></div>")

# ---- Popup (fuller table: all columns except coords) ----
coord_cols = {"__lat","__lon","__geocode_status"}
DISPLAY_COLUMNS = [c for c in mapped.columns if c not in coord_cols]

def _clean(v):
    if pd.isna(v): return None
    s = str(v).strip()
    return s if s and s.lower() not in {"nan","none"} else None

def make_popup_html(row):
    rows = []
    for col in DISPLAY_COLUMNS:
        val = _clean(row.get(col))
        if val is None: continue
        if isinstance(val, str) and val.lower().startswith(("http://","https://")):
            v = f'<a href="{val}" target="_blank" rel="noopener">{html.escape(val)}</a>'
        else:
            v = html.escape(str(val))
        rows.append(f"<tr><th style='text-align:left;padding-right:8px'>{html.escape(col)}</th><td>{v}</td></tr>")
    return "<table>" + "".join(rows) + "</table>" if rows else "<i>No details</i>"

# ---- Build map ----
center = [float(mapped["__lat"].mean()), float(mapped["__lon"].mean())]
m = folium.Map(
    location=center,
    zoom_start=12,
    control_scale=True,
    tiles=(
        f"https://api.mapbox.com/styles/v1/{MAPBOX_STYLE}/tiles/256/{{z}}/{{x}}/{{y}}@2x"
        f"?access_token={map_box_api_key}"
    ),
    attr="© Mapbox © OpenStreetMap",
    name="Base"
)

# ---- Add markers ----
for _, r in mapped.iterrows():
    tooltip_html = make_tooltip_html(r)   # the 5-column tooltip table

    folium.CircleMarker(
        location=(float(r["__lat"]), float(r["__lon"])),
        radius=7,
        color="red",          # circle border color
        weight=2,             # thickness of border
        fill=True,
        fill_color="red",     # fill color
        popup=Popup(make_popup_html(r), max_width=450),
        tooltip=Tooltip(tooltip_html, sticky=True, direction="top"),
    ).add_to(m)

# ---- Save ----
m.save("index.html")
print("✅ Saved map to index.html")

m  # show in notebook (if Jupyter)


API test: OK (lat=26.7144532, lon=-80.0549456)
[5/13] OK:5 CACHE:0 FAIL:0 (last=OK)
[10/13] OK:10 CACHE:0 FAIL:0 (last=OK)
[13/13] OK:13 CACHE:0 FAIL:0 (last=OK)
Rows with coordinates: 13 / 13
✅ Saved map to index.html


In [2]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/sofla_foreclosure_devsites_11_26_2025
