In [1]:
# # %store -r map_box_api_key
# %store -r google_maps_API_Key
# GOOGLE_MAPS_API_KEY = googlemaps.Client(key=google_maps_API_Key)

In [11]:
# %pip install googlemaps folium

import os, time, html, re
import numpy as np
import pandas as pd
import folium
from folium import Popup, Element

# -------------------- CONFIG --------------------
CSV_PATH = "Miami Urban Core Projects Under Construction - Miami apartments _overbuilding_ map data.csv"  # <-- change if needed
ADDRESS_COL = "Address"                 # optional; only needed if we must geocode
CANDIDATE_COORD_COLS = [                # auto-detect (case-insensitive, first match wins)
    "Google coordinates", "Coordinates", "Coord", "LatLon", "Lat/Lon", "Geo", "Location"
]
CACHE_FILE = "geocode_cache.csv"        # normalized address -> lat, lon
REQUESTS_PER_SECOND = 5                 # throttle to respect quotas
APPEND_CITY_STATE = True                # only used when geocoding
CITY_STATE_SUFFIX = ", Miami, FL"       # customize per dataset
# ------------------------------------------------

# ---- Load Google Maps API key (only if needed later) ----
def _load_gmaps_client():
    try:
        get_ipython().run_line_magic("store", "-r google_maps_API_Key")
    except Exception:
        pass
    import googlemaps
    key = globals().get("google_maps_API_Key") or os.getenv("GOOGLE_MAPS_API_KEY", "")
    if not key:
        raise ValueError("No Google Maps API key found. Set `%store google_maps_API_Key` or env var GOOGLE_MAPS_API_KEY.")
    return googlemaps.Client(key=key), googlemaps

# ---- Load CSV ----
df = pd.read_csv(CSV_PATH)

# ---- Try to detect a coordinates column ----
def _find_coords_column(columns, candidates):
    cols_lower = {c.lower(): c for c in columns}
    for cand in candidates:
        if cand.lower() in cols_lower:
            return cols_lower[cand.lower()]
    # fuzzy: any column that contains "coord" or "latlon" or "lat/lon" etc.
    for c in columns:
        lc = c.lower()
        if any(k in lc for k in ["coord", "latlon", "lat/lon", "geo", "location"]):
            return c
    return None

coords_col = _find_coords_column(df.columns, CANDIDATE_COORD_COLS)

# ---- Parse coordinates from a string (e.g., "25.774, -80.19" or Google Maps URL) ----
_num_pat = r"(-?\d+(?:\.\d+)?)"  # capture floats/ints with optional sign
def parse_coords(val):
    if pd.isna(val):
        return (np.nan, np.nan)
    s = str(val).strip()
    # Common forms: "25.774, -80.19", "25.774 -80.19"
    m = re.findall(_num_pat, s)
    if len(m) >= 2:
        try:
            lat = float(m[0]); lon = float(m[1])
            # Rough sanity check
            if -90 <= lat <= 90 and -180 <= lon <= 180:
                return (lat, lon)
        except Exception:
            pass
    return (np.nan, np.nan)

# ---- If we have a coords column, extract lat/lon from it ----
if coords_col:
    df["__lat"], df["__lon"] = zip(*df[coords_col].map(parse_coords))
else:
    # Try explicit lat/lon column pairs if present
    lat_candidates = [c for c in df.columns if c.lower() in ("lat","latitude","y","__lat")]
    lon_candidates = [c for c in df.columns if c.lower() in ("lon","lng","longitude","x","__lon")]
    if lat_candidates and lon_candidates:
        df["__lat"] = pd.to_numeric(df[lat_candidates[0]], errors="coerce")
        df["__lon"] = pd.to_numeric(df[lon_candidates[0]], errors="coerce")
    else:
        # Initialize empty; we may fill via geocoding
        df["__lat"] = np.nan
        df["__lon"] = np.nan

# ---- Determine which rows still need geocoding ----
needs_geo = df["__lat"].isna() | df["__lon"].isna()

# ---- Address normalization (only if we will geocode any rows) ----
def _norm_addr(s: str) -> str:
    return " ".join(str(s).strip().lower().split())

if needs_geo.any():
    if ADDRESS_COL not in df.columns:
        print("ℹ️ No usable coordinates found for some rows AND no address column to geocode from. "
              "Those rows will remain unmapped.")
        df["__geocode_status"] = np.where(needs_geo, "NO_COORDS_NO_ADDRESS", "COORDS")
    else:
        # Prepare addresses only for rows that need geocoding
        df[ADDRESS_COL] = df[ADDRESS_COL].astype(str).str.strip()
        if APPEND_CITY_STATE:
            def _ensure_city_state(s):
                sl = s.lower()
                if "miami" in sl and (" fl" in sl or " florida" in sl):
                    return s
                return s + CITY_STATE_SUFFIX
            df.loc[needs_geo, ADDRESS_COL] = df.loc[needs_geo, ADDRESS_COL].apply(_ensure_city_state)

        # ---- Cache helpers ----
        if os.path.exists(CACHE_FILE):
            cache_df = pd.read_csv(CACHE_FILE)
            cache_df = cache_df.dropna(subset=["norm_address"]).drop_duplicates("norm_address")
            geocode_cache = dict(zip(cache_df["norm_address"], zip(cache_df["lat"], cache_df["lon"])))
        else:
            geocode_cache = {}

        def _save_cache(d: dict):
            if not d: return
            out = pd.DataFrame([{"norm_address": k, "lat": v[0], "lon": v[1]} for k, v in d.items()])
            out.to_csv(CACHE_FILE, index=False)

        # ---- Geocoding (only for rows missing coords) ----
        gmaps_key, googlemaps = _load_gmaps_client()

        def geocode(addr: str, retry=3, backoff=1.6):
            if not isinstance(addr, str) or not addr.strip():
                return (np.nan, np.nan, "EMPTY")
            na = _norm_addr(addr)
            if na in geocode_cache:
                lat, lon = geocode_cache[na]
                if not (pd.isna(lat) or pd.isna(lon)):
                    return (lat, lon, "CACHE")

            last_status = "UNKNOWN"
            for attempt in range(retry):
                try:
                    g = gmaps_key.geocode(addr)
                    if g:
                        lat = g[0]["geometry"]["location"]["lat"]
                        lon = g[0]["geometry"]["location"]["lng"]
                        geocode_cache[na] = (lat, lon)
                        return (lat, lon, "OK")
                    else:
                        last_status = "ZERO_RESULTS"
                except googlemaps.exceptions.ApiError as e:
                    last_status = f"API_ERROR:{getattr(e, 'status', 'UNKNOWN')}"
                except googlemaps.exceptions.TransportError:
                    last_status = "TRANSPORT"
                except Exception:
                    last_status = "EXCEPTION"
                time.sleep(backoff**attempt)

            geocode_cache[na] = (np.nan, np.nan)
            return (np.nan, np.nan, last_status)

        # Optional: quick smoke test (skip if you like)
        tlat, tlon, tstat = geocode("233 S Wacker Dr, Chicago, IL 60606")
        print(f"API test: {tstat} (lat={tlat}, lon={tlon})")

        # ---- Geocode throttle loop only for missing coords ----
        min_interval = 1.0 / max(1, REQUESTS_PER_SECOND)
        last = 0.0
        statuses = df.get("__geocode_status", pd.Series(index=df.index, dtype=object)).copy()

        idx_to_geo = df.index[needs_geo].tolist()
        ok_count = cache_count = fail_count = 0

        for i, idx in enumerate(idx_to_geo, start=1):
            addr = df.at[idx, ADDRESS_COL]
            wait = last + min_interval - time.time()
            if wait > 0: time.sleep(wait)
            lat, lon, status = geocode(addr)
            if status == "OK": ok_count += 1
            elif status == "CACHE": cache_count += 1
            else: fail_count += 1

            df.at[idx, "__lat"] = lat
            df.at[idx, "__lon"] = lon
            statuses.at[idx] = status

            if i % 5 == 0 or i == len(idx_to_geo):
                print(f"[{i}/{len(idx_to_geo)}] OK:{ok_count} CACHE:{cache_count} FAIL:{fail_count} (last={status})")
            last = time.time()

        # For rows that already had coords, mark them as COORDS
        statuses = statuses.fillna("COORDS")
        df["__geocode_status"] = statuses
        _save_cache(geocode_cache)
else:
    # No geocoding needed; everything came from coordinates
    df["__geocode_status"] = "COORDS"

# ---- Build mapped subset ----
mapped = df.dropna(subset=["__lat", "__lon"]).copy()
print(f"Rows with coordinates: {len(mapped)} / {len(df)}")
if mapped.empty:
    print("No points mapped. Status breakdown (all rows):")
    print(pd.Series(df["__geocode_status"]).value_counts(dropna=False))
    raise RuntimeError("No coordinates available—either provide a coordinates column or an address column to geocode.")

# ---- Color by Status (your existing 'Status' field if present) ----
palette = [
    "blue", "red", "green", "purple", "orange",
    "darkred", "lightred", "beige", "darkblue", "darkgreen",
    "cadetblue", "darkpurple", "white", "pink", "lightblue",
    "lightgreen", "gray", "black", "lightgray"
]
if "Status" in mapped.columns:
    statuses_unique = pd.Index(sorted(mapped["Status"].dropna().astype(str).unique()))
else:
    statuses_unique = pd.Index([])
status_to_color = {s: palette[i % len(palette)] for i, s in enumerate(statuses_unique)}

def color_for(s):
    if pd.isna(s): return "yellow"
    return status_to_color.get(str(s), "yellow")

# ---- Popups: ALL non-empty CSV fields (no tooltip) ----
coord_cols = {"__lat","__lon","__geocode_status"}
DISPLAY_COLUMNS = [c for c in mapped.columns if c not in coord_cols]

def _clean(v):
    if pd.isna(v): return None
    s = str(v).strip()
    return s if s and s.lower() not in {"nan","none"} else None

def make_popup_html(row):
    rows = []
    for col in DISPLAY_COLUMNS:
        val = _clean(row.get(col))
        if val is None:
            continue
        if isinstance(val, str) and val.lower().startswith(("http://","https://")):
            v = f'<a href="{val}" target="_blank" rel="noopener">{html.escape(val)}</a>'
        else:
            v = html.escape(str(val))
        col_label = html.escape(str(col))
        rows.append(
            f"<tr><th style='text-align:left;padding-right:8px'>{col_label}</th><td>{v}</td></tr>"
        )
    return "<table>" + "".join(rows) + "</table>" if rows else "<i>No details</i>"

# ---- Build map ----
center = [float(mapped["__lat"].mean()), float(mapped["__lon"].mean())]
m = folium.Map(location=center, zoom_start=10.5, control_scale=True, tiles="CartoDB positron")

for _, r in mapped.iterrows():
    folium.CircleMarker(
        location=(float(r["__lat"]), float(r["__lon"])),
        radius=7,
        color="black", 
        fill_color=color_for(r.get("Status")),
        fill=True,
        fill_opacity=0.7,
        weight=1,
        popup=Popup(make_popup_html(r), max_width=450)
    ).add_to(m)

# ---- Legend ----
legend_items = [
    f"<li><span style='display:inline-block;width:12px;height:12px;background:{color};margin-right:6px;border:1px solid #333'></span>{html.escape(status)}</li>"
    for status, color in status_to_color.items()
]
# legend_html = f"""
# <div style="position:fixed;bottom:20px;left:20px;z-index:9999;background:white;padding:10px 12px;border:1px solid #bbb;border-radius:6px;box-shadow:0 1px 4px rgba(0,0,0,0.2);font-size:13px;max-width:260px;">
#   <div style="font-weight:600;margin-bottom:6px;">Status legend</div>
#   <ul style="list-style:none;padding:0;margin:0;">{''.join(legend_items) if legend_items else '<li>None</li>'}</ul>
# </div>
# """
# m.get_root().html.add_child(folium.Element(legend_html))

title_text = "Miami Urban Core Projects"
m.get_root().header.add_child(Element(f"<title>{html.escape(title_text)}</title>"))

title_html = f"""
<div style="
  position: fixed;
  top: 12px; left: 50%; transform: translateX(-50%);
  z-index: 9999;
  background: rgba(255,255,255,0.92);
  padding: 8px 12px;
  border: 1px solid #bbb; border-radius: 6px;
  box-shadow: 0 1px 4px rgba(0,0,0,0.18);
  font: 600 16px/1.25 system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;
  max-width: 92%;
  text-align: center;
">
  {html.escape(title_text)}
</div>
"""
m.get_root().html.add_child(Element(title_html))

# ---- Save ----
m.save("index.html")
print("✅ Saved map to index.html")
m


Rows with coordinates: 15 / 15
✅ Saved map to index.html


In [12]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/MiamiUrbanCoreProjectsPipeline
