In [1]:
# # %store -r map_box_api_key
# %store -r google_maps_API_Key
# GOOGLE_MAPS_API_KEY = googlemaps.Client(key=google_maps_API_Key)

In [7]:
# %pip install googlemaps folium

import os, time, html
import numpy as np
import pandas as pd
import folium
from folium import Popup, Element

# -------------------- CONFIG --------------------
CSV_PATH = "Downtown Dallas map - Sheet1.csv"  # <-- change if needed
ADDRESS_COL = "Address"
CACHE_FILE = "geocode_cache.csv"     # normalized address -> lat, lon
REQUESTS_PER_SECOND = 5              # throttle to respect quotas
APPEND_CITY_STATE = True            # set True to append ", Chicago, IL" if missing
# ------------------------------------------------

# ---- Load Google Maps API key: your %store approach, with env fallback ----
try:
    get_ipython().run_line_magic("store", "-r google_maps_API_Key")
except Exception:
    pass

import googlemaps

if 'google_maps_API_Key' not in globals() or not google_maps_API_Key:
    google_maps_API_Key = os.getenv("GOOGLE_MAPS_API_KEY", "")

if not google_maps_API_Key:
    raise ValueError("No Google Maps API key found. Set `%store google_maps_API_Key` or env var GOOGLE_MAPS_API_KEY.")

gmaps_key = googlemaps.Client(key=google_maps_API_Key)

# ---- Load CSV ----
df = pd.read_csv(CSV_PATH)
if ADDRESS_COL not in df.columns:
    raise ValueError(f"Expected an '{ADDRESS_COL}' column in your CSV. Got: {df.columns.tolist()}")

# Normalize addresses
df[ADDRESS_COL] = df[ADDRESS_COL].astype(str).str.strip()
if APPEND_CITY_STATE:
    df[ADDRESS_COL] = df[ADDRESS_COL].apply(
        lambda s: s if ("dallas" in s.lower() and "tx" in s.lower()) else f"{s}, Dallas, TX"
    )

# ---- Cache helpers ----
def _norm_addr(s: str) -> str:
    return " ".join(str(s).strip().lower().split())

if os.path.exists(CACHE_FILE):
    cache_df = pd.read_csv(CACHE_FILE)
    cache_df = cache_df.dropna(subset=["norm_address"]).drop_duplicates("norm_address")
    geocode_cache = dict(zip(cache_df["norm_address"], zip(cache_df["lat"], cache_df["lon"])))
else:
    geocode_cache = {}

def _save_cache(d: dict):
    if not d: return
    out = pd.DataFrame([{"norm_address": k, "lat": v[0], "lon": v[1]} for k, v in d.items()])
    out.to_csv(CACHE_FILE, index=False)

# ---- Geocode (googlemaps client) with retry + "bad-cache" bypass ----
def geocode(addr: str, retry=3, backoff=1.6, force_regenerate=False):
    """
    Returns (lat, lon, status) where status is "OK", "CACHE", or a failure string.
    Uses cache only if present AND valid (non-NaN). Set force_regenerate=True to ignore cache entirely.
    """
    if not isinstance(addr, str) or not addr.strip():
        return (np.nan, np.nan, "EMPTY")

    na = _norm_addr(addr)

    # Use cache if valid and not forcing regeneration
    if not force_regenerate and na in geocode_cache:
        lat, lon = geocode_cache[na]
        if not (pd.isna(lat) or pd.isna(lon)):
            return (lat, lon, "CACHE")
        # else: cached NaN -> try again with API

    last_status = "UNKNOWN"
    for attempt in range(retry):
        try:
            g = gmaps_key.geocode(addr)
            if g:
                lat = g[0]["geometry"]["location"]["lat"]
                lon = g[0]["geometry"]["location"]["lng"]
                geocode_cache[na] = (lat, lon)   # store valid result
                return (lat, lon, "OK")
            else:
                last_status = "ZERO_RESULTS"
        except googlemaps.exceptions.ApiError as e:
            last_status = f"API_ERROR:{getattr(e, 'status', 'UNKNOWN')}"
        except googlemaps.exceptions.TransportError:
            last_status = "TRANSPORT"
        except Exception:
            last_status = "EXCEPTION"
        time.sleep(backoff**attempt)

    # Only after failing now, record NaN to avoid re-hammering
    geocode_cache[na] = (np.nan, np.nan)
    return (np.nan, np.nan, last_status)

# ---- Quick API smoke test ----
test_lat, test_lon, test_status = geocode("233 S Wacker Dr, Chicago, IL 60606", force_regenerate=False)
print(f"API test: {test_status} (lat={test_lat}, lon={test_lon})")

# ---- Geocode all rows (throttled) ----
min_interval = 1.0 / max(1, REQUESTS_PER_SECOND)
lats, lons, statuses = [], [], []
last = 0.0
ok_count = cache_count = fail_count = 0

addresses = df[ADDRESS_COL].astype(str).tolist()
for i, addr in enumerate(addresses, start=1):
    wait = last + min_interval - time.time()
    if wait > 0: time.sleep(wait)
    lat, lon, status = geocode(addr)
    if status == "OK": ok_count += 1
    elif status == "CACHE": cache_count += 1
    else: fail_count += 1
    if i % 5 == 0 or i == len(addresses):
        print(f"[{i}/{len(addresses)}] OK:{ok_count} CACHE:{cache_count} FAIL:{fail_count} (last={status})")
    lats.append(lat); lons.append(lon); statuses.append(status)
    last = time.time()

_save_cache(geocode_cache)

df["__lat"] = pd.to_numeric(lats, errors="coerce")
df["__lon"] = pd.to_numeric(lons, errors="coerce")
df["__geocode_status"] = statuses

mapped = df.dropna(subset=["__lat", "__lon"]).copy()
print(f"Rows with coordinates: {len(mapped)} / {len(df)}")
if mapped.empty:
    print("No points mapped. Status breakdown (all rows):")
    print(pd.Series(statuses).value_counts(dropna=False))
    raise RuntimeError("Geocoding produced no coordinates—see logs above.")

# ---- Color by Status ----
palette = [
    "blue", "red", "green", "purple", "orange",
    "darkred", "lightred", "beige", "darkblue", "darkgreen",
    "cadetblue", "darkpurple", "white", "pink", "lightblue",
    "lightgreen", "gray", "black", "lightgray"
]
if "Activity" in mapped.columns:
    statuses_unique = pd.Index(sorted(mapped["Activity"].dropna().astype(str).unique()))
else:
    statuses_unique = pd.Index([])
status_to_color = {s: palette[i % len(palette)] for i, s in enumerate(statuses_unique)}

def color_for(s):
    if pd.isna(s): return "gray"
    return status_to_color.get(str(s), "gray")

# ---- Popups: ALL non-empty CSV fields (no tooltip) ----
coord_cols = {"__lat","__lon","__geocode_status"}
DISPLAY_COLUMNS = [c for c in mapped.columns if c not in coord_cols]

def _clean(v):
    if pd.isna(v): return None
    s = str(v).strip()
    return s if s and s.lower() not in {"nan","none"} else None

def make_popup_html(row):
    rows = []
    for col in DISPLAY_COLUMNS:
        val = _clean(row.get(col))
        if val is None:
            continue
        if isinstance(val, str) and val.lower().startswith(("http://","https://")):
            v = f'<a href="{val}" target="_blank" rel="noopener">{html.escape(val)}</a>'
        else:
            v = html.escape(str(val))
        col_label = html.escape(str(col))
        rows.append(
            f"<tr><th style='text-align:left;padding-right:8px'>{col_label}</th><td>{v}</td></tr>"
        )
    return "<table>" + "".join(rows) + "</table>" if rows else "<i>No details</i>"

# ---- Build map ----
center = [float(mapped["__lat"].mean()), float(mapped["__lon"].mean())]
m = folium.Map(location=center, zoom_start=14, control_scale=True, tiles="CartoDB positron")

for _, r in mapped.iterrows():
    folium.CircleMarker(
        location=(float(r["__lat"]), float(r["__lon"])),
        radius=7,
        color="black", 
        fill_color=color_for(r.get("Status")),
        fill=True,
        fill_opacity=0.7,
        weight=1,
        popup=Popup(make_popup_html(r), max_width=450)
        # (no tooltip)
    ).add_to(m)

# ---- Legend ----
legend_items = [
    f"<li><span style='display:inline-block;width:12px;height:12px;background:{color};margin-right:6px;border:1px solid #333'></span>{html.escape(status)}</li>"
    for status, color in status_to_color.items()
]
# legend_html = f"""
# <div style="position:fixed;bottom:20px;left:20px;z-index:9999;background:white;padding:10px 12px;border:1px solid #bbb;border-radius:6px;box-shadow:0 1px 4px rgba(0,0,0,0.2);font-size:13px;max-width:260px;">
#   <div style="font-weight:600;margin-bottom:6px;">Status legend</div>
#   <ul style="list-style:none;padding:0;margin:0;">{''.join(legend_items) if legend_items else '<li>None</li>'}</ul>
# </div>
# """
# m.get_root().html.add_child(folium.Element(legend_html))


title_text = "Jess's Map"

# Set the browser tab <title>
m.get_root().header.add_child(Element(f"<title>{html.escape(title_text)}</title>"))

# Add an on-map title banner (top-center, responsive)
title_html = f"""
<div style="
  position: fixed;
  top: 12px; left: 50%; transform: translateX(-50%);
  z-index: 9999;
  background: rgba(255,255,255,0.92);
  padding: 8px 12px;
  border: 1px solid #bbb; border-radius: 6px;
  box-shadow: 0 1px 4px rgba(0,0,0,0.18);
  font: 600 16px/1.25 system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;
  max-width: 92%;
  text-align: center;
">
  {html.escape(title_text)}
</div>
"""
m.get_root().html.add_child(Element(title_html))


# ---- Save ----
m.save("index.html")
print("✅ Saved map to index.html")
m

API test: CACHE (lat=41.878474, lon=-87.6363853)
[5/9] OK:0 CACHE:5 FAIL:0 (last=CACHE)
[9/9] OK:0 CACHE:9 FAIL:0 (last=CACHE)
Rows with coordinates: 9 / 9
✅ Saved map to index.html


In [2]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/DowntownDallas10_21_25
