In [1]:
# # %store -r map_box_api_key
# %store -r google_maps_API_Key
# GOOGLE_MAPS_API_KEY = googlemaps.Client(key=google_maps_API_Key)

In [33]:
# %pip install googlemaps folium

import os, time, html, json
import numpy as np
import pandas as pd
import folium
from folium import Popup
import googlemaps

# -------------------- CONFIG --------------------
CSV_PATH = "paramount - Sheet1.csv"   # path to your CSV
ADDRESS_COL = "Address"               # column that has street addresses
CACHE_FILE = "geocode_cache.csv"      # normalized address -> lat, lon
REQUESTS_PER_SECOND = 5               # throttle geocoding calls
APPEND_CITY_STATE = False             # e.g., auto-append ", Chicago, IL"
# ------------------------------------------------

# ---- Load Google Maps API key (from %store or env) ----
try:
    get_ipython().run_line_magic("store", "-r google_maps_API_Key")
except Exception:
    pass

if 'google_maps_API_Key' not in globals() or not google_maps_API_Key:
    google_maps_API_Key = os.getenv("GOOGLE_MAPS_API_KEY", "")

if not google_maps_API_Key:
    raise ValueError("No Google Maps API key found. Set `%store google_maps_API_Key` or env var GOOGLE_MAPS_API_KEY.")

gmaps_key = googlemaps.Client(key=google_maps_API_Key)

# ---- Load CSV ----
df = pd.read_csv(CSV_PATH)
if ADDRESS_COL not in df.columns:
    raise ValueError(f"Expected an '{ADDRESS_COL}' column in your CSV. Got: {df.columns.tolist()}")

# Normalize addresses
df[ADDRESS_COL] = df[ADDRESS_COL].astype(str).str.strip()
if APPEND_CITY_STATE:
    df[ADDRESS_COL] = df[ADDRESS_COL].apply(
        lambda s: s if ("chicago" in s.lower() and "il" in s.lower()) else f"{s}, Chicago, IL"
    )

# ---- Cache helpers ----
def _norm_addr(s: str) -> str:
    return " ".join(str(s).strip().lower().split())

if os.path.exists(CACHE_FILE):
    cache_df = pd.read_csv(CACHE_FILE)
    cache_df = cache_df.dropna(subset=["norm_address"]).drop_duplicates("norm_address")
    geocode_cache = dict(zip(cache_df["norm_address"], zip(cache_df["lat"], cache_df["lon"])))
else:
    geocode_cache = {}

def _save_cache(d: dict):
    if not d: return
    out = pd.DataFrame([{"norm_address": k, "lat": v[0], "lon": v[1]} for k, v in d.items()])
    out.to_csv(CACHE_FILE, index=False)

# ---- Geocode (with retry) ----
def geocode(addr: str, retry=3, backoff=1.6, force_regenerate=False):
    """
    Returns (lat, lon, status) where status is "OK", "CACHE", or a failure string.
    Uses cache only if present AND valid (non-NaN). Set force_regenerate=True to ignore cache entirely.
    """
    if not isinstance(addr, str) or not addr.strip():
        return (np.nan, np.nan, "EMPTY")

    na = _norm_addr(addr)

    if not force_regenerate and na in geocode_cache:
        lat, lon = geocode_cache[na]
        if not (pd.isna(lat) or pd.isna(lon)):
            return (lat, lon, "CACHE")

    last_status = "UNKNOWN"
    for attempt in range(retry):
        try:
            g = gmaps_key.geocode(addr)
            if g:
                lat = g[0]["geometry"]["location"]["lat"]
                lon = g[0]["geometry"]["location"]["lng"]
                geocode_cache[na] = (lat, lon)
                return (lat, lon, "OK")
            else:
                last_status = "ZERO_RESULTS"
        except googlemaps.exceptions.ApiError as e:
            last_status = f"API_ERROR:{getattr(e, 'status', 'UNKNOWN')}"
        except googlemaps.exceptions.TransportError:
            last_status = "TRANSPORT"
        except Exception:
            last_status = "EXCEPTION"
        time.sleep(backoff**attempt)

    geocode_cache[na] = (np.nan, np.nan)
    return (np.nan, np.nan, last_status)

# ---- Quick API smoke test ----
test_lat, test_lon, test_status = geocode("233 S Wacker Dr, Chicago, IL 60606")
print(f"API test: {test_status} (lat={test_lat}, lon={test_lon})")

# ---- Geocode all rows (throttled) ----
min_interval = 1.0 / max(1, REQUESTS_PER_SECOND)
lats, lons, statuses = [], [], []
last = 0.0
ok_count = cache_count = fail_count = 0

addresses = df[ADDRESS_COL].astype(str).tolist()
for i, addr in enumerate(addresses, start=1):
    wait = last + min_interval - time.time()
    if wait > 0: time.sleep(wait)
    lat, lon, status = geocode(addr)
    if status == "OK": ok_count += 1
    elif status == "CACHE": cache_count += 1
    else: fail_count += 1
    if i % 5 == 0 or i == len(addresses):
        print(f"[{i}/{len(addresses)}] OK:{ok_count} CACHE:{cache_count} FAIL:{fail_count} (last={status})")
    lats.append(lat); lons.append(lon); statuses.append(status)
    last = time.time()

_save_cache(geocode_cache)

df["__lat"] = pd.to_numeric(lats, errors="coerce")
df["__lon"] = pd.to_numeric(lons, errors="coerce")
df["__geocode_status"] = statuses

mapped = df.dropna(subset=["__lat", "__lon"]).copy()
print(f"Rows with coordinates: {len(mapped)} / {len(df)}")
if mapped.empty:
    print("No points mapped. Status breakdown (all rows):")
    print(pd.Series(statuses).value_counts(dropna=False))
    raise RuntimeError("Geocoding produced no coordinates—see logs above.")

# ---- Color by Status (optional) ----
palette = [
    "blue", "red", "green", "purple", "orange",
    "darkred", "lightred", "beige", "darkblue", "darkgreen",
    "cadetblue", "darkpurple", "white", "pink", "lightblue",
    "lightgreen", "gray", "black", "lightgray"
]
if "Status" in mapped.columns:
    statuses_unique = pd.Index(sorted(mapped["Status"].dropna().astype(str).unique()))
else:
    statuses_unique = pd.Index([])
status_to_color = {s: palette[i % len(palette)] for i, s in enumerate(statuses_unique)}

def color_for(s):
    if pd.isna(s): return "gray"
    return status_to_color.get(str(s), "gray")

# ---- Popups: include all non-empty CSV fields ----
coord_cols = {"__lat","__lon","__geocode_status"}
DISPLAY_COLUMNS = [c for c in mapped.columns if c not in coord_cols]

def _clean(v):
    if pd.isna(v): return None
    s = str(v).strip()
    return s if s and s.lower() not in {"nan","none"} else None

def make_popup_html(row):
    rows = []
    for col in DISPLAY_COLUMNS:
        val = _clean(row.get(col))
        if val is None:
            continue
        if isinstance(val, str) and val.lower().startswith(("http://","https://")):
            v = f'<a href="{val}" target="_blank" rel="noopener">{html.escape(val)}</a>'
        else:
            v = html.escape(str(val))
        col_label = html.escape(str(col))
        rows.append(f"<tr><th style='text-align:left;padding-right:8px'>{col_label}</th><td>{v}</td></tr>")
    return "<table>" + "".join(rows) + "</table>" if rows else "<i>No details</i>"

# ---- City split + bounds helpers ----
def compute_bounds(frame):
    return [
        [float(frame["__lat"].min()), float(frame["__lon"].min())],
        [float(frame["__lat"].max()), float(frame["__lon"].max())],
    ]

# Heuristic split by longitude: SF (~ -122) vs NYC (~ -74)
mapped["__city"] = np.where(mapped["__lon"] < -100, "San Francisco", "New York City")
sf_df  = mapped[mapped["__city"] == "San Francisco"].copy()
nyc_df = mapped[mapped["__city"] == "New York City"].copy()

# Choose initial view
if not nyc_df.empty:
    start_bounds = compute_bounds(nyc_df)
    start_center = [nyc_df["__lat"].mean(), nyc_df["__lon"].mean()]
elif not sf_df.empty:
    start_bounds = compute_bounds(sf_df)
    start_center = [sf_df["__lat"].mean(), sf_df["__lon"].mean()]
else:
    start_bounds = compute_bounds(mapped)
    start_center = [mapped["__lat"].mean(), mapped["__lon"].mean()]

# ---- Map ----
m = folium.Map(location=[float(start_center[0]), float(start_center[1])],
               zoom_start=12, control_scale=True, tiles="CartoDB positron")

# FeatureGroups per city (so users can toggle)
fg_nyc = folium.FeatureGroup(name="New York City", show=not nyc_df.empty).add_to(m)
fg_sf  = folium.FeatureGroup(name="San Francisco", show=not sf_df.empty).add_to(m)

def add_markers(frame, feature_group):
    for _, r in frame.iterrows():
        folium.CircleMarker(
            location=(float(r["__lat"]), float(r["__lon"])),
            radius=7,
            color="black",
            fill_color=color_for(r.get("Status")),
            fill=True,
            fill_opacity=0.7,
            weight=1,
            popup=Popup(make_popup_html(r), max_width=450)
        ).add_to(feature_group)

# ADD THE PINS
if not nyc_df.empty:
    add_markers(nyc_df, fg_nyc)
if not sf_df.empty:
    add_markers(sf_df, fg_sf)

# ---- City toggle control (Leaflet, bottom-left; large pill buttons) ----
nyc_bounds = compute_bounds(nyc_df) if not nyc_df.empty else None
sf_bounds  = compute_bounds(sf_df)  if not sf_df.empty  else None

print("NYC pts:", len(nyc_df), "SF pts:", len(sf_df))
print("nyc_bounds:", nyc_bounds, "sf_bounds:", sf_bounds)

nyc_bounds_js = json.dumps(nyc_bounds) if nyc_bounds else "null"
sf_bounds_js  = json.dumps(sf_bounds)  if sf_bounds  else "null"

map_var = m.get_name()
toggle_control_js = f"""
<script>
(function waitForMap() {{
  if (typeof {map_var} === 'undefined' || !{map_var}._controlCorners) {{
    return setTimeout(waitForMap, 50);
  }}
  var map = {map_var};
  var nycBounds = {nyc_bounds_js};
  var sfBounds  = {sf_bounds_js};

  function fly(bounds) {{
    if (bounds) map.fitBounds(bounds, {{ padding: [30, 30] }});
  }}

  var CityToggle = L.Control.extend({{
    options: {{ position: 'bottomleft' }},  // where the legend used to be
    onAdd: function() {{
      var wrap = L.DomUtil.create('div', 'leaflet-bar leaflet-control trd-city-toggle');
      wrap.style.background = 'transparent';
      wrap.style.border = 'none';
      wrap.style.boxShadow = 'none';
      wrap.style.margin = '6px';

      var group = L.DomUtil.create('div', '', wrap);
      group.style.display = 'flex';
      group.style.gap = '8px';
      group.style.flexWrap = 'wrap';

      function mkBtn(label, onClick) {{
        var btn = L.DomUtil.create('button', '', group);
        btn.type = 'button';
        btn.textContent = label;
        btn.style.padding = '10px 16px';
        btn.style.fontSize = '15px';
        btn.style.fontWeight = '600';
        btn.style.lineHeight = '1.1';
        btn.style.background = '#ffffff';
        btn.style.border = '1px solid #888';
        btn.style.borderRadius = '9999px';   // pill
        btn.style.boxShadow = '0 1px 4px rgba(0,0,0,0.15)';
        btn.style.cursor = 'pointer';
        btn.style.userSelect = 'none';
        btn.style.display = 'inline-flex';
        btn.style.alignItems = 'center';
        btn.style.justifyContent = 'center';
        btn.style.minWidth = '160px';        // gives the label room
        btn.style.textAlign = 'center';

        btn.onmouseenter = function() {{ btn.style.background = '#f3f3f3'; }};
        btn.onmouseleave = function() {{ btn.style.background = '#ffffff'; }};
        btn.onfocus = function() {{ btn.style.outline = '2px solid #2b7cff'; btn.style.outlineOffset = '2px'; }};
        btn.onblur  = function() {{ btn.style.outline = 'none'; }};

        // prevent map drag/zoom on click
        L.DomEvent.on(btn, 'click', function(e) {{
          L.DomEvent.stop(e);
          onClick();
        }});
        return btn;
      }}

      if (nycBounds) mkBtn('New York City', function() {{ fly(nycBounds); }});
      if (sfBounds)  mkBtn('San Francisco', function() {{ fly(sfBounds); }});

      // Ensure map gestures don't steal focus while hovering the control
      L.DomEvent.disableClickPropagation(wrap);
      L.DomEvent.disableScrollPropagation(wrap);
      return wrap;
    }},
    onRemove: function() {{}}
  }});

  (new CityToggle()).addTo(map);
  console.log('CityToggle added. NYC?', !!nycBounds, 'SF?', !!sfBounds);
}})();
</script>
"""
m.get_root().html.add_child(folium.Element(toggle_control_js))

# ---- Layer toggle & initial fit ----
folium.LayerControl(collapsed=False).add_to(m)
m.fit_bounds(start_bounds, padding=(30, 30))

# ---- Save ----
m.save("index.html")
print("✅ Saved map to index.html with NYC/SF toggle")


API test: CACHE (lat=41.8785831, lon=-87.6363986)
[5/12] OK:0 CACHE:5 FAIL:0 (last=CACHE)
[10/12] OK:0 CACHE:10 FAIL:0 (last=CACHE)
[12/12] OK:0 CACHE:12 FAIL:0 (last=CACHE)
Rows with coordinates: 12 / 12
NYC pts: 8 SF pts: 4
nyc_bounds: [[40.7062255, -74.00839119999999], [40.7628672, -73.9689131]] sf_bounds: [[37.7887443, -122.4004663], [37.7937188, -122.3942152]]
✅ Saved map to index.html with NYC/SF toggle


In [34]:
m

In [35]:
base_name = 'https://trd-digital.github.io/trd-news-interactive-maps/'

cwd = os.getcwd()

cwd = cwd.split('/')

final_name = base_name + cwd[-1]
print(final_name)

https://trd-digital.github.io/trd-news-interactive-maps/paramount_09_17_25
