In [19]:
# =========================
# Pennsylvania Sugar & Salt Trail — Complete Map Script
# =========================
# 1) Install
!pip install -q folium ipywidgets

# 2) Imports & constants
import os, re, zipfile, shutil, base64
import numpy as np
import pandas as pd
import folium
from folium.features import CustomIcon, DivIcon
from folium.plugins import MarkerCluster
import ipywidgets as widgets
from IPython.display import display
from folium import IFrame

CSV_PATH = "pa_snack_companies_complete_with_new_brands.csv"
ZIP_PATH = "pa_snack_brand_icons.zip"
EXTRACT_DIR = "icons_extracted"
FLAT_DIR = "icons_flat"

# 3) Unzip icons & flatten all PNGs into FLAT_DIR/
for p in [EXTRACT_DIR, FLAT_DIR]:
    if os.path.exists(p):
        shutil.rmtree(p)
os.makedirs(EXTRACT_DIR, exist_ok=True)
os.makedirs(FLAT_DIR, exist_ok=True)

with zipfile.ZipFile(ZIP_PATH, 'r') as z:
    z.extractall(EXTRACT_DIR)

png_paths = []
for root, dirs, files in os.walk(EXTRACT_DIR):
    if "__MACOSX" in root:
        continue
    for f in files:
        if f.lower().endswith(".png") and not f.startswith("._"):
            png_paths.append(os.path.join(root, f))

def safe_stem(path: str) -> str:
    stem = os.path.splitext(os.path.basename(path))[0]
    stem = stem.strip().lower().replace("&", "and")
    stem = re.sub(r'[^a-z0-9]+', '_', stem)
    return stem.strip('_')

for p in png_paths:
    shutil.copy2(p, os.path.join(FLAT_DIR, safe_stem(p) + ".png"))

print(f"✅ Found & flattened {len(os.listdir(FLAT_DIR))} PNGs into {FLAT_DIR}/")

# 4) Load company data
df = pd.read_csv(CSV_PATH)  # expects: city, lat, lon, company, year, products, production

# 5) Categorize companies (simple heuristics; tweak as you like)
def categorize_company(name: str) -> str:
    n = name.lower()
    if any(k in n for k in ["pretzel", "snyder", "utz", "herr", "middleswarth", "martin", "wise"]):
        return "Salty Snacks"
    if any(k in n for k in ["chocolate", "candy", "confection", "hershey", "whitman", "clark",
                            "boyer", "just born", "frankford", "gertrude", "éclat", "eclat",
                            "wilbur", "shane", "sarris"]):
        return "Candy & Chocolate"
    if any(k in n for k in ["ice cream", "turkey hill", "basset", "musselman", "tasty", "bazzini", "planters"]):
        return "Sweet Treats"
    return "Other"

df["Category"] = df["company"].apply(categorize_company)

# 6) Deterministic fan-out for overlapping HQ coordinates (so all logos are visible statewide)
def spread_overlaps(df_in: pd.DataFrame, radius_km=10.0, lat_col='lat', lon_col='lon') -> pd.DataFrame:
    df_out = df_in.copy()
    df_out['spread_lat'] = df_out[lat_col].values
    df_out['spread_lon'] = df_out[lon_col].values

    groups = df_out.groupby([lat_col, lon_col], dropna=False)
    for (_, _), idx in groups.indices.items():
        rows = list(idx)
        n = len(rows)
        if n == 1:
            continue

        lat0 = df_out.loc[rows[0], lat_col]
        lon0 = df_out.loc[rows[0], lon_col]

        # ~1 deg lat ~ 110.574 km; ~1 deg lon ~ 111.320*cos(lat) km
        deg_lat = radius_km / 110.574
        deg_lon = (radius_km / 111.320) / max(np.cos(np.radians(lat0)), 1e-6)

        angles = np.linspace(0, 2*np.pi, n, endpoint=False)
        for k, irow in enumerate(rows):
            df_out.at[irow, 'spread_lat'] = lat0 + deg_lat * np.sin(angles[k])
            df_out.at[irow, 'spread_lon'] = lon0 + deg_lon * np.cos(angles[k])
    return df_out

# Default fan-out radius (km) — adjustable via widget below
DEFAULT_SPREAD_KM = 10.0
df_spread = spread_overlaps(df, radius_km=DEFAULT_SPREAD_KM)

# 7) Build data-URI icons (most reliable rendering)
def norm_key(s: str) -> str:
    s = s.lower().replace("&", "and")
    s = re.sub(r'[^a-z0-9]+', '_', s)
    return s.strip('_')

def img_to_data_uri(png_path: str) -> str:
    with open(png_path, "rb") as f:
        return "data:image/png;base64," + base64.b64encode(f.read()).decode("ascii")

stem_to_datauri = {}
for f in os.listdir(FLAT_DIR):
    if f.lower().endswith(".png"):
        stem_to_datauri[norm_key(os.path.splitext(f)[0])] = img_to_data_uri(os.path.join(FLAT_DIR, f))

preferred_tokens = {
    "The Hershey Company":            ["hershey"],
    "Boyer Candy Company":            ["boyer"],
    "D. L. Clark Company":            ["clark"],
    "Gertrude Hawk Chocolates":       ["gertrude", "hawk"],
    "Sarris Candies":                 ["sarris"],
    "Wilbur Chocolate Company":       ["wilbur"],
    "Utz Brands":                     ["utz"],
    "Herr's Snacks":                  ["herr"],
    "Martin's Potato Chips":          ["martin", "martins"],
    "Wise Foods, Inc.":               ["wise"],
    "Ira Middleswarth & Son, Inc.":   ["middleswarth", "middlesworth"],
    "Shane Confectionery":            ["shane"],
    "Tastykake":                      ["tasty", "tastykake"],
    "Philly Pretzel Factory":         ["philly", "pretzel"],
    "Bassett's Ice Cream":            ["basset", "bassetts"],
}

available_stems = list(stem_to_datauri.keys())

def resolve_icon_datauri(company: str) -> str | None:
    k = norm_key(company)
    if k in stem_to_datauri:
        return stem_to_datauri[k]
    # token assist for the tricky names
    for t in preferred_tokens.get(company, []):
        for stem in available_stems:
            if t in stem:
                return stem_to_datauri[stem]
    # drop prefixes (e.g., "the_hershey_company" -> "hershey_company" -> ...)
    parts = k.split("_")
    for i in range(len(parts)-1):
        cand = "_".join(parts[i+1:])
        if cand in stem_to_datauri:
            return stem_to_datauri[cand]
    # last-token loose match
    for t in reversed([p for p in parts if len(p) > 3]):
        for stem in available_stems:
            if t in stem:
                return stem_to_datauri[stem]
    return None

df_spread["icon_datauri"] = df_spread["company"].apply(resolve_icon_datauri)

missing = df_spread[df_spread["icon_datauri"].isna()]["company"].unique().tolist()
if missing:
    print("⚠️ No logo matched for:", missing)
else:
    print("✅ Logos resolved for all companies.")

# 8) UI controls
company_opts = ["All"] + sorted(df_spread["company"].unique().tolist())
category_opts = ["All"] + sorted(df_spread["Category"].unique().tolist())

company_dd  = widgets.Dropdown(options=company_opts, value="All", description="Company:")
category_dd = widgets.Dropdown(options=category_opts, value="All", description="Category:")
icon_size   = widgets.IntSlider(value=40, min=18, max=56, step=2, description="Icon size:")
spread_km   = widgets.FloatSlider(value=DEFAULT_SPREAD_KM, min=2.0, max=12.0, step=0.5, readout_format=".1f",
                                  description="Spread (km):")
view_mode   = widgets.ToggleButtons(options=["Statewide (no cluster)", "Clustered"], description="View:")

TITLE_HTML = """
<h3 align="center" style="font-size:20px; margin:10px 0;">
  <b>Pennsylvania’s Sugar & Salt Trail: Mapping Confectionery & Snack Headquarters</b>
</h3>
"""




# (reuse WEBSITE_MAP, website_link, directions_link, LEGEND_HTML, spread_overlaps, df, df_spread from earlier)

def render_map(company_sel="All", category_sel="All", icon_px=40, spread=10.0, mode="Statewide (no cluster)"):
    # recompute spread if user changed radius
    dfx = df if abs(spread - 10.0) < 1e-6 else spread_overlaps(df, radius_km=spread)
    dfx = dfx.copy()
    # keep resolved category/logos from previous step
    if "Category" not in dfx.columns:
        dfx["Category"] = df_spread["Category"]
    if "icon_datauri" not in dfx.columns:
        dfx["icon_datauri"] = df_spread["icon_datauri"]

    if company_sel != "All":
        dfx = dfx[dfx["company"] == company_sel]
    if category_sel != "All":
        dfx = dfx[dfx["Category"] == category_sel]

    # ensure spread coords exist
    if "spread_lat" not in dfx.columns or "spread_lon" not in dfx.columns:
        dfx = spread_overlaps(dfx, radius_km=spread)

    # bounds/center
    lat_min, lat_max = dfx['spread_lat'].min(), dfx['spread_lat'].max()
    lon_min, lon_max = dfx['spread_lon'].min(), dfx['spread_lon'].max()
    center = [(lat_min + lat_max) / 2, (lon_min + lon_max) / 2]

    # IMPORTANT: start with tiles=None, then add layers with attribution
    # m = Map(location=center, zoom_start=7, control_scale=True, tiles=None)
    m = folium.Map(location=[40.8, -77.8], zoom_start=7, tiles="CartoDB positron", control_scale=True)


    # Basemap layers WITH attribution (prevents "Custom tiles must have an attribution.")
    TileLayer(
        tiles="https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png",
        attr="&copy; OpenStreetMap contributors",
        name="OpenStreetMap",
        control=True,
        overlay=False
    ).add_to(m)

    TileLayer(
        tiles="https://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}{r}.png",
        attr="&copy; OpenStreetMap contributors &copy; CARTO",
        name="Carto Dark",
        control=True,
        overlay=False
    ).add_to(m)

    TileLayer(
        tiles="https://stamen-tiles.a.ssl.fastly.net/toner/{z}/{x}/{y}.png",
        attr="Map tiles by Stamen Design, CC BY 3.0 — Map data © OpenStreetMap",
        name="Stamen Toner",
        control=True,
        overlay=False
    ).add_to(m)

    TileLayer(
        tiles="https://stamen-tiles.a.ssl.fastly.net/terrain/{z}/{x}/{y}.jpg",
        attr="Map tiles by Stamen Design, CC BY 3.0 — Map data © OpenStreetMap",
        name="Stamen Terrain",
        control=True,
        overlay=False
    ).add_to(m)

    TileLayer(
        tiles="https://{s}.basemaps.cartocdn.com/light_all/{z}/{x}/{y}{r}.png",
        attr="&copy; OpenStreetMap contributors &copy; CARTO",
        name="Carto Light",
        control=True,
        overlay=False
    ).add_to(m)

    # Title + legend
    m.get_root().html.add_child(Element("""
    <h3 align="center" style="font-size:20px; margin:10px 0;">
      <b>Pennsylvania’s Sugar & Salt Trail: Mapping Confectionery & Snack Headquarters</b>
    </h3>
    """))
    m.get_root().html.add_child(Element(LEGEND_HTML))

    # Plugins
    Fullscreen().add_to(m)
    MiniMap(toggle_display=True, position="topleft").add_to(m)
    MousePosition(position='topright', prefix="Lat/Lon").add_to(m)
    MeasureControl(position='topright', primary_length_unit='kilometers').add_to(m)

    # Layer for markers
    layer = m
    if mode == "Clustered":
        from folium.plugins import MarkerCluster
        layer = MarkerCluster().add_to(m)

    # markers
    for _, r in dfx.iterrows():
        web = WEBSITE_MAP.get(r['company']) or f"https://www.google.com/search?q={urllib.parse.quote(r['company'] + ' Pennsylvania')}"
        dirs = f"https://www.google.com/maps?q={r['lat']:.6f},{r['lon']:.6f}"

        popup_html = f"""
        <div style='font-size:13px;'>
          <b>{r['company']}</b><br>
          📍 {r['city']}<br>
          📅 Founded: {r['year']}<br>
          🍬 Products: {r['products']}<br>
          📦 Production: {r['production']}<br>
          🗂️ Category: {r['Category']}<br><br>
          <a href="{web}" target="_blank">🌐 Website</a> &nbsp;|&nbsp;
          <a href="{dirs}" target="_blank">🧭 Directions</a>
        </div>
        """
        popup = folium.Popup(IFrame(popup_html, width=280, height=200), max_width=320)

        if pd.notna(r.get("icon_datauri")):
            icon_html = f"<img src='{r['icon_datauri']}' style='width:{icon_px}px;height:{icon_px}px;border-radius:6px;box-shadow:0 1px 4px rgba(0,0,0,.25);'>"
            icon = DivIcon(html=icon_html)
        else:
            icon = folium.Icon(color="blue", icon="info-sign")

        folium.Marker(
            location=[r["spread_lat"], r["spread_lon"]],
            icon=icon,
            popup=popup,
            tooltip=r["company"]
        ).add_to(layer)

    LayerControl(collapsed=False).add_to(m)
    # m.fit_bounds([[lat_min, lon_min], [lat_max, lon_max]])
    return m


out = widgets.Output()
def update_map(*args):
    with out:
        out.clear_output()
        display(render_map(
            company_sel=company_dd.value,
            category_sel=category_dd.value,
            icon_px=icon_size.value,
            spread=spread_km.value,
            mode=view_mode.value
        ))

# Wire controls
for w in [company_dd, category_dd, icon_size, spread_km, view_mode]:
    w.observe(update_map, names="value")

display(widgets.HBox([company_dd, category_dd]))
display(widgets.HBox([icon_size, spread_km, view_mode]))
display(out)
update_map()

# 9) (Optional) Save for embedding
final_map = render_map(mode="Statewide (no cluster)", icon_px=28, spread=10.0)
final_map.save("pa_sugar_salt_trail_map.html")
from google.colab import files
files.download("pa_sugar_salt_trail_map.html")


✅ Found & flattened 25 PNGs into icons_flat/
✅ Logos resolved for all companies.


HBox(children=(Dropdown(description='Company:', options=('All', "Auntie Anne's", "Bassett's Ice Cream", 'Bazzi…

HBox(children=(IntSlider(value=40, description='Icon size:', max=56, min=18, step=2), FloatSlider(value=10.0, …

Output()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>