In [None]:
!pip install geopandas


In [6]:
# ----------------------------
# Geo visualization: Zurich Stadtkreise choropleth (more readable)
# ----------------------------

import os
import re
import pandas as pd
import geopandas as gpd
import folium
from branca.colormap import linear

# ----------------------------
# 0) Paths (adjust if needed)
# ----------------------------
CSV_PATH = "zurich_enriched_taxonomy.csv"
GEO_PATH = "mapdata/stzh.adm_stadtkreise_a.json"
OUT_HTML = "outputs/maps/zurich_stadtkreise_choropleth_readable.html"
os.makedirs("outputs/maps", exist_ok=True)

# ----------------------------
# 1) Load data
# ----------------------------
df = pd.read_csv(CSV_PATH)
gdf = gpd.read_file(GEO_PATH)

# Geo columns in your file:
# ['objid','entstehung','bezeichnung','name','objectid','geometry']
gdf["kreis_int"] = gdf["name"].astype(int)

# ----------------------------
# 2) Assign district from restaurant name (heuristic, subset only)
# ----------------------------
PATTERNS = {
    1:  [r"\bhb\b", r"hauptbahnhof", r"stadelhofen", r"\baltstadt\b", r"\bkreis\s*1\b"],
    2:  [r"\benge\b", r"\bkreis\s*2\b"],
    3:  [r"wiedikon", r"\bkreis\s*3\b"],
    4:  [r"\baussersihl\b", r"\blangstrasse\b", r"\bkreis\s*4\b"],
    6:  [r"unterstrass", r"\bkreis\s*6\b"],
    8:  [r"seefeld", r"riesbach", r"\bkreis\s*8\b"],
    9:  [r"altstetten", r"letzigrund", r"\bkreis\s*9\b"],
    10: [r"wipkingen", r"\bkreis\s*10\b"],
    11: [r"oerlikon", r"seebach", r"affoltern", r"\bkreis\s*11\b"],
    12: [r"schwamendingen", r"\bkreis\s*12\b"],
}

def name_to_kreis(name: str):
    s = str(name).lower()
    for kreis, pats in PATTERNS.items():
        if any(re.search(p, s) for p in pats):
            return kreis
    return None

df["kreis"] = df["name"].apply(name_to_kreis)
df_k = df.dropna(subset=["kreis"]).copy()
df_k["kreis_int"] = df_k["kreis"].astype(int)

print("Total restaurants:", len(df))
print("Subset with kreis assignment:", len(df_k))
print(df_k["kreis_int"].value_counts().sort_index())

# ----------------------------
# 3) Aggregate by district (subset only)
# ----------------------------
summary = (
    df_k.groupby("kreis_int")
        .agg(
            n=("name", "count"),
            avg_rating=("rating", "mean"),
            avg_minimum=("minimum_clean", "mean"),
        )
        .reset_index()
)

display(summary.sort_values("kreis_int").round(2))

# Merge geo + aggregated data
gdf2 = gdf.merge(summary, on="kreis_int", how="left")

# ----------------------------
# 4) Build a more readable map (custom colormap + clearer borders)
# ----------------------------
m = folium.Map(location=[47.3769, 8.5417], zoom_start=11, tiles="CartoDB positron")

# Color scale based on available avg_rating values (ignore NaN districts)
vals = gdf2["avg_rating"].dropna()
if len(vals) > 0:
    vmin, vmax = float(vals.min()), float(vals.max())
else:
    vmin, vmax = 3.5, 5.0  # fallback

colormap = linear.YlGnBu_09.scale(vmin, vmax)
colormap.caption = "Average rating (subset with location hints)"
colormap.add_to(m)

def style_fn(feature):
    props = feature["properties"]
    val = props.get("avg_rating", None)

    # Districts without data: light gray
    if val is None:
        return {
            "fillColor": "#eeeeee",
            "color": "#666666",
            "weight": 1,
            "fillOpacity": 0.25,
        }

    return {
        "fillColor": colormap(val),
        "color": "#2b2b2b",   # border color (dark gray)
        "weight": 1.2,        # thinner border than before
        "fillOpacity": 0.65,  # more contrast, still see base map
    }

def highlight_fn(feature):
    return {
        "weight": 3,
        "color": "#000000",
        "fillOpacity": 0.75,
    }

tooltip = folium.features.GeoJsonTooltip(
    fields=["bezeichnung", "n", "avg_rating", "avg_minimum"],
    aliases=["District", "n (subset)", "Avg rating", "Avg minimum (CHF)"],
    localize=True,
    sticky=True
)

geo_layer = folium.GeoJson(
    gdf2,
    style_function=style_fn,
    highlight_function=highlight_fn,
    tooltip=tooltip,
    name="Zurich districts"
).add_to(m)

folium.LayerControl(collapsed=True).add_to(m)

m.save(OUT_HTML)
OUT_HTML


Total restaurants: 195
Subset with kreis assignment: 24
kreis_int
1     6
2     2
3     1
4     4
8     1
9     1
10    1
11    8
Name: count, dtype: int64


Unnamed: 0,kreis_int,n,avg_rating,avg_minimum
0,1,6,4.25,17.83
1,2,2,4.05,32.5
2,3,1,3.6,35.0
3,4,4,3.42,20.0
4,8,1,4.2,20.0
5,9,1,2.3,30.0
6,10,1,4.2,20.0
7,11,8,4.21,28.12


'outputs/maps/zurich_stadtkreise_choropleth_readable.html'

## Interpretation 

Geographical visualization (Stadtkreise).
Because the dataset contains no addresses or coordinates, restaurants could only be assigned to Zurich’s administrative districts when their names included explicit location references (e.g., “Oerlikon”, “Altstetten”, “Enge”). The choropleth therefore represents aggregated average ratings for this subset and provides spatial context at the district level rather than exact restaurant locations. Districts without assigned restaurants remain unshaded (no data), and conclusions should be interpreted as indicative patterns for the subset rather than for the full sample.