### 🏕️ 3. Feature Distribution Sankey + map
* Issue [#248](https://github.com/salgo60/Stockholm_Archipelago_Trail/issues/248)
* this [Notebook](https://github.com/salgo60/Stockholm_Archipelago_Trail/blob/main/Notebook/SAT248_Sankey.ipynb)
* About [plotly.com/python/sankey-diagram](https://plotly.com/python/sankey-diagram/)

In [1]:
import plotly.express as px


In [2]:
import json
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
import plotly.graph_objects as go

# -----------------------
# 1️⃣ Load data
# -----------------------
print("📦 Loading data...")

sat = gpd.read_file("SAT_full.geojson")

# Normalize section name
if "Labelsv" in sat.columns:
    sat["section_name"] = sat["Labelsv"]
elif "Labelen" in sat.columns:
    sat["section_name"] = sat["Labelen"]
else:
    sat["section_name"] = [f"SAT_{i}" for i in range(len(sat))]

with open("osm_filtered_cache.json") as f:
    osm_data = json.load(f)

elements = [el for el in osm_data["elements"] if el["type"] == "node"]
amenities = pd.DataFrame([{
    "id": el["id"],
    "lat": el["lat"],
    "lon": el["lon"],
    **el.get("tags", {})
} for el in elements])

gdf = gpd.GeoDataFrame(
    amenities,
    geometry=gpd.points_from_xy(amenities.lon, amenities.lat),
    crs="EPSG:4326"
)

# Ensure same CRS
if gdf.crs != sat.crs:
    sat = sat.to_crs(gdf.crs)

print(f"✅ Loaded {len(gdf)} OSM amenities and {len(sat)} SAT sections.")

# -----------------------
# 2️⃣ Classify amenities
# -----------------------
def classify_amenity(tags):
    """Return high-level category for an OSM amenity/tourism/shop tag."""
    if pd.isna(tags):
        return "Other"
    tags = str(tags).lower()
    if tags in ["toilets", "toilet", "drinking_water", "waste_disposal", "shower"]:
        return "Facilities"
    if tags in ["restaurant", "cafe", "fast_food", "bar", "pub"]:
        return "Food"
    if tags in ["supermarket", "convenience", "shop"]:
        return "Shop"
    if tags in ["hotel", "hostel", "guest_house", "motel", "camp_site"]:
        return "Accommodation"
    if tags in ["nature_reserve", "protected_area"]:
        return "Nature"
    return "Other"

# Assign categories
gdf["category"] = gdf["amenity"].fillna(gdf["tourism"].fillna(gdf["shop"]))
gdf["group"] = gdf["category"].apply(classify_amenity)

# -----------------------
# 3️⃣ Normalize wheelchair tag
# -----------------------
def normalize_wheelchair(x):
    x = str(x).strip().lower()
    if x in ["yes", "true"]:
        return "Wheelchair=yes"
    elif x in ["no", "false"]:
        return "Wheelchair=no"
    elif x in ["limited", "partial"]:
        return "Wheelchair=limited"
    else:
        return "Wheelchair=missing"

gdf["wheelchair_status"] = gdf["wheelchair"].apply(normalize_wheelchair)

# -----------------------
# 4️⃣ Match amenities to nearest SAT section (in meters)
# -----------------------
print("🗺️ Matching amenities to nearest SAT section (EPSG:3006)...")

# Project both to SWEREF99 TM (meters)
sat_proj = sat.to_crs(3006)
gdf_proj = gdf.to_crs(3006)

# Spatial join for nearest match
joined = gpd.sjoin_nearest(
    gdf_proj,
    sat_proj[["section_name", "geometry"]],
    how="left",
    distance_col="distance_m"
)

# Keep only amenities within 200 m of trail
joined = joined[joined["distance_m"] <= 200].copy()

# ✅ Use joined dataset for subsequent analysis
gdf = joined.to_crs(4326)
print(f"✅ {len(gdf)} amenities matched to nearest SAT section (≤200 m).")

# -----------------------
# 5️⃣ Summarize counts
# -----------------------
summary = (
    gdf.groupby(["section_name", "group", "category", "wheelchair_status"])
    .size()
    .reset_index(name="count")
)

# -----------------------
# 6️⃣ Build Sankey structure
# -----------------------
sections = summary["section_name"].unique().tolist()
groups = summary["group"].unique().tolist()
categories = summary["category"].unique().tolist()
accessibility_nodes = [
    "Wheelchair=yes",
    "Wheelchair=no",
    "Wheelchair=limited",
    "Wheelchair=missing",
]

nodes = accessibility_nodes + categories + groups + sections
index = {n: i for i, n in enumerate(nodes)}

group_colors = {
    "Facilities": "#1f77b4",
    "Food": "#ff7f0e",
    "Shop": "#2ca02c",
    "Accommodation": "#9467bd",
    "Nature": "#17becf",
    "Other": "#7f7f7f",
    "Wheelchair=yes": "#2ca02c",
    "Wheelchair=no": "#d62728",
    "Wheelchair=limited": "#ff7f0e",
    "Wheelchair=missing": "#b0b0b0",
}

# Build links
links = []
for _, row in summary.iterrows():
    cat = row["category"]
    grp = row["group"]
    sec = row["section_name"]
    val = row["count"]
    wheel = row["wheelchair_status"]

    # Accessibility → Group
    links.append((index[wheel], index[grp], val, wheel))
    # Group → Section
    links.append((index[grp], index[sec], val, grp))

# -----------------------
# 7️⃣ Plot Sankey
# -----------------------
fig = go.Figure(
    data=[
        go.Sankey(
            arrangement="snap",
            node=dict(
                label=nodes,
                pad=15,
                thickness=18,
                color=[group_colors.get(n, "#ccc") for n in nodes],
            ),
            link=dict(
                source=[s for s, t, v, g in links],
                target=[t for s, t, v, g in links],
                value=[v for s, t, v, g in links],
                color=[group_colors.get(g, "#aaa") for s, t, v, g in links],
                hovertemplate="%{source.label} → %{target.label}<br>Count: %{value}<extra></extra>",
            ),
        )
    ]
)

fig.update_layout(
    title_text="Stockholm Archipelago Trail — Amenities & Wheelchair Accessibility",
    font_size=11,
    height=850,
)

fig.write_html("SAT_Sankey_Wheel_map.html", include_plotlyjs="cdn")
print("✅ Generated SAT_Sankey_Wheel_map.html with 4-way accessibility layer.")


📦 Loading data...
✅ Loaded 922 OSM amenities and 790 SAT sections.
🗺️ Matching amenities to nearest SAT section (EPSG:3006)...
✅ 982 amenities matched to nearest SAT section (≤200 m).
✅ Generated SAT_Sankey_Whee_map.html with 4-way accessibility layer.


In [4]:
# -----------------------
# 8️⃣ Add accessibility summary
# -----------------------
import plotly.express as px

wheelchair_summary = (
    gdf["wheelchair_status"].value_counts(normalize=False)
    .rename_axis("wheelchair_status")
    .reset_index(name="count")
)
wheelchair_summary["percent"] = (
    100 * wheelchair_summary["count"] / wheelchair_summary["count"].sum()
).round(1)

# Create pie chart
pie = px.pie(
    wheelchair_summary,
    values="count",
    names="wheelchair_status",
    title="Wheelchair Accessibility Summary",
    color="wheelchair_status",
    color_discrete_map={
        "Wheelchair=yes": "#2ca02c",
        "Wheelchair=no": "#d62728",
        "Wheelchair=limited": "#ff7f0e",
        "Wheelchair=missing": "#b0b0b0",
    },
)

# Combine Sankey + Pie Chart into one HTML
from plotly.subplots import make_subplots

fig_combined = make_subplots(
    rows=2, cols=1,
    specs=[[{"type": "sankey"}], [{"type": "domain"}]],
    row_heights=[0.75, 0.25],
    subplot_titles=[
        "Stockholm Archipelago Trail — Amenities & Wheelchair Accessibility",
        "Accessibility Distribution (%)"
    ],
)

fig_combined.add_trace(fig.data[0], row=1, col=1)
fig_combined.add_trace(pie.data[0], row=2, col=1)

fig_combined.update_layout(height=1100, font_size=11)
fig_combined.write_html("SAT_Sankey_Wheel_map.html", include_plotlyjs="cdn")

print("✅ Added pie chart summary below Sankey.")


✅ Added pie chart summary below Sankey.
