### 🏕️ 3. Feature Distribution Sankey

In [1]:
import plotly.express as px


In [28]:
import geopandas as gpd
import pandas as pd
import json
import folium
import plotly.graph_objects as go
from datetime import datetime
from shapely.ops import nearest_points

# -----------------------
# 1️⃣ Load data
# -----------------------
sat = gpd.read_file("SAT_full.geojson")

# Normalize the section name
if "Labelsv" in sat.columns:
    sat["section_name"] = sat["Labelsv"]
elif "Labelen" in sat.columns:
    sat["section_name"] = sat["Labelen"]
else:
    sat["section_name"] = [f"SAT_{i}" for i in range(len(sat))]

with open("osm_filtered_cache.json") as f:
    osm_data = json.load(f)

elements = [el for el in osm_data["elements"] if el["type"] == "node"]
amenities = pd.DataFrame([{
    "id": el["id"],
    "lat": el["lat"],
    "lon": el["lon"],
    **el.get("tags", {})
} for el in elements])
gdf = gpd.GeoDataFrame(
    amenities,
    geometry=gpd.points_from_xy(amenities.lon, amenities.lat),
    crs="EPSG:4326"
)

# -----------------------
# 2️⃣ Classify amenities
# -----------------------
def classify_amenity(tags):
    tags = str(tags)
    if any(x in tags for x in ["toilet", "toilets", "drinking_water", "waste_disposal"]):
        return "Facilities"
    if any(x in tags for x in ["restaurant", "cafe", "bar", "pub", "fast_food"]):
        return "Food"
    if any(x in tags for x in ["shop", "supermarket", "convenience"]):
        return "Shop"
    if any(x in tags for x in ["hotel", "hostel", "guest_house", "motel", "camp_site"]):
        return "Accommodation"
    if any(x in tags for x in ["nature_reserve", "protected_area"]):
        return "Nature"
    return "Other"

gdf["group"] = gdf["amenity"].fillna(gdf["tourism"].fillna(gdf["shop"])).apply(lambda x: classify_amenity(x))
gdf["category"] = gdf["amenity"].fillna(gdf["tourism"].fillna(gdf["shop"]))

# -----------------------
# 3️⃣ Match to nearest SAT section (200 m buffer)
# -----------------------
sat_proj = sat.to_crs(3006)
gdf_proj = gdf.to_crs(3006)

# Create a buffer around each SAT section (200m)
sat_proj["geometry_buffered"] = sat_proj.buffer(200)

# ✅ Set geometry BEFORE doing the join
sat_proj = sat_proj.set_geometry("geometry_buffered")

# ✅ Now the spatial join will work
joined = gpd.sjoin_nearest(
    gdf_proj, sat_proj[["section_name", "geometry_buffered"]],
    how="left", distance_col="distance_m"
)

# Keep only nearby amenities
joined = joined[joined["distance_m"] <= 200]

# -----------------------
# 4️⃣ Sankey data
# -----------------------
summary = (
    joined.groupby(["section_name", "group", "category"], as_index=False)
    .agg(count=("id", "size"))
)

sections = summary["section_name"].unique().tolist()
groups = summary["group"].unique().tolist()
categories = summary["category"].unique().tolist()
nodes = sections + groups + categories
index = {n: i for i, n in enumerate(nodes)}

group_colors = {
    "Facilities": "#1f77b4",
    "Food": "#ff7f0e",
    "Shop": "#2ca02c",
    "Accommodation": "#9467bd",
    "Nature": "#17becf",
    "Other": "#7f7f7f"
}

links = []
for _, row in summary.iterrows():
    links.append((index[row["section_name"]], index[row["group"]], row["count"], row["group"]))
    links.append((index[row["group"]], index[row["category"]], row["count"], row["group"]))

fig = go.Figure(data=[go.Sankey(
    node=dict(
        label=nodes,
        pad=15,
        thickness=20,
        color=[group_colors.get(n, "#ccc") for n in nodes]
    ),
    link=dict(
        source=[s for s, t, v, g in links],
        target=[t for s, t, v, g in links],
        value=[v for s, t, v, g in links],
        color=[group_colors.get(g, "#aaa") for s, t, v, g in links],
        hovertemplate="%{source.label} → %{target.label}<br>Count: %{value}<extra></extra>"
    )
)])
fig.update_layout(title_text="Stockholm Archipelago Trail — Amenities by Section", font_size=11)
fig.write_html("SAT_Sankey.html", include_plotlyjs="cdn")

print("✅ Created SAT_Sankey.html")

# -----------------------
# 5️⃣ Folium map
# -----------------------
center = [sat.geometry.centroid.y.mean(), sat.geometry.centroid.x.mean()]
m = folium.Map(location=center, zoom_start=9, tiles="OpenStreetMap")

# Trail line
folium.GeoJson(
    sat.to_json(),
    name="SAT Trail",
    style_function=lambda x: {"color": "red", "weight": 3},
    tooltip=folium.GeoJsonTooltip(fields=["section_name"])
).add_to(m)

# Amenity markers
for group in joined["group"].unique():
    fg = folium.FeatureGroup(name=group)
    sub = joined[joined["group"] == group]
    for _, row in sub.iterrows():
        folium.CircleMarker(
            location=[row.geometry.y, row.geometry.x],
            radius=4,
            color=group_colors.get(row["group"], "gray"),
            fill=True,
            fill_opacity=0.7,
            popup=f"<b>{row['category']}</b><br>{row['section_name']}<br>{row['distance_m']:.0f} m"
        ).add_to(fg)
    fg.add_to(m)

folium.LayerControl().add_to(m)
m.save("SAT_Sankey_Map.html")
print("✅ Created SAT_Sankey_Map.html")

# -----------------------
# 6️⃣ Last updated footer
# -----------------------
timestamp = osm_data.get("osm3s", {}).get("timestamp_osm_base", None)
if timestamp:
    updated_date = datetime.fromisoformat(timestamp).strftime("%Y-%m-%d")
else:
    updated_date = datetime.now().strftime("%Y-%m-%d")

print("🕒 Last updated:", updated_date)


✅ Created SAT_Sankey.html



Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




✅ Created SAT_Sankey_Map.html
🕒 Last updated: 2025-10-26


In [29]:
def classify_amenity(tags):
    if "toilets" in tags or tags == "toilet":
        return "Facilities"
    if tags in ["restaurant", "cafe", "fast_food", "bar"]:
        return "Food"
    if tags in ["supermarket", "convenience", "shop"]:
        return "Shop"
    if tags in ["hotel", "hostel", "guest_house", "motel", "camp_site"]:
        return "Accommodation"
    if tags in ["nature_reserve", "protected_area"]:
        return "Nature"
    return "Other"

gdf["group"] = gdf["amenity"].fillna(gdf["tourism"].fillna(gdf["shop"])).apply(lambda x: classify_amenity(str(x)))
gdf["category"] = gdf["amenity"].fillna(gdf["tourism"].fillna(gdf["shop"]))


In [30]:
# Project both to SWEREF99 TM (EPSG:3006) for Sweden
sat_proj = sat.to_crs(3006)
gdf_proj = gdf.to_crs(3006)

# Compute nearest SAT section for each amenity
from shapely.ops import nearest_points

def nearest_section(point, sections):
    distances = sections.geometry.distance(point)
    idx = distances.idxmin()
    return sections.loc[idx, "section_name"] if "section_name" in sections.columns else sections.loc[idx].get("name", f"SAT_{idx}")

gdf_proj["section_name"] = gdf_proj.geometry.apply(lambda p: nearest_section(p, sat_proj))
gdf_proj["distance_m"] = gdf_proj.geometry.apply(lambda p: sat_proj.distance(p).min())

print("✅ Amenities matched to nearest SAT section.")


✅ Amenities matched to nearest SAT section.


In [31]:
summary = (
    gdf_proj.groupby(["section_name", "group", "category"], as_index=False)
    .agg(count=("id", "size"))
)
summary.head()


Unnamed: 0,section_name,group,category,count
0,SAT Arholma,Accommodation,camp_site,1
1,SAT Arholma,Accommodation,hostel,2
2,SAT Arholma,Facilities,toilets,5
3,SAT Arholma,Food,cafe,2
4,SAT Arholma,Food,restaurant,2


In [32]:
summary

Unnamed: 0,section_name,group,category,count
0,SAT Arholma,Accommodation,camp_site,1
1,SAT Arholma,Accommodation,hostel,2
2,SAT Arholma,Facilities,toilets,5
3,SAT Arholma,Food,cafe,2
4,SAT Arholma,Food,restaurant,2
...,...,...,...,...
260,SAT Ålö,Other,ferry_terminal,1
261,SAT Ålö,Other,information,7
262,SAT Ålö,Other,picnic_site,1
263,SAT Ålö,Other,waste_basket,2


In [34]:
# Skip other
summary = summary[summary["group"] != "Other"]

In [38]:
import plotly.graph_objects as go

sections = summary["section_name"].unique().tolist()
groups = summary["group"].unique().tolist()
categories = summary["category"].unique().tolist()
nodes = sections + groups + categories
index = {n: i for i, n in enumerate(nodes)}

group_colors = {
    "Facilities": "#1f77b4",
    "Food": "#ff7f0e",
    "Shop": "#2ca02c",
    "Accommodation": "#9467bd",
    "Nature": "#17becf",
    "Other": "#7f7f7f"
}


links = []
for _, row in summary.iterrows():
    # Category → Group → Section  (reverse direction)
    links.append((index[row["category"]], index[row["group"]], row["count"], row["group"]))
    links.append((index[row["group"]], index[row["section_name"]], row["count"], row["group"]))

fig = go.Figure(data=[go.Sankey(
    node=dict(
        label=nodes,
        pad=15,
        thickness=20,
        color=[group_colors.get(n, "#ccc") for n in nodes]
    ),
    link=dict(
        source=[s for s, t, v, g in links],
        target=[t for s, t, v, g in links],
        value=[v for s, t, v, g in links],
        color=[group_colors.get(g, "#aaa") for s, t, v, g in links],
        hovertemplate="%{source.label} → %{target.label}<br>Count: %{value}<extra></extra>"
    )
)])
fig.update_layout(title_text="Stockholm Archipelago Trail — Amenities by Section", font_size=11)
fig.write_html("SAT_Sankey.html", include_plotlyjs="cdn")

print("✅ Generated SAT_Sankey.html with real data.")


✅ Generated SAT_Sankey.html with real data.


In [39]:
import folium

center = [sat.geometry.centroid.y.mean(), sat.geometry.centroid.x.mean()]
m = folium.Map(location=center, zoom_start=9, tiles="OpenStreetMap")

# Add SAT trail
folium.GeoJson(
    sat.to_json(),
    name="SAT Trail",
    style_function=lambda x: {"color": "red", "weight": 3},
    tooltip=folium.GeoJsonTooltip(fields=["section_name"] if "section_name" in sat.columns else [])
).add_to(m)

# Add amenities by group
for group in gdf_proj["group"].unique():
    fg = folium.FeatureGroup(name=group)
    sub = gdf_proj[gdf_proj["group"] == group]
    for _, row in sub.iterrows():
        folium.CircleMarker(
            location=[row.geometry.y, row.geometry.x],
            radius=4,
            color=group_colors.get(row["group"], "gray"),
            fill=True,
            fill_opacity=0.7,
            popup=f"<b>{row['category']}</b><br>{row.get('name','')}<br>{row['section_name']}<br>Dist: {row['distance_m']:.0f} m"
        ).add_to(fg)
    fg.add_to(m)

folium.LayerControl().add_to(m)
m.save("SAT_Sankey_Map.html")
print("✅ Generated SAT_Sankey_Map.html with real data.")



Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




✅ Generated SAT_Sankey_Map.html with real data.


In [40]:
sat = gpd.read_file("SAT_full.geojson")
print(sat.columns.tolist())
sat.head(2)

['OSM_REL', 'QID', 'Labelsv', 'Labelen', 'website_sv', 'website_en', 'P1401', 'P968', 'P373', 'geometry']


Unnamed: 0,OSM_REL,QID,Labelsv,Labelen,website_sv,website_en,P1401,P968,P373,geometry
0,19012436,Q133374147,SAT Arholma,SAT Arholma,https://stockholmarchipelagotrail.com/sv/secti...,https://stockholmarchipelagotrail.com/section/...,mailto:ProblemsOnSAT@gmail.com,mailto:GeneralinfoSAT@gmail.com,SAT Arholma,"LINESTRING (19.1085 59.85118, 19.10836 59.8512..."
1,19012436,Q133374147,SAT Arholma,SAT Arholma,https://stockholmarchipelagotrail.com/sv/secti...,https://stockholmarchipelagotrail.com/section/...,mailto:ProblemsOnSAT@gmail.com,mailto:GeneralinfoSAT@gmail.com,SAT Arholma,"LINESTRING (19.12108 59.84514, 19.12088 59.84538)"


In [46]:
import geopandas as gpd
import pandas as pd
import json
import folium
import plotly.graph_objects as go
from datetime import datetime

# --------------------------------------------------------
# 1️⃣ Load data
# --------------------------------------------------------
sat = gpd.read_file("SAT_full.geojson")

# Ensure section name
if "Labelsv" in sat.columns:
    sat["section_name"] = sat["Labelsv"]
elif "Labelen" in sat.columns:
    sat["section_name"] = sat["Labelen"]
else:
    sat["section_name"] = [f"SAT_{i}" for i in range(len(sat))]

with open("osm_filtered_cache.json") as f:
    osm_data = json.load(f)

elements = [el for el in osm_data["elements"] if el["type"] == "node"]
amenities = pd.DataFrame([{
    "id": el["id"],
    "lat": el["lat"],
    "lon": el["lon"],
    **el.get("tags", {})
} for el in elements])

gdf = gpd.GeoDataFrame(
    amenities,
    geometry=gpd.points_from_xy(amenities.lon, amenities.lat),
    crs="EPSG:4326"
)

# --------------------------------------------------------
# 2️⃣ Classify amenities
# --------------------------------------------------------
def classify_amenity(tags):
    tags = str(tags)
    if any(x in tags for x in ["toilet", "toilets", "drinking_water", "waste_disposal"]):
        return "Facilities"
    if any(x in tags for x in ["restaurant", "cafe", "bar", "pub", "fast_food"]):
        return "Food"
    if any(x in tags for x in ["shop", "supermarket", "convenience"]):
        return "Shop"
    if any(x in tags for x in ["hotel", "hostel", "guest_house", "motel", "camp_site"]):
        return "Accommodation"
    if any(x in tags for x in ["nature_reserve", "protected_area"]):
        return "Nature"
    return "Other"

gdf["group"] = gdf["amenity"].fillna(gdf["tourism"].fillna(gdf["shop"])).apply(lambda x: classify_amenity(x))
gdf["category"] = gdf["amenity"].fillna(gdf["tourism"].fillna(gdf["shop"]))

# --------------------------------------------------------
# 3️⃣ Match amenities to nearest SAT section (200 m)
# --------------------------------------------------------
sat = sat.set_geometry("geometry")
sat_proj = sat.to_crs(3006)
gdf_proj = gdf.to_crs(3006)

sat_proj["geometry_buffered"] = sat_proj.buffer(200)
sat_proj = sat_proj.set_geometry("geometry_buffered")

joined = gpd.sjoin_nearest(
    gdf_proj, sat_proj[["section_name", "geometry_buffered"]],
    how="left", distance_col="distance_m"
)

joined = joined[joined["distance_m"] <= 200]

# --------------------------------------------------------
# 4️⃣ Sankey diagram
# --------------------------------------------------------
summary = (
    joined.groupby(["section_name", "group", "category"], as_index=False)
    .agg(count=("id", "size"))
)

sections = summary["section_name"].unique().tolist()
groups = summary["group"].unique().tolist()
categories = summary["category"].unique().tolist()
nodes = categories + groups + sections
index = {n: i for i, n in enumerate(nodes)}

group_colors = {
    "Facilities": "#1f77b4",
    "Food": "#ff7f0e",
    "Shop": "#2ca02c",
    "Accommodation": "#9467bd",
    "Nature": "#17becf",
    "Other": "#7f7f7f"
}

links = []
for _, row in summary.iterrows():
    links.append((index[row["category"]], index[row["group"]], row["count"], row["group"]))
    links.append((index[row["group"]], index[row["section_name"]], row["count"], row["group"]))

fig = go.Figure(data=[go.Sankey(
    node=dict(
        label=nodes,
        pad=15,
        thickness=20,
        color=[group_colors.get(n, "#ccc") for n in nodes]
    ),
    link=dict(
        source=[s for s, t, v, g in links],
        target=[t for s, t, v, g in links],
        value=[v for s, t, v, g in links],
        color=[group_colors.get(g, "#aaa") for s, t, v, g in links],
    )
)])
fig.update_layout(title_text="Stockholm Archipelago Trail — Amenities by Section", font_size=11)

# Add JS to send messages on click
fig.write_html(
    "SAT_Sankey_interactive.html",
    include_plotlyjs="cdn",
    post_script="""
    <script>
    document.querySelectorAll('.plotly').forEach((el) => {
      el.on('plotly_click', (data) => {
        const node = data.points[0].label;
        if (node && node.startsWith('SAT')) {
          window.parent.postMessage({section: node}, '*');
        }
      });
    });
    </script>
    """
)
print("✅ Created SAT_Sankey_interactive.html")

# --------------------------------------------------------
# 5️⃣ Folium map with listener — FIXED for marker visibility
# --------------------------------------------------------
center = [sat.geometry.centroid.y.mean(), sat.geometry.centroid.x.mean()]
m = folium.Map(location=center, zoom_start=9, tiles="OpenStreetMap")

# Amenity markers FIRST so they are visible
for group in joined["group"].unique():
    fg = folium.FeatureGroup(name=group)
    sub = joined[joined["group"] == group]
    for _, row in sub.iterrows():
        folium.CircleMarker(
            location=[row.geometry.y, row.geometry.x],
            radius=4,
            color=group_colors.get(row["group"], "gray"),
            fill=True,
            fill_opacity=0.8,
            popup=f"<b>{row['category']}</b><br>{row['section_name']}<br>{row['distance_m']:.0f} m"
        ).add_to(fg)
    fg.add_to(m)

# Trail line LAST (so red lines go behind markers)
folium.GeoJson(
    sat.to_json(),
    name="SAT Trail",
    style_function=lambda x: {"color": "red", "weight": 3},
    tooltip=folium.GeoJsonTooltip(fields=["section_name"])
).add_to(m)

folium.LayerControl().add_to(m)


# Inject JS listener for Sankey → Map
m.get_root().html.add_child(folium.Element("""
<script>
let map = window.map;
if (!map) map = window._last_map;
let highlighted;

window.addEventListener('message', (event) => {
  const section = event.data.section;
  if (!section) return;
  if (highlighted) {
    highlighted.setStyle({color:'red', weight:3});
  }
  map.eachLayer(layer => {
    if (layer.feature && layer.feature.properties.section_name === section) {
      layer.setStyle({color:'yellow', weight:6});
      highlighted = layer;
      map.fitBounds(layer.getBounds());
    }
  });
});
</script>
"""))

m.save("SAT_Sankey_Map_interactive.html")
print("✅ Created SAT_Sankey_Map_interactive.html")

# --------------------------------------------------------
# 6️⃣ Combine into final dashboard
# --------------------------------------------------------
timestamp = osm_data.get("osm3s", {}).get("timestamp_osm_base", None)
updated_date = datetime.fromisoformat(timestamp).strftime("%Y-%m-%d") if timestamp else datetime.now().strftime("%Y-%m-%d")

combined_html = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Stockholm Archipelago Trail — Linked Amenities Explorer</title>
<style>
body {{
  font-family: "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
  margin: 0;
  background-color: #f8f9fa;
  color: #222;
}}
header {{
  background: linear-gradient(90deg, #004B87, #0077b6);
  color: white;
  padding: 1em;
  text-align: center;
  box-shadow: 0 2px 6px rgba(0,0,0,0.2);
}}
h1 {{ margin: 0; font-size: 1.8em; }}
.container {{ max-width: 1200px; margin: auto; padding: 1em; }}
iframe {{ width: 100%; border: none; border-radius: 12px; margin-top: 10px; }}
.legend {{
  display: flex; flex-wrap: wrap; justify-content: center; gap: 10px; margin: 10px 0;
}}
.legend-item {{ display: flex; align-items: center; background:#f1f1f1; border-radius:8px; padding:4px 8px; }}
.color-box {{ width:16px; height:16px; border-radius:3px; margin-right:6px; border:1px solid #ccc; }}
footer {{ text-align:center; font-size:0.85em; color:#666; padding:1em; }}
</style>
</head>
<body>
<header>
  <h1>Stockholm Archipelago Trail — Linked Amenities Explorer</h1>
  <p>Click a SAT section in the Sankey to zoom to it on the map.</p>
</header>

<div class="container">
  <section>
    <h2>1️⃣ SAT Sections → Amenity Flow</h2>
    <iframe src="SAT_Sankey_interactive.html" height="500"></iframe>

    <div class="legend">
      <div class="legend-item"><div class="color-box" style="background:#1f77b4;"></div>Facilities</div>
      <div class="legend-item"><div class="color-box" style="background:#ff7f0e;"></div>Food</div>
      <div class="legend-item"><div class="color-box" style="background:#2ca02c;"></div>Shop</div>
      <div class="legend-item"><div class="color-box" style="background:#9467bd;"></div>Accommodation</div>
      <div class="legend-item"><div class="color-box" style="background:#17becf;"></div>Nature</div>
      <div class="legend-item"><div class="color-box" style="background:#7f7f7f;"></div>Other</div>
    </div>
  </section>

  <section>
    <h2>2️⃣ SAT Trail Map with Amenities</h2>
    <iframe src="SAT_Sankey_Map_interactive.html" height="600"></iframe>
  </section>
</div>

<footer>
  Data from OpenStreetMap & Wikidata — Last updated: {updated_date} |
  <a href="https://github.com/salgo60/Stockholm_Archipelago_Trail" target="_blank">GitHub project</a>
</footer>
</body>
</html>
"""

with open("SAT_Sankey_Linked.html", "w", encoding="utf-8") as f:
    f.write(combined_html)

print("🎯 Created SAT_Sankey_Linked.html — open it in your browser!")


✅ Created SAT_Sankey_interactive.html



Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




✅ Created SAT_Sankey_Map_interactive.html
🎯 Created SAT_Sankey_Linked.html — open it in your browser!


DataSourceError: 'SAT_Sankey_Map_interactive.html' not recognized as being in a supported file format.; It might help to specify the correct driver explicitly by prefixing the file path with '<DRIVER>:', e.g. 'CSV:path'.

In [45]:
print("🔎 Amenities joined to trail sections:", joined.groupby("group").size())


🔎 Amenities joined to trail sections: group
Accommodation      68
Facilities        680
Food              235
Other            3227
Shop               49
dtype: int64
