## Skapa en geojson av SAT leden
* issue [#181](https://github.com/salgo60/Stockholm_Archipelago_Trail/issues/181)
* denna [Notebook](https://github.com/salgo60/Stockholm_Archipelago_Trail/tree/main/notebook/181_geojson_SAT_trail)
* version 1.1 added more properties from Wikidata 

In [11]:
import time
import datetime  
start_time = time.time()
start_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
print(f"Started: {start_str}")


Started: 2025-09-28 11:02


In [6]:
import requests, json
from lxml import etree
from shapely.geometry import LineString
from shapely.ops import unary_union
from collections import Counter, defaultdict
from requests.adapters import HTTPAdapter, Retry
from shapely.geometry import mapping
from pathlib import Path
from SPARQLWrapper import SPARQLWrapper, JSON

OSM_REL = 19012437  # SAT superrelation
EXCLUDE_ROLES = {"alternate", "detour", "connection"}
trail_qid = "Q131318799"

# ---------- Nätverk: session med retry + no-cache ----------
def make_session():
    s = requests.Session()
    retries = Retry(total=5, backoff_factor=1.2,
                    status_forcelist=[429, 500, 502, 503, 504])
    s.mount("https://", HTTPAdapter(max_retries=retries))
    return s

SESSION = make_session()

def fetch_relation_full_with_headers(rel_id: int, timeout=60, session=SESSION):
    url = f"https://api.openstreetmap.org/api/0.6/relation/{rel_id}/full"
    r = session.get(
        url,
        timeout=timeout,
        headers={
            "Cache-Control": "no-cache",
            "Pragma": "no-cache",
            "User-Agent": "sat-debug/0.1 (+contact)"
        },
    )
    r.raise_for_status()
    root = etree.fromstring(r.content)
    return root, r.headers

# ---------- Medlems-hjälpare ----------
def relation_members(xml_root, rel_id: int):
    members = defaultdict(list)
    for rel in xml_root.findall("relation"):
        if rel.attrib["id"] == str(rel_id):
            for m in rel.findall("member"):
                mtype = m.attrib.get("type")
                mid = int(m.attrib.get("ref"))
                role = m.attrib.get("role", "")
                members[mtype].append((mid, role))
            break
    return members

def summarize_members(members, title=""):
    if title:
        print(f"== {title} ==")
    for t in ("relation", "way", "node"):
        lst = members.get(t, [])
        if not lst:
            continue
        roles = [r for _, r in lst]
        print(f"{t}: {len(lst)} st | roller: {dict(Counter(roles))}")

def member_relation_ids(xml_root, rel_id: int):
    ids = set()
    for rel in xml_root.findall("relation"):
        if rel.attrib["id"] == str(rel_id):
            for m in rel.findall("member"):
                if m.attrib.get("type") == "relation":
                    ids.add(int(m.attrib["ref"]))
            break
    return ids

def member_way_ids(xml_root, rel_id: int, exclude_roles=EXCLUDE_ROLES):
    ids = set()
    for rel in xml_root.findall("relation"):
        if rel.attrib["id"] == str(rel_id):
            for m in rel.findall("member"):
                if m.attrib.get("type") == "way":
                    role = m.attrib.get("role", "")
                    if role not in exclude_roles:
                        ids.add(m.attrib["ref"])
            break
    return ids

# ---------- Ways -> koordinater ----------
def ways_by_id_from_xml(xml_root, allowed_way_ids: set[str]):
    nodes = {n.attrib["id"]: (float(n.attrib["lon"]), float(n.attrib["lat"]))
             for n in xml_root.findall("node")}
    ways = {}
    for w in xml_root.findall("way"):
        wid = w.attrib["id"]
        if wid not in allowed_way_ids:
            continue
        coords = []
        for nd in w.findall("nd"):
            ref = nd.attrib["ref"]
            if ref in nodes:
                coords.append(nodes[ref])
        if len(coords) > 1:
            ways[wid] = coords
    return ways

# ---------- Wikidata sections with extended props ----------
def fetch_wikidata_sections(trail_qid="Q131318799"):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    sparql.setQuery(f"""
    SELECT ?section ?sectionLabel_sv ?sectionLabel_en 
           ?website_sv ?website_en 
           ?osmRel ?code ?email ?commonscat
    WHERE {{
      wd:{trail_qid} wdt:P527 ?section .

      OPTIONAL {{
        ?section p:P856 ?stmt_sv.
        ?stmt_sv ps:P856 ?website_sv.
        ?stmt_sv pq:P407 wd:Q9027.
      }}

      OPTIONAL {{
        ?section p:P856 ?stmt_en.
        ?stmt_en ps:P856 ?website_en.
        ?stmt_en pq:P407 wd:Q1860.
      }}

      OPTIONAL {{ ?section wdt:P402 ?osmRel }}
      OPTIONAL {{ ?section wdt:P1401 ?code }}
      OPTIONAL {{ ?section wdt:P968 ?email }}
      OPTIONAL {{ ?section wdt:P373 ?commonscat }}

      ?section rdfs:label ?sectionLabel_en. FILTER(lang(?sectionLabel_en)="en")
      ?section rdfs:label ?sectionLabel_sv. FILTER(lang(?sectionLabel_sv)="sv")
    }}
    """)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    mapping = {}
    for r in results["results"]["bindings"]:
        osm_rel = r.get("osmRel", {}).get("value")
        if osm_rel:
            rid = int(osm_rel)
            mapping[rid] = {
                "QID": r["section"]["value"].split("/")[-1],
                "Labelsv": r.get("sectionLabel_sv", {}).get("value"),
                "Labelen": r.get("sectionLabel_en", {}).get("value"),
                "website_sv": r.get("website_sv", {}).get("value") if "website_sv" in r else None,
                "website_en": r.get("website_en", {}).get("value") if "website_en" in r else None,
                "P1401": r.get("code", {}).get("value") if "code" in r else None,
                "P968": r.get("email", {}).get("value") if "email" in r else None,
                "P373": r.get("commonscat", {}).get("value") if "commonscat" in r else None,
            }
    return mapping

# ---------- Körning ----------
section_meta = fetch_wikidata_sections()

# 1) Hämta superrelationen
root_super, hdr = fetch_relation_full_with_headers(OSM_REL)
print("HTTP Date:", hdr.get("Date"))
print("ETag:", hdr.get("ETag"))
print("Last-Modified:", hdr.get("Last-Modified"))

rel_version = rel_ts = None
for rel in root_super.findall("relation"):
    if rel.attrib["id"] == str(OSM_REL):
        rel_version = rel.attrib.get("version")
        rel_ts = rel.attrib.get("timestamp")
        break
print(f"Relation {OSM_REL} version: {rel_version} | timestamp: {rel_ts}")

# 2) Medlemsöversikt
members_super = relation_members(root_super, OSM_REL)
summarize_members(members_super, title=f"Relation {OSM_REL}")

# 3) Barnrelationer
ref_child_ids = member_relation_ids(root_super, OSM_REL)

present_child_ids = {int(r.attrib["id"])
                     for r in root_super.findall("relation")
                     if r.attrib["id"] != str(OSM_REL)}

missing_ids = sorted(ref_child_ids - present_child_ids)
extra_ids = sorted(present_child_ids - ref_child_ids)

print(f"Delrelationer (referenser): {len(ref_child_ids)}")
print(f"Delrelationer (hämtade element): {len(present_child_ids)}")
print("Saknas i /full:", missing_ids)
print("Överflödiga i /full:", extra_ids)

# 4) Per delrelation geometrier
failed_children = []
relation_geoms = {}

for rid in sorted(ref_child_ids):
    try:
        rxml, _ = fetch_relation_full_with_headers(rid)
        sub_way_ids = member_way_ids(rxml, rid)
        sub_ways = ways_by_id_from_xml(rxml, sub_way_ids)
        lines = [LineString(coords) for coords in sub_ways.values() if len(coords) > 1]
        if not lines:
            continue
        geom = unary_union(lines)
        relation_geoms[rid] = geom
    except requests.HTTPError as e:
        failed_children.append((rid, str(e)))

if failed_children:
    print("Varning: kunde inte hämta följande delrelationer:")
    for rid, err in failed_children:
        print(f"  - {rid}: {err}")

# 5) Bygg GeoJSON features med metadata
features = []
for rid, geom in relation_geoms.items():
    props = {"OSM_REL": rid}
    if rid in section_meta:
        props.update(section_meta[rid])
    if geom.geom_type == "LineString":
        features.append({"type": "Feature", "geometry": mapping(geom), "properties": props})
    elif geom.geom_type == "MultiLineString":
        for seg in geom.geoms:
            features.append({"type": "Feature", "geometry": mapping(seg), "properties": props})

gj = {"type": "FeatureCollection", "features": features}
Path("SAT_full.geojson").write_text(json.dumps(gj, ensure_ascii=False, indent=2), encoding="utf-8")
print("Sparade: SAT_full.geojson med metadata")


HTTP Date: Sun, 28 Sep 2025 01:10:54 GMT
ETag: None
Last-Modified: None
Relation 19012437 version: 31 | timestamp: 2025-09-22T05:04:08Z
== Relation 19012437 ==
relation: 20 st | roller: {'': 20}
Delrelationer (referenser): 20
Delrelationer (hämtade element): 20
Saknas i /full: []
Överflödiga i /full: []
Sparade: SAT_full.geojson med metadata


In [7]:
# fresh fetch every time (don’t reuse root_super)
root_super, hdr = fetch_relation_full_with_headers(OSM_REL)
print("HTTP Date:", hdr.get("Date"))
print("ETag:", hdr.get("ETag"))
print("Last-Modified:", hdr.get("Last-Modified"))



HTTP Date: Sun, 28 Sep 2025 01:10:59 GMT
ETag: None
Last-Modified: None


In [8]:
# IDs som superrelationen REFERERAR till (relation-medlemmar)
refs = []
for rel in root_super.findall("relation"):
    if rel.attrib["id"] == str(OSM_REL):
        for m in rel.findall("member"):
            if m.attrib.get("type") == "relation":
                refs.append(int(m.attrib["ref"]))
        break
refs_set = set(refs)

# IDs som faktiskt finns som <relation>-ELEMENT i /full-svaret (exkl. superrelationen själv)
present_set = {int(r.attrib["id"]) for r in root_super.findall("relation") if r.attrib["id"] != str(OSM_REL)}

print("Antal referenser:", len(refs_set))
print("Antal relation-element:", len(present_set))
print("Saknas som element (finns som referens):", sorted(refs_set - present_set))
print("Överflödiga element (inte listade som medlem):", sorted(present_set - refs_set))


Antal referenser: 20
Antal relation-element: 20
Saknas som element (finns som referens): []
Överflödiga element (inte listade som medlem): []


In [9]:
for rel in root_super.findall("relation"):
    if rel.attrib["id"] == str(OSM_REL):
        print("Relation version:", rel.attrib.get("version"), "timestamp:", rel.attrib.get("timestamp"))
        break


Relation version: 31 timestamp: 2025-09-22T05:04:08Z


In [10]:
    end_time = time.time()
    duration = end_time - start_time
    print(f"Finished in {duration:.2f} seconds.")


Finished in 1719.92 seconds.
