## 195 PoC: Koppla SAT-leden till Naturvårdsverkets “Skyddad natur” + maskinöversatta föreskrifter

* [Issue 195](ss=

[Reservatsnamn]  
📜 Föreskrifter: [SV]  
🌍 Maskinöversatt: [DA] [NN] [EN] [FR] [ZH] [AR]


In [14]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import json, sys, time, re
from io import BytesIO, StringIO
from urllib.parse import urlencode
import requests

# --- (valfritt) geopandas för "svårare" P3896-källor (WFS/ZIP) ---
try:
    import geopandas as gpd        # pip install geopandas
except Exception:
    gpd = None

WIKIDATA_SPARQL = "https://query.wikidata.org/sparql"
UA = {"User-Agent": "SAT-pipeline/1.0 (contact: your-email@example.com)"}

# Fil att skriva
OUTFILE = "output/SAT_reserves_translations.geojson"

# Språklistor
RESIDENT_LANGS = ["sv","en","ar","fi","so","fa","ckb","ti","pl","tr","es"]
TOURIST_LANGS  = ["nb","nn","da","fi","de","nl","en","fr","es","it","zh","ja","pl","ru"]

def google_translate_url(src_sv_url: str, tl: str) -> str:
    base = "https://translate.google.com/translate"
    q = {"hl": "sv", "sl": "sv", "tl": tl, "u": src_sv_url}
    return f"{base}?{urlencode(q)}"

def fetch_sparql():
    q = """
    SELECT ?res ?resLabel ?foreskrift ?shape ?coord WHERE {
      wd:Q131318799 wdt:P3018 ?res .
      OPTIONAL { ?res wdt:P856 ?foreskrift }
      OPTIONAL { ?res wdt:P3896 ?shape }
      OPTIONAL { ?res wdt:P625 ?coord }
      SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
    }
    """
    r = requests.get(WIKIDATA_SPARQL, params={"query": q, "format": "json"}, headers=UA, timeout=60)
    r.raise_for_status()
    return r.json()["results"]["bindings"]

def is_commons_map(url: str) -> bool:
    # Commons Data namespace .map (GeoJSON)
    return ("commons.wikimedia.org" in url) and ("/data/main/Data:" in url or "action=raw" in url) and url.endswith(".map")

def fetch_geojson_from_url(url: str):
    """
    Försök hämta GeoJSON från P3896:
    - Commons .map (GeoJSON) → direkt
    - URL som slutar på .geojson/.json → direkt
    - Annars: försök geopandas.read_file(url) om geopandas finns
    Returnerar FeatureCollection-dict eller None.
    """
    try:
        r = requests.get(url, headers=UA, timeout=60)
        r.raise_for_status()
        gj = r.json()
        
        if gj.get("type") == "FeatureCollection":
            return gj
        if gj.get("type") == "Feature":
            return {"type": "FeatureCollection", "features": [gj]}
        # vissa Commons .map kan vara en lista av features:
        if isinstance(gj, list):
            return {"type": "FeatureCollection", "features": gj}

        if gpd is not None:
            # geopandas kan ofta läsa WFS/ZIP/SHP via URL
            gdf = gpd.read_file(url)
            if gdf is None or gdf.empty:
                return None
            return json.loads(gdf.to_json())
    except Exception:
        return None
    return None

def parse_point_wkt(wkt: str):
    # "Point(lon lat)" → (lon, lat)
    try:
        s = wkt.strip()
        s = s.replace("POINT", "Point")
        s = s[s.index("(")+1:s.rindex(")")]
        lon, lat = [float(x) for x in s.split()]
        return lon, lat
    except Exception:
        return None

def build_feature_from_point(lon: float, lat: float, props: dict) -> dict:
    return {
        "type": "Feature",
        "geometry": {"type": "Point", "coordinates": [lon, lat]},
        "properties": props,
    }

def attach_properties_to_all_features(gj: dict, props: dict) -> dict:
    out = {"type": "FeatureCollection", "features": []}
    for f in gj.get("features", []):
        g = f.get("geometry")
        if not g:
            continue
        p = f.get("properties", {}).copy()
        p.update(props)
        out["features"].append({"type": "Feature", "geometry": g, "properties": p})
    return out
    
import urllib.parse
from urllib.parse import urlparse, unquote

def commons_map_url(in_url: str) -> str:
    # Parse input URL
    parsed = urlparse(in_url)
    
    # Extract path after /data/main/
    path = parsed.path.split("/data/main/")[-1]
    
    # Decode percent-encodings (e.g. %C3%B6 → ö)
    path = unquote(path)
    
    # Replace + with _ (Commons convention)
    path = path.replace("+", "_")
    
    # Build output URL
    out_url = f"https://commons.wikimedia.org/w/index.php?title={path}&action=raw"
    return out_url



def main():
    rows = fetch_sparql()
    features = []

    for b in rows:
        label = b.get("resLabel", {}).get("value") or "(namnlös)"
        fores = b.get("foreskrift", {}).get("value")   # SV original
        shape = b.get("shape", {}).get("value")
        coord = b.get("coord", {}).get("value")

        
        #print("Shapeout",commons_map_url(shape))
        shape = commons_map_url(shape)
        # http://commons.wikimedia.org/data/main/Data:/Sweden/Nature+reserves/2020/Fjärdlång/2002299.map
        # https://commons.wikimedia.org/w/index.php?title=Data:/Sweden/Nature_reserves/2020/Fj%C3%A4rdl%C3%A5ng/2002299.map&action=raw
        props = {
            "name": label,
            "foreskrift_sv": fores,
            "translations": {tl: google_translate_url(fores, tl) for tl in sorted(set(RESIDENT_LANGS + TOURIST_LANGS))}
                        if fores else {},
            "source": "Wikidata P3018/P856/P3896; Commons/GeoJSON där tillgängligt",
        }

        # 1) Försök polygon via P3896
        
        added = False
        if shape:
            gj = fetch_geojson_from_url(shape)
            
            if gj:
                gj_props = attach_properties_to_all_features(gj, props)
                features.extend(gj_props["features"])
                added = True
        
        # 2) Fallback: punkt från P625
        if not added:
            if coord:
                pt = parse_point_wkt(coord)
                if pt:
                    lon, lat = pt
                else:
                    lon, lat = 18.06, 59.33  # stockholm fallback
            else:
                lon, lat = 18.06, 59.33
            features.append(build_feature_from_point(lon, lat, props))

        # Hövlig paus så vi inte spammar endpoints
        time.sleep(0.2)

    fc = {"type": "FeatureCollection", "features": features}
    # skriv
    with open(OUTFILE, "w", encoding="utf-8") as f:
        json.dump(fc, f, ensure_ascii=False, indent=2)

    print(f"✅ Skrev {OUTFILE} med {len(features)} features.")

if __name__ == "__main__":
    sys.exit(main())


✅ Skrev output/SAT_reserves_translations.geojson med 12 features.


SystemExit: 

In [13]:
%tb 


SystemExit: 