### 237_SAT_images
* Issue [#237](https://github.com/salgo60/Stockholm_Archipelago_Trail/issues/237)
* Denna [notebook](237_SAT_images.ipynb)

In [1]:
import time
import datetime  
start_time = time.time()
start_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")
print(f"Started: {start_str}")


Started: 2025-10-21 04:32


In [2]:
import os
import json
import time
import random
from pathlib import Path
from urllib.parse import quote
from concurrent.futures import ThreadPoolExecutor, as_completed

import requests
import requests_cache
from SPARQLWrapper import SPARQLWrapper, JSON
import folium
from folium.plugins import MarkerCluster

# ===============================
# Configuration
# ===============================
CATEGORY = "Category:Stockholm Archipelago Trail"
SAT_GEOJSON_PATH = "SAT_full.geojson"
POINTS_CACHE_FILE = "points_cache.json"
CATEGORY_CACHE_FILE = "category_cache.json"
LICENSE_CACHE_FILE = "license_cache.json"
CACHE_DIR = Path("cache")
CACHE_DIR.mkdir(exist_ok=True)

USER_AGENT = "SAT-Folium-Mapper/1.3 (contact: salgo60@msn.com)"
API = "https://commons.wikimedia.org/w/api.php"
ENTITY_DATA = "https://commons.wikimedia.org/wiki/Special:EntityData/"

LICENSE_COLORS = {
    "Q6938433": "green",       # CC0 1.0
    "Q18199165": "blue",       # CC BY-SA 4.0
    "Q7257361": "gray",        # Public Domain Mark
    "Q88088423": "purple",     # CC BY-ND 4.0
    "Q19125117": "teal",       # CC BY-SA 3.0
    "Q20007257": "red",        # CC BY 4.0
    "Q14946043": "orange",     # CC BY 3.0
}

KNOWN_LICENSES = {
    "Q6938433": ("CC0 1.0", "https://creativecommons.org/publicdomain/zero/1.0/deed.sv"),
    "Q18199165": ("CC BY-SA 4.0", "https://creativecommons.org/licenses/by-sa/4.0/deed.sv"),
    "Q7257361": ("Public Domain Mark", "https://creativecommons.org/publicdomain/mark/1.0/deed.sv"),
    "Q88088423": ("CC BY-ND 4.0", "https://creativecommons.org/licenses/by-nd/4.0/deed.sv"),
    "Q19125117": ("CC BY-SA 3.0", "https://creativecommons.org/licenses/by-sa/3.0/deed.sv"),
    "Q20007257": ("CC BY 4.0", "https://creativecommons.org/licenses/by/4.0/deed.sv"),
    "Q14946043": ("CC BY 3.0", "https://creativecommons.org/licenses/by/3.0/deed.sv"),
}


# ===============================
# HTTP caching
# ===============================
print("🧠 Initierar HTTP-cache (7 dagar)...")
requests_cache.install_cache("http_cache", expire_after=7 * 24 * 3600, allowable_methods=("GET", "POST"))
S = requests.Session()
S.headers.update({"User-Agent": USER_AGENT})

# ===============================
# Helper functions
# ===============================
def load_json(path, default):
    if Path(path).exists():
        with open(path, "r", encoding="utf-8") as f:
            return json.load(f)
    return default

def save_json(path, obj):
    with open(path, "w", encoding="utf-8") as f:
        json.dump(obj, f, indent=2, ensure_ascii=False)

def cached_json(path: Path, func):
    if path.exists():
        try:
            with open(path, "r", encoding="utf-8") as f:
                return json.load(f)
        except Exception:
            pass
    data = func()
    with open(path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    return data

def safe_get(url, params=None, max_retries=5, base_wait=1.5, timeout=30):
    for attempt in range(max_retries):
        try:
            r = S.get(url, params=params, timeout=timeout)
            if r.status_code == 200:
                return r
            else:
                print(f"⚠️ HTTP {r.status_code} på {url}")
        except requests.exceptions.RequestException as e:
            wait = base_wait * (2 ** attempt) + random.random()
            print(f"⚠️ Nätverksproblem ({e}); väntar {wait:.1f}s...")
            time.sleep(wait)
    print("❌ Permanent fel:", url)
    return None

# ===============================
# Category crawl
# ===============================
def get_all_category_files_recursive(category):
    print(f"📂 Startar hämtning av filer från {category}...")
    if Path(CATEGORY_CACHE_FILE).exists():
        files = load_json(CATEGORY_CACHE_FILE, [])
        if files:
            print(f"✅ Laddade {len(files)} filer från cache.")
            return files

    seen_cats = set()
    all_files = set()

    def crawl(cat):
        if cat in seen_cats:
            return
        seen_cats.add(cat)

        cont = {}
        while True:
            params = {
                "action": "query", "format": "json",
                "list": "categorymembers",
                "cmtitle": cat,
                "cmnamespace": "6|14",
                "cmtype": "file|subcat",
                "cmlimit": "max",
            }
            params.update(cont)
            r = safe_get(API, params=params)
            if not r:
                break
            data = r.json()
            for m in data.get("query", {}).get("categorymembers", []):
                title = m["title"]
                if title.startswith("File:"):
                    all_files.add(title)
                elif title.startswith("Category:"):
                    crawl(title)
            cont = data.get("continue", {})
            if not cont:
                break
            time.sleep(0.2)

    crawl(category)
    files_sorted = sorted(all_files)
    print(f"✅ Hittade {len(files_sorted)} filer i {len(seen_cats)} kategorier.")
    save_json(CATEGORY_CACHE_FILE, files_sorted)
    return files_sorted

# ===============================
# Commons helpers
# ===============================
def fetch_pageinfo_and_ext(title):
    def do_fetch():
        print(f"🔍 Hämtar metadata för {title}...")
        r = safe_get(API, params={
            "action": "query", "format": "json",
            "titles": title,
            "prop": "imageinfo|pageprops",
            "iiprop": "extmetadata|url",
            "iiurlwidth": 600,
        })
        if not r:
            return {}
        pages = r.json().get("query", {}).get("pages", {})
        for _, p in pages.items():
            info = (p.get("imageinfo") or [{}])[0]
            thumb = info.get("thumburl")
            url = info.get("url")
            mid = None
            pp = p.get("pageprops") or {}
            if isinstance(pp, dict):
                mid = pp.get("wikibase_item")
            if not mid:
                mid = filetitle_to_mediainfo_id(title)
            return {"thumb": thumb, "url": url, "mid": mid}
        return {}

    fname = CACHE_DIR / f"info_{quote(title, safe='')}.json"
    return cached_json(fname, do_fetch)

def filetitle_to_mediainfo_id(title):
    r = safe_get(API, params={
        "action": "wbgetentities", "format": "json",
        "sites": "commonswiki", "titles": title, "props": "info",
    })
    if not r:
        return None
    for eid, e in r.json().get("entities", {}).items():
        if eid.startswith("M"):
            return eid
    return None

def get_sdc(mid):
    url = f"{ENTITY_DATA}{mid}.json"
    print(f"📡 Hämtar SDC-data för {mid}...")
    r = safe_get(url)
    if r:
        return r.json()
    return None

def get_sdc_cached(mid):
    fname = CACHE_DIR / f"sdc_{mid}.json"
    return cached_json(fname, lambda: get_sdc(mid) or {})

def extract_coords(sdc):
    try:
        entity = list(sdc["entities"].values())[0]
        claims = entity.get("statements", {})
        for pid in ("P1259", "P625"):
            if pid in claims:
                for claim in claims[pid]:
                    v = claim.get("mainsnak", {}).get("datavalue", {}).get("value")
                    if v and "latitude" in v:
                        return float(v["latitude"]), float(v["longitude"])
    except Exception:
        pass
    return None

def extract_license_qid(sdc):
    try:
        entity = list(sdc["entities"].values())[0]
        claims = entity.get("statements", {})
        if "P275" in claims:
            vals = []
            for sn in claims["P275"]:
                datav = sn.get("mainsnak", {}).get("datavalue", {})
                if datav.get("type") == "wikibase-entityid":
                    vals.append("Q" + str(datav["value"]["numeric-id"]))
            return vals
    except Exception:
        pass
    return []

license_cache = load_json(LICENSE_CACHE_FILE, {})

def get_license_info(qid, max_retries=5):
    if qid in KNOWN_LICENSES:
        return KNOWN_LICENSES[qid]
    if qid in license_cache:
        d = license_cache[qid]
        return d.get("label", qid), d.get("url", f"https://www.wikidata.org/wiki/{qid}")

    print(f"🔖 Hämtar licensinfo för {qid} från Wikidata...")
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql", agent="SAT-map/1.0")
    query = f"""
    SELECT ?label ?url WHERE {{
      wd:{qid} rdfs:label ?label.
      OPTIONAL {{ wd:{qid} wdt:P856 ?url. }}
      FILTER(LANG(?label)='sv')
    }}
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)

    delay = 1
    for attempt in range(max_retries):
        try:
            results = sparql.query().convert()
            bindings = results.get("results", {}).get("bindings", [])
            if bindings:
                label = bindings[0].get("label", {}).get("value")
                url = bindings[0].get("url", {}).get("value", f"https://www.wikidata.org/wiki/{qid}")
            else:
                label = qid
                url = f"https://www.wikidata.org/wiki/{qid}"
            license_cache[qid] = {"label": label, "url": url}
            save_json(LICENSE_CACHE_FILE, license_cache)
            return label, url
        except Exception as e:
            print(f"⚠️ Fel vid hämtning av {qid}: {e}")
            time.sleep(delay)
            delay *= 2
    return qid, f"https://www.wikidata.org/wiki/{qid}"

def make_popup(p):
    license_links = []
    for qid in p["licenses"]:
        label, url = get_license_info(qid)
        license_links.append(f'<a href="{url}" target="_blank">{label}</a>')
    lic_html = "<br>".join(license_links) if license_links else "okänd"
    img_html = f'<img src="{p["thumb"]}" style="max-width:300px;height:auto"/>' if p["thumb"] else ""
    return f"""
    <div style="font-size:14px">
    <b>{p["title"]}</b><br>
    <a href="{p["page"]}" target="_blank">Commons-sida</a><br>
    {img_html}<br>
    <b>Licens:</b> {lic_html}
    </div>
    """

def process_title(title):
    if title in points_cache:
        return points_cache[title]
    info = fetch_pageinfo_and_ext(title)
    mid = info.get("mid")
    if not mid:
        return None
    sdc = get_sdc_cached(mid)
    coords = extract_coords(sdc)
    if not coords:
        return None
    licenses = extract_license_qid(sdc)
    print(f"📍 {title} → {coords}")
    return {
        "title": title.replace("File:", ""),
        "page": "https://commons.wikimedia.org/wiki/" + quote(title.replace(" ", "_")),
        "thumb": info.get("thumb"),
        "lat": coords[0],
        "lon": coords[1],
        "licenses": licenses,
    }

print("🚀 Startar körning med cache och ETA...")
points_cache = load_json(POINTS_CACHE_FILE, {})
import atexit
atexit.register(lambda: save_json(POINTS_CACHE_FILE, points_cache))
files = get_all_category_files_recursive(CATEGORY)
print(f"📊 Totalt {len(files)} filer att behandla.")

max_workers = 3
progress_every = 25
processed = 0
start_time = time.time()

with ThreadPoolExecutor(max_workers=max_workers) as ex:
    futures = {ex.submit(process_title, t): t for t in files}
    for fut in as_completed(futures):
        title = futures[fut]
        try:
            res = fut.result()
            if res:
                points_cache[title] = res
        except Exception as e:
            print(f"❌ Fel för {title}: {e}")
        processed += 1
        if processed % progress_every == 0:
            elapsed = time.time() - start_time
            avg_time = elapsed / processed
            remaining = (len(files) - processed) * avg_time
            eta = time.strftime("%H:%M:%S", time.gmtime(remaining))
            print(f"⏳ {processed}/{len(files)} klara (ETA ~{eta})...")
            save_json(POINTS_CACHE_FILE, points_cache)

save_json(POINTS_CACHE_FILE, points_cache)
points = list(points_cache.values())
print(f"✅ Färdig! Punkter med koordinater: {len(points)}")


# ===============================
# Skapa karta och licenslager med färgade klustrar
# ===============================
m = folium.Map(location=[59.5, 18.8], zoom_start=8, tiles="OpenStreetMap")

# Lägg till SAT-leden om den finns
if Path(SAT_GEOJSON_PATH).exists():
    folium.GeoJson(
        SAT_GEOJSON_PATH,
        name="SAT-led",
        style_function=lambda x: {"color": "#ff6600", "weight": 3}
    ).add_to(m)

# Funktion som skapar JS för färgade kluster
def make_cluster(color):
    # HTML-färg till Leaflet-klass
    return folium.plugins.MarkerCluster(
        icon_create_function=f"""
        function (cluster) {{
            var count = cluster.getChildCount();
            return new L.DivIcon({{
                html: '<div style="background-color:{color}; border-radius:50%; color:white; text-align:center; line-height:40px; width:40px; height:40px;">' + count + '</div>',
                className: 'custom-cluster-icon',
                iconSize: [40, 40]
            }});
        }}
        """
    )

# Skapa FeatureGroup + färgat MarkerCluster per licens
license_layers = {}
for qid, (label, url) in KNOWN_LICENSES.items():
    fg = folium.FeatureGroup(name=f"{label}", show=True)
    color = LICENSE_COLORS.get(qid, "gray")
    cluster = make_cluster(color)
    cluster.add_to(fg)
    fg.add_to(m)
    license_layers[qid] = cluster

# Lager för okända licenser
other_fg = folium.FeatureGroup(name="Andra/okända licenser", show=True)
other_cluster = make_cluster("black")
other_cluster.add_to(other_fg)
other_fg.add_to(m)

# ===============================
# Lägg till punkter i rätt lager
# ===============================
for p in points:
    popup = folium.Popup(make_popup(p), max_width=320)
    if p["licenses"]:
        qid = p["licenses"][0]
        color = LICENSE_COLORS.get(qid, "gray")
        cluster = license_layers.get(qid, other_cluster)
    else:
        color = "black"
        cluster = other_cluster

    folium.CircleMarker(
        [p["lat"], p["lon"]],
        radius=5,
        color=color,
        fill=True,
        fill_opacity=0.9,
        tooltip=p["title"],
        popup=popup,
    ).add_to(cluster)

# ===============================
# Lägg till lagerkontroll
# ===============================
folium.LayerControl(collapsed=False).add_to(m)

print("🗺️ Karta med färgade MarkerCluster per licenstyp klar! Använd display(m) i notebook eller m.save('sat_commons_map.html') för att spara.")


🧠 Initierar HTTP-cache (7 dagar)...
🚀 Startar körning med cache och ETA...
📂 Startar hämtning av filer från Category:Stockholm Archipelago Trail...
✅ Laddade 6741 filer från cache.
📊 Totalt 6741 filer att behandla.
⏳ 25/6741 klara (ETA ~00:00:15)...
⏳ 50/6741 klara (ETA ~00:00:13)...
⏳ 75/6741 klara (ETA ~00:00:13)...
⏳ 100/6741 klara (ETA ~00:00:13)...
⏳ 125/6741 klara (ETA ~00:00:13)...
⏳ 150/6741 klara (ETA ~00:00:12)...
⏳ 175/6741 klara (ETA ~00:00:12)...
⏳ 200/6741 klara (ETA ~00:00:12)...
⏳ 225/6741 klara (ETA ~00:00:12)...
⏳ 250/6741 klara (ETA ~00:00:11)...
⏳ 275/6741 klara (ETA ~00:00:11)...
⏳ 300/6741 klara (ETA ~00:00:11)...
⏳ 325/6741 klara (ETA ~00:00:11)...
⏳ 350/6741 klara (ETA ~00:00:11)...
⏳ 375/6741 klara (ETA ~00:00:11)...
⏳ 400/6741 klara (ETA ~00:00:11)...
⏳ 425/6741 klara (ETA ~00:00:10)...
⏳ 450/6741 klara (ETA ~00:00:10)...
⏳ 475/6741 klara (ETA ~00:00:10)...
⏳ 500/6741 klara (ETA ~00:00:10)...
⏳ 525/6741 klara (ETA ~00:00:10)...
⏳ 550/6741 klara (ETA ~00:00:10)

In [3]:
from string import Template 
import html as _html 
from datetime import datetime
# =========================
# ABOUT BOX
# =========================
def add_about_box(
    m,
    issue_number: int,
    map_name: str,
    created_date: str | None = None,
    repo: str = "salgo60/Stockholm_Archipelago_Trail",
    collapsed: bool = False,
):
    if created_date is None:
        created_date = datetime.now().strftime("%Y-%m-%d %H:%M")
    map_dom_id = m.get_name()
    box_id     = f"sat-about-{map_dom_id}"
    header_id  = f"{box_id}-hdr"
    issue_url  = f"https://github.com/{repo}/issues/{issue_number}"

    links = [
        ("SAT Dashboard", "https://raw.githack.com/salgo60/Stockholm_Archipelago_Trail/main/notebook/output/SAT_ALL_IN_ONE_142_3_dashboard_latest.html"),
        ("Project repo issues", "https://github.com/salgo60/Stockholm_Archipelago_Trail/issues?q=is%3Aissue"),
        ("Trail on OSM (rel 19012437)", "https://www.openstreetmap.org/relation/19012437"),
        ("Trail on Wikicommons", "https://commons.wikimedia.org/wiki/Category:Stockholm_Archipelago_Trail"),
        ("Official page", "https://stockholmarchipelagotrail.com/"),
        ("Unofficial FB group", "https://www.facebook.com/groups/2875020699552247"),
        ("Visit Sweden", "https://traveltrade.visitsweden.com/plan/news-sweden/Stockholm-Archipelago-Trail/"),
    ]
    links_html = "".join(
        f'<div><a href="{_html.escape(u)}" target="_blank">🔗 {_html.escape(t)}</a></div>'
        for t, u in links
    )
    tpl = Template(r"""
<style>
  .sat-about { position: fixed; z-index: 10000; background: rgba(255,255,255,0.97);
    border: 2px solid #666; border-radius: 10px; box-shadow: 0 2px 6px rgba(0,0,0,0.25);
    font: 12px/1.35 system-ui, sans-serif; pointer-events: auto;
    min-width: 240px; max-width: 320px; }
  .sat-about-header { cursor: pointer; padding: 8px 10px; font-weight: 700;
    display: flex; align-items: center; gap: 6px; user-select: none;
    background: rgba(248,248,248,.9); border-bottom: 1px solid #e5e7eb; }
  .sat-about-body { padding: 8px 10px 10px 10px; }
  .sat-about-collapsed .sat-about-body { display: none; }
  .sat-chevron { margin-left: auto; transition: transform .15s ease-in-out; }
  .sat-about-collapsed .sat-chevron { transform: rotate(-90deg); }
</style>

<div id="$box_id" class="sat-about">
  <div id="$header_id" class="sat-about-header">
    <span>ℹ️ About</span><span class="sat-chevron">▸</span>
  </div>
  <div class="sat-about-body">
    <div style="font-weight:700;margin-bottom:4px;">Stockholm Archipelago Trail Map</div>
    <div>Issue: <a href="$issue_url" target="_blank">#$issue_number</a>&nbsp;&nbsp; Map: $map_name</div>
    <div>Created: $created_date</div>
    <div class="sat-links">$links_html</div>
  </div>
</div>
""")
    html_snippet = tpl.substitute(
        box_id=box_id, header_id=header_id,
        issue_url=issue_url, issue_number=str(issue_number),
        map_name=_html.escape(map_name), created_date=_html.escape(created_date),
        links_html=links_html, map_dom_id=map_dom_id
    )
    m.get_root().html.add_child(folium.Element(html_snippet))

In [4]:
add_about_box(m, issue_number=237, map_name="SAT images with license")


In [5]:
# ===============================
# Hopfällbar legend (startar expanderad)
# ===============================
legend_html = """
<style>
  .legend-box {
      position: fixed;
      bottom: 20px;
      right: 20px;
      z-index: 9999;
      background-color: rgba(255, 255, 255, 0.95);
      border: 2px solid #ccc;
      border-radius: 14px;
      font-size: 13px;
      box-shadow: 0 2px 8px rgba(0,0,0,0.25);
      max-width: 270px;
      overflow: hidden;
  }
  .legend-header {
      padding: 6px 10px;
      background-color: #f2f2f2;
      cursor: pointer;
      font-weight: bold;
      border-bottom: 1px solid #ccc;
      text-align: center;
  }
  .legend-content {
      display: block; /* 👈 Startar expanderad */
      padding: 10px 14px;
      line-height: 1.6;
  }
  .legend-content small {
      display: block;
      margin-bottom: 6px;
  }
  .legend-item {
      margin-top: 4px;
  }
  .legend-color {
      display: inline-block;
      width: 14px;
      height: 14px;
      border-radius: 3px;
      margin-right: 6px;
      vertical-align: middle;
  }
</style>

<div class="legend-box" id="license-legend">
  <div class="legend-header" onclick="toggleLegend()">📸 Dölj licenser</div>
  <div class="legend-content">
    <small>
      Bilderna kommer från
      <a href="https://commons.wikimedia.org" target="_blank" style="color:#0645AD;">Wikimedia Commons</a>.<br>
      Färgerna visar bildens Creative Commons-licens. Klicka för att läsa mer.
    </small>
    <hr style="margin:6px 0;border:none;border-top:1px solid #ddd;">
"""

# Dynamiskt innehåll för licenser
for qid, (label, url) in KNOWN_LICENSES.items():
    color = LICENSE_COLORS.get(qid, "gray")
    legend_html += f"""
    <div class="legend-item">
      <span class="legend-color" style="background-color:{color};"></span>
      <a href="{url}" target="_blank" style="text-decoration:none;color:#0645AD;">{label}</a>
    </div>
    """

legend_html += """
    <div class="legend-item">
      <span class="legend-color" style="background-color:black;"></span>
      <span style="color:#333;">Andra/okända licenser</span>
    </div>
    <hr style="margin:6px 0;border:none;border-top:1px solid #ddd;">
    <small>
      💡 Vissa bilder (t.ex. från
      <a href="http://www.arkivdigital.net" target="_blank" style="color:#0645AD;">Arkiv Digital</a>)
      kan ha egna villkor. Se bildens Commons-sida.
    </small>
  </div>
</div>

<script>
  function toggleLegend() {
      var content = document.querySelector('.legend-content');
      var header = document.querySelector('.legend-header');
      if (content.style.display === 'block' || content.style.display === '') {
          content.style.display = 'none';
          header.innerHTML = '📸 Visa licenser';
      } else {
          content.style.display = 'block';
          header.innerHTML = '📸 Dölj licenser';
      }
  }
</script>
"""

m.get_root().html.add_child(folium.Element(legend_html))


<branca.element.Element at 0x10903e0c0>

In [6]:
m.save("output/237_SAT_images.html")

In [7]:
end_time = time.time()
duration = end_time - start_time
print(f"Finished in {duration:.2f} seconds.")


Finished in 12.07 seconds.
