* [#4](https://github.com/salgo60/Naturreservat-Sverige/issues/4) 
* denna Notebook [4_Automatch_WD_OSM](https://github.com/salgo60/Naturreservat-Sverige/blob/main/Notebook/4_Automatch_WD_OSM.ipynb)


In [42]:
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

query = """
SELECT ?item ?itemLabel ?nrvid ?lat ?lon ?url WHERE {
  ?item wdt:P31 wd:Q179049;
        wdt:P17 wd:Q34;
        wdt:P625 ?coord;
        wdt:P3613 ?nrvid.

  MINUS { ?item wdt:P402 ?rel. }
  MINUS { ?item wdt:P10689 ?way. }
  MINUS { ?item wdt:P576 ?dissolved. }

  BIND(geof:latitude(?coord) AS ?lat)
  BIND(geof:longitude(?coord) AS ?lon)
  MINUS {
  ?item p:P10689 ?stmt.
  ?stmt prov:wasDerivedFrom ?ref.
  ?ref pr:P854 ?url.
  FILTER(CONTAINS(STR(?url), "openstreetmap.org/note"))
  } 
  MINUS {
  ?item p:P402 ?stmt.
  ?stmt prov:wasDerivedFrom ?ref.
  ?ref pr:P854 ?url.
  FILTER(CONTAINS(STR(?url), "openstreetmap.org/note"))
}
  SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
"""

sparql.setQuery(query)
sparql.setReturnFormat(JSON)
data = sparql.query().convert()

rows = []
for r in data["results"]["bindings"]:
    rows.append({
        "wd": r["item"]["value"],
        "label": r["itemLabel"]["value"],
        "nrvid": r["nrvid"]["value"],
        "lat": float(r["lat"]["value"]),
        "lon": float(r["lon"]["value"]),
    })

wd_df = pd.DataFrame(rows)
wd_df.head()


Unnamed: 0,wd,label,nrvid,lat,lon
0,http://www.wikidata.org/entity/Q10509731,Grängshytteforsarna,2000253,59.71651,14.782602
1,http://www.wikidata.org/entity/Q10524585,Hissö,2002397,56.957118,14.799846
2,http://www.wikidata.org/entity/Q11872000,Klusåbergets naturreservat,2044059,66.156619,20.827351
3,http://www.wikidata.org/entity/Q18291412,Kronoberg,2002396,56.932458,14.782011
4,http://www.wikidata.org/entity/Q18331194,Kycklingkullen,2027491,57.895173,13.451222


In [43]:
wd_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201 entries, 0 to 200
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   wd      201 non-null    object 
 1   label   201 non-null    object 
 2   nrvid   201 non-null    object 
 3   lat     201 non-null    float64
 4   lon     201 non-null    float64
dtypes: float64(2), object(3)
memory usage: 8.0+ KB


In [14]:
import requests

overpass_query = """
[out:json][timeout:60];
(
  nwr["ref:NVRID"];
  nwr["leisure"="nature_reserve"];
  nwr["wikidata"];
);
out center tags;
"""

url = "https://overpass-api.de/api/interpreter"
resp = requests.post(url, data={"data": overpass_query})
osm = resp.json()


In [15]:
osm_rows = []

for el in osm["elements"]:
    tags = el.get("tags", {})
    
    lat = el.get("lat") or el.get("center", {}).get("lat")
    lon = el.get("lon") or el.get("center", {}).get("lon")
    if not lat or not lon:
        continue

    osm_rows.append({
        "type": el["type"],
        "id": el["id"],
        "lat": lat,
        "lon": lon,
        "wikidata": tags.get("wikidata"),
        "nrvid": tags.get("ref:NVRID"),
        "leisure": tags.get("leisure")
    })

osm_df = pd.DataFrame(osm_rows)
osm_df.head()


In [16]:
wd_df

Unnamed: 0,wd,label,nrvid,lat,lon
0,http://www.wikidata.org/entity/Q10501019,Fårhagsberget,2002288,57.249529,16.250354
1,http://www.wikidata.org/entity/Q10509731,Grängshytteforsarna,2000253,59.716510,14.782602
2,http://www.wikidata.org/entity/Q10524585,Hissö,2002397,56.957118,14.799846
3,http://www.wikidata.org/entity/Q10531425,Hällingsåfallet,2001199,64.350681,14.390908
4,http://www.wikidata.org/entity/Q10531844,Hästbergs klack,2002084,60.329444,15.066944
...,...,...,...,...,...
1276,http://www.wikidata.org/entity/Q133794797,Junkerkölens naturreservat,2055859,65.904140,20.906590
1277,http://www.wikidata.org/entity/Q133794802,Stora Njakajaure naturreservat,2062481,65.930270,18.185490
1278,http://www.wikidata.org/entity/Q133794803,Naulajärvi-Kirvesvuoma naturreservat,2062781,66.794940,21.244600
1279,http://www.wikidata.org/entity/Q133794800,Stor-Getarbergets naturreservat,2063681,65.613910,18.389610


In [17]:
import geopandas as gpd

wd_gdf = gpd.GeoDataFrame(
    wd_df,
    geometry=gpd.points_from_xy(wd_df.lon, wd_df.lat),
    crs="EPSG:4326"
)


In [18]:
wd_gdf.head()

Unnamed: 0,wd,label,nrvid,lat,lon,geometry
0,http://www.wikidata.org/entity/Q10501019,Fårhagsberget,2002288,57.249529,16.250354,POINT (16.25035 57.24953)
1,http://www.wikidata.org/entity/Q10509731,Grängshytteforsarna,2000253,59.71651,14.782602,POINT (14.7826 59.71651)
2,http://www.wikidata.org/entity/Q10524585,Hissö,2002397,56.957118,14.799846,POINT (14.79985 56.95712)
3,http://www.wikidata.org/entity/Q10531425,Hällingsåfallet,2001199,64.350681,14.390908,POINT (14.39091 64.35068)
4,http://www.wikidata.org/entity/Q10531844,Hästbergs klack,2002084,60.329444,15.066944,POINT (15.06694 60.32944)


In [20]:
import requests
import osm2geojson

overpass_query = """
[out:json][timeout:120];
(
  relation["boundary"="protected_area"](55,11,69,24);
  way["boundary"="protected_area"](55,11,69,24);
  relation["leisure"="nature_reserve"](55,11,69,24);
  way["leisure"="nature_reserve"](55,11,69,24);
);
out geom;
"""

url = "https://overpass-api.de/api/interpreter"
r = requests.post(url, data={"data": overpass_query})
print(r.status_code)
print(r.text[:500])


200
{
  "version": 0.6,
  "generator": "Overpass API 0.7.62.10 2d4cfc48",
  "osm3s": {
    "timestamp_osm_base": "2026-02-17T21:41:00Z",
    "copyright": "The data included in this document is from www.openstreetmap.org. The data is made available under ODbL."
  },
  "elements": [

{
  "type": "way",
  "id": 4157140,
  "bounds": {
    "minlat": 59.2178868,
    "minlon": 10.5385463,
    "maxlat": 59.2198617,
    "maxlon": 10.5404155
  },
  "nodes": [
    23666848,
    4776161962,
    4776161963,
    


In [21]:
import json
import geopandas as gpd

# --- 1) Säker JSON-parse ---
if not r.text.strip().startswith("{"):
    raise Exception("Overpass returnerade inte JSON:\n" + r.text[:1000])

osm_json = r.json()

# --- 2) Konvertera till GeoJSON ---
geojson = osm2geojson.json2geojson(osm_json)

# --- 3) Bygg GeoDataFrame ---
osm_gdf = gpd.GeoDataFrame.from_features(geojson["features"], crs="EPSG:4326")

print("Antal features:", len(osm_gdf))

# --- 4) Rensa upp geometrier ---
osm_gdf = osm_gdf[osm_gdf.geometry.notnull()]

# Shapely 2.x
try:
    osm_gdf["geometry"] = osm_gdf["geometry"].make_valid()
except:
    # fallback för Shapely <2
    osm_gdf["geometry"] = osm_gdf["geometry"].buffer(0)

# ta bort fortfarande trasiga
osm_gdf = osm_gdf[osm_gdf.is_valid]

print("Efter cleanup:", len(osm_gdf))

# --- 5) (Rekommenderat) reprojicera för snabbare spatial join ---
osm_gdf = osm_gdf.to_crs(3857)




Failed to convert relation to shape: 
 GEOSException('TopologyException: side location conflict at 11.675698990196006 58.121017776299496. This can occur if the input geometry is invalid.') {'bounds': {'maxlat': 58.1220316,
            'maxlon': 11.707381,
            'minlat': 58.0991892,
            'minlon': 11.6512585},
 'id': 1026566,
 'members': [{'geometry': [{'lat': 58.1114279, 'lon': 11.6652596},
                           {'lat': 58.1112097, 'lon': 11.6650665},
                           {'lat': 58.1055387, 'lon': 11.6552496},
                           {'lat': 58.0997448, 'lon': 11.6512585}],
              'ref': 66106125,
              'role': 'outer',
              'type': 'way',
              'used': 1026566},
             {'geometry': [{'lat': 58.1215448, 'lon': 11.6762727},
                           {'lat': 58.1214831, 'lon': 11.6766882},
                           {'lat': 58.1214292, 'lon': 11.6769663},
                           {'lat': 58.1214033, 'lon': 11.6772751},

Antal features: 15320
Efter cleanup: 15320


Unnamed: 0,geometry,type,id,tags,nodes
0,"LINESTRING (1173145.608 8227752.908, 1173151.7...",way,4157140,"{'access:conditional': 'no @ Apr 15 - Jul 15',...","[23666848, 4776161962, 4776161963, 4776161970,..."
1,"LINESTRING (1174791.789 8224760.89, 1174787.31...",way,4157244,"{'access:conditional': 'no@Apr 15 - Jul 15', '...","[23669848, 12560503967, 12560503940, 125605039..."
2,"LINESTRING (1175383.508 8224501.954, 1175410.9...",way,4157247,"{'access:conditional': 'no @ Apr 15 - Jul 15',...","[23669883, 12560503906, 12560503949, 23669880,..."
3,"LINESTRING (1175131.881 8224178.846, 1175156.4...",way,4157249,"{'access:conditional': 'no @ Apr 15 - Jul 15',...","[23669946, 23669943, 12560503958, 12560503914,..."
4,"LINESTRING (1174739.647 8223743.201, 1174712.1...",way,4157258,"{'access:conditional': 'no @ Apr 15 - Jul 15',...","[23670193, 12560517011, 23670191, 23670189, 23..."


In [26]:
osm_gdf.head()

Unnamed: 0,geometry,type,id,tags,nodes
0,"LINESTRING (1173145.608 8227752.908, 1173151.7...",way,4157140,"{'access:conditional': 'no @ Apr 15 - Jul 15',...","[23666848, 4776161962, 4776161963, 4776161970,..."
1,"LINESTRING (1174791.789 8224760.89, 1174787.31...",way,4157244,"{'access:conditional': 'no@Apr 15 - Jul 15', '...","[23669848, 12560503967, 12560503940, 125605039..."
2,"LINESTRING (1175383.508 8224501.954, 1175410.9...",way,4157247,"{'access:conditional': 'no @ Apr 15 - Jul 15',...","[23669883, 12560503906, 12560503949, 23669880,..."
3,"LINESTRING (1175131.881 8224178.846, 1175156.4...",way,4157249,"{'access:conditional': 'no @ Apr 15 - Jul 15',...","[23669946, 23669943, 12560503958, 12560503914,..."
4,"LINESTRING (1174739.647 8223743.201, 1174712.1...",way,4157258,"{'access:conditional': 'no @ Apr 15 - Jul 15',...","[23670193, 12560517011, 23670191, 23670189, 23..."


In [27]:
osm_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 15320 entries, 0 to 15319
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   geometry  15320 non-null  geometry
 1   type      15320 non-null  object  
 2   id        15320 non-null  int64   
 3   tags      15320 non-null  object  
 4   nodes     12272 non-null  object  
dtypes: geometry(1), int64(1), object(3)
memory usage: 598.6+ KB


In [30]:
wd_gdf = wd_gdf.to_crs(3857)
osm_gdf = osm_gdf.to_crs(3857)  

import geopandas as gpd

matches = gpd.sjoin(
    wd_gdf,
    osm_gdf[["geometry","id","type","tags"]],
    how="left",
    predicate="within"
)
matches = matches.reset_index(drop=True)


In [31]:
matches


Unnamed: 0,wd,label,nrvid,lat,lon,geometry,index_right,id,type,tags
0,http://www.wikidata.org/entity/Q10501019,Fårhagsberget,2002288,57.249529,16.250354,POINT (1808981.15 7811292.275),2139.0,100610138.0,way,"{'leisure': 'nature_reserve', 'lst:area_ha': '..."
1,http://www.wikidata.org/entity/Q10509731,Grängshytteforsarna,2000253,59.716510,14.782602,POINT (1645591.71 8336890.679),,,,
2,http://www.wikidata.org/entity/Q10524585,Hissö,2002397,56.957118,14.799846,POINT (1647511.325 7751359.082),,,,
3,http://www.wikidata.org/entity/Q10531425,Hällingsåfallet,2001199,64.350681,14.390908,POINT (1601988.559 9439379.672),480.0,43002332.0,way,"{'leisure': 'nature_reserve', 'lst:area_12': '..."
4,http://www.wikidata.org/entity/Q10531844,Hästbergs klack,2002084,60.329444,15.066944,POINT (1677244.583 8473453.153),623.0,43189852.0,way,"{'leisure': 'nature_reserve', 'lst:ajour': '20..."
...,...,...,...,...,...,...,...,...,...,...
1282,http://www.wikidata.org/entity/Q133794797,Junkerkölens naturreservat,2055859,65.904140,20.906590,POINT (2327310.953 9850659.193),,,,
1283,http://www.wikidata.org/entity/Q133794802,Stora Njakajaure naturreservat,2062481,65.930270,18.185490,POINT (2024399.487 9857787.568),,,,
1284,http://www.wikidata.org/entity/Q133794803,Naulajärvi-Kirvesvuoma naturreservat,2062781,66.794940,21.244600,POINT (2364938.054 10097881.821),,,,
1285,http://www.wikidata.org/entity/Q133794800,Stor-Getarbergets naturreservat,2063681,65.613910,18.389610,POINT (2047122.021 9771968.057),,,,


In [32]:
matched = matches[matches["id"].notna()]
print("Matchade:", len(matched))

Matchade: 1078


In [33]:
missing = matches[matches["id"].isna()]
print("Saknas:", len(missing))

Saknas: 209


In [34]:
missing.to_crs(4326).to_file(
    "wd_missing_in_osm.geojson",
    driver="GeoJSON"
)

In [35]:
matches["status"] = matches["id"].notna()
matches["status"].value_counts()

status
True     1078
False     209
Name: count, dtype: int64

In [None]:
matches.info()

In [38]:
import folium

# QID från wd-URI
matches["qid"] = matches["wd"].str.split("/").str[-1]

# OSM id till int (när finns)
matches["osm_id"] = matches["id"].dropna().astype(int)

# WGS84 för karta
map_gdf = matches.to_crs(4326) 
m = folium.Map(location=[62,15], zoom_start=5)

layer_match = folium.FeatureGroup(name="Match")
layer_missing = folium.FeatureGroup(name="Saknas")
for _, r in map_gdf.iterrows():

    wd_link = f"https://www.wikidata.org/wiki/{r.qid}"

    osm_link = ""
    if r.status:
        osm_type = r["type"]   # way eller relation
        osm_link = f"https://www.openstreetmap.org/{osm_type}/{int(r.osm_id)}"

    popup = folium.Popup(f"""
        <b>{r.label}</b><br><br>
        WD: <a href="{wd_link}" target="_blank">{r.qid}</a><br>
        OSM: <a href="{osm_link}" target="_blank">
             {r['type']}/{int(r.osm_id) if r.status else "—"}
             </a><br>
        NVRID: {r.nrvid}<br>
        Status: {r.status}
    """, max_width=250)

    marker = folium.CircleMarker(
        [r.geometry.y, r.geometry.x],
        radius=6,
        color="green" if r.status else "red",
        fill=True,
        popup=popup
    )

    if r.status:
        marker.add_to(layer_match)
    else:
        marker.add_to(layer_missing)

layer_match.add_to(m)
layer_missing.add_to(m)

folium.LayerControl().add_to(m)
m.save("wd_vs_osm_QA_map.html")

In [39]:
m

## Create quickstatement file

In [41]:
matches = matches.reset_index(drop=True)

# QID
matches["qid"] = matches["wd"].str.split("/").str[-1]

# only matched rows
qs_df = matches[matches["status"]].copy()

# clean id
qs_df["osm_id"] = qs_df["id"].astype(int) 

lines = []

for _, r in qs_df.iterrows():
    
    if r["type"] == "relation":
        lines.append(f'{r.qid}|P402|"{r.osm_id}"')
        
    elif r["type"] == "way":
        lines.append(f'{r.qid}|P10689|"{r.osm_id}"')
with open("osm_ids.qs", "w") as f:
    f.write("\n".join(lines))

print("QuickStatements file created:", len(lines), "statements")



QuickStatements file created: 1078 statements
