Get all Swedish Bathing Waters in this project from Wikidata and checks if they are connected to an OSM object using an [API](https://osm.wikidata.link/tagged)

* [github salgo60/Svenskabadplatser](https://github.com/salgo60/Svenskabadplatser)
* this [Notebook](https://github.com/salgo60/Svenskabadplatser/blob/main/Jupyter/OSM_Wikidata_Bathwater.ipynb)

* API [Wikidata to OpenStreetMap](https://osm.wikidata.link/tagged)
  * ex [https://osm.wikidata.link/tagged/api/item/Q106708773](https://osm.wikidata.link/tagged/api/item/Q106708773)
* Wikidata badplats <-> Naturreservat [SPARQL](https://w.wiki/3MwX)

* Another tool [osm.wikidata.link](https://osm.wikidata.link/search)
  
Status:
* 20210525  Badplatser 2789 ej OSM kopplade 2076
  * vatten kopplade 1104 ej OSM kopplade 234
  * naturreservat kopplade 209 ej OSM kopplade 84
* 20210524  Badplatser 2787 ej OSM kopplade 2080
  * vatten kopplade 1094 ej OSM kopplade 230
  * naturreservat kopplade 208 ej OSM kopplade 84
* 20210523  Badplatser 2766 ej OSM kopplade 2120
  * vatten kopplade 1030 ej OSM kopplade 219
  * naturreservat kopplade 205 ej OSM kopplade 86
* 20210522  Badplatser 2761 ej OSM kopplade 2142
  * vatten kopplade 1001 ej OSM kopplade 216
  * naturreservat kopplade 205 ej OSM kopplade 87
* 20210521 badplatser 2756 ej OSM kopplade 2199
  * vatten kopplade 979 ej OSM kopplade 215
* 20210519 badplatser 2757 ej OSM kopplade 2257
  * vatten kopplade 594 ej OSM kopplade 59
* 20210518 badplatser 2755 ej OSM kopplade 2327
  * vatten kopplade 484 ej OSM kopplade 60

In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

Last run:  2021-05-25 19:39:56.779293


In [2]:
import pandas as pd


In [3]:
#
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys,json
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"
 
# https://w.wiki/3LWk    
queryBath = """SELECT (REPLACE(STR(?nodeBath), ".*Q", "Q") AS ?qbathid) ?nodeBathLabel ?nutsCode ?SJOID
(REPLACE(STR(?nodeWater), ".*Q", "Q") AS ?qlakeid) ?nodeWaterLabel ?coord WHERE {
  ?nodeBath wdt:P6104 wd:Q106774536.
  OPTIONAL { ?nodeBath wdt:P605 ?nutsCode. }
  OPTIONAL { ?nodeBath wdt:P625 ?coord. }
  OPTIONAL {
    ?nodeBath wdt:P206 ?nodeWater.
    OPTIONAL { ?nodeWater wdt:P761 ?SJOID. }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
ORDER BY (?nodeBathLabel)"""


def get_sparql_dataframe(endpoint_url, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
 
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

WDBath = get_sparql_dataframe(endpoint_url, queryBath)
WDBath["Source"] = "WD"     
WDBath.shape

(2789, 8)

In [23]:
WDBath.head()

Unnamed: 0,qbathid,nodeBathLabel,nutsCode,SJOID,qlakeid,nodeWaterLabel,coord,Source
0,Q106708773,Abborrbergets badplats,SE0220486000001903,658887-156656,Q35694946,Strängnäsfjärden,Point(17.037397412 59.384608492),WD
1,Q106707080,Abborrsjöns badplats,SE0411060000000277,624471-143063,Q16275305,Abborrasjön,Point(14.691359 56.326564),WD
2,Q106707080,Abborrsjöns badplats,SE0411060000000277,624471-143063,Q16275305,Abborrasjön,Point(14.691358723 56.326564061),WD
3,Q106708954,Abborrtjärns badplats,SE0611763000002279,660121-136542,Q16275388,Abborrtjärn,Point(13.4333 59.5096),WD
4,Q106711299,Abborrtjärns badplats,SE0812404000003683,713089-168959,Q16487572,Abborrtjärnen,Point(19.724889698 64.231408958),WD


In [5]:
import urllib3, json
from tqdm import tqdm
http = urllib3.PoolManager()

listBath = []
#for WD, row in WDBath.iterrows():
for WD, row in tqdm(WDBath.iterrows(), total=WDBath.shape[0]):
    #print(row["qbathid"] ) 
    url = "https://osm.wikidata.link/tagged/api/item/" + row["qbathid"] 
    
    new_item = dict()
    new_item['wikidata'] = row["qbathid"] 
    new_item['coord'] = row["coord"] 
    r = http.request('GET', url) 
    data = json.loads(r.data.decode('utf-8'))
#    print (r.status)
    try:
        #print ("OSM ", data["osm"], "Type: ", type(data["osm"]))
        #print ("ID: ", data["osm"][0]["id"])
        osmid = data["osm"][0]["id"]        
    except:
        #print ("error")
        osmid =""
    new_item['osmid'] = osmid 
    listBath.append(new_item)
print (len(listBath))

100%|██████████| 2789/2789 [04:14<00:00, 10.96it/s]

2789





In [6]:
OSMtot = pd.DataFrame(listBath,
                  columns=['wikidata','coord','osmid'])
OSMtot.shape


(2789, 3)

In [24]:
pd.set_option('max_colwidth', 400)
OSMtot.head(10)

Unnamed: 0,wikidata,coord,osmid
0,Q106708773,Point(17.037397412 59.384608492),8725492343
1,Q106707080,Point(14.691359 56.326564),8737463766
2,Q106707080,Point(14.691358723 56.326564061),8737463766
3,Q106708954,Point(13.4333 59.5096),356838031
4,Q106711299,Point(19.724889698 64.231408958),8763990476
5,Q106710481,Point(18.576809583 65.354219925),943675486
6,Q106708549,Point(14.397042778 57.006691546),8763997383
7,Q106708324,Point(14.155285315 56.744170005),4428909177
8,Q106708011,Point(13.779767114 57.251420376),8737439015
9,Q106689287,Point(18.4089 59.2016),5347857709


In [8]:
#OSMempty = OSMtot.osmid.notnull()
OSMtot[(OSMtot['osmid']=="")].shape

(2076, 3)

In [9]:
OSMEmpty =OSMtot[(OSMtot['osmid']=="")]

In [10]:
OSMEmpty.shape

(2076, 3)

In [25]:
OSMEmpty.head()

Unnamed: 0,wikidata,coord,osmid
12,Q106709184,Point(14.414252823 59.404268046),
19,Q106709695,Point(16.55509 60.63854),
21,Q106709070,Point(14.869880056 60.749363203),
22,Q106708712,Point(15.445449575 58.121240586),
24,Q106708053,Point(14.812960627 57.823274799),


### Check sjöar

In [12]:
# https://w.wiki/3LqW"
querySea = """SELECT (REPLACE(STR(?nodeBath), ".*Q", "Q") AS ?qbathid) ?nodeBathLabel ?nutsCode ?SJOID
(REPLACE(STR(?nodeWater), ".*Q", "Q") AS ?qlakeid) ?nodeWaterLabel ?coord WHERE {
  ?nodeBath wdt:P6104 wd:Q106774536.
  OPTIONAL { ?nodeBath wdt:P605 ?nutsCode. }
  OPTIONAL { ?nodeBath wdt:P625 ?coord. }
   {
    ?nodeBath wdt:P206 ?nodeWater.
    OPTIONAL { ?nodeWater wdt:P761 ?SJOID. }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
ORDER BY (?nodeBathLabel)"""
WDSea = get_sparql_dataframe(endpoint_url, querySea)
WDSea["Source"] = "WD"     
WDSea.shape

(1104, 8)

In [26]:
WDSea.head()

Unnamed: 0,qbathid,nodeBathLabel,nutsCode,SJOID,qlakeid,nodeWaterLabel,coord,Source
0,Q106708773,Abborrbergets badplats,SE0220486000001903,658887-156656,Q35694946,Strängnäsfjärden,Point(17.037397412 59.384608492),WD
1,Q106707080,Abborrsjöns badplats,SE0411060000000277,624471-143063,Q16275305,Abborrasjön,Point(14.691358723 56.326564061),WD
2,Q106707080,Abborrsjöns badplats,SE0411060000000277,624471-143063,Q16275305,Abborrasjön,Point(14.691359 56.326564),WD
3,Q106708954,Abborrtjärns badplats,SE0611763000002279,660121-136542,Q16275388,Abborrtjärn,Point(13.4333 59.5096),WD
4,Q106711299,Abborrtjärns badplats,SE0812404000003683,713089-168959,Q16487572,Abborrtjärnen,Point(19.724889698 64.231408958),WD


In [14]:
listSea = []
#for WD, row in WDBath.iterrows():
for WD, row in tqdm(WDSea.iterrows(), total=WDBath.shape[0]):
    #print(row["qlakeid"] ) 
    url = "https://osm.wikidata.link/tagged/api/item/" + row["qlakeid"] 
    
    new_item = dict()
    new_item['wikidata'] = row["qlakeid"] 
    r = http.request('GET', url) 
    data = json.loads(r.data.decode('utf-8'))
    try:
        #print ("ID: ", data["osm"][0]["id"])

        osmid = data["osm"][0]["id"] 
        
    except:
        #print ("error")
        osmid =""
    new_item['osmid'] = osmid 
    listSea.append(new_item)
print (len(listSea))
OSMSeatot = pd.DataFrame(listSea,
                  columns=['wikidata','osmid'])

 40%|███▉      | 1104/2789 [01:44<02:40, 10.51it/s]

1104





In [28]:
OSMSeatot.head()

Unnamed: 0,wikidata,osmid
0,Q35694946,
1,Q16275305,10677610.0
2,Q16275305,10677610.0
3,Q16275388,241597601.0
4,Q16487572,30167501.0


In [16]:
OSMSeaEmpty=OSMSeatot[(OSMSeatot['osmid']=="")]
OSMSeaEmpty.shape


(234, 2)

# Check Naturreservat

In [17]:
# https://w.wiki/3N6L
queryNature = """SELECT (REPLACE(STR(?nodeBath), ".*Q", "Q") AS ?qbathid) ?nodeBathLabel ?nutsCode ?Naturreg
(REPLACE(STR(?naturreservat), ".*Q", "Q") AS ?naturreservatID)  WHERE {
  ?nodeBath wdt:P6104 wd:Q106774536.
  OPTIONAL {?nodeBath wdt:P605 ?nutsCode }
  ?nodeBath wdt:P276 ?naturreservat.
  ?naturreservat wdt:P3613 ?Naturreg. # --> reservat
  SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
"""
WDNature = get_sparql_dataframe(endpoint_url, queryNature)
WDNature["Source"] = "WD"     
WDNature.shape

(209, 6)

In [30]:
WDNature.head()

Unnamed: 0,qbathid,nodeBathLabel,nutsCode,Naturreg,naturreservatID,Source
0,Q106709262,badplats Södra Ånnabosjön,SE0241880000002650,2010890,Q10726587,WD
1,Q106709266,"badplats Freden, Borgåsunds badplats",SE0251961000002656,2001346,Q30185792,WD
2,Q106709275,"Stora bäcken, badplats",SE0622061000002665,2002711,Q10575924,WD
3,Q106709398,badplats Fegensjön,SE0A11382000002853,2002579,Q30204163,WD
4,Q106709553,Roxnäs badplats,SE0622080000002939,2014144,Q30174732,WD


In [19]:
listNature = []
for WD, row in tqdm(WDNature.iterrows(), total=WDNature.shape[0]):
    #print(row["naturreservatID"] ) 
    url = "https://osm.wikidata.link/tagged/api/item/" + row["naturreservatID"] 
    
    new_item = dict()
    new_item['wikidata'] = row["naturreservatID"] 
    r = http.request('GET', url) 
    data = json.loads(r.data.decode('utf-8'))
    try:
        #print ("ID: ", data["osm"][0]["id"])
        osmid = data["osm"][0]["id"] 
        
    except:
        #print ("error")
        osmid =""
    new_item['osmid'] = osmid 
    listNature.append(new_item)
print (len(listNature))
OSMNature = pd.DataFrame(listNature,
                  columns=['wikidata','osmid'])

100%|██████████| 209/209 [00:19<00:00, 10.90it/s]

209





In [31]:
OSMNature.head()

Unnamed: 0,wikidata,osmid
0,Q10726587,
1,Q30185792,8798814.0
2,Q10575924,305353.0
3,Q30204163,
4,Q30174732,1379763.0


In [32]:
OSMNatureEmpty=OSMNature[(OSMNature['osmid']=="")]
OSMNatureEmpty.head()

Unnamed: 0,wikidata,osmid
0,Q10726587,
3,Q30204163,
5,Q18202399,
11,Q30161673,
14,Q10708179,


In [22]:
print(start_time.strftime("%Y%m%d")," Badplatser", WDBath.shape[0], "ej OSM kopplade",OSMEmpty.shape[0]) 
print(" vatten kopplade", WDSea.shape[0], "ej OSM kopplade",OSMSeaEmpty.shape[0])
print(" naturreservat kopplade", OSMNature.shape[0], "ej OSM kopplade",OSMNatureEmpty.shape[0])


20210525  Badplatser 2789 ej OSM kopplade 2076
 vatten kopplade 1104 ej OSM kopplade 234
 naturreservat kopplade 209 ej OSM kopplade 84
