Get all Swedish Bathing Waters in this project from Wikidata and checks if they are connected to an OSM object using an [API](https://osm.wikidata.link/tagged)

* [github salgo60/Svenskabadplatser](https://github.com/salgo60/Svenskabadplatser)
* this [Notebook](https://github.com/salgo60/Svenskabadplatser/blob/main/Jupyter/OSM_Wikidata_Bathwater.ipynb)

* API [Wikidata to OpenStreetMap](https://osm.wikidata.link/tagged)
  * ex [https://osm.wikidata.link/tagged/api/item/Q106708773](https://osm.wikidata.link/tagged/api/item/Q106708773)
  
Status:
* 20210519 badplatser 2757 ej kopplade 2257
  * vatten 594 ej kopplade 59
* 20210518 badplatser 2755 ej kopplade 2327
  * vatten 484 ej kopplade 60

In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

Last run:  2021-05-19 17:49:40.257985


In [2]:
import pandas as pd


In [3]:
#
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys,json
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"
 
# https://w.wiki/3LWk    
queryBath = """SELECT (REPLACE(STR(?nodeBath), ".*Q", "Q") AS ?qbathid) ?nodeBathLabel ?nutsCode ?SJOID
(REPLACE(STR(?nodeWater), ".*Q", "Q") AS ?qlakeid) ?nodeWaterLabel ?coord WHERE {
  ?nodeBath wdt:P6104 wd:Q106774536.
  OPTIONAL { ?nodeBath wdt:P605 ?nutsCode. }
  OPTIONAL { ?nodeBath wdt:P625 ?coord. }
  OPTIONAL {
    ?nodeBath wdt:P206 ?nodeWater.
    OPTIONAL { ?nodeWater wdt:P761 ?SJOID. }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
ORDER BY (?nodeBathLabel)"""


def get_sparql_dataframe(endpoint_url, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
 
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

WDBath = get_sparql_dataframe(endpoint_url, queryBath)
WDBath["Source"] = "WD"     
WDBath.shape

(2757, 8)

In [4]:
WDBath

Unnamed: 0,qbathid,nodeBathLabel,nutsCode,SJOID,qlakeid,nodeWaterLabel,coord,Source
0,Q106708773,Abborrbergets badplats,SE0220486000001903,,Q35694946,Strängnäsfjärden,Point(17.037397412 59.384608492),WD
1,Q106707080,Abborrsjöns badplats,SE0411060000000277,624471-143063,Q16275305,Abborrasjön,Point(14.691358723 56.326564061),WD
2,Q106708954,Abborrtjärns badplats,SE0611763000002279,660121-136542,Q16275388,Abborrtjärn,Point(13.4333 59.5096),WD
3,Q106711299,Abborrtjärns badplats,SE0812404000003683,713089-168959,Q16487572,Abborrtjärnen,Point(19.724889698 64.231408958),WD
4,Q106710481,Adaks badplats,SE0812418000003416,725253-162920,Q17538599,Stor-Adakträsket,Point(18.576809583 65.354219925),WD
...,...,...,...,...,...,...,...,...
2752,Q106695873,Östra holmens badplats,SE0251980000002546,,Q184492,Mälaren,Point(16.576895 59.586077),WD
2753,Q106710694,Östtjärn badplats,SE0712281000003466,691354-157517,Q18184573,Östtjärnen,Point(17.268201 62.327749),WD
2754,Q106708537,Övdens badplats,SE0920764000001583,627654-141929,Q18134117,Övden,Point(14.50528951 56.620870346),WD
2755,Q106685181,Övre Rudansjöns badstrand,SE0110136000002143,656380-163314,Q3360982,Övre Rudasjön,Point(18.1316 59.1659),WD


In [5]:
import urllib3, json
from tqdm import tqdm
http = urllib3.PoolManager()

listBath = []
#for WD, row in WDBath.iterrows():
for WD, row in tqdm(WDBath.iterrows(), total=WDBath.shape[0]):
    #print(row["qbathid"] ) 
    url = "https://osm.wikidata.link/tagged/api/item/" + row["qbathid"] 
    
    new_item = dict()
    new_item['wikidata'] = row["qbathid"] 
    new_item['coord'] = row["coord"] 
    r = http.request('GET', url) 
    data = json.loads(r.data.decode('utf-8'))
#    print (r.status)
    try:
        #print ("OSM ", data["osm"], "Type: ", type(data["osm"]))
        #print ("ID: ", data["osm"][0]["id"])
        osmid = data["osm"][0]["id"]        
    except:
        #print ("error")
        osmid =""
    new_item['osmid'] = osmid 
    listBath.append(new_item)
print (len(listBath))

100%|██████████| 2757/2757 [04:10<00:00, 11.01it/s]

2757





In [18]:
OSMtot = pd.DataFrame(listBath,
                  columns=['wikidata','coord','osmid'])
OSMtot.shape


(2757, 3)

In [7]:
pd.set_option('max_colwidth', 400)
OSMtot.head(100)

Unnamed: 0,wikidata,coord,osmid
0,Q106708773,Point(17.037397412 59.384608492),8725492343
1,Q106707080,Point(14.691358723 56.326564061),8737463766
2,Q106708954,Point(13.4333 59.5096),356838031
3,Q106711299,Point(19.724889698 64.231408958),
4,Q106710481,Point(18.576809583 65.354219925),943675486
...,...,...,...
95,Q106676963,Point(18.762768 60.056753),680605708
96,Q106711971,Point(15.267990274 60.563705742),
97,Q106709345,Point(21.209162429 65.492882413),
98,Q106709935,Point(18.572530897 63.230536008),


In [8]:
#OSMempty = OSMtot.osmid.notnull()
OSMtot[(OSMtot['osmid']=="")].shape

(2257, 3)

In [9]:
OSMEmpty =OSMtot[(OSMtot['osmid']=="")]

In [19]:
OSMEmpty.shape

(2257, 3)

In [20]:
OSMEmpty

Unnamed: 0,wikidata,coord,osmid
3,Q106711299,Point(19.724889698 64.231408958),
5,Q106708549,Point(14.397042778 57.006691546),
11,Q106709184,Point(14.414252823 59.404268046),
18,Q106709695,Point(16.55509 60.63854),
20,Q106709070,Point(14.869880056 60.749363203),
...,...,...,...
2744,Q106671882,Point(18.257445 59.250819),
2745,Q106653304,Point(17.9824 59.3098),
2747,Q18202436,Point(13.546667 59.381944),
2748,Q106708778,Point(17.199441044 59.36672915),


### Check sjöar

In [11]:
# https://w.wiki/3LqW"
querySea = """SELECT (REPLACE(STR(?nodeBath), ".*Q", "Q") AS ?qbathid) ?nodeBathLabel ?nutsCode ?SJOID
(REPLACE(STR(?nodeWater), ".*Q", "Q") AS ?qlakeid) ?nodeWaterLabel ?coord WHERE {
  ?nodeBath wdt:P6104 wd:Q106774536.
  OPTIONAL { ?nodeBath wdt:P605 ?nutsCode. }
  OPTIONAL { ?nodeBath wdt:P625 ?coord. }
   {
    ?nodeBath wdt:P206 ?nodeWater.
    OPTIONAL { ?nodeWater wdt:P761 ?SJOID. }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
ORDER BY (?nodeBathLabel)"""
WDSea = get_sparql_dataframe(endpoint_url, querySea)
WDSea["Source"] = "WD"     
WDSea.shape

(594, 8)

In [12]:
WDSea

Unnamed: 0,qbathid,nodeBathLabel,nutsCode,SJOID,qlakeid,nodeWaterLabel,coord,Source
0,Q106708773,Abborrbergets badplats,SE0220486000001903,,Q35694946,Strängnäsfjärden,Point(17.037397412 59.384608492),WD
1,Q106707080,Abborrsjöns badplats,SE0411060000000277,624471-143063,Q16275305,Abborrasjön,Point(14.691358723 56.326564061),WD
2,Q106711299,Abborrtjärns badplats,SE0812404000003683,713089-168959,Q16487572,Abborrtjärnen,Point(19.724889698 64.231408958),WD
3,Q106708954,Abborrtjärns badplats,SE0611763000002279,660121-136542,Q16275388,Abborrtjärn,Point(13.4333 59.5096),WD
4,Q106710481,Adaks badplats,SE0812418000003416,725253-162920,Q17538599,Stor-Adakträsket,Point(18.576809583 65.354219925),WD
...,...,...,...,...,...,...,...,...
589,Q106708531,Öjaby badplats,SE0920780000001570,630764-143570,Q1474746,Helgasjön,Point(14.740117728 56.900880232),WD
590,Q106708690,Östra Valsjöns badplats,SE0A21490000001781,641433-131980,Q18194101,Östra Valsjön,Point(12.783612748 57.817176191),WD
591,Q106695873,Östra holmens badplats,SE0251980000002546,,Q184492,Mälaren,Point(16.576895 59.586077),WD
592,Q106710694,Östtjärn badplats,SE0712281000003466,691354-157517,Q18184573,Östtjärnen,Point(17.268201 62.327749),WD


In [13]:
listSea = []
#for WD, row in WDBath.iterrows():
for WD, row in tqdm(WDSea.iterrows(), total=WDBath.shape[0]):
    #print(row["qlakeid"] ) 
    url = "https://osm.wikidata.link/tagged/api/item/" + row["qlakeid"] 
    
    new_item = dict()
    new_item['wikidata'] = row["qlakeid"] 
    r = http.request('GET', url) 
    data = json.loads(r.data.decode('utf-8'))
    try:
        #print ("ID: ", data["osm"][0]["id"])

        osmid = data["osm"][0]["id"] 
        
    except:
        #print ("error")
        osmid =""
    new_item['osmid'] = osmid 
    listSea.append(new_item)
print (len(listSea))
OSMSeatot = pd.DataFrame(listSea,
                  columns=['wikidata','osmid'])

 22%|██▏       | 594/2757 [00:56<03:26, 10.49it/s]

594





In [14]:
OSMSeatot

Unnamed: 0,wikidata,osmid
0,Q35694946,
1,Q16275305,10677610
2,Q16487572,30167501
3,Q16275388,241597601
4,Q17538599,181285898
...,...,...
589,Q1474746,6715
590,Q18194101,
591,Q184492,1433877
592,Q18184573,23047338


In [15]:
OSMSeatot[(OSMSeatot['osmid']=="")].shape


(59, 2)

In [16]:
OSMSeaEmpty =OSMSeatot[(OSMSeatot['osmid']=="")]

In [17]:
OSMSeaEmpty

Unnamed: 0,wikidata,osmid
0,Q35694946,
26,Q31891561,
33,Q13368095,
38,Q10689011,
60,Q18131777,
61,Q16503735,
76,Q18194046,
77,Q32229557,
78,Q10438360,
79,Q10438438,
