Get all Swedish Bathing Waters in this project from Wikidata and checks if they are connected to an OSM object using an [API](https://osm.wikidata.link/tagged)

* [github salgo60/Svenskabadplatser](https://github.com/salgo60/Svenskabadplatser)
* this [Notebook](https://github.com/salgo60/Svenskabadplatser/blob/main/Jupyter/OSM_Wikidata_Bathwater.ipynb)

* API [Wikidata to OpenStreetMap](https://osm.wikidata.link/tagged)
  * ex [https://osm.wikidata.link/tagged/api/item/Q106708773](https://osm.wikidata.link/tagged/api/item/Q106708773)
  
Status:
* 20210522  Badplatser 2761 ej OSM kopplade 2142
  * vatten kopplade 1001 ej OSM kopplade 216
* 20210521 badplatser 2756 ej OSM kopplade 2199
  * vatten kopplade 979 ej OSM kopplade 215
* 20210519 badplatser 2757 ej OSM kopplade 2257
  * vatten kopplade 594 ej OSM kopplade 59
* 20210518 badplatser 2755 ej OSM kopplade 2327
  * vatten kopplade 484 ej OSM kopplade 60

In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

Last run:  2021-05-22 10:04:12.132493


In [2]:
import pandas as pd


In [3]:
#
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys,json
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"
 
# https://w.wiki/3LWk    
queryBath = """SELECT (REPLACE(STR(?nodeBath), ".*Q", "Q") AS ?qbathid) ?nodeBathLabel ?nutsCode ?SJOID
(REPLACE(STR(?nodeWater), ".*Q", "Q") AS ?qlakeid) ?nodeWaterLabel ?coord WHERE {
  ?nodeBath wdt:P6104 wd:Q106774536.
  OPTIONAL { ?nodeBath wdt:P605 ?nutsCode. }
  OPTIONAL { ?nodeBath wdt:P625 ?coord. }
  OPTIONAL {
    ?nodeBath wdt:P206 ?nodeWater.
    OPTIONAL { ?nodeWater wdt:P761 ?SJOID. }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
ORDER BY (?nodeBathLabel)"""


def get_sparql_dataframe(endpoint_url, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
 
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

WDBath = get_sparql_dataframe(endpoint_url, queryBath)
WDBath["Source"] = "WD"     
WDBath.shape

(2761, 8)

In [4]:
WDBath

Unnamed: 0,qbathid,nodeBathLabel,nutsCode,SJOID,qlakeid,nodeWaterLabel,coord,Source
0,Q106708773,Abborrbergets badplats,SE0220486000001903,658887-156656,Q35694946,Strängnäsfjärden,Point(17.037397412 59.384608492),WD
1,Q106707080,Abborrsjöns badplats,SE0411060000000277,624471-143063,Q16275305,Abborrasjön,Point(14.691358723 56.326564061),WD
2,Q106708954,Abborrtjärns badplats,SE0611763000002279,660121-136542,Q16275388,Abborrtjärn,Point(13.4333 59.5096),WD
3,Q106711299,Abborrtjärns badplats,SE0812404000003683,713089-168959,Q16487572,Abborrtjärnen,Point(19.724889698 64.231408958),WD
4,Q106710481,Adaks badplats,SE0812418000003416,725253-162920,Q17538599,Stor-Adakträsket,Point(18.576809583 65.354219925),WD
...,...,...,...,...,...,...,...,...
2756,Q106695873,Östra holmens badplats,SE0251980000002546,,Q184492,Mälaren,Point(16.576895 59.586077),WD
2757,Q106710694,Östtjärn badplats,SE0712281000003466,691354-157517,Q18184573,Östtjärnen,Point(17.268201 62.327749),WD
2758,Q106708537,Övdens badplats,SE0920764000001583,627654-141929,Q18134117,Övden,Point(14.50528951 56.620870346),WD
2759,Q106685181,Övre Rudansjöns badstrand,SE0110136000002143,656380-163314,Q3360982,Övre Rudasjön,Point(18.1316 59.1659),WD


In [5]:
import urllib3, json
from tqdm import tqdm
http = urllib3.PoolManager()

listBath = []
#for WD, row in WDBath.iterrows():
for WD, row in tqdm(WDBath.iterrows(), total=WDBath.shape[0]):
    #print(row["qbathid"] ) 
    url = "https://osm.wikidata.link/tagged/api/item/" + row["qbathid"] 
    
    new_item = dict()
    new_item['wikidata'] = row["qbathid"] 
    new_item['coord'] = row["coord"] 
    r = http.request('GET', url) 
    data = json.loads(r.data.decode('utf-8'))
#    print (r.status)
    try:
        #print ("OSM ", data["osm"], "Type: ", type(data["osm"]))
        #print ("ID: ", data["osm"][0]["id"])
        osmid = data["osm"][0]["id"]        
    except:
        #print ("error")
        osmid =""
    new_item['osmid'] = osmid 
    listBath.append(new_item)
print (len(listBath))

100%|██████████| 2761/2761 [04:06<00:00, 11.21it/s]

2761





In [6]:
OSMtot = pd.DataFrame(listBath,
                  columns=['wikidata','coord','osmid'])
OSMtot.shape


(2761, 3)

In [7]:
pd.set_option('max_colwidth', 400)
OSMtot.head(100)

Unnamed: 0,wikidata,coord,osmid
0,Q106708773,Point(17.037397412 59.384608492),8725492343
1,Q106707080,Point(14.691358723 56.326564061),8737463766
2,Q106708954,Point(13.4333 59.5096),356838031
3,Q106711299,Point(19.724889698 64.231408958),
4,Q106710481,Point(18.576809583 65.354219925),943675486
...,...,...,...
95,Q104808953,Point(17.440773888 59.057008055),377452749
96,Q106710705,Point(17.459269445 62.271444954),307760979
97,Q106707873,Point(16.37065877 56.696529388),
98,Q106676963,Point(18.762768 60.056753),680605708


In [8]:
#OSMempty = OSMtot.osmid.notnull()
OSMtot[(OSMtot['osmid']=="")].shape

(2142, 3)

In [9]:
OSMEmpty =OSMtot[(OSMtot['osmid']=="")]

In [10]:
OSMEmpty.shape

(2142, 3)

In [11]:
OSMEmpty

Unnamed: 0,wikidata,coord,osmid
3,Q106711299,Point(19.724889698 64.231408958),
5,Q106708549,Point(14.397042778 57.006691546),
11,Q106709184,Point(14.414252823 59.404268046),
18,Q106709695,Point(16.55509 60.63854),
20,Q106709070,Point(14.869880056 60.749363203),
...,...,...,...
2745,Q106687370,Point(12.192259 58.273465),
2746,Q106671882,Point(18.257445 59.250819),
2747,Q106653304,Point(17.9824 59.3098),
2749,Q18202436,Point(13.546667 59.381944),


### Check sjöar

In [12]:
# https://w.wiki/3LqW"
querySea = """SELECT (REPLACE(STR(?nodeBath), ".*Q", "Q") AS ?qbathid) ?nodeBathLabel ?nutsCode ?SJOID
(REPLACE(STR(?nodeWater), ".*Q", "Q") AS ?qlakeid) ?nodeWaterLabel ?coord WHERE {
  ?nodeBath wdt:P6104 wd:Q106774536.
  OPTIONAL { ?nodeBath wdt:P605 ?nutsCode. }
  OPTIONAL { ?nodeBath wdt:P625 ?coord. }
   {
    ?nodeBath wdt:P206 ?nodeWater.
    OPTIONAL { ?nodeWater wdt:P761 ?SJOID. }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
ORDER BY (?nodeBathLabel)"""
WDSea = get_sparql_dataframe(endpoint_url, querySea)
WDSea["Source"] = "WD"     
WDSea.shape

(1001, 8)

In [13]:
WDSea

Unnamed: 0,qbathid,nodeBathLabel,nutsCode,SJOID,qlakeid,nodeWaterLabel,coord,Source
0,Q106708773,Abborrbergets badplats,SE0220486000001903,658887-156656,Q35694946,Strängnäsfjärden,Point(17.037397412 59.384608492),WD
1,Q106707080,Abborrsjöns badplats,SE0411060000000277,624471-143063,Q16275305,Abborrasjön,Point(14.691358723 56.326564061),WD
2,Q106708954,Abborrtjärns badplats,SE0611763000002279,660121-136542,Q16275388,Abborrtjärn,Point(13.4333 59.5096),WD
3,Q106711299,Abborrtjärns badplats,SE0812404000003683,713089-168959,Q16487572,Abborrtjärnen,Point(19.724889698 64.231408958),WD
4,Q106710481,Adaks badplats,SE0812418000003416,725253-162920,Q17538599,Stor-Adakträsket,Point(18.576809583 65.354219925),WD
...,...,...,...,...,...,...,...,...
996,Q106695873,Östra holmens badplats,SE0251980000002546,,Q184492,Mälaren,Point(16.576895 59.586077),WD
997,Q106710694,Östtjärn badplats,SE0712281000003466,691354-157517,Q18184573,Östtjärnen,Point(17.268201 62.327749),WD
998,Q106708537,Övdens badplats,SE0920764000001583,627654-141929,Q18134117,Övden,Point(14.50528951 56.620870346),WD
999,Q106685181,Övre Rudansjöns badstrand,SE0110136000002143,656380-163314,Q3360982,Övre Rudasjön,Point(18.1316 59.1659),WD


In [14]:
listSea = []
#for WD, row in WDBath.iterrows():
for WD, row in tqdm(WDSea.iterrows(), total=WDBath.shape[0]):
    #print(row["qlakeid"] ) 
    url = "https://osm.wikidata.link/tagged/api/item/" + row["qlakeid"] 
    
    new_item = dict()
    new_item['wikidata'] = row["qlakeid"] 
    r = http.request('GET', url) 
    data = json.loads(r.data.decode('utf-8'))
    try:
        #print ("ID: ", data["osm"][0]["id"])

        osmid = data["osm"][0]["id"] 
        
    except:
        #print ("error")
        osmid =""
    new_item['osmid'] = osmid 
    listSea.append(new_item)
print (len(listSea))
OSMSeatot = pd.DataFrame(listSea,
                  columns=['wikidata','osmid'])

 36%|███▋      | 1001/2761 [01:30<02:39, 11.06it/s]

1001





In [15]:
OSMSeatot

Unnamed: 0,wikidata,osmid
0,Q35694946,
1,Q16275305,10677610
2,Q16275388,241597601
3,Q16487572,30167501
4,Q17538599,181285898
...,...,...
996,Q184492,1433877
997,Q18184573,23047338
998,Q18134117,48996
999,Q3360982,23988339


In [16]:
OSMSeatot[(OSMSeatot['osmid']=="")].shape


(216, 2)

In [17]:
OSMSeaEmpty =OSMSeatot[(OSMSeatot['osmid']=="")]

Badplatser 2756 ej OSM kopplade 2199
vatten kopplade 979 ej OSM kopplade 215

In [18]:
print(start_time.strftime("%Y%m%d")," Badplatser", WDBath.shape[0], "ej OSM kopplade",OSMEmpty.shape[0]) 
print(" vatten kopplade", WDSea.shape[0], "ej OSM kopplade",OSMSeaEmpty.shape[0])

20210522  Badplatser 2761 ej OSM kopplade 2142
 vatten kopplade 1001 ej OSM kopplade 216
