* [github salgo60/Svenskabadplatser](https://github.com/salgo60/Svenskabadplatser)
* this [Notebook](https://github.com/salgo60/Svenskabadplatser/blob/main/Jupyter/OSM_Wikidata_Bathwater.ipynb)

* API [Wikidata to OpenStreetMap](https://osm.wikidata.link/tagged)
  * ex [https://osm.wikidata.link/tagged/api/item/Q106708773](https://osm.wikidata.link/tagged/api/item/Q106708773)

In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

Last run:  2021-05-18 04:35:18.212860


In [2]:
import pandas as pd


In [3]:
# Get all Swedish Bathing Waters with NUTS and if they are cinnected to a Water and if tyhat water has a SJOID
#
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys,json
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"
 
# https://w.wiki/3LWk    
queryBath = """SELECT (REPLACE(STR(?nodeBath), ".*Q", "Q") AS ?qbathid) ?nodeBathLabel ?nutsCode ?SJOID
(REPLACE(STR(?nodeWater), ".*Q", "Q") AS ?qlakeid) ?nodeWaterLabel ?coord WHERE {
  ?nodeBath wdt:P6104 wd:Q106774536.
  OPTIONAL { ?nodeBath wdt:P605 ?nutsCode. }
  OPTIONAL { ?nodeBath wdt:P625 ?coord. }
  OPTIONAL {
    ?nodeBath wdt:P206 ?nodeWater.
    OPTIONAL { ?nodeWater wdt:P761 ?SJOID. }
  }
  SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}
ORDER BY (?nodeBathLabel)"""


def get_sparql_dataframe(endpoint_url, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
 
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

WDBath = get_sparql_dataframe(endpoint_url, queryBath)
WDBath["Source"] = "WD"     
WDBath.shape

(2755, 8)

In [4]:
WDBath

Unnamed: 0,qbathid,nodeBathLabel,nutsCode,SJOID,qlakeid,nodeWaterLabel,coord,Source
0,Q106708773,Abborrbergets badplats,SE0220486000001903,,Q35694946,Strängnäsfjärden,Point(17.037397412 59.384608492),WD
1,Q106707080,Abborrsjöns badplats,SE0411060000000277,624471-143063,Q16275305,Abborrasjön,Point(14.691358723 56.326564061),WD
2,Q106711299,Abborrtjärns badplats,SE0812404000003683,713089-168959,Q16487572,Abborrtjärnen,Point(19.724889698 64.231408958),WD
3,Q106708954,Abborrtjärns badplats,SE0611763000002279,660121-136542,Q16275388,Abborrtjärn,Point(13.4333 59.5096),WD
4,Q106710481,Adaks badplats,SE0812418000003416,725253-162920,Q17538599,Stor-Adakträsket,Point(18.576809583 65.354219925),WD
...,...,...,...,...,...,...,...,...
2750,Q18335372,Österskärs havsbad,SE0110117000002087,,,,Point(18.3115 59.4555),WD
2751,Q106681296,Östnora havsbad,SE0110136000002146,,,,Point(18.060191 59.051436),WD
2752,Q106708690,Östra Valsjöns badplats,SE0A21490000001781,641433-131980,Q18194101,Östra Valsjön,Point(12.783612748 57.817176191),WD
2753,Q106710694,Östtjärn badplats,SE0712281000003466,691354-157517,Q18184573,Östtjärnen,Point(17.268201 62.327749),WD


In [5]:
import urllib3, json
from tqdm import tqdm
http = urllib3.PoolManager()

listBath = []
#for WD, row in WDBath.iterrows():
for WD, row in tqdm(WDBath.iterrows(), total=WDBath.shape[0]):
    #print(row["qbathid"] ) 
    url = "https://osm.wikidata.link/tagged/api/item/" + row["qbathid"] 
    
    new_item = dict()
    new_item['wikidata'] = row["qbathid"] 
    new_item['coord'] = row["coord"] 
    r = http.request('GET', url) 
    data = json.loads(r.data.decode('utf-8'))
#    print (r.status)
    try:
        #print ("OSM ", data["osm"], "Type: ", type(data["osm"]))
        #print ("ID: ", data["osm"]["id"])
        osmid = data["osm"][0]["id"]        
    except:
        #print ("error")
        osmid =""
    new_item['osmid'] = osmid 
    listBath.append(new_item)
print (len(listBath))

100%|██████████| 2755/2755 [03:55<00:00, 11.69it/s]

2755





In [6]:
OSMtot = pd.DataFrame(listBath,
                  columns=['wikidata','coord','osmid'])

In [7]:
pd.set_option('max_colwidth', 400)
OSMtot.head(100)

Unnamed: 0,wikidata,coord,osmid
0,Q106708773,Point(17.037397412 59.384608492),8725492343
1,Q106707080,Point(14.691358723 56.326564061),8737463766
2,Q106711299,Point(19.724889698 64.231408958),
3,Q106708954,Point(13.4333 59.5096),356838031
4,Q106710481,Point(18.576809583 65.354219925),943675486
...,...,...,...
95,Q106708677,Point(15.540833314 58.486808339),
96,Q106707164,Point(12.064385903 57.749508373),10605543
97,Q106708627,Point(12.465829873 57.886302719),700957430
98,Q106708833,Point(18.273156529 59.649251959),


In [18]:
#OSMempty = OSMtot.osmid.notnull()
OSMtot[(OSMtot['osmid']=="")].shape

(2327, 3)

In [19]:
OSMEmpty =OSMtot[(OSMtot['osmid']=="")]

In [20]:
OSMEmpty

Unnamed: 0,wikidata,coord,osmid
2,Q106711299,Point(19.724889698 64.231408958),
5,Q106708549,Point(14.397042778 57.006691546),
8,Q106689287,Point(18.4089 59.2016),
11,Q106709184,Point(14.414252823 59.404268046),
15,Q106707023,Point(15.440837711 56.147648666),
...,...,...,...
2746,Q106671882,Point(18.257445 59.250819),
2747,Q106653304,Point(17.9824 59.3098),
2749,Q18202436,Point(13.546667 59.381944),
2752,Q106708690,Point(12.783612748 57.817176191),
