Get all Bathing Waters with P9618 in this project from Wikidata and checks if they exist on eionet as a WaterBody 

https://dd.eionet.europa.eu/vocabularyconcept/wise/WaterBody

* The project: [github salgo60/Svenskabadplatser](https://github.com/salgo60/Svenskabadplatser)
  * European bathwaters [GITHUB](https://github.com/salgo60/EuropeanBathingWater/blob/main/README.md) / [Wikidata](https://www.wikidata.org/wiki/Wikidata:WikiProject_European_Bath_Waters)
* this [Notebook](https://github.com/salgo60/Svenskabadplatser/blob/main/Jupyter/Eionet_Data_Dictionary.ipynb)

  
Status:  



| Date | Total | Ok | Error 
| ------------- |:-------------:|:-------------:|:-------------:|
| 20210610 | 3176 | 2240 | 936 |



In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

Last run:  2021-06-10 09:52:45.966952


In [2]:
import pandas as pd


In [3]:
#
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys,json
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"
 
# https://w.wiki/3Tk$    
queryBath = """SELECT  (REPLACE(STR(?nodebath), ".*Q", "Q") AS ?wikidata) ?nodebath (SUBSTR(lcase(?bath),1,2) AS ?country)
(URI(CONCAT("https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode.",
       str(?bath))) AS ?eionet)  (str(?bath) AS ?bathwateridentifier){
      ?nodebath wdt:P9616 ?bath} 
"""


def get_sparql_dataframe(endpoint_url, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
 
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

WDBath = get_sparql_dataframe(endpoint_url, queryBath)
WDBath.shape

(3176, 5)

In [4]:
WDBath.head()

Unnamed: 0,wikidata,nodebath,country,eionet,bathwateridentifier
0,Q106702268,http://www.wikidata.org/entity/Q106702268,se,https://dd.eionet.europa.eu/vocabularyconcept/...,SE0411080000000220
1,Q106702265,http://www.wikidata.org/entity/Q106702265,se,https://dd.eionet.europa.eu/vocabularyconcept/...,SE0411080000000205
2,Q106702261,http://www.wikidata.org/entity/Q106702261,se,https://dd.eionet.europa.eu/vocabularyconcept/...,SE0411080000000216
3,Q106703325,http://www.wikidata.org/entity/Q106703325,se,https://dd.eionet.europa.eu/vocabularyconcept/...,SE0210380000001180
4,Q106702259,http://www.wikidata.org/entity/Q106702259,se,https://dd.eionet.europa.eu/vocabularyconcept/...,SE0411080000000212


In [None]:
import urllib3, json
from tqdm import tqdm
http = urllib3.PoolManager()

listBath = []
for WD, row in tqdm(WDBath.iterrows(), total=WDBath.shape[0]):
    url = row["eionet"] 
    
    new_item = dict()
    new_item['wikidata'] = row["wikidata"] 
    try:
        r = http.request('GET', url) 
        new_item['status'] = r.status
    except:
        print (r.status, url)
        new_item['status'] = r.status
    new_item['eionet'] = url 
    new_item['bathwateridentifier'] = row["bathwateridentifier"] 
    new_item['country'] = row["country"] 
    
    listBath.append(new_item)
print (len(listBath))

  1%|          | 29/3176 [00:01<01:44, 29.97it/s]

In [None]:
Eionettot = pd.DataFrame(listBath,
                  columns=['wikidata','country','bathwateridentifier','status','eionet'])
Eionettot.shape


In [None]:
pd.set_option('max_colwidth', 400)
Eionettot.head(10)

In [None]:
#Eionettot.value_counts({"status","country"})
#Eionettot[['status', 'country']].apply(pd.Series.value_counts)


In [None]:
EionettotError = Eionettot[(Eionettot['status']==200)] 
EionettotOk = Eionettot[(Eionettot['status']==404)]

In [None]:
EionettotError.shape

In [None]:
EionettotError.value_counts("country")

In [None]:
EionettotOk.shape

In [None]:
EionettotOk.value_counts("country")

In [None]:
EionettotOk.to_csv("BathIdentifier_Ok.csv")
EionettotError.to_csv("BathIdentifier_Error.csv")
Eionettot.to_csv("BathIdentifier_All.csv")


Generate Markdown table eg.
| 20210610 | 3176 | 2240 | 936 |


In [None]:
print("|",start_time.strftime("%Y%m%d"),"|", \
      Eionettot.shape[0],"|", \
      EionettotOk.shape[0],"|", \
      EionettotError.shape[0],"|",)


In [None]:
end = datetime.now()
print("Ended: ", end) 
print('Time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - start_time))