Get all Swedish Bathing Waters with P9616 in this project from Wikidata and checks if they exist on eionet as a WaterBody 

https://dd.eionet.europa.eu/vocabularyconcept/wise/WaterBody

* The project: [github salgo60/Svenskabadplatser](https://github.com/salgo60/Svenskabadplatser)
  * European bathwaters [GITHUB](https://github.com/salgo60/EuropeanBathingWater/blob/main/README.md) / [Wikidata](https://www.wikidata.org/wiki/Wikidata:WikiProject_European_Bath_Waters)
* this [Notebook](https://github.com/salgo60/Svenskabadplatser/blob/main/Jupyter/Eionet%20Data%20Dictionary.ipynb)

**See also** [github salgo60/EuropeanBathingWater](https://github.com/salgo60/EuropeanBathingWater/blob/main/Jupyter/Eionet%20Data%20Dictionary.ipynb)


Status:  



| Date | Total | Ok | Error 
| ------------- |:-------------:|:-------------:|:-------------:|
| 20210629 | 833 | 832 | 1 |
| 20210626 | 2654 | 833 | 1821 |


In [1]:
from datetime import datetime
start_time  = datetime.now()
print("Last run: ", start_time)

Last run:  2021-06-29 18:17:09.193874


In [2]:
import pandas as pd


In [3]:
#
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys,json
from SPARQLWrapper import SPARQLWrapper, JSON

endpoint_url = "https://query.wikidata.org/sparql"
 
# get Swedish baths 
# https://w.wiki/3YdP
queryBath = """SELECT DISTINCT  (REPLACE(STR(?nodebath), ".*Q", "Q") AS ?wikidata) ?nodebath ?bathwateridentifier
(URI(CONCAT("https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode.",
       str(?bathwateridentifier))) AS ?eionet)  {
      ?nodebath wdt:P9616 ?bathwateridentifier .
      ?nodebath wdt:P17 wd:Q34 .
}
"""


def get_sparql_dataframe(endpoint_url, query):
    """
    Helper function to convert SPARQL results into a Pandas data frame.
    """
    user_agent = "salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
 
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    result = sparql.query()

    processed_results = json.load(result.response)
    cols = processed_results['head']['vars']

    out = []
    for row in processed_results['results']['bindings']:
        item = []
        for c in cols:
            item.append(row.get(c, {}).get('value'))
        out.append(item)

    return pd.DataFrame(out, columns=cols)

WDBath = get_sparql_dataframe(endpoint_url, queryBath)
WDBath.shape

(833, 4)

In [4]:
WDBath.head()

Unnamed: 0,wikidata,nodebath,bathwateridentifier,eionet
0,Q106654923,http://www.wikidata.org/entity/Q106654923,SE0A21493000001928,https://dd.eionet.europa.eu/vocabularyconcept/...
1,Q106614954,http://www.wikidata.org/entity/Q106614954,SE0A21480000000516,https://dd.eionet.europa.eu/vocabularyconcept/...
2,Q106707173,http://www.wikidata.org/entity/Q106707173,SE0A21480000000390,https://dd.eionet.europa.eu/vocabularyconcept/...
3,Q106707174,http://www.wikidata.org/entity/Q106707174,SE0A21480000000392,https://dd.eionet.europa.eu/vocabularyconcept/...
4,Q106707175,http://www.wikidata.org/entity/Q106707175,SE0A21481000000394,https://dd.eionet.europa.eu/vocabularyconcept/...


In [5]:
import urllib3, json
from tqdm import tqdm
http = urllib3.PoolManager()
urlHav = "https://badplatsen.havochvatten.se/badplatsen/api/detail/" 

listBath = []
for WD, row in tqdm(WDBath.iterrows(), total=WDBath.shape[0]):
    url = row["eionet"] 
    
    new_item = dict()
    new_item['wikidata'] = row["wikidata"] 
    #print(url)
    try:
        r = http.request('GET', url) 
        new_item['status'] = r.status
        if  r.status == 404:
            #check API for reason
            try:
                urlHavBath = urlHav + row["bathwateridentifier"]
                rHav = http.request('GET',urlHavBath , 
                                    headers={'Content-Type': 'application/json'})
                rHavData = json.loads(rHav.data.decode('utf-8'))  
                #for key, value in rHavData.items() :
                    #print ("\t\t",key, value)
                new_item['euType'] = rHavData["euType"]
                new_item['euMotive'] = rHavData["euMotive"]
                new_item['NotEuMotive'] = rHavData["NotEuMotive"]
                
            except Exception as e:
                print ("Hav except ", e, urlHavBath, " WD:",row["wikidata"] )

    except:
        #print (r.status, url)
        new_item['status'] = r.status
    new_item['eionet'] = url 
    new_item['bathwateridentifier'] = row["bathwateridentifier"] 
#    new_item['country'] = row["country"] 
    
    listBath.append(new_item)
print (len(listBath))

100%|██████████| 833/833 [00:27<00:00, 30.58it/s]

833





In [6]:
#listBath

In [7]:
Eionettot = pd.DataFrame(listBath,
                  columns=['wikidata','bathwateridentifier','status','eionet','euType','euMotive','NotEuMotive'])
Eionettot.shape


(833, 7)

In [8]:
Eionettot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 833 entries, 0 to 832
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   wikidata             833 non-null    object 
 1   bathwateridentifier  833 non-null    object 
 2   status               833 non-null    int64  
 3   eionet               833 non-null    object 
 4   euType               1 non-null      object 
 5   euMotive             0 non-null      float64
 6   NotEuMotive          0 non-null      float64
dtypes: float64(2), int64(1), object(4)
memory usage: 45.7+ KB


In [9]:
pd.set_option('max_colwidth', 400)
Eionettot.head(10)

Unnamed: 0,wikidata,bathwateridentifier,status,eionet,euType,euMotive,NotEuMotive
0,Q106654923,SE0A21493000001928,404,https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode.SE0A21493000001928,False,,
1,Q106614954,SE0A21480000000516,200,https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode.SE0A21480000000516,,,
2,Q106707173,SE0A21480000000390,200,https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode.SE0A21480000000390,,,
3,Q106707174,SE0A21480000000392,200,https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode.SE0A21480000000392,,,
4,Q106707175,SE0A21481000000394,200,https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode.SE0A21481000000394,,,
5,Q106707178,SE0A21481000000397,200,https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode.SE0A21481000000397,,,
6,Q106707179,SE0A21481000000398,200,https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode.SE0A21481000000398,,,
7,Q106707176,SE0A21481000000395,200,https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode.SE0A21481000000395,,,
8,Q106707177,SE0A21481000000396,200,https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode.SE0A21481000000396,,,
9,Q106707180,SE0A21481000000399,200,https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode.SE0A21481000000399,,,


In [10]:
#Eionettot["link"] = "<a href='https://dd.eionet.europa.eu/vocabularyconcept/wise/WFDProtectedArea/euProtectedAreaCode." + Eionettot["eionet"].astype(str) + "'">link eionet</a>"
Eionettot["link"] = "<a href='" + Eionettot["eionet"].astype(str) + "'>link eionet</a>"
Eionettot["WD"] = "<a href='https://www.wikidata.org/wiki/" + Eionettot["wikidata"].astype(str) + "'>link WD</a>"


In [11]:
from IPython.display import display, HTML  

#Eionettot.value_counts({"status","country"})
#Eionettot[['status', 'country']].apply(pd.Series.value_counts)
HTML(Eionettot[{'WD','bathwateridentifier','status','link','euType','euMotive','NotEuMotive'}].tail(50).to_html(escape=False))

Unnamed: 0,WD,NotEuMotive,bathwateridentifier,link,euType,euMotive,status
783,link WD,,SE0920780000003302,link eionet,,,200
784,link WD,,SE0611737000003306,link eionet,,,200
785,link WD,,SE0611737000003330,link eionet,,,200
786,link WD,,SE0A21486000003348,link eionet,,,200
787,link WD,,SE0812482000003353,link eionet,,,200
788,link WD,,SE0611785000003410,link eionet,,,200
789,link WD,,SE0611785000003413,link eionet,,,200
790,link WD,,SE0611785000003411,link eionet,,,200
791,link WD,,SE0712282000003439,link eionet,,,200
792,link WD,,SE0712281000003450,link eionet,,,200


In [12]:
EionettotOk = Eionettot[(Eionettot['status']==200)] 
EionettotError = Eionettot[(Eionettot['status']==404)]

In [13]:
EionettotOk.shape

(832, 9)

In [14]:
EionettotError.shape

(1, 9)

In [15]:
#EionettotError.value_counts("country")

In [16]:
EionettotError.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1 entries, 0 to 0
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   wikidata             1 non-null      object 
 1   bathwateridentifier  1 non-null      object 
 2   status               1 non-null      int64  
 3   eionet               1 non-null      object 
 4   euType               1 non-null      object 
 5   euMotive             0 non-null      float64
 6   NotEuMotive          0 non-null      float64
 7   link                 1 non-null      object 
 8   WD                   1 non-null      object 
dtypes: float64(2), int64(1), object(6)
memory usage: 80.0+ bytes


In [17]:

HTML(EionettotError[{'WD','bathwateridentifier','status','link','euType','euMotive','NotEuMotive'}].tail(10).to_html(escape=False))

Unnamed: 0,WD,NotEuMotive,bathwateridentifier,link,euType,euMotive,status
0,link WD,,SE0A21493000001928,link eionet,False,,404


In [18]:
EionettotErrorEuType = EionettotError[EionettotError["euType"] == True] 
HTML(EionettotErrorEuType[{'WD','bathwateridentifier','status','link','euType','euMotive','NotEuMotive'}].tail(10).to_html(escape=False))

Unnamed: 0,WD,NotEuMotive,bathwateridentifier,link,euType,euMotive,status


In [19]:
EionettotError["euMotive"].value_counts()

Series([], Name: euMotive, dtype: int64)

In [20]:
EionettotOk.shape

(832, 9)

In [21]:
#EionettotOk.value_counts("country")

In [22]:
EionettotOk.to_csv("BathIdentifier_Ok.csv")
EionettotError.to_csv("BathIdentifier_Error.csv")
Eionettot.to_csv("BathIdentifier_All.csv")


Generate Markdown table eg.
| 20210610 | 3176 | 2240 | 936 |


In [23]:
print("|",start_time.strftime("%Y%m%d"),"|", \
      Eionettot.shape[0],"|", \
      EionettotOk.shape[0],"|", \
      EionettotError.shape[0],"|",)


| 20210629 | 833 | 832 | 1 |


In [24]:
end = datetime.now()
print("Ended: ", end) 
print('Time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - start_time))

Ended:  2021-06-29 18:17:37.907875
Time elapsed (hh:mm:ss.ms) 0:00:28.714265
