In [2]:
import requests
import pandas as pd
import sqlite3
import dateparser as parser

In [3]:
server = 'https://pubgeo.zwemwater.nl/geoserver/zwr_public/wfs'
dbConnection = sqlite3.connect("data.db")

In [4]:
# Location data
body = f"""
    <GetFeature xmlns="http://www.opengis.net/wfs" service="WFS" version="1.1.0" outputFormat="application/json" xsi:schemaLocation="http://www.opengis.net/wfs http://schemas.opengis.net/wfs/1.1.0/wfs.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
        <Query typeName="zwr_public:zwemplekken_details" srsName="EPSG:28992" xmlns:zwr_public="https://pubgeo.zwemwater.nl/geoserver/zwr_public">
            <Filter xmlns="http://www.opengis.net/ogc">
                <PropertyIsEqualTo>
                    <PropertyName>zwemwaterlocatie_id</PropertyName>
                    <Literal>6102791</Literal>
                </PropertyIsEqualTo>
            </Filter>
        </Query>
    </GetFeature>
"""
response = requests.post(server, body)
details = [item['properties'] for item in response.json()['features']]


In [5]:
def convert_status(input):
    if input == "WAARSCHUWING":
        return "Waarschuwing"
    elif input == "NEGATIEF_ZWEMADVIES":
        return "Negatief zwemadvies"
    else:
        return input.title()


In [6]:
details = details[0]
naam = details['naam']
if details['korte_naam']:
    naam = details['korte_naam']

status = convert_status(details['status'])
id = details['zwemwaterlocatie_id']
plaats = details['adr_woonplaats'] 

print(id, naam, plaats, status)

6102791 Sloterstrand Amsterdam Goed


In [7]:
# Locations
body = """
    <GetFeature xmlns="http://www.opengis.net/wfs" service="WFS" version="1.1.0" outputFormat="application/json" xsi:schemaLocation="http://www.opengis.net/wfs http://schemas.opengis.net/wfs/1.1.0/wfs.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
        <Query typeName="zwr_public:zwemplekken_details" srsName="EPSG:28992" xmlns:zwr_public="https://pubgeo.zwemwater.nl/geoserver/zwr_public">
        </Query>
    </GetFeature>
"""
response = requests.post(server, body)
details = [item['properties'] for item in response.json()['features']]
locations = pd.DataFrame(details, columns=["zwemwaterlocatie_id", "naam", "korte_naam", "status"])
locations['korte_naam'] = locations['korte_naam'].fillna(locations['naam'])
locations = locations.drop("naam", axis=1)
locations

Unnamed: 0,zwemwaterlocatie_id,korte_naam,status
0,7350900,'s-Gravenzande,goed
1,955,Heide,goed
2,1007,Berkenven,goed
3,1005,De Tolplas,goed
4,7350750,Hartje Groen,goed
...,...,...,...
940,1522,Recreatiestrand Porta Isola,goed
941,1560,Mirrorstrand Hemmeland,goed
942,1598,Strand Uitdam,goed
943,7942900,Rakelbos,goed


In [8]:
# EU status
body = f"""
    <GetFeature xmlns="http://www.opengis.net/wfs" service="WFS" version="1.1.0" outputFormat="application/json" xsi:schemaLocation="http://www.opengis.net/wfs http://schemas.opengis.net/wfs/1.1.0/wfs.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <Query typeName="zwr_public:eustatussen" srsName="EPSG:28992" xmlns:zwr_public="https://pubgeo.zwemwater.nl/geoserver/zwr_public">

    </Query>
    </GetFeature>
"""
response = requests.post(server, body)
details = [item['properties'] for item in response.json()['features']]

df = pd.DataFrame(details, columns=["zwemwaterlocatie_id", "jaar", "omschrijving"])
latest_status = df.sort_values('jaar')\
                    .groupby('zwemwaterlocatie_id')\
                    .tail(1)\
                    .drop("jaar", axis=1)\
                    .rename({'omschrijving': 'historie'}, axis=1)


In [9]:
data = locations.merge(latest_status, on="zwemwaterlocatie_id")

In [10]:
data

Unnamed: 0,zwemwaterlocatie_id,korte_naam,status,historie
0,7350900,'s-Gravenzande,goed,uitstekend
1,955,Heide,goed,goed
2,1007,Berkenven,goed,goed
3,1005,De Tolplas,goed,aanvaardbaar
4,7350750,Hartje Groen,goed,uitstekend
...,...,...,...,...
930,6267820,"Opgang Vliegerpad, Recreatiestrand, zone 3",goed,uitstekend
931,6267820,"Kitesurfpad, Aktiviteitenstrand, zone 2",goed,uitstekend
932,6267820,"Golfsurfpad, Recreatiestrand, zone 1",goed,uitstekend
933,6267820,"Golfsurfpad, Recreatiestrand, zone 1",goed,uitstekend


In [11]:
data.query("zwemwaterlocatie_id == 6102791")


Unnamed: 0,zwemwaterlocatie_id,korte_naam,status,historie
853,6102791,Sloterstrand,goed,slecht


In [16]:
# Measurements
body = f"""
    <GetFeature xmlns="http://www.opengis.net/wfs" service="WFS" version="1.1.0" outputFormat="application/json" xsi:schemaLocation="http://www.opengis.net/wfs http://schemas.opengis.net/wfs/1.1.0/wfs.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <Query typeName="zwr_public:resultaatsen" srsName="EPSG:28992" xmlns:zwr_public="https://pubgeo.zwemwater.nl/geoserver/zwr_public">

    </Query>
    </GetFeature>
"""
response = requests.post(server, body)
details = [item['properties'] for item in response.json()['features']]

df = pd.DataFrame(details, columns=[
                  "zwemwaterlocatie_id", "type_object_code", "object_begin_tijd", "numerieke_waarde"])
df['object_begin_tijd'] = pd.to_datetime(df['object_begin_tijd'])



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13611 entries, 0 to 13610
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype              
---  ------               --------------  -----              
 0   zwemwaterlocatie_id  13611 non-null  int64              
 1   type_object_code     13611 non-null  object             
 2   object_begin_tijd    13611 non-null  datetime64[ns, UTC]
 3   numerieke_waarde     13611 non-null  int64              
dtypes: datetime64[ns, UTC](1), int64(2), object(1)
memory usage: 425.5+ KB


Unnamed: 0,zwemwaterlocatie_id,type_object_code,object_begin_tijd,numerieke_waarde
0,1619,E_COLI,2022-04-19 11:54:00+00:00,15
1,1263,E_COLI,2022-04-19 12:11:00+00:00,15
2,1624,E_COLI,2022-04-19 11:09:00+00:00,15
3,1625,E_COLI,2022-04-19 08:05:00+00:00,15
4,1626,E_COLI,2022-04-19 10:55:00+00:00,15


In [23]:
from itertools import groupby


latest_measurements = df.sort_values('object_begin_tijd', ascending=False)\
    .groupby(['zwemwaterlocatie_id', 'type_object_code'])
latest_measurements.head()

Unnamed: 0,zwemwaterlocatie_id,type_object_code,object_begin_tijd,numerieke_waarde
6714,1654,E_COLI,2022-09-30 07:10:00+00:00,143
13494,1654,INTTNLETRCCN,2022-09-30 07:10:00+00:00,15
6713,1653,E_COLI,2022-09-30 06:44:00+00:00,94
13493,1653,INTTNLETRCCN,2022-09-30 06:44:00+00:00,15
6558,1230,E_COLI,2022-09-29 12:45:32+00:00,1100
...,...,...,...,...
8101,1249,INTTNLETRCCN,2022-05-16 06:48:49+00:00,15
8102,299241,INTTNLETRCCN,2022-05-16 06:48:49+00:00,15
8028,1570,INTTNLETRCCN,2022-05-16 06:20:00+00:00,15
1204,1570,E_COLI,2022-05-16 06:20:00+00:00,15


In [24]:
latest_measurements.query("zwemwaterlocatie_id == 6102791")


AttributeError: 'DataFrameGroupBy' object has no attribute 'query'