This script computes the monthly and yearly mean of water level in the tidal gauges, using the raw data from RWS.

In [1]:
import pandas as pd
import numpy as np
import glob
import os
import matplotlib
from matplotlib import pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

In [2]:
ipath = '../../data/ddl/raw/' #RWS data must be loated in a folder called raw
opath =  '../../data/ddl/postprocessed/' # postprocessed folder must be created

tidal_gauges= ['Delfzijl', 'Den Helder', 'Harlingen', 'Hoek van Holland',
              'Vlissingen']
start_year, end_year= 2016, 2019

codes = pd.read_csv('../../data/ddl/waterhoogteMetadata.csv',
                usecols= ['Naam', 'Code'])

In [7]:
for i in range(len(tidal_gauges)):
    print(tidal_gauges[i])
    locs = codes[codes['Naam'].str.contains(tidal_gauges[i])].Code.values

    data= []

    for c in locs:
        for y in range(start_year, end_year+1, 1):
            filename= '{}_WATHTE_NAP_{}.csv'.format(c, y)
            if (os.path.isfile(ipath+filename)):
                #print(filename)
                df = pd.read_csv(ipath+filename, usecols=['Tijdstip',
                                                         'Meetwaarde.Waarde_Numeriek',
                                                         'locatie_code'
                                                               ])
                df.rename(columns={'Tijdstip':'time', 
                                   'Meetwaarde.Waarde_Numeriek': 'Water_level'}, inplace= True)
                df.time = pd.to_datetime(df.time, utc= True)
                data.append(df)

    data = pd.concat(data) 
    data.sort_values(by= 'time', inplace= True)

    # Remove observations with missing date
    if (data.time.isnull().sum() != 0):
        data = data[data.time.notnull()]

    # Monthly and yearly aggregations
    monthly = data.set_index('time').resample('M').mean()
    yearly = data.set_index('time').resample('Y').mean()

    # Interpolate if missing values are present. Do this here to reduce bias in estimate
    if (monthly.Water_level.isnull().sum() != 0):
        monthly['Water_level'].interpolate(method= 'linear', inplace= True)
    
    if (yearly.Water_level.isnull().sum() != 0):
        yearly['Water_level'].interpolate(method= 'linear', inplace= True)

    monthly.to_csv(opath+'month_agg_water_level_{}.csv'.format(tidal_gauges[i]))
    yearly.to_csv(opath+'year_agg_water_level_{}.csv'.format(tidal_gauges[i]))

    #Plots
    fig, axes = plt.subplots(1,2, figsize=(15,5))
    monthly.plot(legend= False, ax= axes[0])
    yearly.plot(legend= False, ax= axes[1])
    axes[0].set_ylabel('Monthly water level [cm NAP] at {}'.format(tidal_gauges[i]))
    axes[1].set_ylabel('Yearly water level [cm NAP] at {}'.format(tidal_gauges[i]))
    plt.savefig(opath+'plots/{}.png'.format(tidal_gauges[i]), bbox_inches= 'tight')
    plt.close()

Delfzijl
Den Helder
Harlingen
Hoek van Holland
Vlissingen
