In [1]:
url = 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/waterbase/27_Waterhoogte_in_cm_t.o.v._normaal_amsterdams_peil_in_oppervlaktewater/nc/id1-BROUWHVSGT08.nc'

In [2]:
import datetime

import pandas as pd
import numpy as np

import xarray
import matplotlib.pyplot as plt

import statsmodels.regression.linear_model 
import statsmodels.api as sm


In [None]:
# open dataset
ds = xarray.open_dataset(url)

In [None]:
# get first location (only 1 location in file)
ssh = ds.sea_surface_height[0]

In [None]:
# convert to dataframe
df = pd.DataFrame(ssh.to_pandas(), columns=['ssh'])

In [None]:
# check number of measurements per year
# this data is up to date until 2018-09
# after that waterbase was is no longer available.
# the updates based on DDL are not yet available
counts = df.resample('A', label='left').count()
print(counts.head(n=3))
print(counts.tail(n=3))

In [None]:
# drop incomplete years
df = df[df.index > datetime.datetime(1980, 1, 1)]
df = df[df.index < datetime.datetime(2018, 1, 1)]


In [None]:
# define some extra variables
annual_mean_df = df.resample('A', label='right').mean()
annual_max_df = df.resample('A', label='right').max()
year = [x.year for x in annual_mean_df.index]
annual_mean_df['year'] = year
year = [x.year for x in annual_max_df.index]
annual_max_df['year'] = year
# m to mm, sealevelmonitor is in mm
annual_mean_df['height'] = annual_mean_df['ssh'] * 1000
annual_max_df['height'] = annual_max_df['ssh'] * 1000


In [None]:
# plot measured data (labels are a bit off)
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(annual_mean_df.year, annual_mean_df.height, 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual mean');

In [None]:
# plot annual maxima
fig, ax = plt.subplots(figsize=(8, 5))
ax.plot(annual_max_df['year'], annual_max_df['height'], 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual max (values at end of period)');

In [None]:
# define the statistical model
def linear_model(df, with_ar=True):
    y = df['height']
    X = np.c_[
        df['year']-1970, 
        np.cos(2*np.pi*(df['year']-1970)/18.613),
        np.sin(2*np.pi*(df['year']-1970)/18.613)
    ]
    month = np.mod(df['year'], 1) * 12.0
    names = ['Constant', 'Trend', 'Nodal U', 'Nodal V']
    X = sm.add_constant(X)
    if with_ar:
        model = sm.GLSAR(y, X, missing='drop', rho=1)
    else:
        model = sm.OLS(y, X, missing='drop')
    fit = model.fit(cov_type='HC0')
    return fit, names

In [None]:
fit, names = linear_model(annual_mean_df)
prediction = fit.get_prediction()
fit.summary(xname=names)


In [None]:
fig, ax = plt.subplots(figsize=(10, 5))
ax.plot(annual_mean_df.year, annual_mean_df.height, 'k.')
ax.set_ylabel("{} [{}]".format(ssh.standard_name, 'mm'))
ax.set_title('Annual mean')
conf_int = prediction.conf_int()
pred_int = prediction.conf_int(obs=True)
ax.fill_between(annual_mean_df.year, pred_int[:, 0], pred_int[:, 1], alpha=0.1, color='blue', label='prediction interval')
ax.fill_between(annual_mean_df.year, conf_int[:, 0], conf_int[:, 1], alpha=0.3, color='blue', label='confidence interval')
ax.plot(annual_mean_df.year, fit.predict(), color='blue', label='fitted trend')
ax.legend()
ax.grid(True)
