In [194]:
import requests
import numpy as np
import pandas
import bokeh.charts
import io
import bokeh.io
import bokeh.plotting
import bokeh.tile_providers
import zipfile
import pyproj 
import skimage.io
from ipywidgets import Image
import IPython.display
import bokeh.palettes

WEBMERCATOR = pyproj.Proj(init='epsg:3857')
WGS84 = pyproj.Proj(init='epsg:4326')

bokeh.io.output_notebook()

In [20]:
urls = {
    'metric_monthly': 'http://www.psmsl.org/data/obtaining/met.monthly.data/met_monthly.zip',
    'rlr_monthly': 'http://www.psmsl.org/data/obtaining/rlr.annual.data/rlr_monthly.zip',
    'rlr_annual': 'http://www.psmsl.org/data/obtaining/rlr.annual.data/rlr_annual.zip'
}
dataset_name = 'rlr_annual'
resp = requests.get(urls[dataset_name])

In [21]:
stream = io.BytesIO(resp.content)
zf = zipfile.ZipFile(stream)

In [243]:
# station ID, latitude, longitude, station name, coastline code, station code, and quality flag
csvtext = zf.read('{}/filelist.txt'.format(dataset_name))

stations = pandas.read_csv(
    io.BytesIO(csvtext), 
    sep=';',
    names=('id', 'lat', 'lon', 'name', 'coastline_code', 'station_code', 'quality'),
    converters={
        'name': lambda x: str.capitalize(str.strip(x)),
        'quality': str.strip
    }
)
stations = stations.set_index('id')

# the dutch stations in the PSMSL database, make a copy
dutch_stations = stations[stations.coastline_code == 150].copy()
# set the main stations
main_stations = [24, 23, 32, 22, 20, 25]
dutch_stations['main'] = False
dutch_stations.loc[main_stations, 'main'] = True
dutch_stations

Unnamed: 0_level_0,lat,lon,name,coastline_code,station_code,quality,main
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
24,53.326389,6.933056,Delfzijl,150,1,N,True
236,53.363056,5.22,West-terschelling,150,11,N,False
25,53.175556,5.409444,Harlingen,150,21,N,True
23,52.964444,4.745,Den helder,150,31,N,True
32,52.462222,4.554722,Ijmuiden,150,41,N,True
22,51.9775,4.12,Hoek van holland,150,51,N,True
9,51.9175,4.249722,Maassluis,150,61,N,False
1551,51.619722,3.681944,Roompot buiten,150,90,N,False
20,51.442222,3.596111,Vlissingen,150,101,N,True


In [244]:
# show all the stations on a map
# compute the bounds of the plot
sw = (50, -5)
ne = (55, 10)
sw_wm = pyproj.transform(WGS84, WEBMERCATOR, sw[1], sw[0])
ne_wm = pyproj.transform(WGS84, WEBMERCATOR, ne[1], ne[0])
# create a plot
fig = bokeh.plotting.figure(tools='pan, wheel_zoom', plot_width=600, plot_height=200, x_range=(sw_wm[0], ne_wm[0]), y_range=(sw_wm[1], ne_wm[1]))
fig.axis.visible = False
# add some background tiles
fig.add_tile(bokeh.tile_providers.STAMEN_TERRAIN)
# add the stations
x, y = pyproj.transform(WGS84, WEBMERCATOR, np.array(stations.lon), np.array(stations.lat))
fig.circle(x, y)
bokeh.io.show(fig)

In [245]:
# stations that we are using for our computation
# define the name formats for the relevant files
names = {
    'datum': '{dataset}/RLR_info/{id}.txt',
    'diagram': '{dataset}/RLR_info/{id}.png',
    'url': 'http://www.psmsl.org/data/obtaining/rlr.diagrams/{id}.php',
    'data': '{dataset}/data/{id}.rlrdata',
    'doc': '{dataset}/docu/{id}.txt',
    'contact': '{dataset}/docu/{id}_auth.txt'
}

In [246]:
def get_url(station):
    """return the url of the station information (diagram and datum)"""
    info = dict(
        dataset=dataset_name,
        id=station.name
    )
    url = names['url'].format(**info)
    return url
dutch_stations['url'] = dutch_stations.apply(get_url, axis=1)
dutch_stations

Unnamed: 0_level_0,lat,lon,name,coastline_code,station_code,quality,main,url
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
24,53.326389,6.933056,Delfzijl,150,1,N,True,http://www.psmsl.org/data/obtaining/rlr.diagra...
236,53.363056,5.22,West-terschelling,150,11,N,False,http://www.psmsl.org/data/obtaining/rlr.diagra...
25,53.175556,5.409444,Harlingen,150,21,N,True,http://www.psmsl.org/data/obtaining/rlr.diagra...
23,52.964444,4.745,Den helder,150,31,N,True,http://www.psmsl.org/data/obtaining/rlr.diagra...
32,52.462222,4.554722,Ijmuiden,150,41,N,True,http://www.psmsl.org/data/obtaining/rlr.diagra...
22,51.9775,4.12,Hoek van holland,150,51,N,True,http://www.psmsl.org/data/obtaining/rlr.diagra...
9,51.9175,4.249722,Maassluis,150,61,N,False,http://www.psmsl.org/data/obtaining/rlr.diagra...
1551,51.619722,3.681944,Roompot buiten,150,90,N,False,http://www.psmsl.org/data/obtaining/rlr.diagra...
20,51.442222,3.596111,Vlissingen,150,101,N,True,http://www.psmsl.org/data/obtaining/rlr.diagra...


In [247]:
def missing2nan(value, missing=-99999):
    value = float(value)
    if value == missing:
        return np.nan
    return value

def get_data(station):
    info = dict(
        dataset=dataset_name,
        id=station.name
    )
    bytes = zf.read(names['data'].format(**info))
    df = pandas.read_csv(
        io.BytesIO(bytes), 
        sep=';', 
        names=('year', 'height', 'interpolated', 'flags'),
        converters={
            "height": missing2nan,
            "interpolated": str.strip,
        }
    )
    df['station'] = station.name
    return df
dutch_stations['data'] = [get_data(station) for _, station in dutch_stations.iterrows()]

In [248]:
fig = bokeh.plotting.figure(x_range=(1890, 2020))
colors = bokeh.palettes.Accent[8]
    
for color, (id_, station) in zip(colors, dutch_stations[dutch_stations.main].iterrows()):
    data = station['data']
    fig.line(data.year, data.height, line_color=color, legend=station['name'])
fig.legend.location = "top_left"
bokeh.io.show(fig)


In [249]:
mean_df = pandas.concat(dutch_stations[dutch_stations.main]['data'].tolist())[['year', 'height']].groupby('year').mean().reset_index()


In [251]:
fig.line(mean_df.year, mean_df.height, legend='Mean', line_width=3)
bokeh.io.show(fig)