In [1]:
import itertools
import functools

import pandas as pd
import slr.psmsl
import slr.wind

In [2]:
default_dataset_name = 'rlr_annual'

stations = ['NL', 'NL-Delfzijl', 'Vlissingen', 'Hoek van Holland', 'Den Helder', 'Delfzijl', 'Harlingen', 'IJmuiden']
wind_variants = ['NCEP1', '20CR', 'Combined', 'GTSM']
model_variants = ['linear', 'polynomial', 'broken_linear', 'broken_polynomial', 'broken_discontinuous', 'loess']


In [3]:
work_df = pd.DataFrame(
    list(
        itertools.product(
            stations, wind_variants, model_variants
        )
    ), 
    columns=['station', 'wind', 'model']
)
work_df

Unnamed: 0,station,wind,model
0,NL,NCEP1,linear
1,NL,NCEP1,polynomial
2,NL,NCEP1,broken_linear
3,NL,NCEP1,broken_polynomial
4,NL,NCEP1,broken_discontinuous
...,...,...,...
187,IJmuiden,GTSM,polynomial
188,IJmuiden,GTSM,broken_linear
189,IJmuiden,GTSM,broken_polynomial
190,IJmuiden,GTSM,broken_discontinuous


# Stations
Let's first collect all the info of all the stations. We have the main stations and aggregations that are combinations of multiple stations.

In [4]:
# get the zipfiles
zipfiles = slr.psmsl.get_zipfiles()
# get the list of main stations
main_stations = slr.psmsl.get_main_stations()

# get our zipfile
zipfile = zipfiles[default_dataset_name]
# load the stations
stations = slr.psmsl.get_station_list(zipfiles[default_dataset_name], dataset_name=default_dataset_name)

# the dutch stations in the PSMSL database, make a copy
# or use stations.coastline_code == 150 for all dutch stations
selected_stations = pd.merge(main_stations, stations, left_index=True, right_index=True, suffixes=['_rws', '_psmsl'])

# these are the individual stations
selected_stations

Unnamed: 0_level_0,name_rws,msl-rlr,msl-nap,nap-rlr,alpha,ddl_id,location,psmsl_id,foundation_low,station_low,...,links,lat,lon,name_psmsl,coastline_code,station_code,quality,met_monthly,rlr_monthly,rlr_annual
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20,Vlissingen,6976,46,6930,118,VLISSGN,Vlissingen,20,-17.6,2.5,...,[{'href': 'http://gnss1.tudelft.nl/dpga/statio...,51.442222,3.596111,VLISSINGEN,150,101,N,http://www.psmsl.org/data/obtaining/rlr.diagra...,http://www.psmsl.org/data/obtaining/rlr.diagra...,http://www.psmsl.org/data/obtaining/rlr.diagra...
22,Hoek van Holland,6987,114,6873,117,HOEKVHLD,Hoek van Holland,22,-3.3,5.27,...,[{'href': 'http://geodesy.unr.edu/NGLStationPa...,51.9775,4.12,HOEK VAN HOLLAND,150,51,N,http://www.psmsl.org/data/obtaining/rlr.diagra...,http://www.psmsl.org/data/obtaining/rlr.diagra...,http://www.psmsl.org/data/obtaining/rlr.diagra...
23,Den Helder,6962,16,6946,107,DENHDR,Den Helder,23,-5.0,5.0,...,[],52.964444,4.745,DEN HELDER,150,31,N,http://www.psmsl.org/data/obtaining/rlr.diagra...,http://www.psmsl.org/data/obtaining/rlr.diagra...,http://www.psmsl.org/data/obtaining/rlr.diagra...
24,Delfzijl,6953,130,6823,117,DELFZL,Delfzijl,24,-20.0,1.85,...,[{'href': 'http://geodesy.unr.edu/NGLStationPa...,53.326389,6.933056,DELFZIJL,150,1,N,http://www.psmsl.org/data/obtaining/rlr.diagra...,http://www.psmsl.org/data/obtaining/rlr.diagra...,http://www.psmsl.org/data/obtaining/rlr.diagra...
25,Harlingen,7024,110,6914,108,HARLGN,Harlingen,25,-5.4,5.55,...,[],53.175556,5.409444,HARLINGEN,150,21,N,http://www.psmsl.org/data/obtaining/rlr.diagra...,http://www.psmsl.org/data/obtaining/rlr.diagra...,http://www.psmsl.org/data/obtaining/rlr.diagra...
32,IJmuiden,7014,64,6950,110,IJMDBTHVN,IJmuiden,32,-13.0,4.2,...,[{'href': 'http://gnss1.tudelft.nl/dpga/statio...,52.462222,4.554722,IJMUIDEN,150,41,N,http://www.psmsl.org/data/obtaining/rlr.diagra...,http://www.psmsl.org/data/obtaining/rlr.diagra...,http://www.psmsl.org/data/obtaining/rlr.diagra...


In [5]:
# station indices for mean stations
stations_idx = {}
stations_idx['NL'] = list(selected_stations.index)
stations_idx['NL-Delfzijl'] = list(selected_stations.query('name_rws != "Delfzijl"').index)
stations_idx

{'NL': [20, 22, 23, 24, 25, 32], 'NL-Delfzijl': [20, 22, 23, 25, 32]}

# Wind variants

In [6]:
wind_products, annual_wind_products = slr.wind.get_annual_wind_products()
annual_wind_products['GTSM'] = slr.wind.get_gtsm_df()
annual_wind_products.keys()


found point 52.3799 3.75
found point 53.0 3.0


dict_keys(['NCEP1', '20CR', 'Combined', 'GTSM'])

# Wind x Stations
Now we can create a dataset with measurements per  (station, wind)  combination. 

In [11]:
dataset_name = 'rlr_annual'
for (station_name, wind_product), _ in work_df.groupby(['station', 'wind']):
    if wind_product == 'GTSM':
        continue
    annual_wind_df = annual_wind_products[wind_product]
    wind_df = wind_products[wind_product]
    f = functools.partial(
        # this  function  fills in missing wind with nan
        slr.psmsl.get_data_with_wind, 
        dataset_name=dataset_name, 
        wind_df=wind_df, 
        # don't include year otherwise we get year_x and year_y
        annual_wind_df=annual_wind_df.drop(columns=['year']),
        zipfiles=zipfiles
    )
    # look up the data for each station
    selected_stations[dataset_name] = [f(station) for _, station in selected_stations.iterrows()]

In [None]:
wind_product