Import required toolboxes and then select dataset of interest from the dropdown list.

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 15 13:28:31 2018

@author: rthomas
"""
#import calendar
import datetime
#from erddapy import ERDDAP
import numpy as np
import os
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import interact
import matplotlib.pyplot as plt

dtype = widgets.Dropdown(
    options=['wave_spectral', 'wave_zero', 'weather'],
    description='Buoy data type:',
    disabled=False,
)
display(dtype)

Download the selected dataset from the Marine Institute ERDDAP instance:

In [None]:
source = 'url' # Populate with one of ['erddapy','url']

typed = dtype.value
# Set global variables without 'buoy_id', 'latitude' and 'longitude'
now = datetime.date.today()
metadata = ['station_id',
            'time']
print(typed)
# Set variables based on data type (typed)
if typed.lower() == 'wave_spectral':
    dset_id = 'IWaveBNetwork_spectral'
    syear = 2008    
    master_params = ['PeakPeriod',
                     'PeakDirection',
                     'PeakSpread',
                     'SignificantWaveHeight',
                     'EnergyPeriod',
                     'MeanWavePeriod_Tm01',
                     'MeanWavePeriod_Tm02',
                     'qcflag']

elif typed.lower() == 'wave_zero':
    dset_id = 'IWaveBNetwork_zerocrossing'
    syear = 2008    
    master_params = ['Hmax',
                     'HmaxPeriod',
                     'Havg',
                     'Tavg',
                     'qcflag']

elif typed.lower() == 'weather':
    dset_id = 'IWBNetwork'
    syear = 2001
    master_params = ['AtmosphericPressure',
                     'WindDirection',
                     'WindSpeed',
                     'Gust',
                     'WaveHeight',
                     'WavePeriod',
                     'MeanWaveDirection',
                     'Hmax',
                     'AirTemperature',
                     'DewPoint',
                     'SeaTemperature',
                     'RelativeHumidity',
                     'QC_Flag']

# Get data from selected source
if source.lower() not in ['erddapy','url']:
    print("Please check option has been entered correctly.")
else:
    if source.lower() == 'erddapy': # Use ERDDAP toolbox to call data from MI ERDDAP
        e = ERDDAP(server='https://erddap.marine.ie/erddap',
                   protocol='tabledap',)
        e.response = 'csv'
        e.dataset_id = dset_id
        e.constraints = {
            'time>=': '%s-01-01T00:00:00Z' % (syear),
            'time<=': '%sT00:00:00Z' % (now.strftime('%Y-%m-%d')),
        }
        e.variables = metadata + master_params        
        df = e.to_pandas()

    elif source.lower() == 'url': # Manually generated ERDDAP URL call
        df = pd.DataFrame()
        # Generate parameter component of URL
        plist = ''
        for item in metadata + master_params:
            plist = plist+item+'%2C'
        plist = plist[0:-3]    
        # Iterate by year to reduce risk of time out
        years = range(syear,now.year)
        for year in years:    
            url = "https://erddap.marine.ie/erddap/tabledap/"+dset_id+".csv?"+plist+"&time%3E="+str(year)+"-01-01T00:00:00Z&time%3C"+str(year+1)+"-01-01T00:00:00Z"
            dfbyyear = pd.read_csv(url,index_col=1,header=[0],skiprows=[1],parse_dates=True,infer_datetime_format=True)
            df = pd.concat([df,dfbyyear])
            print("Downloaded %s" % (year))
        # Final call for data from start of this year upto and including yesterday
        url = "https://erddap.marine.ie/erddap/tabledap/"+dset_id+".csv?"+plist+"&time%3E="+str(now.year)+"-01-01T00:00:00Z&time%3C"+now.strftime('%Y-%m-%d')+"T00:00:00Z"
        dfbyyear = pd.read_csv(url,index_col=1,header=[0],skiprows=[1],parse_dates=True,infer_datetime_format=True)
        df = pd.concat([df,dfbyyear])
        print("Downloaded %s" % (str(now.year)))

    else:
        print("Error in code logic. Please check.")

    data_full = df
    print("Full resolution data downloaded. Available as 'data_full'.")


Run the following cell to see a table of the full resolution data:

In [None]:
data_full

Calculate data availability as a percentage of expected data per day for each variable:

In [None]:
# Utilise quality control flags to clean data set
# Code to be added...


# Add columns for date variables

df['Date'] = df.index.date
df['Year'] = df.index.year

df_summ = df.groupby(['station_id','Date','Year']).count().reset_index(level=['station_id','Date','Year'])

df_avail = pd.DataFrame()  
for stn in df_summ.station_id.unique().tolist():
    df_stn = df_summ[df_summ['station_id']==stn]
    if typed == 'weather' or stn == 'Westwave MK4':
        res=24
    else:
        res=48
    df_stn.loc[:,master_params] = df_stn.loc[:,master_params]/res*100
    df_fulldates = pd.DataFrame(index = pd.date_range(df_stn.Date.min() - datetime.timedelta(days=df_stn.Date.min().day-1), 
                                                      df_stn.Date.max()))
    df_fulldates['Date'] = df_fulldates.index.date
    df_fulldates['Year'] = df_fulldates.index.year
    df_fulldates = df_fulldates.merge(df_stn, how='outer', left_on='Date', right_on='Date').fillna(0)
    df_fulldates.station_id = stn

    df_avail = pd.concat([df_avail,df_fulldates])

df_avail = df_avail.drop(['Year_y'], axis=1)
df_avail.rename(columns={'Year_x': 'Year'}, inplace=True)
df_avail = df_avail.set_index(['station_id', 'Date','Year'])
if typed != 'weather':
    df_avail = df_avail.drop(['qcflag'], axis=1)
else:
    df_avail = df_avail.drop(['QC_Flag'], axis=1)
df_avail.columns = pd.MultiIndex.from_product([df_avail.columns, ['avail']])

data_avail = df_avail
print("Daily availability generated. Available as 'data_avail'.")


Run the following cell to see a table of the full resolution data availability as a percentage of expected data per day:

In [None]:
data_avail

Calculate daily summary statistics

In [None]:
# Split out parameter types for different summary statistics

params = []
param_dir = []
for item in master_params:
    if 'qc' not in item.lower():
        if 'Dir' in item:
            param_dir.append(item)
        else:
            params.append(item)
#%% Take a copy of the data
data = df

# Get north and east components for directional measurements
param_comp = []
for dirtn in param_dir:
    data['%s_n' % (dirtn)] = np.cos(data[dirtn]*np.pi/180)
    param_comp.append('%s_n' % (dirtn))
    data['%s_e' % (dirtn)] = np.sin(data[dirtn]*np.pi/180)
    param_comp.append('%s_e' % (dirtn))

# Resample for summary statistics for non-directional measurements
daily = data.groupby(['station_id','Date','Year'])[params].agg(['min','max','mean','std'])

if len(param_dir)!=0:
    # Resample for mean and std for directional measurement components (north and east)
    data2 = data.groupby(['station_id','Date','Year'])[param_comp].agg(['mean','std'])

    # Recalculate direction mean and std from averaged components (north and east)
    # Add directly into daily dataframe
    for dirtn in param_dir:
        daily[(dirtn, 'mean')] = (360 + np.arctan2(data2[('%s_e' % (dirtn), 'mean')], data2[('%s_n' % (dirtn), 'mean')]) * 180/np.pi) % 360
        daily[(dirtn, 'std')] = (360 + np.arctan2(data2[('%s_e' % (dirtn), 'std')], data2[('%s_n' % (dirtn), 'std')]) * 180/np.pi) % 360
        daily[(dirtn, 'max')] = np.nan
        daily[(dirtn, 'min')] = np.nan

# Sort daily dataframe
daily = daily[sorted(daily.columns.tolist())]
data_daily = daily
print("Daily statistics generated. Available as 'data_daily'.")

Run the following cell to see a table of the daily summary statistics data:

In [None]:
data_daily

Plot data with interactive widgets:

In [None]:
def plotting(station, year, xaxis, yaxis, stat):
    idx = pd.IndexSlice

    plotdata = data_daily.loc[station]
    
    if xaxis in ('PeakDirection'):
        statx = 'mean'
    else:
        statx = stat
    if yaxis in ('PeakDirection'):
        staty = 'mean'
    else:
        staty = stat
        
    x = plotdata.loc[idx[:,year],idx[[xaxis],statx]].values
    y = plotdata.loc[idx[:,year],idx[[yaxis],staty]].values

    l = plt.plot(x, y,'o')
    plt.setp(l, markersize=5)
    plt.setp(l, markerfacecolor='C0')
    plt.title("Location: %s Year: %s" % (station,year))
    plt.xlabel("%s (%s) [unit]" % (xaxis, statx))
    plt.ylabel("%s (%s) [unit]" % (yaxis, staty))

    plt.show()

interact(plotting,
    station = widgets.Dropdown(
        options=data_daily.index.levels[0].tolist(),
        description='Location:',
        disabled=False),

    year = widgets.IntSlider(
        min = int(data_daily.index.levels[2].min()),
        max = int(data_daily.index.levels[2].max()),
        description = 'Year:'),

    xaxis = widgets.RadioButtons(
        options=data_daily.columns.levels[0].tolist(),
        description='x-axis:',
        disabled=False),

    yaxis = widgets.RadioButtons(
        options=data_daily.columns.levels[0].tolist(),
        description='y-axis:',
        disabled=False),

    stat = widgets.RadioButtons(
        options=data_daily.columns.levels[1].tolist(),
        description='Statistic:',
        disabled=False),
        )
        