# NOAA Tides and Currents data exploration

This notebook uses the NOAA tides and current data to create a custom data explorer.   
It is intended as a proof of concept and demonstration of capabilities in Panel and  
Holoviews.   
  
In this notebook you can see that we can interactively build our individual dashboard  
pages here in the notebook. Then, once we are happy with the results, we can move on  
to building a multi-page app by stringing together the individual dashboard pages.  
We by add the suffix of `.servable()` to the final Panel object we want as our  
final dashboard. Finally, this notebook can then be deployed via command line:  
`panel serve water_dashboard.ipynb`. Alternately, it can be served via CDSDashboards  
on a JupyterHub or Nebari server.  
  
  
There are plenty of edge cases were data may not be available for a given  
gauge or data type. We knowingly bypass these with a try/except since this is intended  
for demonstration purposes only.   

Conda environment: `water_dashboard.yml`

In [None]:
import pandas as pd
import numpy as np
import noaa_coops as nc
import requests
import geopandas as gpd
import geoviews as gv
import holoviews as hv
import geoviews.tile_sources as gvts
import param
import panel as pn
import rich
from rich import print
import datetime as dt
from io import StringIO

# noaa_coops package has some deprecation warnings
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

pn.config.sizing_mode = 'stretch_width'

pn.extension()
gv.extension('bokeh')


In [None]:
METADATA_URL = 'https://api.tidesandcurrents.noaa.gov/mdapi/prod/webapi/stations/.json'

def get_station_metadata():
    """Collect gauge metadata including name, id, lat/long, etc"""
    response = requests.get(METADATA_URL)
    raw_data = response.json()
    
    # define columns which hold dictionaries
    dict_cols = ['details', 'sensors','floodlevels','datums','supersededdatums','harmonicConstituents','benchmarks','tidePredOffsets','nearby','products','disclaimers','notices']
    # remove unnessary embedding (edits the original dict)
    for station in raw_data['stations']:
        for col in dict_cols:
            station[col] = station[col]['self']

    df = pd.DataFrame(raw_data['stations'])
    return df

In [None]:
class StationSelect(param.Parameterized):
    selected_stations = param.DataFrame()
    
    def __init__(self, **params):
        self.df = get_station_metadata()
        super().__init__(**params)
        
        # define columns to keep
        self.show_cols = ['tidal', 'state','id','name','lat','lng','affiliations']
        
    @param.output('selected_stations')
    def output(self):
        self.selected_stations = self.annotator.selected.dframe()
        return self.selected_stations
        
        
    def view_stations(self):
        points = gv.Points(self.df[self.show_cols], kdims=['lng', 'lat']).options(tools=['hover'], active_tools=['wheel_zoom'], size=2)
        self.annotator = hv.annotate.instance()
        
        layout = self.annotator(hv.element.tiles.OSM() * points, name="NOAA Gauge Metadata", num_objects=1, table_opts={'width': 700}) #, edit_vertices = False, vertex_style = {'size':2, 'non_selection_alpha': 90, 'color': 'red'})
        # self.annotator.annotator.vertex_style = {'size':2, 'non_selection_alpha': 90, 'color': 'red'}
        return layout

    def panel(self):
        return pn.Row(self.view_stations)
    
# ss = StationSelect()
# ss.panel()

In [None]:
# for testing:
# selected_stations = pd.DataFrame(
#     {'lng': {2: -157.79, 3: -156.476694},
#      'lat': {2: 21.433056, 3: 20.895},
#      'tidal': {2: True, 3: True},
#      'state': {2: 'HI', 3: 'HI'},
#      'id': {2: '1612480', 3: '1615680'},
#      'name': {2: 'Mokuoloe', 3: 'Kahului, Kahului Harbor'},
#      'affiliations': {2: 'NWLON', 3: 'NWLON'}}
# )
# OR:
# selected_stations = ss.annotator.selected.dframe()


In [None]:

label_key_map = {
    'Wind': 'wind',
    'Air Temperature': 'air_temperature',
    'Water Temperature': 'water_temperature',
    'Barometric Pressure': 'air_pressure',
    'Preliminary 6-Minute Water Level': 'predictions',
    'Verified 6-Minute Water Level': 'water_level',
    'Verified Hourly Height Water Level': 'hourly_height',
    'Verified High/Low Water Level': 'high_low',
    'Verified Monthly Mean Water Level': 'monthly_mean',
}

key_label_map = {v: k for k, v in label_key_map.items()}

In [None]:

class CollectData(param.Parameterized):
    selected_stations = param.DataFrame()
    station_ids = param.ObjectSelector(objects=[], label='Station IDs')
    
    start_date = param.Date(dt.datetime(2017, 1, 1), bounds=(dt.datetime(1900, 1, 1), dt.datetime(2100, 1, 1)))
    end_date = param.Date(dt.datetime(2017, 1, 2), bounds=(dt.datetime(1900, 1, 1), dt.datetime(2100, 1, 1)))
    
    data = param.DataFrame(default=pd.DataFrame())
    metadata = param.Dict(dict())

    
    def __init__(self, selected_stations, **params):
        self.selected_stations=selected_stations
        ids = [int(x) for x in self.selected_stations['id'].values.tolist()]
        self.param.station_ids.objects = ids
        self.station_ids = ids[0]
        self.radio_group = pn.widgets.RadioBoxGroup(
            name='Select data type to view', value=[], options=[],
            inline=False)
        self.plot_button = pn.widgets.Button(name='Generate Plot', button_type='primary')
        
        self.date_range_slider = pn.widgets.DateRangeSlider(
            name='Date Range',
            start=self.start_date, end=self.end_date,
            value=(self.end_date - dt.timedelta(days=30), self.end_date),
        )   
        super().__init__(selected_stations=selected_stations, **params)

        self.stations = {}
        for sid in self.param.station_ids.objects:
            self.stations[sid] = nc.Station(sid)

    @param.output('data', 'metadata')
    def output(self):
        return (self.data, self.metadata)            
        
    def view_summary(self):
        """Summary of the station data available for the stations selected on the previous page"""
        cards = []
        for sid in self.param.station_ids.objects:
            cards.append(pn.Card(pd.DataFrame(self.stations[sid].data_inventory).T, title=f'NOAA Station {sid}: {self.stations[sid].name}', background='WhiteSmoke'))
        return pn.Column(*cards)
    
    @pn.depends('station_ids')
    def view_properties(self):
        """And time a different station id is selected, we update the available properties"""
        data_types = list(self.stations[self.station_ids].data_inventory.keys())
        self.radio_group.options = data_types
        self.radio_group.value = data_types[0]

        if self.radio_group.value:
            start_date = dt.datetime.strptime(self.stations[self.station_ids].data_inventory[self.radio_group.value]['start_date'], '%Y-%m-%d %H:%M')
            end_date = dt.datetime.strptime(self.stations[self.station_ids].data_inventory[self.radio_group.value]['end_date'], '%Y-%m-%d %H:%M')
            
            self.date_range_slider = pn.widgets.DateRangeSlider(
                name='Date Range',
                start=start_date, end=end_date,
                value=(end_date - dt.timedelta(days=60), end_date),
                ) 
            
        view = pn.Column(
            self.radio_group,
            self.date_range_slider,
        )
        return view
    
    @pn.depends('plot_button.clicks')
    def view_plot(self):
        """when the plot button is clicked, create a new plot based on the current widget selections"""
        if self.radio_group.value:
            # set selected data
            # wrap this in try/except because noaa_coops is not very resilient and there are many edge cases which cause errors
            try:
                self.data = self.stations[self.station_ids].get_data(
                    begin_date=(self.date_range_slider.value[0]).strftime('%Y%m%d'),
                    end_date=self.date_range_slider.value[1].strftime('%Y%m%d'),
                    product=label_key_map[self.radio_group.value],
                    # bin_num=2,
                    units="metric",
                    time_zone="gmt",
                    # interval='6',
                    datum='STND',
                )
                self.metadata = {
                    'begin_date': (self.date_range_slider.value[0]).strftime('%Y%m%d'),
                    'end_date': self.date_range_slider.value[1].strftime('%Y%m%d'),
                    'product': label_key_map[self.radio_group.value],
                    'units': 'metric',
                    'time_zone': "gmt",
                    'datum': 'STND',
                    'name': self.stations[self.station_ids].metadata['name'],
                    'id': self.stations[self.station_ids].metadata['id'],
                }
                plot = hv.Curve(self.data.reset_index(drop=False)).opts(line_width=1)
            except:
                self.data = pd.DataFrame()
                self.metadata = dict()
                plot = hv.Curve([]).opts(title='noaa_coops failed to return data properly')
        else:
            # clear data selection
            self.data = pd.DataFrame()
            self.metadata = dict()
            plot = hv.Curve([])
            
        return plot
        
        
    def panel(self):
        view = pn.Column(
            self.view_summary,
            self.param.station_ids,
            self.view_properties,
            self.plot_button,
            self.view_plot,
            
        )
        return view
    

# cc = CollectData(selected_stations=selected_stations)
# cc.panel()


In [None]:
class ExportData(param.Parameterized):

    data = param.DataFrame(default=pd.DataFrame())
    metadata = param.Dict(dict())
    filename = param.String()

    
    def __init__(self, **params): 
        super().__init__(**params)
    
        self.filename = f"{self.metadata['name']}_{self.metadata['id']}_{self.metadata['product']}_{self.metadata['units']}_{self.metadata['begin_date']}_{self.metadata['end_date']}.csv".replace(',', '_').replace(' ', '_')
        
    def translate_data(self):
        sio = StringIO()
        self.data.to_csv(sio)
        sio.seek(0)
        return sio

    def panel(self):
        df_pane = pn.pane.DataFrame(self.data, max_rows=20)
        fd = pn.widgets.FileDownload(
            callback=self.translate_data, 
            filename=self.filename,
            label='Download Data',
        )
    
        
        return pn.Column(
            df_pane,
            self.param.filename,
            fd
        )


In [None]:
pipeline = pn.pipeline.Pipeline(debug=True)
pipeline.add_stage('Gauge Selection', StationSelect())
pipeline.add_stage('Data Preview', CollectData)
pipeline.add_stage('Export Data', ExportData)
pipeline.servable()