The CERA workflow for stations involved the following station filtering and adjustments:
- Get by parameters
- Get by region (coastal)
- Adjust by vertical datum
- Adjust time zone
- Station active vs inactive

In [None]:
from datetime import datetime, timedelta
from matplotlib import pyplot

import geopandas as gpd
import numpy as np

from searvey import usgs, stations

In [None]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
us = world[world.name.isin(['United States of America', 'Puerto Rico'])]
us_coast = us.boundary.intersection(world.unary_union.boundary)
ax = world.plot(color='k', alpha=0.1)
us.plot(ax=ax, color='b', alpha=0.2)
us_coast.plot(ax=ax, color='r')

In [None]:
params_of_interest = ['62620', '62615']
region_of_interest = us_coast.unary_union.buffer(0.5)  # Buffer coast lines to overlap with some stations.

Note that currently USGS implemented all parameters of interest by CERA workflow, for further filtering one needs to fetch all and then filter. Also note that currently `stations.get_stations` API doesn't have paramter information.

In [None]:
#usgs_stations = stations.get_stations(providers='USGS', region=region_of_interest)
usgs_stations = usgs.get_usgs_stations(region=region_of_interest)
usgs_stations

In [None]:
ax2 = us.plot()
usgs_stations.plot(ax=ax2, color='r')

In [None]:
usgs_stations_w_param = usgs_stations[usgs_stations.parm_cd.isin(params_of_interest)]
is_active = np.logical_or((datetime.now() - usgs_stations_w_param.end_date) < timedelta(days=3), usgs_stations_w_param.end_date.isnull())
usgs_stations_of_interest = usgs_stations_w_param[is_active]

In [None]:
ax3 = us.plot()
usgs_stations_of_interest.plot(ax=ax3, color='r')

In [None]:
usgs_stations_of_interest.columns

In [None]:
data = usgs.get_usgs_data(usgs_metadata=usgs_stations_of_interest)
data = data.assign(vdatum=('site_no', usgs_stations_of_interest.drop_duplicates(subset='site_no').set_index('site_no').alt_datum_cd.loc[data.site_no]))
data

In [None]:
def adjust_vdatum(ds):
    # TODO: Adjust "value"s based on the "vdatum" for the "site_no"
    return ds

In [None]:
adjusted_data = adjust_vdatum(data)
adjusted_data

In [None]:
plot_data = []
for site_no in adjusted_data.site_no.values:
    for code in adjusted_data.code.values:
        for option in adjusted_data.option.values:
            ds = adjusted_data.sel(site_no=site_no, code=code, option=option).squeeze()
            if ds.value.isnull().all():
                continue
            plot_data.append(ds.to_dataframe())
            

ncols = 3
fig, axes = pyplot.subplots(ncols=ncols, nrows=len(plot_data) // ncols + 1, figsize=(15, 150))

for ds, ax in zip(plot_data, axes.ravel()):
    ds.value.dropna().plot(ax=ax, style='x', xlabel='datetime', ylabel=f'{ds.code.iloc[0]} ({ds.unit.iloc[0]})')
fig.tight_layout()