# Exploring COAWST coupled circulation/wave forecast data

In [None]:
import fsspec
import xarray as xr
import hvplot.xarray
import numpy as np
import dask

In [None]:
fs = fsspec.filesystem('s3', requester_pays=True)

Xarray uses Dask behind the scenes, so spin up a Dask Cluster

In [None]:
import sys, os
sys.path.append(os.path.join(os.environ['HOME'],'shared','users','lib'))
import ebdpy as ebd
ebd.set_credentials(profile='esip-qhub')

aws_profile = 'esip-qhub'
aws_region = 'us-west-2'
endpoint = f's3.{aws_region}.amazonaws.com'
ebd.set_credentials(profile=aws_profile, region=aws_region, endpoint=endpoint)
worker_max = 30
client,cluster = ebd.start_dask_cluster(profile=aws_profile, worker_max=worker_max, 
                                      region=aws_region, use_existing_cluster=True,
                                      adaptive_scaling=False, wait_for_cluster=False, 
                                      environment='pangeo', worker_profile='Pangeo Worker', 
                                      propagate_env=True)

In [None]:
ds = xr.open_zarr(fsspec.get_mapper('s3://pangeo-data-uswest2/esip/COAWST/surface_vars', 
                  requester_pays=True), consolidated=True)

In [None]:
%%time
a = (ds['Hwave'][:,:10,:10]
   .quantile(q=0.05, dim='ocean_time').values) 

In [None]:
from dask.distributed import Client
client=Client(cluster)

In [None]:
ds.Hwave

#### Extract entire time series 

In [None]:
%%time
h = ds['Hwave'][:,288,610].load()  # New York Bight
h.hvplot(grid=True)

In [None]:
%%time
h = ds['Hwave'][:,280,600].load()
h.quantile(q=0.95, dim='ocean_time')

In [None]:
h.quantile(q=0.95, dim='ocean_time')

In [None]:
%a = (ds['Hwave'][:,:50,:50].chunk({'ocean_time':-1}).
   quantile(q=np.linspace(0, 1, num=21), dim='ocean_time').values)                       %time

In [None]:
ds['Hwave']

In [None]:
a = (ds['Hwave'].chunk({'eta_rho':25,'xi_rho':25, 'ocean_time':-1}).
   quantile(q=np.linspace(0, 1, num=21), dim='ocean_time'))                      

In [None]:
%%time
dask.compute(a, retries=10)

In [None]:
#cluster.shutdown()

In [None]:
a.shape

In [None]:
import numpy as np

In [None]:
np.linspace(0, 1, num=21)

Plot entire field at a fixed time (here Hurricane Sandy)

In [None]:
%%time
h = ds['Hwave'].sel(ocean_time='2012-10-29 22:00').load()

In [None]:
h.hvplot.quadmesh(x='lon_rho', y='lat_rho', geo=True, frame_height=400,
                  rasterize=True, cmap='turbo', tiles='OSM')

The computationally expensive step: taking the mean of the entire wave height field over time

In [None]:
%%time
hwave_mean = ds['Hwave'].mean(dim='ocean_time').compute()

Taking the mean using our cluster reading from AWS took only about 1 minute! 

The previous workflow for this calculation was obtaining the data via THREDDS and calculating the mean on a local desktop computer, which took 2 weeks!

In [None]:
hwave_mean.where(hwave_mean>0.0).hvplot.quadmesh(x='lon_rho', y='lat_rho', 
                    rasterize=True, geo=True, cmap='turbo', tiles='OSM')

In the figure above, we can see locally enhanced waves in the Gulf Stream region, caused by the coupling between currents and waves in COAWST. This enhancement does not appear in non-coupled models like the NOAA WaveWatch III model.

In [None]:
cluster.close(); client.close()