# STWAVE test

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import fsspec
import hvplot.xarray

### Explore the Coastal Coupling data

In [None]:
fs = fsspec.filesystem('s3', requester_pays=True)

In [None]:
fs.ls('esip-qhub')

In [None]:
fs.ls('esip-qhub/usace')

### Read the data using the cloud-friendly zarr data format

In [None]:
store = fs.get_mapper('s3://esip-qhub/usace/USACE-2016-stwave-regional.zarr')

In [None]:
ds = xr.open_zarr(store, consolidated=True)

In [None]:
ds

In [None]:
#ds = ds.where(ds!=0)

In [None]:
ds['waveHs']

How many GB of wave height data do we have?

In [None]:
ds['waveHs'].nbytes/1.e9

### Start a dask cluster to crunch the data

In [None]:
from dask_gateway import Gateway
from dask.distributed import Client

gateway = Gateway()
gateway.list_clusters()
if gateway.list_clusters():
    print('Existing Dask clusters:')
    for c in gateway.list_clusters():
        print('Cluster Name:',c.name,c.status)
else:
    print('No Cluster running.')
# New or connect:
# If no cluster is running, create a new one, else connect to the first one found (idx=0, change if other cluster should be running)
idx=0
if not gateway.list_clusters():
    cluster = gateway.new_cluster(environment='pangeo', profile='Small Worker')
else:
    cluster=gateway.connect(gateway.list_clusters()[idx].name)  

In [None]:
gateway.cluster_options()

In [None]:
#from dask_kubernetes import KubeCluster
#cluster = KubeCluster()

In [None]:
cluster.scale(10)

In [None]:
cluster

In [None]:
client = Client(cluster)

### Visualize using HoloViz.org tools

In [None]:
ds['waveHs'][:,40,40].hvplot(x='time', grid=True)

In [None]:
ds['waveHs'].sel(time=slice('2016-11-01 00:00','2016-11-30 23:00'))[:,40,40].hvplot(x='time', grid=True)

In [None]:
da = ds['waveHs'].sel(time='2016-11-08 16:00')

In [None]:
da.where(da!=0).hvplot.quadmesh(x='longitude', y='latitude',
                    colormap='rainbow', rasterize=True, geo=True, 
                    tiles='OSM', clim=(1,3))

Take the maximum over the time dimension and persist the data on the workers to use later.  This is the computationally intensive step.

In [None]:
%%time 
max_var = ds['waveHs'].max(dim='time').compute()

In [None]:
max_var.where(max_var!=0).hvplot.quadmesh(x='longitude', y='latitude',
                    colormap='rainbow', rasterize=True, geo=True,
                    tiles='OSM', clim=(3,5))

### Extract a time series at a lon,lat location

In [None]:
# find nearest_point for a requested lat/ lon
def findij(lon2d, lat2d, lon_sta, lat_sta):
    d_lat = lat2d - lat_sta
    d_lon = lon2d - lon_sta
    r2_requested = d_lat**2 + d_lon**2
    (ii,jj) = np.where(r2_requested == np.min(r2_requested))
    return (ii,jj)

In [None]:
lat_sta = 36.2
lon_sta = -75.65
[jj,ii] = findij(ds.longitude, ds.latitude, lon_sta, lat_sta)

In [None]:
ds['waveHs'][:,jj,ii].hvplot(x='time', grid=True)

In [None]:
lat_sta = 36.2
lon_sta = -75.73
[jj,ii] = findij(ds.longitude, ds.latitude, lon_sta, lat_sta)

In [None]:
ds['waveHs'][:,jj,ii].hvplot(x='time', grid=True)

When done, you can do "File=>Close and Shutdown Notebook" from the menu, shutdown the client and cluster thusly:

In [None]:
client.close();cluster.shutdown();

Or scale to 1 without killing the cluster:

In [None]:
#cluster.scale(1)