# National Water Model 
Compute and visualize the mean annual river discharge from the National Water Model v2.0 from 2.7 million rivers in minutes using Pangeo 

In [None]:
import xarray as xr
import fsspec
import numpy as np

In [None]:
import hvplot.pandas
import hvplot.xarray
import geoviews as gv
from holoviews.operation.datashader import rasterize
import cartopy.crs as ccrs

In [None]:
from dask.distributed import Client, progress

from dask_gateway import Gateway
gateway = Gateway()

In [None]:
cluster = gateway.new_cluster()

In [None]:
cluster.adapt(minimum=4, maximum=20);

In [None]:
client = Client(cluster)
client

In [None]:
ds = xr.open_zarr(fsspec.get_mapper('s3://pangeo-data-uswest2/esip/NWM2/2017', anon=False, requester_pays=True))

In [None]:
ds

In [None]:
with fsspec.open('s3://pangeo-data-uswest2/esip/NWM2/nwm-v1.2-channel_spatial_index.nc', anon=False, requester_pays=True) as f:
    ds_lonlat = xr.open_dataset(f)
    lat = ds_lonlat['latitude'].values
    lon = ds_lonlat['longitude'].values

In [None]:
print(lat.max(), lon.max())

Let's find the site with the largest streamflow on June 1

In [None]:
imax = ds.streamflow.sel(time='2017-06-01 00:00:00').argmax().values

Let's plot the whole year-long time series at that location

In [None]:
%%time
ds.streamflow[:,imax].hvplot()

In [None]:
var='streamflow'

In [None]:
ds[var].nbytes/1e9

In [None]:
var_mean = ds[var].mean(dim='time').persist()
progress(var_mean)

In [None]:
df = var_mean.to_pandas().to_frame()

In [None]:
df = df.assign(latitude=lat)
df = df.assign(longitude=lon)
df.rename(columns={0: "transport"}, inplace=True)

In [None]:
p = df.hvplot.points('longitude', 'latitude', crs=ccrs.PlateCarree(),
                     c='transport', colorbar=True, size=14)

In [None]:
g = rasterize(p, aggregator='mean', x_sampling=0.02, y_sampling=0.02, width=500).opts(tools=['hover'], 
                aspect='equal', logz=True, clim=(1e-2, np.nan))

In [None]:
g * gv.tile_sources.OSM