# National Water Model 
Compute and visualize the mean annual river discharge from the National Water Model v2 from 2.7 million rivers in minutes using Pangeo 

In [1]:
import xarray as xr
import fsspec
import numpy as np

In [2]:
import hvplot.pandas
import hvplot.xarray
import geoviews as gv
from holoviews.operation.datashader import rasterize
import cartopy.crs as ccrs

In [3]:
from dask.distributed import Client, progress

from dask_gateway import Gateway
gateway = Gateway()

In [4]:
cluster = gateway.new_cluster()

In [5]:
cluster.adapt(minimum=4, maximum=20);

In [6]:
client = Client(cluster)
client

0,1
Client  Scheduler: gateway://traefik-prod-dask-gateway.prod:80/prod.633322cfbd2d4120903ff8c74f03d095  Dashboard: https://hub.aws-uswest2-binder.pangeo.io/services/dask-gateway/clusters/prod.633322cfbd2d4120903ff8c74f03d095/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [7]:
ds = xr.open_zarr(fsspec.get_mapper('s3://pangeo-data-uswest2/esip/NWM2/2017', anon=False, requester_pays=True))

In [8]:
ds

Unnamed: 0,Array,Chunk
Bytes,10.92 MB,839.72 kB
Shape,"(2729077,)","(209929,)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 10.92 MB 839.72 kB Shape (2729077,) (209929,) Count 14 Tasks 13 Chunks Type float32 numpy.ndarray",2729077  1,

Unnamed: 0,Array,Chunk
Bytes,10.92 MB,839.72 kB
Shape,"(2729077,)","(209929,)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,10.92 MB,839.72 kB
Shape,"(2729077,)","(209929,)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 10.92 MB 839.72 kB Shape (2729077,) (209929,) Count 14 Tasks 13 Chunks Type float32 numpy.ndarray",2729077  1,

Unnamed: 0,Array,Chunk
Bytes,10.92 MB,839.72 kB
Shape,"(2729077,)","(209929,)"
Count,14 Tasks,13 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.76 kB,72 B
Shape,"(8760,)","(72,)"
Count,123 Tasks,122 Chunks
Type,|S1,numpy.ndarray
"Array Chunk Bytes 8.76 kB 72 B Shape (8760,) (72,) Count 123 Tasks 122 Chunks Type |S1 numpy.ndarray",8760  1,

Unnamed: 0,Array,Chunk
Bytes,8.76 kB,72 B
Shape,"(8760,)","(72,)"
Count,123 Tasks,122 Chunks
Type,|S1,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,95.63 GB,60.46 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 95.63 GB 60.46 MB Shape (8760, 2729077) (72, 209929) Count 1587 Tasks 1586 Chunks Type float32 numpy.ndarray",2729077  8760,

Unnamed: 0,Array,Chunk
Bytes,95.63 GB,60.46 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,95.63 GB,60.46 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 95.63 GB 60.46 MB Shape (8760, 2729077) (72, 209929) Count 1587 Tasks 1586 Chunks Type int32 numpy.ndarray",2729077  8760,

Unnamed: 0,Array,Chunk
Bytes,95.63 GB,60.46 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,int32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,191.25 GB,120.92 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 191.25 GB 120.92 MB Shape (8760, 2729077) (72, 209929) Count 1587 Tasks 1586 Chunks Type float64 numpy.ndarray",2729077  8760,

Unnamed: 0,Array,Chunk
Bytes,191.25 GB,120.92 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,191.25 GB,120.92 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 191.25 GB 120.92 MB Shape (8760, 2729077) (72, 209929) Count 1587 Tasks 1586 Chunks Type float64 numpy.ndarray",2729077  8760,

Unnamed: 0,Array,Chunk
Bytes,191.25 GB,120.92 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,191.25 GB,120.92 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 191.25 GB 120.92 MB Shape (8760, 2729077) (72, 209929) Count 1587 Tasks 1586 Chunks Type float64 numpy.ndarray",2729077  8760,

Unnamed: 0,Array,Chunk
Bytes,191.25 GB,120.92 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,191.25 GB,120.92 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 191.25 GB 120.92 MB Shape (8760, 2729077) (72, 209929) Count 1587 Tasks 1586 Chunks Type float64 numpy.ndarray",2729077  8760,

Unnamed: 0,Array,Chunk
Bytes,191.25 GB,120.92 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,191.25 GB,120.92 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 191.25 GB 120.92 MB Shape (8760, 2729077) (72, 209929) Count 1587 Tasks 1586 Chunks Type float64 numpy.ndarray",2729077  8760,

Unnamed: 0,Array,Chunk
Bytes,191.25 GB,120.92 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,191.25 GB,120.92 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 191.25 GB 120.92 MB Shape (8760, 2729077) (72, 209929) Count 1587 Tasks 1586 Chunks Type float64 numpy.ndarray",2729077  8760,

Unnamed: 0,Array,Chunk
Bytes,191.25 GB,120.92 MB
Shape,"(8760, 2729077)","(72, 209929)"
Count,1587 Tasks,1586 Chunks
Type,float64,numpy.ndarray


In [9]:
with fsspec.open('s3://pangeo-data-uswest2/esip/NWM2/nwm-v1.2-channel_spatial_index.nc', anon=False, requester_pays=True) as f:
    ds_lonlat = xr.open_dataset(f)
    lat = ds_lonlat['latitude'].values
    lon = ds_lonlat['longitude'].values

In [10]:
print(lat.max(), lon.max())

52.86352 -66.99203


Let's find the site with the largest streamflow on June 1

In [11]:
imax = ds.streamflow.sel(time='2017-06-01 00:00:00').argmax().values

Let's plot the whole year-long time series at that location

In [12]:
%%time
ds.streamflow[:,imax].hvplot()

CPU times: user 87 ms, sys: 4.98 ms, total: 92 ms
Wall time: 14.3 s


In [13]:
var='streamflow'

In [14]:
ds[var].nbytes/1e9

191.25371616

In [15]:
var_mean = ds[var].mean(dim='time').persist()
progress(var_mean)

VBox()

In [16]:
df = var_mean.to_pandas().to_frame()

In [17]:
df = df.assign(latitude=lat)
df = df.assign(longitude=lon)
df.rename(columns={0: "transport"}, inplace=True)

In [18]:
p = df.hvplot.points('longitude', 'latitude', crs=ccrs.PlateCarree(),
                     c='transport', colorbar=True, size=14)

In [19]:
g = rasterize(p, aggregator='mean', x_sampling=0.02, y_sampling=0.02, width=500).opts(tools=['hover'], 
                aspect='equal', logz=True, clim=(1e-2, np.nan))

In [20]:
g * gv.tile_sources.OSM