# Read NWM Data from AWS Public Dataset 
using Zarr/ReferenceFileSystem

In [None]:
import fsspec
import xarray as xr
import json
import intake
import hvplot.xarray

#### Load multiple NetCDF files from local JSON

In [None]:
%%time
r_opts = {'anon': True} # NetCDF files on AWS Open Data public bucket

fo = "nwm.json"
fs = fsspec.filesystem("reference", fo=fo, 
                       remote_protocol='s3', remote_options=r_opts)

m = fs.get_mapper("")
ds = xr.open_dataset(m, engine="zarr")

In [None]:
ds

In [None]:
#### Load multiple NetCDF files from local json

In [None]:
ds.streamflow

In [None]:
ds.streamflow[:,1000].values

#### Load JSON on S3

In [None]:
%%time
r_opts = {'anon': True} # NetCDF files on AWS Open Data public bucket
t_opts = {'requester_pays':True}  # JSON or zip file on requester pays bucket

fo = "s3://coawst-public/testing/nwm.json"
fs = fsspec.filesystem("reference", fo=fo, 
                       remote_protocol='s3', remote_options=r_opts,
                       target_protocol='s3', target_options=t_opts)

m = fs.get_mapper("")
ds = xr.open_dataset(m, engine="zarr")

In [None]:
ds

#### Load a single netcdf file from a json in a zip file

In [None]:
jsons = fsspec.open_files("zip://*::s3://coawst-public/testing/out.zip", s3={"requester_pays": True})
with jsons[0] as afile:
    m = fsspec.get_mapper("reference://", fo=json.load(afile), remote_protocol="s3", remote_options={"anon": True})
    ds = xr.open_dataset(m, engine="zarr")

#### Load from jsons in zip into a single dataset

In [None]:
ds_list=[]
for j in jsons:
    with j as afile:
        m = fsspec.get_mapper("reference://", fo=json.load(afile), remote_protocol="s3", remote_options={"anon": True})
        ds_list.append(xr.open_dataset(m, engine="zarr", drop_variables='reference_time'))

ds = xr.concat(ds_list, dim='time', 
               coords='minimal', data_vars='minimal', compat='override')

In [None]:
ds

#### Load the json for the whole dataset using an Intake Catalog

In [None]:
%%time
cat = intake.open_catalog('s3://esip-qhub/usgs/nwm_intake.yml', 
                          storage_options={"requester_pays": True})

In [None]:
list(cat)

In [None]:
ds = cat['nwm-forecast'].to_dask()

ds.streamflow

In [None]:
%%time
ds.streamflow[:,1000].hvplot(x='time', grid=True)

#### What does this magical intake catalog look like?

In [None]:
print(cat.text)

In [None]:
cat['nwm-rfs']