Saves routelink objects from AL (or any state really) to a NetCDF on disk

In [None]:
# Import packages
import fsspec
import xarray as xr
from kerchunk.hdf import SingleHdf5ToZarr
import rioxarray as rx 
import geopandas as gpd

In [None]:
# Load RouteLink_CONUS.nc. You can change the file path to Alaska, Hawaii, 
# Puerto Rico if you like.

fs = fsspec.filesystem("http")

rl_nwm_url = "https://www.nco.ncep.noaa.gov/pmb/codes/nwprod/nwm.v3.0.13/parm/"\
    "domain/RouteLink_CONUS.nc"
with fs.open(rl_nwm_url) as f:
    rl_t = SingleHdf5ToZarr(f, rl_nwm_url, inline_threshold=0).translate()

    # Key example here:
    # https://fsspec.github.io/kerchunk/test_example.html

backend_args = {
    "consolidated": False,
    "storage_options": {
        "fo": rl_t,
        # Adding these options returns a properly dimensioned but otherwise null 
        # dataframe
        # "remote_protocol": "https",
        # "remote_options": {'anon':True}
    },
}
ds = xr.open_dataset("reference://", engine="zarr", backend_kwargs=backend_args)


In [None]:
# Load in shapefile of state boundaries
path = "../tl_2024_us_state/tl_2024_us_state.shp"
states = gpd.read_file(path)

In [None]:
# Get a coordinate for every basin in RouteLink file
x_flat = ds.lon.values.ravel()
y_flat = ds.lat.values.ravel()

xy_point_array = gpd.GeoDataFrame(
    geometry=gpd.points_from_xy(x_flat, y_flat, crs="EPSG:4269")
)

In [None]:
# Spatial join - each basin in ds gets a new column indicating which state it is
# in
states_by_point = xy_point_array.sjoin(states)

In [None]:
# Save spatial join to disk.
states_by_point.to_file('routelink_by_state.shp')

  states_by_point.to_file('routelink_by_state.shp')
  ogr_write(
  ogr_write(


In [None]:
# Create list of states in the order that reaches are listed in ds.
states = []

for i in range(ds.lat.shape[0]):
    try:
        states.append(states_by_point['NAME'][i])
    except:
        states.append('NA')

In [None]:
# Add the corresponding state to each reach in ds as a coordinate
# Honestly, it probably should have been a variable. If someone wants to change
# that, that'd be cool.
ds.coords['State'] = xr.DataArray(states, 
                                  coords={'feature_id': ds['feature_id']}, 
                                  dims=('feature_id',))

In [None]:
# Drop all basins that aren't located in Alabama
al_cats = ds.where(ds['State'] == 'Alabama', drop=True)

In [None]:
# Save Alabama basins and their geometries to disk as a NetCDF file
al_cats.to_netcdf('al_routelink.nc')