Saves routelink objects from AL (or any state really) to a NetCDF on disk

In [1]:
import fsspec
import xarray as xr
from kerchunk.hdf import SingleHdf5ToZarr
import rioxarray as rx 
import geopandas as gpd

fs = fsspec.filesystem("http")

rl_nwm_url = "https://www.nco.ncep.noaa.gov/pmb/codes/nwprod/nwm.v3.0.13/parm/"\
    "domain/RouteLink_CONUS.nc"
with fs.open(rl_nwm_url) as f:
    rl_t = SingleHdf5ToZarr(f, rl_nwm_url, inline_threshold=0).translate()

    # Key example here:
    # https://fsspec.github.io/kerchunk/test_example.html


In [2]:
backend_args = {
    "consolidated": False,
    "storage_options": {
        "fo": rl_t,
        # Adding these options returns a properly dimensioned but otherwise null 
        # dataframe
        # "remote_protocol": "https",
        # "remote_options": {'anon':True}
    },
}
ds = xr.open_dataset("reference://", engine="zarr", backend_kwargs=backend_args)


In [None]:
path = "../tl_2024_us_state/tl_2024_us_state.shp"
states = gpd.read_file(path)


In [4]:
'''subslice = [
    "link",
    "to",
    "gages",
]
df = ds[subslice].to_dataframe().astype({"link": int, "to": int,})'''

'subslice = [\n    "link",\n    "to",\n    "gages",\n]\ndf = ds[subslice].to_dataframe().astype({"link": int, "to": int,})'

In [5]:
ds

In [6]:
x_flat = ds.lon.values.ravel()
y_flat = ds.lat.values.ravel()

xy_point_array = gpd.GeoDataFrame(
    geometry=gpd.points_from_xy(x_flat, y_flat, crs="EPSG:4269")
)

In [7]:
states_by_point = xy_point_array.sjoin(states)

In [8]:
states_by_point.to_file('routelink_by_state.shp')

  states_by_point.to_file('routelink_by_state.shp')
  ogr_write(
  ogr_write(


In [15]:
states_by_point.index

Index([      0,       1,       2,       3,       4,       5,       6,       7,
             8,       9,
       ...
       2776722, 2776724, 2776726, 2776727, 2776728, 2776729, 2776730, 2776731,
       2776732, 2776733],
      dtype='int64', length=2668128)

In [23]:
states = []

for i in range(ds.lat.shape[0]):
    try:
        states.append(states_by_point['NAME'][i])
    except:
        states.append('NA')

In [None]:
ds.coords['State'] = xr.DataArray(states, 
                                  coords={'feature_id': ds['feature_id']}, 
                                  dims=('feature_id',))

In [43]:
print(ds['State'])

<xarray.DataArray 'State' (feature_id: 2776734)> Size: 222MB
array(['Minnesota', 'Minnesota', 'Minnesota', ..., 'New York', 'New York',
       'New York'], dtype='<U20')
Coordinates:
    lat         (feature_id) float32 11MB 46.23 46.21 46.2 ... 44.99 45.0 45.0
    lon         (feature_id) float32 11MB -96.54 -96.53 -96.51 ... -74.67 -74.55
  * feature_id  (feature_id) int64 22MB 0 1 2 3 ... 2776731 2776732 2776733
    State       (feature_id) <U20 222MB 'Minnesota' 'Minnesota' ... 'New York'


In [44]:
al_cats = ds.where(ds['State'] == 'Alabama', drop=True)

In [47]:
al_cats.to_netcdf('al_routelink.nc')