# SWORD-defined bounding box around each aufeis feature of interest

In [1]:
import os
import math
import numpy as np
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon
from pyproj import Transformer
from netCDF4 import Dataset
from scipy.spatial import cKDTree
from collections import defaultdict

In [2]:
# ---------- Paths & parameters to be adjusted ----------
sword_nc_path = "../Workspace/na_sword_v17b.nc" # need SWORD v17b for SWOT data from 4/28/25 onwards
aufeis_shp_path = "../Workspace/aufeis_locations.shp" # manually id'ed aufeis with length & width attributes
out_shp_path = "../Workspace/polygon_aufeis.shp" # where to save output files

target_crs = "EPSG:32606"   # UTM zone 6N & UTM zone 7N
scale_maxwidth = 1.0        # still applied if fallback to node max_width

# currently this is set to eastern north slope ish ish (where colville turns north)
bbox = (-151.524048, 67.205182, -140.139127, 70.222115)  # (lon_min, lat_min, lon_max, lat_max)
endpoint_tol = 25.0  # meters tolerance to match SWORD reach endpoints when connecting reaches

In [3]:
# ---------- Helper functions ----------

# shortcut for pulling a variable out of a grouped NetCDF
def var_from_group(nc_obj, groupname, varname):
    return nc_obj.groups[groupname].variables[varname][:]

# builds a rectangle centered and oriented along a line segment of a given width 
def make_rotated_rectangle_from_segment(pt1, pt2, width):
    x1, y1 = pt1 # end of line segment
    x2, y2 = pt2 # other end of line segment
    dx = x2 - x1
    dy = y2 - y1
    seg_len = math.hypot(dx, dy) # length of segment
    if seg_len == 0: # catch for if segment has no length
        w2 = max(1.0, width / 2.0)
        return Point(x1, y1).buffer(w2, cap_style=2) # would just make a small circle
    angle = math.atan2(dy, dx) # direction of the line (in radians)
    perp_x = -math.sin(angle) # unit vector perpendicular to line segment
    perp_y = math.cos(angle)
    half_w = width / 2.0 # put half the width on either side of the line segment
    p1 = (x1 + perp_x * half_w, y1 + perp_y * half_w) # construct the corners of the rectangle
    p2 = (x2 + perp_x * half_w, y2 + perp_y * half_w)
    p3 = (x2 - perp_x * half_w, y2 - perp_y * half_w)
    p4 = (x1 - perp_x * half_w, y1 - perp_y * half_w)
    return Polygon([p1, p2, p3, p4, p1]) # returns a closed polygon with the four corners

# segments the portion SWORD we want
def extract_segment_centered_on_line(line: LineString, center_point: Point, total_length: float):
    if total_length <= 0: # catch for if length is 0 or negative
        return LineString([center_point, center_point])
    if line.length <= total_length: # catch for if we are asking for a smaller section than already given
        return line
    proj = line.project(center_point) # find the distance along the line from the start to dist of interest
    # compute where the segment should start and end
    half = total_length / 2.0
    start = max(0.0, proj - half)
    end = min(line.length, proj + half)
    n_samples = max(2, int(math.ceil((end - start) / 1.0)))
    distances = np.linspace(start, end, n_samples)
    pts = [line.interpolate(d) for d in distances]
    # return the extracted line segment
    return LineString([(p.x, p.y) for p in pts])

# ensures correct topology aka keeps reaches connected in the right direction
def orient_linestring_to_connect(prev_end, neighbor_line):
    start = Point(neighbor_line.coords[0])
    end = Point(neighbor_line.coords[-1])
    dstart = prev_end.distance(start)
    dend = prev_end.distance(end)
    return neighbor_line if dstart <= dend else LineString(list(neighbor_line.coords)[::-1])

# list of coordinates along a line
def coords_from_linestring_portion(line, from_dist, to_dist, forward=True):
    n_samples = max(2, int(math.ceil(abs(to_dist - from_dist) / 1.0)))
    distances = np.linspace(from_dist, to_dist, n_samples)
    pts = [line.interpolate(d) for d in distances]
    coords = [(p.x, p.y) for p in pts]
    return coords if forward else coords[::-1]

In [6]:
# ---------- Load aufeis locations ----------
aufeis = gpd.read_file(aufeis_shp_path)
aufeis = aufeis.to_crs(target_crs)

# feature count and CRS
print(f"Loaded {len(aufeis)} aufeis features. CRS: {aufeis.crs}")

# checks before processing
if aufeis.crs is None:
    raise RuntimeError("aufeis shapefile has no CRS.")
if "length_m" not in aufeis.columns:
    raise RuntimeError("aufeis shapefile should contain 'length_m' column.")
if "width_m" not in aufeis.columns:
    raise RuntimeError("aufeis shapefile should contain 'width_m' column.")

Loaded 5 aufeis features. CRS: EPSG:32606


In [8]:
# ---------- Open netCDF and subset by bounding box ----------

# set bounding box
lon_min, lat_min, lon_max, lat_max = bbox
# bring SWORD in with base netCDF4 reader
nc = Dataset(sword_nc_path, mode="r")

# Read node arrays
node_lon_all = var_from_group(nc, "nodes", "x")[:]
node_lat_all = var_from_group(nc, "nodes", "y")[:]
node_id_all = var_from_group(nc, "nodes", "node_id")[:]
node_reach_all = var_from_group(nc, "nodes", "reach_id")[:]
node_max_width_all = var_from_group(nc, "nodes", "max_width")[:]

# subset nodes by bbox (lon/lat)
mask_nodes = (node_lon_all >= lon_min) & (node_lon_all <= lon_max) & (node_lat_all >= lat_min) & (node_lat_all <= lat_max)
node_idx_sub = np.nonzero(mask_nodes)[0]
if node_idx_sub.size == 0:
    raise RuntimeError("No nodes in bbox.") # catch to make sure there's data

node_id = node_id_all[node_idx_sub].astype(int)
node_lon = node_lon_all[node_idx_sub].astype(float)
node_lat = node_lat_all[node_idx_sub].astype(float)
node_reach = node_reach_all[node_idx_sub].astype(int)
node_max_width = node_max_width_all[node_idx_sub].astype(float)

# Read centerlines (global)
cl_x_all = var_from_group(nc, "centerlines", "x")[:]
cl_y_all = var_from_group(nc, "centerlines", "y")[:]
cl_id_all = var_from_group(nc, "centerlines", "cl_id")[:]
cl_reach_id_2d = var_from_group(nc, "centerlines", "reach_id")[:]
cl_node_id_2d = var_from_group(nc, "centerlines", "node_id")[:]
CL_FILL_INT = -9999

# bbox subset
mask_cl = (cl_x_all >= lon_min) & (cl_x_all <= lon_max) & (cl_y_all >= lat_min) & (cl_y_all <= lat_max)
cl_idx_sub_global = np.nonzero(mask_cl)[0]

cl_x_sub = cl_x_all[cl_idx_sub_global].astype(float)
cl_y_sub = cl_y_all[cl_idx_sub_global].astype(float)
cl_id_sub = cl_id_all[cl_idx_sub_global].astype(int)
cl_reach_id_sub = cl_reach_id_2d[:, cl_idx_sub_global]
cl_node_id_sub = cl_node_id_2d[:, cl_idx_sub_global]

In [9]:
# ---------- Transform coords to UTM ----------
transformer = Transformer.from_crs("EPSG:4326", target_crs, always_xy=True)
nodes_x_utm, nodes_y_utm = transformer.transform(node_lon, node_lat)
nodes_xy = np.vstack([nodes_x_utm, nodes_y_utm]).T
cl_x_utm, cl_y_utm = transformer.transform(cl_x_sub, cl_y_sub)

In [10]:
# ---------- Build reach_lines ----------

# here we are going from centerline points to actual LineStrings stored by SWORD reach
num_domains, npoints_sub = cl_reach_id_sub.shape
cl_by_reach = defaultdict(list)
for d in range(num_domains):
    arr = cl_reach_id_sub[d, :]
    valid = (arr != CL_FILL_INT)
    for i_local in np.nonzero(valid)[0]:
        rid = int(arr[i_local])
        cid = int(cl_id_sub[i_local]) if cl_id_sub[i_local] != CL_FILL_INT else None
        if cid is None:
            continue
        cl_by_reach[rid].append((cid, float(cl_x_utm[i_local]), float(cl_y_utm[i_local])))
reach_lines = {}
for rid, points in cl_by_reach.items():
    points.sort(key=lambda t: t[0])
    coords = [(t[1], t[2]) for t in points]
    if len(coords) >= 2:
        reach_lines[rid] = LineString(coords)
print(f"Built {len(reach_lines)} reach lines in bbox.")

Built 403 reach lines in bbox.


In [12]:
# ---------- River network topology ----------

# which river reaches connect to which other reaches (e.g. which are upstream and downstream)
reaches_group = nc.groups.get("reaches", None)
adj_down = defaultdict(list)
adj_up = defaultdict(list)
if reaches_group is not None:
    try:
        rch_up_all = reaches_group.variables["rch_id_up"][:]
        rch_dn_all = reaches_group.variables["rch_id_dn"][:]
        rch_up = rch_up_all[0, :]
        rch_dn = rch_dn_all[0, :]
        reach_id_all = reaches_group.variables["reach_id"][:]
        reachid_to_index = {int(r): i for i, r in enumerate(reach_id_all)}
        REACH_FILL = -9999
        for global_rid in list(reach_lines.keys()):
            idx = reachid_to_index.get(int(global_rid), None)
            if idx is None:
                continue
            val_dn = int(rch_dn[idx]) if rch_dn[idx] != REACH_FILL else None
            val_up = int(rch_up[idx]) if rch_up[idx] != REACH_FILL else None
            if val_dn is not None:
                adj_down[int(global_rid)].append(val_dn)
            if val_up is not None:
                adj_up[int(global_rid)].append(val_up)
    except Exception:
        pass

# # geometry-based adjacency
# for rid, line in reach_lines.items():
#     end_pt = Point(line.coords[-1])
#     start_pt = Point(line.coords[0])
#     for cand_rid, cand_line in reach_lines.items():
#         if cand_rid == rid:
#             continue
#         cand_start = Point(cand_line.coords[0])
#         cand_end = Point(cand_line.coords[-1])
#         if end_pt.distance(cand_start) <= endpoint_tol:
#             adj_down[rid].append(cand_rid); adj_up[cand_rid].append(rid)
#         elif end_pt.distance(cand_end) <= endpoint_tol:
#             adj_down[rid].append(cand_rid); adj_up[cand_rid].append(rid)
#         if start_pt.distance(cand_end) <= endpoint_tol:
#             adj_up[rid].append(cand_rid); adj_down[cand_rid].append(rid)
#         elif start_pt.distance(cand_start) <= endpoint_tol:
#             adj_up[rid].append(cand_rid); adj_down[cand_rid].append(rid)

# Remove duplicates
for k in list(adj_down.keys()):
    adj_down[k] = list(dict.fromkeys(adj_down[k]))
for k in list(adj_up.keys()):
    adj_up[k] = list(dict.fromkeys(adj_up[k]))

In [13]:
# ---------- KDTree for node snapping ----------

# KD-Tree is a fast spatial search structure
kdt = cKDTree(nodes_xy) # spatial index of all node coordinates
nodeid_to_index = {int(nid): i for i, nid in enumerate(node_id)}

In [16]:
# ---------- Traversal function ----------

# build a continuous centerline given a reach id (center_reach_id), point near the reach (center_point), 
# and the length of the final segment above and below the center_point (half_length_needed)

def collect_multireach_segment(center_reach_id, center_point, half_length_needed):
    # get the LineString for the given center_reach_id
    if center_reach_id not in reach_lines: # catch!
        return None
    center_line = reach_lines[center_reach_id]
    # Get the distance along the reach to the point nearest the center_point (aufeis location)
    proj = center_line.project(center_point)
    # exact coord along the reach
    proj_point = center_line.interpolate(proj)

    # Initialize upstream / downstream direction
    upstream_coords = [(proj_point.x, proj_point.y)]
    downstream_coords = [(proj_point.x, proj_point.y)]

    # go upstream within center reach
    needed_up = half_length_needed
    upstream_len_accum = 0.0
    if proj > 0:
        coords = coords_from_linestring_portion(center_line, proj, 0.0, forward=False)
        prev = (proj_point.x, proj_point.y)
        for (x,y) in coords[1:]:
            seglen = math.hypot(x - prev[0], y - prev[1])
            if upstream_len_accum + seglen > needed_up:
                remain = needed_up - upstream_len_accum
                frac = remain / seglen if seglen>0 else 0
                ix = prev[0] + (x - prev[0]) * frac
                iy = prev[1] + (y - prev[1]) * frac
                upstream_coords.append((ix, iy))
                upstream_len_accum = needed_up
                break
            else:
                upstream_coords.append((x,y))
                upstream_len_accum += seglen
                prev = (x,y)
    visited = set([center_reach_id])
    current_reach = center_reach_id
    while upstream_len_accum < needed_up:
        neighbors = adj_up.get(current_reach, [])
        if not neighbors:
            break
        next_reach = None
        for nb in neighbors:
            if nb not in visited and nb in reach_lines:
                next_reach = nb
                break
        if next_reach is None:
            break
        next_line = reach_lines[next_reach]
        oriented = orient_linestring_to_connect(Point(reach_lines[current_reach].coords[0]), next_line)
        coords_iter = list(oriented.coords)[::-1]
        prev = upstream_coords[-1]
        for (x,y) in coords_iter[1:]:
            seglen = math.hypot(x - prev[0], y - prev[1])
            if upstream_len_accum + seglen > needed_up:
                remain = needed_up - upstream_len_accum
                frac = remain / seglen if seglen>0 else 0
                ix = prev[0] + (x - prev[0]) * frac
                iy = prev[1] + (y - prev[1]) * frac
                upstream_coords.append((ix, iy))
                upstream_len_accum = needed_up
                break
            else:
                upstream_coords.append((x,y))
                upstream_len_accum += seglen
                prev = (x,y)
        visited.add(next_reach)
        current_reach = next_reach

    # downstream within center reach
    needed_dn = half_length_needed
    downstream_len_accum = 0.0
    if proj < center_line.length:
        coords = coords_from_linestring_portion(center_line, proj, center_line.length, forward=True)
        prev = (proj_point.x, proj_point.y)
        for (x,y) in coords[1:]:
            seglen = math.hypot(x - prev[0], y - prev[1])
            if downstream_len_accum + seglen > needed_dn:
                remain = needed_dn - downstream_len_accum
                frac = remain / seglen if seglen>0 else 0
                ix = prev[0] + (x - prev[0]) * frac
                iy = prev[1] + (y - prev[1]) * frac
                downstream_coords.append((ix, iy))
                downstream_len_accum = needed_dn
                break
            else:
                downstream_coords.append((x,y))
                downstream_len_accum += seglen
                prev = (x,y)

    current_reach = center_reach_id
    while downstream_len_accum < needed_dn:
        neighbors = adj_down.get(current_reach, [])
        if not neighbors:
            break
        next_reach = None
        for nb in neighbors:
            if nb not in visited and nb in reach_lines:
                next_reach = nb
                break
        if next_reach is None:
            break
        next_line = reach_lines[next_reach]
        oriented = orient_linestring_to_connect(Point(reach_lines[current_reach].coords[-1]), next_line)
        coords_iter = list(oriented.coords)
        prev = downstream_coords[-1]
        for (x,y) in coords_iter[1:]:
            seglen = math.hypot(x - prev[0], y - prev[1])
            if downstream_len_accum + seglen > needed_dn:
                remain = needed_dn - downstream_len_accum
                frac = remain / seglen if seglen>0 else 0
                ix = prev[0] + (x - prev[0]) * frac
                iy = prev[1] + (y - prev[1]) * frac
                downstream_coords.append((ix, iy))
                downstream_len_accum = needed_dn
                break
            else:
                downstream_coords.append((x,y))
                downstream_len_accum += seglen
                prev = (x,y)
        visited.add(next_reach)
        current_reach = next_reach

    up_rev = list(reversed(upstream_coords))
    full_coords = up_rev + downstream_coords[1:]
    return LineString(full_coords)

In [17]:
# ---------- Iterate aufeis, snap, build polygons (use width_m from aufeis) ----------

polys = []
attrs = []
for orig_idx, row in aufeis.reset_index().iterrows():
    orig_index = int(row['index'])
    length_m = float(row['length_m'])
    geom = row.geometry
    if not isinstance(geom, Point):
        geom = geom.centroid
    px, py = geom.x, geom.y

    # snap to nearest node
    dist, node_idx_local = kdt.query([px, py])
    node_idx_local = int(node_idx_local)
    snapped_node_id = int(node_id[node_idx_local])
    snapped_reach_id = int(node_reach[node_idx_local])
    # priority: use aufeis width_m field if present and >0
    width_m_val = None
    if 'width_m' in row.index:
        try:
            width_m_val = float(row['width_m'])
        except Exception:
            width_m_val = None
    if width_m_val is None or math.isnan(width_m_val) or width_m_val <= 0:
        # fallback to node max_width (scaled)
        node_w = float(node_max_width[node_idx_local]) * scale_maxwidth
        if math.isnan(node_w) or node_w <= 0:
            node_w = 1.0
        width_used_source = "node_max_width"
    else:
        node_w = float(width_m_val)
        width_used_source = "aufeis_width_m"

    # Use reach_lines for snapped reach
    if snapped_reach_id in reach_lines:
        reach_line = reach_lines[snapped_reach_id]
        proj_pt = reach_line.interpolate(reach_line.project(Point(px, py)))
        half = length_m / 2.0
        multi_seg = collect_multireach_segment(snapped_reach_id, proj_pt, half)
        if multi_seg is None or multi_seg.length == 0:
            seg = extract_segment_centered_on_line(reach_line, proj_pt, length_m)
            start_pt = seg.interpolate(0.0)
            end_pt = seg.interpolate(seg.length)
            rect = make_rotated_rectangle_from_segment((start_pt.x, start_pt.y), (end_pt.x, end_pt.y), node_w)
            used = "reach_single"
        else:
            proj_on_multi = multi_seg.project(Point(px, py))
            seg_exact = extract_segment_centered_on_line(multi_seg, multi_seg.interpolate(proj_on_multi), length_m)
            start_pt = seg_exact.interpolate(0.0)
            end_pt = seg_exact.interpolate(seg_exact.length)
            rect = make_rotated_rectangle_from_segment((start_pt.x, start_pt.y), (end_pt.x, end_pt.y), node_w)
            used = "multi_reach"
    else:
        # fallback: attempt node-level centerline
        node_cl = []
        for d in range(cl_node_id_sub.shape[0]):
            matches = np.where(cl_node_id_sub[d, :] == snapped_node_id)[0]
            for mi in matches:
                node_cl.append((int(cl_id_sub[mi]), float(cl_x_utm[mi]), float(cl_y_utm[mi])))
        if len(node_cl) >= 2:
            node_cl.sort(key=lambda t: t[0])
            ln = LineString([(t[1], t[2]) for t in node_cl])
            seg = extract_segment_centered_on_line(ln, ln.interpolate(ln.project(Point(px, py))), length_m)
            start_pt = seg.interpolate(0.0)
            end_pt = seg.interpolate(seg.length)
            rect = make_rotated_rectangle_from_segment((start_pt.x, start_pt.y), (end_pt.x, end_pt.y), node_w)
            used = "node_cl"
        else:
            node_xy = nodes_xy[node_idx_local]
            rect = Point(node_xy[0], node_xy[1]).buffer(max(1.0, node_w/2.0), cap_style=2)
            used = "none"

    polys.append(rect)
    attrs.append({
        "aufeis_index": orig_index,
        "snapped_node_id": snapped_node_id,
        "snapped_reach_id": snapped_reach_id,
        "centerline_used": used,
        "width_used_source": width_used_source,
        "width_m_used": node_w,
        "length_m": length_m
    })

In [19]:
# ---------- Build GeoDataFrame & save ----------
print("Saving polygons to", out_shp_path)
out_gdf = gpd.GeoDataFrame(attrs, geometry=polys, crs=target_crs)
aufeis_reset = aufeis.reset_index().rename(columns={"index": "aufeis_index"})
out_gdf = out_gdf.merge(aufeis_reset.drop(columns="geometry"), on="aufeis_index", how="left")

os.makedirs(os.path.dirname(out_shp_path), exist_ok=True)
out_gdf.to_file(out_shp_path, driver="ESRI Shapefile")
print("wrote", len(out_gdf), "polygons to", out_shp_path)

# close netCDF
nc.close()

Saving polygons to ../Workspace/polygon_aufeis.shp
wrote 5 polygons to ../Workspace/polygon_aufeis.shp


RuntimeError: NetCDF: Not a valid ID

In [25]:
# %%bash

# sword checks
# ls -l ../Workspace/na_sword_v17b.nc
# ncdump -h ../Workspace/na_sword_v17b.nc

In [20]:
# %%bash
# # test to see what GRIT looks like (thought about this instead of SWORD)

# # subset the GRIT river network to a the eastern north slope of AK
# cd ../
# ogr2ogr -f GPKG GRITv06_reaches_NorthSlopeAK_EPSG4326.gpkg GRITv06_reaches_simple_NA_EPSG4326.gpkg -spat -151.524048 67.205182 -140.139127 70.222115 -spat_srs EPSG:4326

# # for now, grit seems overly complex... don't want to track along messy centerline that have breaks around aufeis ...
# # might also be good to look at merit hydro

In [32]:
# # to move the files to the right place:

# jovyan@c35f098b3a59:~/work$ pwd
# /home/jovyan/work
# jovyan@c35f098b3a59:~/work$ mv Workspace/polygon_aufeis.shx ../../../shared_space/SWOT_Aufeis/area_of_interest/
# jovyan@c35f098b3a59:~/work$ mv Workspace/polygon_aufeis.shp ../../../shared_space/SWOT_Aufeis/area_of_interest/
# jovyan@c35f098b3a59:~/work$ mv Workspace/polygon_aufeis.prj ../../../shared_space/SWOT_Aufeis/area_of_interest/
# jovyan@c35f098b3a59:~/work$ mv Workspace/polygon_aufeis.dbf ../../../shared_space/SWOT_Aufeis/area_of_interest/
# jovyan@c35f098b3a59:~/work$ mv Workspace/polygon_aufeis.cpg ../../../shared_space/SWOT_Aufeis/area_of_interest/
# jovyan@c35f098b3a59:~/work$ mv Workspace/aufeis_locations.cpg ../../../shared_space/SWOT_Aufeis/area_of_interest/
# jovyan@c35f098b3a59:~/work$ mv Workspace/aufeis_locations.dbf ../../../shared_space/SWOT_Aufeis/area_of_interest/
# jovyan@c35f098b3a59:~/work$ mv Workspace/aufeis_locations.prj ../../../shared_space/SWOT_Aufeis/area_of_interest/
# jovyan@c35f098b3a59:~/work$ mv Workspace/aufeis_locations.shp ../../../shared_space/SWOT_Aufeis/area_of_interest/
# jovyan@c35f098b3a59:~/work$ mv Workspace/aufeis_locations.shx ../../../shared_space/SWOT_Aufeis/area_of_interest/