# Watershed Delineation - Step-by-Step Example

This notebook demonstrates the functional approach to watershed delineation.
Each step can be run independently for easy debugging and verification.

In [1]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from pathlib import Path
import networkx as nx

import graph_delineator.delineate as gd

In [2]:
# Define MERIT data directories
merit_dirs = {
    'catchment': '/nas/cee-ice/data/MERIT-BASINS/pfaf_level_02/',
    'river': '/nas/cee-ice/data/MERIT-BASINS/pfaf_level_02/',
    'flow_dir': '/nas/cee-ice/data/MERIT-Hydro/processed/dir/',
    'accum': '/nas/cee-ice/data/MERIT-Hydro/processed/upg/',
    'megabasins': '/nas/cee-ice/data/MERIT-Hydro/basins_level2/merit_hydro_vect_level2.shp'
}

# Load gauges
gauges_csv = '/nas/cee-water/cjgleason/ted/graph_delineator/inputs/test.csv'



In [15]:
all_gauges = gd.load_gauges(gauges_csv)
all_gauges

Unnamed: 0,id,lng,lat,name,outlet_id,geometry
0,USGS-01052500,-71.0575,44.8775,"Diamond River near Wentworth Location, NH",USGS-01059000,POINT (-71.0575 44.8775)
1,USGS-01053500,-71.128611,44.7825,"Androscoggin River at Errol, NH",USGS-01059000,POINT (-71.12861 44.7825)
2,USGS-01053600,-71.181389,44.666111,Androscoggin River below Bog Brook at Cambridg...,USGS-01059000,POINT (-71.18139 44.66611)
3,USGS-01054000,-71.190278,44.435833,"Androscoggin River near Gorham, NH",USGS-01059000,POINT (-71.19028 44.43583)
4,USGS-01054114,-71.170656,44.381319,"Peabody River at Gorham, New Hampshire",USGS-01059000,POINT (-71.17066 44.38132)
5,USGS-01054200,-70.979722,44.390556,"Wild River at Gilead, Maine",USGS-01059000,POINT (-70.97972 44.39056)
6,USGS-01054300,-70.733056,44.593333,"Ellis River at South Andover, Maine",USGS-01059000,POINT (-70.73306 44.59333)
7,USGS-01054500,-70.544167,44.551944,"Androscoggin River at Rumford, Maine",USGS-01059000,POINT (-70.54417 44.55194)
8,USGS-01055000,-70.588889,44.642778,"Swift River near Roxbury, Maine",USGS-01059000,POINT (-70.58889 44.64278)
9,USGS-01055500,-70.229722,44.269444,"Nezinscot River at Turner Center, Maine",USGS-01059000,POINT (-70.22972 44.26944)


In [16]:
megabasins_outlets = gd.get_megabasin_outlet_dict(all_gauges, merit_dirs['megabasins'])
megabasins_outlets

Identifying basins for gauges...


{73: {'USGS-01059000': ['USGS-01052500',
   'USGS-01053500',
   'USGS-01053600',
   'USGS-01054000',
   'USGS-01054114',
   'USGS-01054200',
   'USGS-01054300',
   'USGS-01054500',
   'USGS-01055000',
   'USGS-01055500',
   'USGS-01057000',
   'USGS-01059000']}}

In [14]:
def get_watershed_comids(
    gauges: gpd.GeoDataFrame,
    catchments: gpd.GeoDataFrame,
    rivers: gpd.GeoDataFrame
) -> set:
    """
    Find all COMIDs in the watershed network containing these gauges.
    
    Uses network traversal to find all upstream catchments from the outlet.
    """
    joined = gpd.sjoin(gauges, catchments.reset_index(), how='left', predicate='within')
    gauge_comids = set(joined['COMID'].dropna().unique())
    
    if not gauge_comids:
        return set()
    
    # Build network graph
    G = nx.DiGraph()
    for comid in rivers.index:
        nextdown = rivers.loc[comid, 'NextDownID']
        if nextdown != '0' and nextdown in rivers.index:
            G.add_edge(comid, nextdown)
    
    # Find all upstream catchments
    network_comids = set()
    for start_comid in gauge_comids:
        if start_comid in G:
            # Add all ancestors (upstream)
            ancestors = {int(a) for a in nx.ancestors(G, start_comid)}
            network_comids.update(ancestors)
            
            # Add all descendants (downstream) 
            descendants =  {int(d) for d in nx.descendants(G, start_comid)}
            network_comids.update(descendants)
            
            # Add the node itself
            network_comids.add(int(start_comid))
    
    return network_comids



In [24]:
for megabasin_id, outlet_dict in megabasins_outlets.items():
    print(f"\n{'='*60}")
    print(f"Processing Basin {megabasin_id}")
    print(f"{'='*60}")
    
    # Load basin data
    mega_catchments, mega_rivers = gd.load_basin_data(megabasin_id, merit_dirs)

    # Process each outlet in this basin
    for outlet_id, outlet_gauge_ids in outlet_dict.items():
        print(f"\nProcessing outlet {outlet_id} ({len(outlet_gauge_ids)} gauges)")

        gauges = all_gauges[all_gauges['id'].isin(outlet_gauge_ids)]
        
        comids = get_watershed_comids(outlet_gauges, mega_catchments, mega_rivers)
        
        catchments = mega_catchments.loc[mega_catchments.index.isin(comids)]
        rivers = mega_rivers.loc[mega_rivers.index.isin(comids)]
        
        # Build graph with all geometries
        G = gd.build_graph_with_geometries(catchments, rivers)

        break
    break


Processing Basin 73
  Loading data from pfaf_73_MERIT_Hydro_v07_Basins_v01
    Loaded 23290 catchments, 23290 rivers

Processing outlet USGS-01059000 (12 gauges)


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4326
Right CRS: None

  joined = gpd.sjoin(gauges, catchments.reset_index(), how='left', predicate='within')


Built graph with 201 nodes, 200 edges


In [25]:
G.nodes['73001618']

{'polygon': <POLYGON ((-71.263 44.515, -71.261 44.515, -71.261 44.515, -71.26 44.515, -7...>,
 'river_geom': <LINESTRING (-71.18 44.468, -71.181 44.468, -71.182 44.468, -71.182 44.469, ...>,
 'area_km2': 41.79544015000498,
 'length_km': np.float64(5.98404862513981),
 'node_type': 'original',
 'is_gauge': False,
 'original_comid': 73001618,
 'nextdown': '73001335'}

In [26]:
gauges

Unnamed: 0,id,lng,lat,name,outlet_id,geometry
0,USGS-01052500,-71.0575,44.8775,"Diamond River near Wentworth Location, NH",USGS-01059000,POINT (-71.0575 44.8775)
1,USGS-01053500,-71.128611,44.7825,"Androscoggin River at Errol, NH",USGS-01059000,POINT (-71.12861 44.7825)
2,USGS-01053600,-71.181389,44.666111,Androscoggin River below Bog Brook at Cambridg...,USGS-01059000,POINT (-71.18139 44.66611)
3,USGS-01054000,-71.190278,44.435833,"Androscoggin River near Gorham, NH",USGS-01059000,POINT (-71.19028 44.43583)
4,USGS-01054114,-71.170656,44.381319,"Peabody River at Gorham, New Hampshire",USGS-01059000,POINT (-71.17066 44.38132)
5,USGS-01054200,-70.979722,44.390556,"Wild River at Gilead, Maine",USGS-01059000,POINT (-70.97972 44.39056)
6,USGS-01054300,-70.733056,44.593333,"Ellis River at South Andover, Maine",USGS-01059000,POINT (-70.73306 44.59333)
7,USGS-01054500,-70.544167,44.551944,"Androscoggin River at Rumford, Maine",USGS-01059000,POINT (-70.54417 44.55194)
8,USGS-01055000,-70.588889,44.642778,"Swift River near Roxbury, Maine",USGS-01059000,POINT (-70.58889 44.64278)
9,USGS-01055500,-70.229722,44.269444,"Nezinscot River at Turner Center, Maine",USGS-01059000,POINT (-70.22972 44.26944)


In [27]:
gauge_info = gd.insert_all_gauges(
    G,
    gauges,
    merit_dirs['flow_dir'],
    megabasin_id
)

ModuleNotFoundError: No module named 'split_catchment'