In [1]:
import geopandas as gpd
import pandas as pd
import networkx as nx

# Load all regional catchments (Level 8, customized with lakes)
catchments_sa = gpd.read_file("sa/hybas_lake_sa_lev08_v1c.shp")
catchments_na = gpd.read_file("na/hybas_lake_na_lev08_v1c.shp")
catchments_ar = gpd.read_file("ar/hybas_lake_ar_lev08_v1c.shp")

In [2]:
# Merge all catchments
catchments = pd.concat([catchments_sa, catchments_na, catchments_ar], ignore_index=True)
catchments = gpd.GeoDataFrame(catchments, crs=catchments_sa.crs)

In [3]:
# Load your lake polygons
lakes = gpd.read_file("lakes/CCILakesV202_Laura.shp")

In [4]:
# Ensure CRS match
if lakes.crs != catchments.crs:
    lakes = lakes.to_crs(catchments.crs)

# Spatial join: find lake-containing catchments
matched = gpd.sjoin(catchments[['HYBAS_ID', 'NEXT_DOWN', 'geometry']], lakes, how="inner", predicate="intersects")
lake_hybas_ids = matched['HYBAS_ID'].unique()

In [5]:
# 4. Build a directed graph: edges point *downstream*
G = nx.DiGraph()
for i, row in catchments.iterrows():
    src = row['HYBAS_ID']
    dst = row['NEXT_DOWN']
    if dst != 0:  # 0 means no downstream connection (e.g., ocean)
        G.add_edge(dst, src)  # Reversed: we're interested in tracing *upstream*

In [7]:
# 5. Collect upstream nodes for each lake catchment
all_upstream_ids = set()
for lake_id in lake_hybas_ids:
    if lake_id in G:
        upstream = nx.descendants(G, lake_id)
        upstream.add(lake_id)  # Include the lake's own catchment
        all_upstream_ids.update(upstream)

# 6. Filter catchments by upstream IDs
upstream_catchments = catchments[catchments['HYBAS_ID'].isin(all_upstream_ids)]


In [8]:
# 7. Export
upstream_catchments.to_file("LakeCatchments_Level8_WithUpstream_NX.shp")
print(f"✅ Exported {len(upstream_catchments)} catchments to 'LakeCatchments_Level8_WithUpstream_NX.shp'")

✅ Exported 38285 catchments to 'LakeCatchments_Level8_WithUpstream_NX.shp'
