In [1]:
import geopandas as gpd
import pandas as pd
import networkx as nx

# Load all regional catchments (Level 8, customized with lakes)
catchments_sa = gpd.read_file("sa/hybas_lake_sa_lev08_v1c.shp")
catchments_na = gpd.read_file("na/hybas_lake_na_lev08_v1c.shp")
catchments_ar = gpd.read_file("ar/hybas_lake_ar_lev08_v1c.shp")

In [2]:
# Merge all catchments
catchments = pd.concat([catchments_sa, catchments_na, catchments_ar], ignore_index=True)
catchments = gpd.GeoDataFrame(catchments, crs=catchments_sa.crs)

In [3]:
# Load your lake polygons
lakes = gpd.read_file("lakes/CCILakesV202_Laura.shp")
# Ensure CRS match
if lakes.crs != catchments.crs:
    lakes = lakes.to_crs(catchments.crs)

In [4]:

# Spatial join: find lake-containing catchments
matched = gpd.sjoin(catchments[['HYBAS_ID', 'NEXT_DOWN', 'geometry']], lakes, how="inner", predicate="intersects")
lake_hybas_ids = matched['HYBAS_ID'].unique()
# Assume your lake file has a unique ID field called 'Lake_ID'
lake_matches = matched[['HYBAS_ID', 'Lake_ID']]  # ⬅️ Change 'Lake_ID' to your actual column


In [5]:
# 4. Build directed graph of catchment flow
G = nx.DiGraph()
for i, row in catchments.iterrows():
    src = row['HYBAS_ID']
    dst = row['NEXT_DOWN']
    if dst != 0:
        G.add_edge(dst, src)  # upstream relationship

In [6]:
# 5. Group upstream catchments by lake
records = []

for _, row in lake_matches.iterrows():
    lake_id = row['Lake_ID']
    hybas_id = row['HYBAS_ID']
    
    if hybas_id in G:
        upstream = nx.descendants(G, hybas_id)
        upstream.add(hybas_id)  # include the lake's own catchment

        for uid in upstream:
            records.append({'HYBAS_ID': uid, 'Lake_ID': lake_id})


In [7]:
# Create a DataFrame of the upstream-lake mappings
upstream_df = pd.DataFrame(records)

# Merge with catchment geometries
upstream_catchments = upstream_df.merge(catchments, on='HYBAS_ID', how='left')
upstream_catchments_gdf = gpd.GeoDataFrame(upstream_catchments, geometry='geometry', crs=catchments.crs)


In [None]:
# 6. Export: group all results together, tagged with 'Lake_ID'
upstream_catchments_gdf.to_file("LakeCatchments_Grouped_By_LakeID.shp")

print(f"✅ Done! Exported upstream catchments grouped by Lake_ID.")