In [1]:
import geopandas as gpd
import pandas as pd
import networkx as nx

In [2]:
def load_and_merge_catchments(paths):
    """Loads and merges HydroBASINS catchments from a list of file paths."""
    dfs = [gpd.read_file(p) for p in paths]
    combined = pd.concat(dfs, ignore_index=True)
    return gpd.GeoDataFrame(combined, crs=dfs[0].crs)


In [3]:
def load_lakes(lake_path, target_crs):
    """Loads lake shapefile and reprojects to match catchments if needed."""
    lakes = gpd.read_file(lake_path)
    if lakes.crs != target_crs:
        lakes = lakes.to_crs(target_crs)
    return lakes

In [4]:
def match_lakes_to_catchments(catchments, lakes, lake_id_col):
    """Spatial join to associate each lake with its intersecting catchment."""
    join = gpd.sjoin(catchments[['HYBAS_ID', 'NEXT_DOWN', 'geometry']], lakes, how="inner", predicate="intersects")
    return join[['HYBAS_ID', lake_id_col]]


In [5]:
def build_catchment_graph(catchments):
    """Builds a directed graph of the river network."""
    G = nx.DiGraph()
    for _, row in catchments.iterrows():
        src = row['HYBAS_ID']
        dst = row['NEXT_DOWN']
        if dst != 0:
            G.add_edge(dst, src)  # Edge points upstream
    return G

In [6]:
def trace_upstream_by_lake(lake_matches, graph):
    """For each lake's HYBAS_ID, find all upstream catchments."""
    records = []
    for _, row in lake_matches.iterrows():
        lake_id = row['Lake_ID']
        hybas_id = row['HYBAS_ID']
        if hybas_id in graph:
            upstream = nx.descendants(graph, hybas_id)
            upstream.add(hybas_id)
            for uid in upstream:
                records.append({'HYBAS_ID': uid, 'Lake_ID': lake_id})
    return pd.DataFrame(records)

In [7]:
def merge_with_geometries(upstream_df, catchments):
    """Merge upstream IDs with catchment geometries."""
    merged = upstream_df.merge(catchments, on='HYBAS_ID', how='left')
    return gpd.GeoDataFrame(merged, geometry='geometry', crs=catchments.crs)


In [None]:
def main():
    # === File paths ===
    catchment_files = [
        "sa/hybas_lake_sa_lev08_v1c.shp",
        "na/hybas_lake_na_lev08_v1c.shp",
        "ar/hybas_lake_ar_lev08_v1c.shp"
    ]
    lake_file = "lakes/CCILakesV202_Laura.shp"
    lake_id_column = "Lake_ID"  # Replace with your actual column name

    # === Process ===
    catchments = load_and_merge_catchments(catchment_files)
    lakes = load_lakes(lake_file, catchments.crs)
    lake_matches = match_lakes_to_catchments(catchments, lakes, lake_id_column)
    graph = build_catchment_graph(catchments)
    upstream_df = trace_upstream_by_lake(lake_matches, graph)
    result_gdf = merge_with_geometries(upstream_df, catchments)

    # === Export ===
    result_gdf.to_file("LakeCatchments_Grouped_By_LakeID.shp")
    print("✅ Done! Exported grouped upstream catchments.")

if __name__ == "__main__":
    main()
