In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import networkx as nx

import os

from typing import (
    List,
    Dict,
    Any,
)

from collections.abc import (
    Collection,
    Callable,
)

In [2]:
# MERIT-Basins path
mb_path = '/project/rrg-mclark/data/geospatial-data/MERIT-Basins/MERIT_Hydro_v07_Basins_v01_bugfix1/'

# cat layer
cat = gpd.read_file(os.path.join(mb_path, 'pfaf_level_02', 'cat_pfaf_72_MERIT_Hydro_v07_Basins_v01_bugfix1.shp'))

# riv layer
riv = gpd.read_file(os.path.join(mb_path, 'pfaf_level_02', 'riv_pfaf_72_MERIT_Hydro_v07_Basins_v01_bugfix1.shp'))

# cst layer
cst = gpd.read_file(os.path.join(mb_path, 'coastal_hillslopes', 'hillslope_72_clean.shp'))

In [3]:
# how user will use `hydrant` to do aggregations
#
# >>> aggregation_methods = {
#     'lengthkm': 'sum',
#     'width': ht.agg_funcs.weighted_mean,
#     'slope': ht.agg_funcs.weighted_mean,
#     'order': 'upstream',
#     'uparea': 'sum',
#     'lengthdir': ht.agg_funcs.spatial_distance,
#     'slope_taud': 'mean'
# }

In [4]:
# these will need to be found using networkx methods
# maybe using
# >>> arc_successor = rg.out_edges(successor)
# >>> arc_predecessors = [rg.out_edges(e) for e in predecessors]
# predecessors = [72053437, 72052241, 72053625]
# but we only need one upstream node/segment
successor = 72051873
predecessor = 72053625

# user inputs
attr = 'lengthkm'
main_id = 'COMID'
ds_main_id = 'NextDownID'

# build a networkx DiGraph out of GeoPandas GeoDataFrame of a river network
rg = nx.from_pandas_edgelist(df=riv,
                             source=main_id,
                             target=ds_main_id,
                             edge_attr=True,
                             create_using=nx.DiGraph)

# each river confluence (networkx node) corresponds to its outgoing edge
# (river segment), and all attributes are similar for both objects
nx.set_node_attributes(
    G=rg,
    values=riv.set_index(main_id, drop=False).T.to_dict()
)

____

Try to write a function that aggregates the attributes of two river segments (by doing so, you are assuming the candidate(s) for aggregation is already known):

In [5]:
def _aggregate_attr(
    rg: nx.DiGraph,
    successor: str | float | int,
    predecessor: float | int | Collection[float | int],
    attr: str,
    method: str | Callable,
    *args: Any,
    **kwargs: Dict[Any, Any],
) -> float | int:
    """Aggregate `attr` values given a `method` of choice that is either
    one of the pre-defined aggregation methods, or a user-defined
    function. If the user-defined function accepts more arguments, they
    can be fed using `args` or `kwargs`.
    
    Parameters
    ----------
    rg : ``networkx.DiGraph``
        A directed acyclic graph of a river network of interest, where
        each node (river confluence) has the same attribute of its outgoing
        edge (river).
    successor : str, float, or int depending on `rg`'s nodes datatypes
        `rg`'s node (river confluence) or its corresponding outgoing edge
        (river segment) which is going to be aggregated and further
        contracted with one or all of its predecessors. Node contraction
        is not done in this method.
    predecessor : str, float, or int depending on `rg`'s nodes datatypes
        `rg`'s node (river confluence) or its corresponding outgoing edge
        (river segment) which is going to be aggregated and further
        contracted with its successor. Node contraction is not done in
        this method.
    attr : str
        Target attribute for aggregation
    method : str or a user-defined function
        Method for aggregation of `attr` values of `successor` and
        `predecessor` nodes. It can be one of the pre-defined methods
        or a user-defined function; in case of a user-defined function,
        if necessary, extra arguments to the `method` function can be
        provided using `*args` or `**kwargs`. A single dictionary of
        all elements of `successor` and `predecessor` is passed to the
        used-defined function.  
        Current pre-defined methods are: 'sum', 'mean', 'min', 'max',
        'upstream', and 'downstream'.
    
    Other Parameters
    ----------------
    agrs: ``any``
        The arguments to be fed to `method`, if a user-defined function
        is provided. Raises ``ValueError`` exception if method is not
        callable.
    kwargs: dict
        The keys of this `dict` are the input arguments to `method`
        if a user-defined function is given; the values must follow the
        datatype of `method`'s arguments'.
    
    Returns
    -------
    dict
        The keys are each of the `predecessor` and values are the
        aggregated `attr` given `sucessor` and `predecessor` values
        based on the user's `method`
    
    Raises
    ------
    ValueError
        If `args` and/or `kwargs` are provided while `method` is not a
        user-defined function.
    TypeError
        If the content of `attr` is not ``float`` or ``int``, then an
        ``TypeError`` is raised
    """
    # if args or kwargs are provided and `method` is not a user-defined
    # function, raise a ValueError exception
    if isinstance(method, str):
        if bool(args) or bool(kwargs):
            raise ValueError("arguments provided through `args` or `kwargs`"
                             " while `method` is not a user-defined"
                             " function")
    # check the data type of `attr`
    if not isinstance(attr, (str, int, float)):
        raise TypeError("`attr` must be of type str, int, or float")
        
    # build attribute dictionary
    attr_dict = nx.get_node_attributes(
        G=rg,
        name=attr,
    )
    
    # build a dictionaries of elements to be aggregated
    successor_dict = {successor: rg.nodes[successor][attr]}
    predecessor_dict = {predecessor: rg.nodes[predecessor][attr]}
    
    # create a list of both dictionaries for easier access
    node_dict = {**successor_dict, **predecessor_dict}
    
    # dict value iterator in case all values are needed in one go
    all_attr_values = node_dict.values()
    
    # checking `method`
    # simple summation
    if callable(method):
        return method(node_dict, *args, **kwargs)
    elif method in ('sum'):
        return sum(all_attr_values)
    # simple average
    elif method in ('mean'):
        return sum(all_attr_values) / len(all_attr_values)
    # minimum value
    elif method in ('min'):
        return min(all_attr_values)
    # maximum value
    elif method in ('max'):
        return max(all_attr_values)
    # get the upstream value given a target upstream
    elif method in ('upstream'):
        return ht.agg_funcs.upstream(
            successor,
            predecessor,
            *args,
            **kwargs,
        )
    # get the downstream value
    elif method in ('downstream'):
        return ht.agg_funcs.downstream(
            successor,
            predecessor,
            *args,
            **kwargs,
        )
    return

In [6]:
def _sum(d):
    return sum(d.values())

def _upstream(d, node):
    return d[node]

In [7]:
_aggregate_attr(
    rg=rg,
    successor=successor,
    predecessor=predecessor,
    attr='lengthkm',
    method='sum',
)

1.32493010874339