In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import networkx as nx

from shapely.geometry import LineString, Point

import os
import warnings

from typing import (
    List,
    Dict,
    Union,
    Iterable
)

In [2]:
# MERIT-Basins path
mb_path = '/project/rrg-mclark/data/geospatial-data/MERIT-Basins/MERIT_Hydro_v07_Basins_v01_bugfix1/'

# cat layer
cat = gpd.read_file(os.path.join(mb_path, 'pfaf_level_02', 'cat_pfaf_71_MERIT_Hydro_v07_Basins_v01_bugfix1.shp'))

# riv layer
riv = gpd.read_file(os.path.join(mb_path, 'pfaf_level_02', 'riv_pfaf_71_MERIT_Hydro_v07_Basins_v01_bugfix1.shp'))

# cst layer
cst = gpd.read_file(os.path.join(mb_path, 'coastal_hillslopes', 'hillslope_71_clean.shp'))

In [3]:
def aggregate_seg(
    rg: nx.DiGraph,
    node: Union[str, int],
    columns: Dict[str, str],
    criteria: str,
    *args,
    **kwargs,
) -> None:
    ...
    
columns = {
    'length': 'lengthkm',
    'area': 'unitarea'
}

criteria = 'length'

In [4]:
rg = nx.from_pandas_edgelist(df=riv,
                             source='COMID',
                             target='NextDownID',
                             edge_attr=riv.columns.to_list(),
                             create_using=nx.DiGraph)

In [5]:
rg

<networkx.classes.digraph.DiGraph at 0x2abc54dc0f10>

Each node has similar name to its outgoing edge.

In [6]:
nodes = list(rg.nodes)

In [7]:
edges = list(rg.edges)

____

Notes:  
1. Headwater river segments:  
 1.1. They cannot be dissolved; nothing to dissolve with.  
2. Confluences river segments:  
 2.1. River segment is removed entirely,  
 2.2. Corresponding sub-basin is dissolved with an upstream sub-basin (i.e., target sub-basin free choice?),  
  &nbsp;&nbsp;2.2.1. If `area` is given, the areas are `sum`ed,  
 2.3. target upstream river segments is adjusted,  
  &nbsp;&nbsp;2.3.1. the last point of its `linestring` is changed to the last point of the disappearing river segment,  
  &nbsp;&nbsp;2.3.2. The `length` of the upstream river segments are adjusted to consider the added segment,  
  &nbsp;&nbsp;2.3.3. The `width` of the upstream river segments are adjusted to consider the added segment,  
  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;2.3.3.1. The method could be `average`,  
  &nbsp;&nbsp;2.3.4. The `slope` of the upstream river segments are adjusted to consider the added segment,  
  &nbsp;&nbsp;&nbsp;&nbsp;2.3.4.1. The method could be `average` or ``,  
3. 

Checking river segment `71027927` for a test...

In [8]:
n = 71027927
e = n

In [9]:
def _is_headwater(
    rg: nx.DiGraph,
    node: Union[str, int],
) -> bool:
    """Return `True` if the `node` in the river graph `rg`
    is a headwater basin, otherwise `False`
    """
    if not isinstance(rg, nx.DiGraph):
        raise TypeError("`rg` must be of type networkx.DiGraph")
    
    if rg.in_degree(node) == 0:
        return True
    else:
        return False

In [10]:
list(rg.successors(n))

[71027880]

In [11]:
list(rg.predecessors(n))

[71027928, 71030573]

In [12]:
n

71027927

In [13]:
def _choose_seg_target(
    rg: nx.DiGraph,
    nodes: Iterable[Union[str, int]],
    cols: Dict[str, str],
    source_node: Union[str, int] = None,
    criteria: str = None,
    *args,
    **kwargs,
) -> Union[str, int]:
    """Returning the chosen target segment based on `criteria`
    
    Parameters
    ----------
    rg: networkx.DiGraph
        a DiGraph of the river network of interest, `rg` must have
        attributes (as Python dictionary) with keys that correspond
        to the keys in `cols`
    node: str or int
        The node name of interest
    source_node: str or int, optional
        the source node where the target aggregation segment is being
        selected for, if nothing provided, warning messages will be
        vague
    criteria: str, optional
        The criterion for the aggregation method, currently available
        methods are: `area` and `length`. If ``None`` is provided, an
        upstream segment is selected randomly
    cols: dict
        Dictionary with keys that describes the ones in `rg`'s edge
        attribute table. Any of following keys need to be provided in
        the `cols`, depending on `criteria`:  
          1. 'length'  
          2. 'area'  
    
    Returns
    -------
    candidate_node: Union[str, int]
        The candidate node of interest
    """
    # check if the source_node is adjacent to any of `nodes` values
    
    
    if len(nodes) > 1:
        if criteria in ('length', 'area'):
            # build a dictionary with node keys and criteria values
            criteria_dict = {n: rg[n][source_node][cols[criteria]] \
                            for n in nodes}
            # reporting the node with the minimum criteria value
            candidate_node = min(criteria_dict, key=criteria_dict.get)

        elif crtieria == None:
            warn_message = (f"Selecting upstream target node for aggregation"
                            " is random, as `criteria` is not provided")
            warnings.warn(warn_message, UserWarning)
            # the selection is not really random, it is just the first
            # node in the Iterable object
            candidate_node = nodes[0]
            
        else:
            raise ValueError("`criteria` can only be `length`, `area` or None")
            
    elif len(nodes) == 1:
        warn_message = (f"Only one upstream node for `{source_node}` is available "
                        f"and selected as the target for aggregation: {nodes[0]}")
        warnings.warn(warn_message, UserWarning)
        # the only node is the candidate
        candidate_node = nodes[0]
        
    else: # `nodes` is an empty iterable
        raise ValueError("The length of `nodes` cannot be empty")
        
    return candidate_node

In [14]:
# if length is provided, and all the upstream segments
# are headwater ones, choose the one with the shorter
# river length
headwater_candidates = [node for node in rg.predecessors(n) if _is_headwater(rg, node)]
if any(headwater_candidates):
    candidate = _choose_seg_target(rg=rg,
                                   nodes=headwater_candidates,
                                   cols={'length': 'lengthkm'},
                                   source_node=n,
                                   criteria=criteria)
else:
    normal_candidates = [node for node in rg.predecessors(n)]
    candidate = _choose_seg_target(rg,
                                   normal_candidates,
                                   n,
                                   criteria = criteria)



In [15]:
n

71027927

In [16]:
candidate

71030573

Notes:  
1. First aggregate attributes:  
&ensp;1.1. `length` needs to be `sum`med,
&ensp;1.2. `

In [17]:
rg2 = nx.contracted_nodes(G=rg,
                          u=candidate,
                          v=n,
                          self_loops=False
                         )

In [18]:
rg[candidate][n]

{'COMID': 71030573,
 'lengthkm': 1.325056645795759,
 'lengthdir': 1.222643033274586,
 'sinuosity': 1.083764115718126,
 'slope': 0.000754684717195,
 'uparea': 27.817919621783,
 'order': 1,
 'strmDrop_t': 0.0,
 'slope_taud': 0.0,
 'NextDownID': 71027927,
 'maxup': 0,
 'up1': 0,
 'up2': 0,
 'up3': 0,
 'up4': 0,
 'geometry': <LINESTRING (-91.427 51.074, -91.426 51.074, -91.425 51.075, -91.424 51.075,...>}

In [19]:
# Combine the linestrings into a longer one
# This approach directly connects the end of the first linestring to the start of the second
def _merge_linstrings(*args):
    combined_coords = list(linestring1.coords) + list(linestring2.coords)[1:] # Skip the first point of the second linestring to avoid duplication
longer_linestring = LineString(combined_coords)

# Optionally, create a GeoDataFrame to hold the longer linestring
gdf = gpd.GeoDataFrame(geometry=[longer_linestring])

NameError: name 'combined_coords' is not defined

In [None]:
def _agg_length(*args):
    

In [None]:
gdf.iloc[0, 0]

In [None]:
print(gdf.iloc[0, 0])

____

# splitting 

In [None]:
poly = cat.loc[cat.COMID == 71047064].copy()
poly.set_crs(epsg=4326, inplace=True)
poly

In [None]:
line = riv.loc[riv.COMID == 71047064].copy()
line.set_crs(epsg=4326, inplace=True)
line

In [None]:
ax = poly.plot()
line.plot(ax=ax, zorder=100, color='red')

In [None]:
from shapely.ops import split

d = split(poly.geometry.iloc[0], line.geometry.iloc[0])

In [None]:
d

In [None]:
poly.overlay(line)

In [None]:
line.geometry.iloc[0].intersects(poly.geometry.iloc[0])

In [None]:
g = line.geometry.iloc[0].intersection(poly.geometry.iloc[0])

_____

In [None]:
lines = riv.loc[riv.COMID.isin([72051873, 72053625])]

In [None]:
lines