In [5]:
import nhd_network
import xarray as xr
from functools import partial

In [2]:
path = "al_routelink.nc"
ds = xr.open_dataset(path)

In [3]:
subslice = [
    "link",
    "to",
    "gages",
]
df = ds[subslice].to_dataframe().astype({"link": int, "to": int,})

In [4]:
df = df.set_index("link")
df

Unnamed: 0_level_0,to,gages,lat,lon,State
link,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
19566408,19566406,b' ',34.745842,-87.973145,Alabama
19566410,19566412,b' ',34.719414,-87.975365,Alabama
19566416,936030090,b' ',34.715527,-87.999626,Alabama
19566418,19566412,b' ',34.715916,-87.974808,Alabama
19566422,19566404,b' ',34.725525,-87.959076,Alabama
...,...,...,...,...,...
18516130,0,b' ',30.409370,-87.863617,Alabama
18516132,0,b' ',30.389450,-87.855713,Alabama
18516148,0,b' ',30.247742,-87.937485,Alabama
18516150,0,b' ',30.250677,-87.934319,Alabama


In [6]:
def replace_downstreams(data, downstream_col, terminal_code):
    ds0_mask = data[downstream_col] == terminal_code
    new_data = data.copy()
    new_data.loc[ds0_mask, downstream_col] = ds0_mask.index[ds0_mask]

    # Also set negative any nodes in downstream col not in data.index
    new_data.loc[~data[downstream_col].isin(data.index), downstream_col] *= -1
    return new_data


def organize_independent_networks(connections):
    rconn = nhd_network.reverse_network(connections)
    independent_networks = nhd_network.reachable_network(rconn)
    reaches_bytw = {}
    for tw, net in independent_networks.items():
        path_func = partial(nhd_network.split_at_junction, net)
        reaches_bytw[tw] = nhd_network.dfs_decomposition(net, path_func)

    return independent_networks, reaches_bytw, rconn

In [7]:
df = df.sort_index()
df = replace_downstreams(df, "to", 0)

In [8]:
connections = nhd_network.extract_connections(df, "to")

In [9]:
independent_networks, reaches_bytw, rconn = organize_independent_networks(
    connections)

In [11]:
reachable_subnetworks = nhd_network.reachable_network(rconn)

In [22]:
network_sizes = []

In [None]:
for i in range(len(list(reachable_subnetworks.keys()))):
    network_sizes.append(len(reachable_subnetworks[list(
        reachable_subnetworks.keys())[i]]))

In [49]:
len(connections)

77684

In [50]:
len(rconn)

78041

In [51]:
len(independent_networks)

357

In [46]:
rconn[2130784]

[2130744, 2130746]

In [52]:
len(reaches_bytw)

357

In [None]:
test_rconn = {
    1: [5, 6],
    2: [7],
    3: [8],
    4: [9, 10, 11],
    5: [12],
    6: [13, 14],
    7: [],
    8: [15],
    9: [16, 17, 18],
    10: [19],
    11: [],
    12: [],
    13: [20, 21], 
    14: [22],
    15: [],
    16: [23, 24, 25], 
    17: [],
    18: [26],
    19: [],
    20: [27],
    21: [],
    22: [],
    23: [],
    24: [],
    25: [],
    26: [],
    27: []
}