# Waterways

The best US government supported waterways network seems to be:

In [None]:
from pathlib import Path
import pandas as pd
import geopandas as gpd
import numpy as np
from sklearn.neighbors import BallTree

In [None]:
input_directory = Path(r'C:/Users/Marc.Meketon/Downloads')
ww_gdf = gpd.read_file(input_directory / 'ndc_-93513140148177336.geojson')
# 6 links have bad data
ww_gdf = ww_gdf[~pd.isna(ww_gdf.ANODE)]

ww_nodes_gdf = gpd.read_file(input_directory / 'ndc_-5268635341862566859.geojson')
# use NODENUM for the index
ww_nodes_gdf.set_index('NODENUM', inplace=True, drop=False)

principal_ports_gdf = gpd.read_file(input_directory / 'ndc_-137970917446961778.geojson')
river_mile_markers_gdf = gpd.read_file(input_directory / 'usace_river_mile_markers_-7040874731012618368.geojson')

## Ports

**We will be restricting locations where intermodal traffic occurs on the waterways to where there are ports.**

The "Navigable Waterway Network Nodes" has a field called Port Name, as well as a Port ID (typically of the form xDDDD, where x is C, I or L, DDDD is a 4 digit number, possible with leading zeros).  There are 580 nodes with a Port Name, out of 6255 nodes altogether.

There is another file call Principal Ports, and there are 150 of those nodes.  It also has a Port ID, although formatted differently.

About 92 of the ports in Principal Ports have the same Port ID as found in the Network Nodes.  Of the remaining 58 principal ports, there are Network Nodes that are close to these ports for about 50 of these cases.  Of these 50, sometimes the network Nodes have a port id (but not always) that is numerically close to the principal port id.

We want to label all the ports as correctly as possible (use the principal port name when possible), and where a Network Node does not have a Port ID, to use the principal port ID.

In [None]:
principal_ports_gdf['PORT_TYPE_NUMBER'] = principal_ports_gdf['TYPE'] + principal_ports_gdf['PORT'].apply(lambda x: f'{x:04d}').astype(str)

In [None]:
ww2_nodes_gdf = ww_nodes_gdf.merge(principal_ports_gdf[['PORT_NAME', 'PORT_TYPE_NUMBER']], how='left', left_on='PORT_ID', right_on='PORT_TYPE_NUMBER')

In [None]:
ww_nodes_lat_lon_rad = np.deg2rad(list(zip(ww2_nodes_gdf.geometry.y.to_numpy(), ww2_nodes_gdf.geometry.x.to_numpy())))
ball_tree = BallTree(ww_nodes_lat_lon_rad, metric='haversine')

principal_ports_lat_lon_rad = np.deg2rad(principal_ports_gdf[['LATITUDE1', 'LONGITUDE1']].to_numpy())
distance, indices = ball_tree.query(principal_ports_lat_lon_rad, return_distance=True, k=1)
distance = distance.ravel()
indices = indices.ravel()
distance *= 3958.8

In [None]:
ww_node_closest_to_principal_port_gdf = ww2_nodes_gdf.iloc[indices].copy().reset_index()

In [None]:
x_df = ww_node_closest_to_principal_port_gdf  # alias
x_df['PRINCIPAL_PORT_NAME'] = principal_ports_gdf.PORT_NAME
x_df['PRINCIPAL_PORT_TYPE_NUMBER'] = principal_ports_gdf.PORT_TYPE_NUMBER
x_df['DISTANCE'] = np.round(distance, 2)
x_df['IS_PRINCIPAL_PORT'] = 'Y'
index_to_change = x_df[pd.isna(ww_node_closest_to_principal_port_gdf.PORT_NAME_y) & 
                                                        (x_df.DISTANCE <= 5.0)].index

x_df.loc[index_to_change, 'PORT_NAME_y'] = x_df.loc[index_to_change, 'PRINCIPAL_PORT_NAME']
x_df.loc[x_df.PORT_ID.str.strip().str.len() == 0, 'PORT_ID'] = x_df.loc[x_df.PORT_ID.str.strip().str.len() == 0, 'PRINCIPAL_PORT_TYPE_NUMBER']
x_df.set_index('NODENUM', inplace=True, drop=False)

Place in the PORT_NAME_y and the PORT_ID back into the original Network Nodes

(recall, `x_df` is an alias for `ww_node_closest_to_principal_port_gdf`)

In [None]:
ww_nodes_gdf.loc[ww_node_closest_to_principal_port_gdf.NODENUM, 'PRINCIPAL_PORT_NAME'] = ww_node_closest_to_principal_port_gdf.PORT_NAME_y
ww_nodes_gdf.loc[ww_node_closest_to_principal_port_gdf.NODENUM, 'PORT_ID'] = ww_node_closest_to_principal_port_gdf.PORT_ID

ww_nodes_gdf['IS_PRINCIPAL_PORT'] = 'N'
ww_nodes_gdf.loc[~pd.isna(ww_nodes_gdf.PRINCIPAL_PORT_NAME), 'IS_PRINCIPAL_PORT'] = 'Y'

In [None]:
ww_gdf.explore()

# Waterway types (WTWY_TYPE)

Below table is based on https://services7.arcgis.com/n1YM8pTrFmm7L4hs/ArcGIS/rest/services/ndc/FeatureServer/7

Another listing of the fields with codes explained (somewhat) is in https://doi.org/10.21949/1529053

|WTWY_TYPE|Description|
| -: | :- |
|1|Bay, Harbor|
|2|Intercoastal Waterway|
|3|Sealane|
|4|Sealane with separation zone|
|5|Open water|
|6|River, creek, thoroughfare, Lake|
|7|Estuary|
|8|Channel|
|9|Canal|
|10|Great Lakes direct link|
|11|Great Lakes indirect link|
|12|USACE Lock|

We only care about the continental US, not Alaska, Hawaii, Puerto Rico, Panama Canal, or territories like Guam, Saipan, etc.  We use the FIPS code to eliminate them.

We also get rid of 'non navigable' waterways (FUNC_CLASS = 'N), and anything that Vanderbilt added.  For the great lakes, we kept the 'spine'

Also, wanted to ensure that any link that touches a principal port is kept.

## For each node, add a 'river name'

This is particularly important for the Mississippi, which has an "UM" (upper Mississippi) and a "LM" (lower Mississippi).  These designations are used by all shipping companies that use the Mississippi (and attached rivers like the Ohio, Missouri, etc.)

In [None]:
ww_gdf = ww_gdf.astype({'ANODE': 'int64', 'BNODE': 'int64'})
port_nodenums = set(ww_nodes_gdf.loc[ww_nodes_gdf.IS_PRINCIPAL_PORT == 'Y', 'NODENUM'].to_list())

In [None]:
ww2_gdf = ww_gdf[~ww_gdf.FIPS.isin(['02', '15', '72', '74', '99'])]

In [None]:
ww2_gdf = ww2_gdf[(
                   (~pd.isna(ww2_gdf.ID)) &
                   ww2_gdf.WTWY_TYPE.isin([1, 2, 5, 6, 7, 8, 9, 10, 12]) & 
                   (ww2_gdf.FUNC_CLASS != 'N') &
                   (ww2_gdf.GEO_CLASS != 'O') &
                   (ww2_gdf.LINKTYPE != "VANDERBILT") &
                   (~((ww2_gdf.GEO_CLASS == 'G') & (ww2_gdf.LINKNAME.str.contains(" SPINE "))))
                   )
                   | ww2_gdf.ANODE.isin(port_nodenums) | ww2_gdf.BNODE.isin(port_nodenums)
                   ]
ww2_gdf.explore()

## Reduce the nodes to be the nodes that are left

In [None]:
ww_nodes_remaining_gdf = ww_nodes_gdf[ww_nodes_gdf.NODENUM.isin(ww2_gdf.ANODE) | ww_nodes_gdf.NODENUM.isin(ww2_gdf.BNODE)].copy()
print(ww_nodes_gdf.shape, ww_nodes_remaining_gdf.shape)
river_mile_lat_lon_rad = np.deg2rad(river_mile_markers_gdf[['LATITUDE1', 'LONGITUDE1']].to_numpy())
ball_tree = BallTree(river_mile_lat_lon_rad, metric='haversine')

ww_nodes_lat_lon_rad = np.deg2rad(list(zip(ww_nodes_remaining_gdf.geometry.y.to_numpy(), ww_nodes_remaining_gdf.geometry.x.to_numpy())))
distance, indices = ball_tree.query(ww_nodes_lat_lon_rad, return_distance=True, k=1)
distance = distance.ravel()
indices = indices.ravel()
distance *= 3958.8
print(distance.shape, ww_nodes_remaining_gdf.shape)
ww_nodes_remaining_gdf['DISTANCE_TO_RMM'] = distance
ww_nodes_remaining_gdf['RIVER_NAME'] = river_mile_markers_gdf.loc[indices, 'RIVER_NAME'].to_numpy()

In [None]:
mileage_by_river_name = ww2_gdf.groupby(by=['RIVERNAME'])['LENGTH1'].sum().to_dict()

In [None]:
x = ww2_gdf[['ANODE', 'AMILE', 'RIVERNAME']].merge(ww2_gdf[['BNODE', 'BMILE', 'RIVERNAME']], how='outer', left_on='ANODE', right_on='BNODE').to_numpy()
len(x[[(anode == bnode and amile != bmile) for anode, amile, river1, bnode, bmile, river2 in x]])

In [None]:
lats_by_node = ww_nodes_gdf.geometry.y.to_dict()
ww_nodes_remaining_gdf['RIVER_NAME'] = ''
ww_nodes_remaining_gdf['MILE_MARKER'] = 0.0
for anode, amile, river1, bnode, bmile, river2 in x:
    if np.isnan(anode):
        river_name = river2
        mile_marker = bmile
        nodenum = int(bnode)
    elif np.isnan(bnode):
        river_name = river1
        mile_marker = amile
        nodenum = int(anode)
    else:
        nodenum = int(anode)
        mile_marker = max(amile, bmile)
        if mileage_by_river_name[river1] > mileage_by_river_name[river2]:
            river_name = river1
        else:
            river_name = river2
    if river_name.startswith('MISSISSIPPI'):
        if lats_by_node[nodenum] >= 39.98 or (lats_by_node[nodenum] >= 39.97 and mile_marker <= 100.0):
            river_name = "UM"
        else:
            river_name = "LM"
        
    ww_nodes_remaining_gdf.at[nodenum, 'RIVER_NAME'] = river_name
    ww_nodes_remaining_gdf.at[nodenum, 'MILE_MARKER'] = mile_marker

In [None]:
ww_nodes_remaining_gdf.index.name = 'NODENUM_IDX'
ww_nodes_remaining_gdf.explore()

In [None]:
output_directory = Path(r'C:\Users\Marc.Meketon\OneDrive - MMC\Documents\OliverWyman\DOE_IntermodalRouting\Truck_Areas')
ww_nodes_remaining_gdf.to_parquet(output_directory / 'waterway_nodes_gdf.parquet')
ww2_gdf.to_parquet(output_directory / 'waterway_lines_gdf.parquet')