In [None]:
# ------------------------------------- #
#                MODULES                #

# Standard Modules
import os
import warnings

warnings.simplefilter("ignore")

# Third-Party Libraries
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, Point

#                                       #
# ------------------------------------- #

***

## 0. Build Filtering Bounding Box

In [2]:
# Define bounding box coordinates for HUC-17 (Pacific Northwest)
bbox_coords = [(-125.5, 44), (-125.5, 51), (-115, 51), (-115, 44), (-125.5, 44)]
bbox_polygon = Polygon(bbox_coords)

# Create GeoDataFrame
bbox_gdf = gpd.GeoDataFrame({"geometry": [bbox_polygon]}, crs="EPSG:4326")

## 1. Full US Hydrology Data - Water Bodies

In [3]:
# Open US Hydrography Waterbodies Data
# Source: https://www.arcgis.com/home/item.html?id=f1f45a3ba37a4f03a5f48d7454e4b654
us_hydrogaphy = gpd.read_file(
    "/Users/tylerstevenson/Documents/CODE/SalmonSignal/data/processed/GIS/inland_rivers/US_Detailed_Water_Bodies/USA_Detailed_Water_Bodies.shp"
)

# Ensure Consistent CRS
us_hydrogaphy = us_hydrogaphy.to_crs("EPSG:4326")

# Filter to Sources to Pacific
us_hydrogaphy = us_hydrogaphy.clip(bbox_gdf)

# Filter to Rivers
us_hydrogaphy = us_hydrogaphy[
    (us_hydrogaphy.FTYPE.isin(["Canal/Ditch", "Stream/River"]))
    | (us_hydrogaphy.FCODE_DESC.astype(str).str.contains("River"))
]

# Subset Columns
us_hydrogaphy = us_hydrogaphy[["geometry"]]

# Dissolve Geometry to Simplify Geometries
us_hydrogaphy = us_hydrogaphy.dissolve()
us_hydrogaphy["AREA"] = "BODY"

## 2. Full US Hydrology Data - River/Stream Lines

In [4]:
# Open US Hydrography River/Streams Data
# Source: https://hub.arcgis.com/datasets/esri::usa-rivers-and-streams/explore?location=46.118025%2C-122.384827%2C9.56
us_rivers = gpd.read_file(
    "/Users/tylerstevenson/Documents/CODE/SalmonSignal/data/processed/GIS/inland_rivers/US_Detailed_Rivers_Lines/9ae73184-d43c-4ab8-940a-c8687f61952f2020328-1-r9gw71.0odx9.shp"
)

# Ensure Consistent CRS
us_rivers = us_rivers.to_crs("EPSG:4326")

# Filter to Sources to Pacific
us_rivers = us_rivers.clip(bbox_gdf)

# Subset to Columns
us_rivers = us_rivers[["geometry"]]

# Dissolve Geometries
us_rivers = us_rivers.dissolve()

# Simplify
us_rivers["geometry"] = us_rivers["geometry"].buffer(0.00025)
us_rivers["AREA"] = "LINE"

In [5]:
# Combine with US Hydorlogic Water Bodies
us_hydrogaphy = pd.concat([us_rivers, us_hydrogaphy])

***

## 1. Washington State Hydrology

In [6]:
# Open Washington Hydrography Waterbodies Data
# Source: https://geo.wa.gov/datasets/ -> Search "NHDWaterbody"
washington_hydrography = gpd.read_file(
    "/Users/tylerstevenson/Documents/CODE/SalmonSignal/data/processed/GIS/inland_rivers/US_WUHD_Washington/NHDWaterbody.shp"
)

# Ensure Consistent CRS
washington_hydrography = washington_hydrography.to_crs("EPSG:4326")

# Enforce D-Type
washington_hydrography["ReachCode"] = washington_hydrography.ReachCode.str.strip("")

In [7]:
# Select Lakes for Columbia River:
location_selection = [
    "Lake Bonneville",
    "Lake Celilo",
    "Lake Chelan",
    "Lake Entiat",
    "Lake Merwin",
    "Lake Sacajawea",
    "Lake Umatilla",
    "Lake Wallula",
    "Mayfield Lake",
    "Priest Rapids Lake",
    "Riffe Lake",
    "Sturgeon Lake",
    "Swift Reservoir",
    "Wanapum Lake",
    "Yakima River",
    "Yale Lake",
]

In [8]:
# Exclude Duplicate Names - Non River Systems
river_system_wa = washington_hydrography[
    ~washington_hydrography.ReachCode.isin(["17080003037462", "17080003037449"])
].copy()

# Filter to River System Lakes
river_system_wa = river_system_wa[
    washington_hydrography.GNIS_Name.isin(location_selection)
]

river_system_wa = river_system_wa.dissolve(by="GNIS_Name")[["geometry"]].reset_index()
river_system_wa.columns = ["NAME", "geometry"]

## 4. Combine Dataset to Create Unified Dataset

In [9]:
# Combine Data to Build Unified Dataset
hydrography_area = pd.concat([us_hydrogaphy, river_system_wa])
# hydrography_area = hydrography_area[["geometry"]]
# hydrography_area = hydrography_area.dissolve()

In [10]:
# Build Output Directory
output_directory = "../data/processed/GIS/inland_waters"

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Save Inland Waters
hydrography_area.to_parquet(f"{output_directory}/US_INLAND_WATERS.parquet")

In [16]:
# Save Columbia River Connections
hydrography_area = hydrography_area[["geometry"]]
hydrography_area = hydrography_area.dissolve()
hydrography_areas = hydrography_area.explode()

In [17]:
# Coordinates for center of Columbia River near Portland, OR
lat, lon = 46.225, -123.8

# Create a GeoDataFrame with this point
columbia_tie_point_gdf = gpd.GeoDataFrame(
    {"geometry": [Point(lon, lat)]}, crs="EPSG:4326"
)

In [18]:
columbia_hydrography_areas = hydrography_areas.sjoin(columbia_tie_point_gdf)

In [19]:
hydrography_area_reduced = pd.concat([us_hydrogaphy, river_system_wa])

In [20]:
columbia_hydrography_areas.clip(hydrography_area_reduced)

Unnamed: 0,geometry,index_right
0,"POLYGON Z ((-123.10679 44.0946 0, -123.10746 4...",0


In [29]:
columbia_hydrography_area_reduced = hydrography_area_reduced.clip(
    columbia_hydrography_areas
)

columbia_hydrography_area_reduced["NAME"] = np.where(
    columbia_hydrography_area_reduced["AREA"] == "BODY",
    "MAJOR",
    columbia_hydrography_area_reduced["NAME"],
)

columbia_hydrography_area_reduced["NAME"] = np.where(
    columbia_hydrography_area_reduced["AREA"] == "LINE",
    "MINOR",
    columbia_hydrography_area_reduced["NAME"],
)

columbia_hydrography_area_reduced = columbia_hydrography_area_reduced[
    ["NAME", "geometry"]
]

In [33]:
def classify_river(name):
    name = name.upper()
    if "MAJOR" in name:
        return "MAJOR"
    elif "MINOR" in name:
        return "MINOR"
    else:
        return "OTHER"

In [34]:
columbia_hydrography_area_reduced["RIVER_TYPE"] = columbia_hydrography_area_reduced[
    "NAME"
].apply(classify_river)

In [35]:
columbia_hydrography_area_reduced.to_parquet(
    f"{output_directory}/US_INLAND_WATERS_COLUMBIA_R.parquet"
)