In [None]:
# ----------------------------------------- #
#                  MODULES                  #

# Standard Modules
import os
import warnings
from joblib import Parallel, delayed

# Third-Party Modules
import geopandas as gpd
import h3
import numpy as np
import pandas as pd
import rioxarray
from shapely.geometry import box, Point, Polygon
import xarray as xr

warnings.filterwarnings("ignore")

# System Configuration
parallel = Parallel(n_jobs=8)

#                                           #
# ----------------------------------------- #

# ----------------------------------------- #
#                 FUNCTIONS                 #


# Open Data
def open_geotiff(
    path,
    data_crs="EPSG:4326",
    desired_crs="EPSG:4326",
    mask_upper_data=False,
    mask_lower_data=False,
    mask_upper_val=None,
    mask_lower_val=None,
):
    if not os.path.exists(path):
        print(f"WARNING: Path does not exist - {path}")
        return None
    else:
        # Open Data
        da = rioxarray.open_rasterio(path)

        # Filter Data to Sea Level
        if mask_upper_data:
            da = xr.where(da > mask_upper_val, mask_upper_val, da)
        if mask_lower_data:
            da = xr.where(da < mask_upper_val, mask_lower_val, da)

        # Set CRS (example: WGS84 EPSG:4326)
        if da.rio.crs == None:
            da = da.rio.write_crs(data_crs)

        else:
            # reproject CRS
            print("need to add a crs reprojection", da.rio.crs, "->", desired_crs)
            # TODO: reproject to provided crs

        return da


# Open Water Region Polygon
def open_water_polygon_aoi(path, data_crs="EPSG:4326"):
    if not os.path.exists(path):
        print(f"WARNING: Path does not exist - {path}")
        return None
    else:
        # Open Polygon
        polygon_area = gpd.read_parquet(path)

        # Dissolve Geometry
        polygon_area = polygon_area.dissolve()

        # Enforce Projection
        polygon_area = polygon_area.to_crs(data_crs)

        return polygon_area


# Clip Raster Data to AOI
def clip_raster_to_aoi(da, poly_data):
    da_clipped = da.rio.clip(
        poly_data.geometry,  # geometry column
        poly_data.crs,  # CRS of the geometry
        all_touched=True,  # include partial pixels
        drop=True,  # drop pixels outside
    )
    return da_clipped


# Clip Polygon to Bounds
def clip_poly_to_data_bounds(da, poly_data):
    # Your bounds (min_lon, min_lat, max_lon, max_lat)
    da_bounds = da.rio.bounds()

    # Create shapely box (polygon)
    polygon = box(*da_bounds)

    # Make GeoDataFrame
    bounds_gdf = gpd.GeoDataFrame({"geometry": [polygon]}, crs="EPSG:4326")

    # Clip Polygon Data
    poly_data = poly_data.clip(bounds_gdf)

    # Subset to Geometry
    poly_data = poly_data[["geometry"]]

    return poly_data


# Geometry Point to H3 Grid
def point_to_h3(point, res):
    # point: shapely Point geometry
    return h3.latlng_to_cell(point.y, point.x, res)


# H3 Grid to Polygon
def h3_to_polygon(h3_index):
    boundary = h3.cell_to_boundary(h3_index)
    boundary_lonlat = [(lon, lat) for lat, lon in boundary]
    return Polygon(boundary_lonlat)


# Get H3 Grids Over Polygon Extent
def get_h3_grids_over_polygon(poly_area, target_resolution=6):
    poly_area_detach = poly_area.copy()
    poly_area_detach["geometry"] = poly_area_detach.sample_points(10000)
    poly_area_detach = poly_area_detach[["geometry"]]
    poly_area_detach = poly_area_detach.explode()

    # Apply to all points
    poly_area_detach["h3_index"] = poly_area_detach.geometry.apply(
        lambda p: point_to_h3(p, 3)
    )

    # Get unique H3 cells
    parent_h3_cells = poly_area_detach["h3_index"].unique().tolist()

    # Get children for each parent at target resolution
    all_children = []
    for parent in parent_h3_cells:
        children = h3.cell_to_children(parent, target_resolution)  # Get children

        all_children.extend(children)

    # Remove duplicates (set automatically removes duplicates)
    unique_children = list(set(all_children))

    # Build H3 Grid
    h3_gdf = gpd.GeoDataFrame(
        {
            "h3_index": unique_children,
            "geometry": [h3_to_polygon(h) for h in unique_children],
        },
        crs="EPSG:4326",
    )

    # Ensure they Overlap with Polygon Area
    h3_gdf_clipped = h3_gdf.clip(poly_area)
    h3_gdf = h3_gdf[h3_gdf.h3_index.isin(h3_gdf_clipped.h3_index)]
    print("Total Cells:", len(h3_gdf))

    return h3_gdf, h3_gdf_clipped


# Query AOI Information
def get_aoi_info(polygon_gdf, da_clipped):
    import warnings

    polygon_gdf_index = polygon_gdf["h3_index"].iloc[0]

    warnings.filterwarnings("ignore")
    da_analysis = da_clipped.rio.clip(
        polygon_gdf.geometry,  # geometry column
        polygon_gdf.crs,  # CRS of the geometry
        all_touched=True,  # include partial pixels
        drop=True,  # drop pixels outside)
    )

    stats = []
    values = da_analysis.values.squeeze()
    values = values[~np.isnan(values)]
    if len(values) > 0:
        stats.append(
            {
                "mean": values.mean(),
                "min": values.min(),
                "max": values.max(),
                "std": values.std(),
                "median": np.median(values),
                "h3_index": polygon_gdf_index,
            }
        )
    else:
        stats.append(
            {
                "mean": np.nan,
                "min": np.nan,
                "max": np.nan,
                "std": np.nan,
                "median": np.nan,
                "h3_index": polygon_gdf_index,
            }
        )

    stats_df = pd.DataFrame(stats)

    return stats_df


#                                           #
# ----------------------------------------- #

In [2]:
# Water Polygon Path
water_poly_path = "../data/processed/GIS/ocean/TERRITORIAL_WATERS.parquet"

# Data Sourced from GEBCO_2025 - https://download.gebco.net
raster_path = "../data/raw/bathymetry/GEBCO_12_Aug_2025_e9cdc1fe517e/gebco_2025_n54.524_s33.96_w-130.0_e-120.0.tif"

In [None]:
# Open Water Region Polygon
water_poly = open_water_polygon_aoi(water_poly_path, data_crs="EPSG:4326")

In [4]:
# Open GeoTiff
da = open_geotiff(
    raster_path, data_crs="EPSG:4326", mask_upper_data=True, mask_upper_val=0
)

# Clip AOI to Polygon AOI
da_clipped = clip_raster_to_aoi(da=da, poly_data=water_poly)

In [5]:
# Clip Polygon to Bounds of Data Array
water_poly = clip_poly_to_data_bounds(da=da_clipped, poly_data=water_poly)

In [None]:
# da_clipped.plot(cmap="turbo", vmax=-1)

In [None]:
# water_poly.explore()

In [None]:
# Get H3 Polygons at Target Resolution
h3_water_poly, h3_water_poly_clipped = get_h3_grids_over_polygon(
    poly_area=water_poly, target_resolution=6
)

In [None]:
# Query DataArray Using H3 Grids - TODO; can we optimize this so that we can use smaller grid size? 
h3_da_query = parallel(
    delayed(get_aoi_info)(
        h3_water_poly_clipped[h3_water_poly_clipped.h3_index == h3_index_val],
        da_clipped,
    )
    for h3_index_val in h3_water_poly_clipped["h3_index"].unique()
)
h3_da_query = pd.concat(h3_da_query)

# Combine with Original Geometry
h3_da_query = pd.merge(h3_water_poly, h3_da_query, how="left")

In [None]:
h3_da_query["mean"] = h3_da_query["mean"].fillna(-9999)

In [None]:
m = h3_da_query.explore(
    "median",
    cmap="Blues_r",
    vmin=-300,
    tiles="https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}",
    attr="Tiles © Esri — Source: Esri, Maxar, Earthstar Geographics, USDA FSA, USGS, AeroGRID",
)

In [None]:
# Orca Sightings
sightings = "/Users/tylerstevenson/Documents/CODE/SalmonSignal/data/processed/ORCA_SIGHTINGS/ORCA_SIGHTINGS.parquet"
sightings = pd.read_parquet(sightings)
sightings.DATE = pd.to_datetime(sightings.DATE)

geometry = [
    Point(xy)
    for xy in zip(
        sightings["LONGITUDE"],
        sightings["LATITUDE"],
    )
]
# Build GeoDataFrame
sightings = gpd.GeoDataFrame(sightings, geometry=geometry, crs="EPSG:4326")
sightings = sightings[sightings.POD_TYPE == "SRKW"]

In [None]:
sightings[sightings.DATE >= pd.to_datetime("2025-01-01")].explore(
    "DOY", cmap="Reds", m=m
)

In [None]:
compute distance to shelf break by deriving a contour on a depth threshold (e.g., 200 m) and computing H3-cell distance to that contour. shelf break proximity = killer covariate.
compute BPI (bathymetric position index) to detect depressions/ridges (use whitebox or scipy filters).
add seafloor substrate layers if available (gravel/mud/rock) — very useful for some prey.
compute bathymetric complexity at multiple scales (3×3 window, 25×25 window) and include both as covariates.

1. NOAA National Centers for Environmental Information (NCEI)
Bathymetry Data Viewer has REST-like endpoints behind it, but the public docs focus on downloads.
Direct API for their netCDF/GeoTIFF products isn’t super public, but you can grab prebuilt rasters via:
ERDDAP servers (many NOAA datasets, bathy included)
Example: https://coastwatch.pfeg.noaa.gov/erddap/griddap/
Lets you query depth by bounding box, depth variable = altitude or z.
NCEI also serves Coastal Relief Models (CRM) as OPeNDAP / WCS.