In [35]:
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import re
from bs4 import BeautifulSoup
from io import StringIO

import calendar
import requests
import geopandas as gpd
from shapely.geometry import Point
import time


def extract_single_date(date_str, year):
    if pd.isna(date_str):
        return None

    # Strip periods (e.g., "Aug." → "Aug") and whitespace
    clean = date_str.strip().replace(".", "")

    # Match a single date or date range like "Aug 25–26" or "Aug 25"
    match = re.match(r"([A-Za-z]+)\s*(\d{1,2})", clean)
    if match:
        month = match.group(1)
        day = match.group(2)
        return f"{month} {day} {year}"

    # Fallback
    return f"{clean} {year}"


def fetch_buoy10_year(year: int) -> pd.DataFrame:
    """Fetches the Buoy 10 fishing report table for a given year and returns it as a DataFrame."""
    url = f"https://wdfw.wa.gov/fishing/reports/creel/buoy10#{year}"
    resp = requests.get(url)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # Find the header containing the year
    year_header = soup.find(
        lambda tag: tag.name in ["h2", "h3"] and str(year) in tag.text
    )
    if not year_header:
        raise ValueError(f"No data found for year {year}")

    # Get the table after the year header
    table = year_header.find_next("table")
    if table is None:
        raise ValueError(f"No table found for year {year}")

    # Use StringIO to pass the HTML table to pandas
    table_html = StringIO(str(table))
    df = pd.read_html(table_html)[0]
    df["Year"] = year

    # Clean & convert the Date column
    df["Date_clean"] = df["Date"].apply(lambda x: extract_single_date(x, year))
    df["Date_parsed"] = pd.to_datetime(df["Date_clean"], errors="coerce")

    return df


def build_buoy10_df(years: list[int]) -> pd.DataFrame:
    """Fetches data for each year in the list and concatenates into one mega DataFrame."""
    dfs = []
    for y in years:
        try:
            df = fetch_buoy10_year(y)
            dfs.append(df)
        except Exception as e:
            print(f"⚠️ Warning: Skipped year {y} due to: {e}")
    if not dfs:
        raise ValueError(
            "No valid data frames fetched – check your years or site structure."
        )

    result = pd.concat(dfs, ignore_index=True)

    result.columns = result.columns.str.strip()
    result[["Number of Boats", "Chinook Kept"]] = result[
        ["Number of Boats", "Chinook Kept"]
    ].apply(pd.to_numeric, errors="coerce")

    return result

In [100]:
# # Open Washington Hydrography Data
# washington_hydrography = gpd.read_file(
#     "/Users/tylerstevenson/Documents/CODE/SalmonSignal/data/processed/GIS/inland_rivers/US_WUHD_Washington/NHDWaterbody.shp"
# )
# washington_hydrography


In [2]:
## Bonneville Data
bon_data = pd.read_csv(
    "/Users/tylerstevenson/Documents/CODE/SalmonSignal/data/AD16894096d7ce32.csv"
)
bon_data = bon_data[bon_data["Dam"] == "BON"].copy()
bon_data["DATETIME"] = pd.to_datetime(bon_data["Date"])
bon_data["DoY"] = bon_data["DATETIME"].dt.day_of_year
bon_data["Year"] = bon_data["DATETIME"].dt.year

# Dalles Data
dalles = pd.read_csv(
    "/Users/tylerstevenson/Documents/CODE/SalmonSignal/data/AD1689415a04da39.csv"
)
dalles = dalles[dalles["Dam"] == "TDA"].copy()

dalles["DATETIME"] = pd.to_datetime(dalles["Date"])
dalles["DoY"] = dalles["DATETIME"].dt.day_of_year
dalles["Year"] = dalles["DATETIME"].dt.year

# John Day Data
johnday = pd.read_csv(
    "/Users/tylerstevenson/Documents/CODE/SalmonSignal/data/AD16894181728912.csv"
)

johnday = johnday[johnday["Dam"] == "JDA"].copy()

johnday["DATETIME"] = pd.to_datetime(johnday["Date"])
johnday["DoY"] = johnday["DATETIME"].dt.day_of_year
johnday["Year"] = johnday["DATETIME"].dt.year

willamette = pd.read_csv(
    "/Users/tylerstevenson/Documents/CODE/SalmonSignal/data/AD168948e560fe6a.csv"
)
willamette = willamette[willamette["Dam"] == "WFA"].copy()

willamette["DATETIME"] = pd.to_datetime(willamette["Date"])
willamette["DoY"] = willamette["DATETIME"].dt.day_of_year
willamette["Year"] = willamette["DATETIME"].dt.year

# Calculate Mean Count by Day of Year
bon_avg = bon_data.groupby("DoY", as_index=False)["ChinookAdult"].mean()
dalles_avg = dalles.groupby("DoY", as_index=False)["ChinookAdult"].mean()
johnday_avg = johnday.groupby("DoY", as_index=False)["ChinookAdult"].mean()
willamette_avg = willamette.groupby("DoY", as_index=False)["ChinookAdult"].mean()

In [3]:
# Plot Averages for the Dams
fig = px.line()
fig.add_scatter(
    x=bon_avg["DoY"],
    y=bon_avg["ChinookAdult"],
    name="Bonneville",
    mode="lines",
    line=dict(color="#08B2E3", width=2),
)

fig.add_scatter(
    x=dalles_avg["DoY"],
    y=dalles_avg["ChinookAdult"],
    name="Dalles",
    mode="lines",
    line=dict(color="#EE6352", width=2),
)

fig.add_scatter(
    x=johnday_avg["DoY"],
    y=johnday_avg["ChinookAdult"],
    name="John Day",
    mode="lines",
    line=dict(color="#B2FFA9", width=2),
)

fig.add_scatter(
    x=willamette_avg["DoY"],
    y=willamette_avg["ChinookAdult"],
    name="Willamette",
    mode="lines",
    line=dict(color="#E5D4CE", width=2),
)

# Update layout with correct syntax
fig.update_layout(
    title=dict(
        text="Mean Daily Chinook Adult Count by Day of Year",
        font=dict(size=20, family="Helvetica Neue", color="#333"),
    ),
    plot_bgcolor="white",
    paper_bgcolor="white",
    xaxis=dict(
        title=dict(text="Day of Year", font=dict(size=16)),
        tickfont=dict(size=14),
        gridcolor="#E5ECF6",
        zeroline=False,
    ),
    yaxis=dict(
        title=dict(text="Mean Count", font=dict(size=16)),
        tickfont=dict(size=14),
        gridcolor="#E5ECF6",
        zeroline=False,
    ),
    legend=dict(title="Dam Location", font=dict(size=14)),
    font=dict(family="Helvetica Neue", size=14, color="#333"),
    margin=dict(l=60, r=40, t=60, b=60),
)
import calendar

# Get the first DoY of each month (non-leap year assumed)
month_start_days = [1]
for month in range(2, 13):
    month_start_days.append(
        sum(calendar.monthrange(2024, m)[1] for m in range(1, month)) + 1
    )

# Add monthly divider lines
for doy in month_start_days:
    fig.add_vline(x=doy, line=dict(color="#999", width=1, dash="dash"), layer="below")

for i, doy in enumerate(month_start_days):
    fig.add_annotation(
        x=doy + 15,  # push label into the month a bit
        y=0,
        yanchor="bottom",
        text=calendar.month_abbr[i + 1],
        showarrow=False,
        font=dict(size=12, color="#666"),
        xanchor="center",
        xref="x",
        yref="paper",
    )

fig.show()

In [4]:
# Hypothesis - Orca Sightings Peak in October + April

# April is 100 day of the year
# October is 283 day of the year

In [5]:
buoy10_raw = build_buoy10_df(
    [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]
)

buoy10_raw["DATE"] = pd.to_datetime(buoy10_raw["Date_clean"], format="mixed")
buoy10_raw["DoY"] = buoy10_raw["DATE"].dt.day_of_year

In [6]:
buoy10 = buoy10_raw.copy()

buoy10["Chinook Kept"] = buoy10["Chinook Kept"] / buoy10["Number of Boats"]

buoy10_avg = buoy10.groupby("DoY", as_index=False)["Chinook Kept"].mean()

In [7]:
buoy10

Unnamed: 0,Date,Number of Boats,Number of Anglers,Chinook Kept,Coho Kept,Comments,Year,Date_clean,Date_parsed,Comment,DATE,DoY
0,01-Aug,43.0,107.0,0.302326,15.0,Opening Day,2014,01-Aug 2014,2014-08-01,,2014-08-01,213
1,02-Aug,46.0,118.0,0.673913,25.0,225 boats counted during effort flight count,2014,02-Aug 2014,2014-08-02,,2014-08-02,214
2,03-Aug,46.0,126.0,0.717391,8.0,,2014,03-Aug 2014,2014-08-03,,2014-08-03,215
3,04-Aug,53.0,138.0,0.584906,5.0,Private boats,2014,04-Aug 2014,2014-08-04,,2014-08-04,216
4,04-Aug,1.0,5.0,0.000000,0.0,Charter,2014,04-Aug 2014,2014-08-04,,2014-08-04,216
...,...,...,...,...,...,...,...,...,...,...,...,...
676,Sept. 2,,,,,,2025,Sept 2 2025,NaT,,2025-09-02,245
677,Sept. 3,,,,,,2025,Sept 3 2025,NaT,,2025-09-03,246
678,Sept. 4,,,,,,2025,Sept 4 2025,NaT,,2025-09-04,247
679,Sept. 5,,,,,,2025,Sept 5 2025,NaT,,2025-09-05,248


In [8]:
# fig = px.line(buoy10_avg, x="DoY", y="Chinook Kept")
# fig.show()

In [9]:
# Base figure
fig = go.Figure()

# Primary axis: Dam counts
fig.add_trace(
    go.Scatter(
        x=bon_avg["DoY"],
        y=bon_avg["ChinookAdult"],
        name="Bonneville",
        mode="lines",
        line=dict(color="#08B2E3", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=dalles_avg["DoY"],
        y=dalles_avg["ChinookAdult"],
        name="Dalles",
        mode="lines",
        line=dict(color="#EE6352", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=johnday_avg["DoY"],
        y=johnday_avg["ChinookAdult"],
        name="John Day",
        mode="lines",
        line=dict(color="#B2FFA9", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=willamette_avg["DoY"],
        y=willamette_avg["ChinookAdult"],
        name="Willamette",
        mode="lines",
        line=dict(color="#4B244A", width=2),
        yaxis="y",
    )
)


# Secondary axis: Buoy 10 catch data
fig.add_trace(
    go.Scatter(
        x=buoy10_avg["DoY"],
        y=buoy10_avg["Chinook Kept"],
        name="Buoy 10 Chinook Kept",
        mode="lines",
        line=dict(color="#C155D3", width=2, dash="dot"),
        yaxis="y2",
    )
)

# Layout
fig.update_layout(
    title=dict(
        text="Mean Daily Chinook Adult Count (Dams) vs. Chinook Kept (Buoy 10)",
        font=dict(size=20, family="Helvetica Neue", color="#333"),
    ),
    plot_bgcolor="white",
    paper_bgcolor="white",
    xaxis=dict(
        title=dict(text="Day of Year", font=dict(size=16)),
        tickfont=dict(size=14),
        gridcolor="#E5ECF6",
        zeroline=False,
    ),
    yaxis=dict(
        title=dict(text="Mean Count (Dam)", font=dict(size=16)),
        tickfont=dict(size=14),
        gridcolor="#E5ECF6",
        zeroline=False,
    ),
    yaxis2=dict(
        title=dict(text="Chinook Kept (Buoy 10)", font=dict(size=16)),
        tickfont=dict(size=14),
        overlaying="y",
        side="right",
        showgrid=False,
    ),
    legend=dict(title="Location", font=dict(size=14)),
    font=dict(family="Helvetica Neue", size=14, color="#333"),
    margin=dict(l=60, r=60, t=60, b=60),
)

# Month lines and labels
month_start_days = [1]
for month in range(2, 13):
    month_start_days.append(
        sum(calendar.monthrange(2024, m)[1] for m in range(1, month)) + 1
    )

for doy in month_start_days:
    fig.add_vline(x=doy, line=dict(color="#999", width=1, dash="dash"), layer="below")

for i, doy in enumerate(month_start_days):
    fig.add_annotation(
        x=doy + 15,
        y=0,
        yanchor="bottom",
        text=calendar.month_abbr[i + 1],
        showarrow=False,
        font=dict(size=12, color="#666"),
        xanchor="center",
        xref="x",
        yref="paper",
    )

fig.show()

In [10]:
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd

# Approximate lat/lon for each site
locations_data = {
    "name": ["Bonneville Dam", "Dalles Dam", "John Day Dam", "Buoy 10"],
    "lat": [45.6433, 45.6082, 45.7175, 46.2167],
    "lon": [-121.9483, -121.1794, -120.6892, -123.9333],
}

# Create DataFrame
df_locations = pd.DataFrame(locations_data)

# Convert to GeoDataFrame
gdf_locations = gpd.GeoDataFrame(
    df_locations,
    geometry=[Point(xy) for xy in zip(df_locations["lon"], df_locations["lat"])],
    crs="EPSG:4326",  # WGS84
)

# Optional: Set as lookup if you want
locations_lookup = gdf_locations.set_index("name")

# Done
print(locations_lookup)

                    lat       lon                   geometry
name                                                        
Bonneville Dam  45.6433 -121.9483  POINT (-121.9483 45.6433)
Dalles Dam      45.6082 -121.1794  POINT (-121.1794 45.6082)
John Day Dam    45.7175 -120.6892  POINT (-120.6892 45.7175)
Buoy 10         46.2167 -123.9333  POINT (-123.9333 46.2167)


In [11]:
# gdf_locations.explore()

In [12]:
import geopandas as gpd

# Assuming you already have your locations_lookup GeoDataFrame (in EPSG:4326)
# Reproject to UTM zone appropriate for Washington state — usually UTM zone 10N
gdf_utm = locations_lookup.to_crs(epsg=32610)

# Create a distance matrix (in meters)
distance_matrix = gdf_utm.geometry.apply(lambda geom1: gdf_utm.geometry.distance(geom1))

print(distance_matrix)

name            Bonneville Dam     Dalles Dam   John Day Dam        Buoy 10
name                                                                       
Bonneville Dam        0.000000   60073.800110   98421.415060  166560.037086
Dalles Dam        60073.800110       0.000000   40084.186093  224021.781306
John Day Dam      98421.415060   40084.186093       0.000000  257399.567898
Buoy 10          166560.037086  224021.781306  257399.567898       0.000000


In [13]:
distance_miles = (distance_matrix * 0.000621371).round(2)
print(distance_miles)

name            Bonneville Dam  Dalles Dam  John Day Dam  Buoy 10
name                                                             
Bonneville Dam            0.00       37.33         61.16   103.50
Dalles Dam               37.33        0.00         24.91   139.20
John Day Dam             61.16       24.91          0.00   159.94
Buoy 10                 103.50      139.20        159.94     0.00


In [14]:
# How fast do chinook swim up river?
# 2-4 miles per hour

# from bonneville to dalles
distance_miles_lu = distance_miles["Bonneville Dam"].reset_index()
distance_miles_lu[distance_miles_lu["name"] == "Dalles Dam"]

37.33 * 2 / 24, 37.33 * 4 / 24

# 3 - 6 days - we would expect 3-6 day lag then
# day 120 : 122 (2 days)
# day 251 : 254 (3 days)
# makes sense...though actual distance traveled will be different... not straight line

# buoy10 - bonneville
103.50 * 2 / 24, 103.50 * 4 / 24

# 8 - 17 days
# day 231 : 251 (20 days)

(8.625, 17.25)

In [15]:
# What About Sightings Data
import pandas as pd
import geopandas as gpd
import glob
import h3
from typing import Literal
from datetime import datetime, timedelta
import pandas as pd
import plotly.express as px
import geopandas as gpd
from shapely.geometry import Point


## Compute Sunday from Statistical Week
def compute_sunday(row):
    jan1 = datetime(int(row["catch_year"]), 1, 1)
    first_monday = jan1 + timedelta(days=(7 - jan1.weekday()) % 7)
    return first_monday + timedelta(weeks=int(row["stat_week"]) - 1, days=6)


# Loan and Process Sightings
def load_and_process_sighting_data(
    directory: str,
    date_col: str,
    lat_col: str,
    lon_col: str,
    id_col: str,
    h3_resolution: int,
    start_date: str = None,
    source: Literal["TMW", "ACARTIA"] = "TMW",
) -> pd.DataFrame:
    """
    Load and process sighting data for TMW or Acartia.

    Args:
        directory (str): Path to CSV files.
        date_col (str): Column name containing datetime string.
        lat_col (str): Latitude column name.
        lon_col (str): Longitude column name.
        id_col (str): Unique identifier or countable column.
        h3_resolution (int): H3 resolution to use.
        start_date (str, optional): Filter records to start at this date.
        source (str): "TMW" or "ACARTIA", for minor formatting differences.

    Returns:
        pd.DataFrame: Aggregated sightings data with full date-grid coverage.
    """
    print(directory)
    # Read & concat all CSVs
    if ".csv" in directory:
        data = pd.read_csv(directory)
    else:
        data = pd.concat(
            [pd.read_csv(path) for path in glob.glob(f"{directory}/*.csv")]
        )

    data.columns = data.columns.str.upper()

    if source == "TMW":
        # Filter to SRKW
        data = data[
            ~(
                (data.POD.str.contains("t"))
                | (data.POD.str.contains("bigg"))
                | (data.POD.str.contains("orca"))
            )
        ]

    elif source == "ACARTIA":
        data = data[
            (data.TYPE.str.lower().str.contains("killer"))
            | (data.TYPE.str.lower().str.contains("orca"))
            | (data.TYPE.str.lower().str.contains("srk"))
        ]

        data = data[
            ~(
                (data.DATA_SOURCE_COMMENTS.str.lower().str.contains("biggs"))
                | (data.DATA_SOURCE_COMMENTS.str.lower().str.contains("trans"))
            )
        ]

    # Parse date and geo
    data["DATE"] = data[date_col].str[:10]
    data["LATITUDE"] = pd.to_numeric(data[lat_col], errors="coerce")
    data["LONGITUDE"] = pd.to_numeric(data[lon_col], errors="coerce")
    data = data.dropna(subset=["LATITUDE", "LONGITUDE"])

    # Calculate H3 grid
    h3_col = f"H3_GRID_{h3_resolution}"
    data[h3_col] = data.apply(
        lambda x: h3.latlng_to_cell(x["LATITUDE"], x["LONGITUDE"], h3_resolution),
        axis=1,
    )

    data["DATE"] = pd.to_datetime(data["DATE"])
    if start_date:
        data = data[data["DATE"] >= pd.to_datetime(start_date)]

    # Aggregate sightings
    data_agg = data.groupby(
        ["DATE", "LATITUDE", "LONGITUDE", h3_col], as_index=False
    ).agg(SIGHTING_COUNT=(id_col, "count"))

    return data_agg


## Set H3 Resolution
h3_resolution = 3

## TMW Data Path
tmw_directory = "/Users/tylerstevenson/Documents/CODE/orcasalmon/data/twm"

## Acartia Data Path
acartia_directory = "/Users/tylerstevenson/Documents/CODE/FindMyWhale/data/raw/sightings/acartia-export.csv"

## Open TMW
tmw_data_cleaned = load_and_process_sighting_data(
    directory=tmw_directory,
    date_col="SIGHTDATE",
    lat_col="LATITUDE",
    lon_col="LONGITUDE",
    id_col="DATE",  # or other proxy for sightings count
    h3_resolution=h3_resolution,
    source="TMW",
)

## Open Acartia
acartia_data_cleaned = load_and_process_sighting_data(
    directory=acartia_directory,
    date_col="CREATED",
    lat_col="LATITUDE",
    lon_col="LONGITUDE",
    id_col="ENTRY_ID",
    h3_resolution=h3_resolution,
    start_date="2022-01-01",
    source="ACARTIA",
)

# Conbine Sightings Data
sightings_data_raw = pd.concat([acartia_data_cleaned, tmw_data_cleaned])

/Users/tylerstevenson/Documents/CODE/orcasalmon/data/twm
/Users/tylerstevenson/Documents/CODE/FindMyWhale/data/raw/sightings/acartia-export.csv


In [16]:
# Assuming your DataFrame is called df
sightings_data_raw["DATE"] = pd.to_datetime(sightings_data_raw["DATE"])

# Create geometry column from lat/lon
sightings_data_raw["geometry"] = sightings_data_raw.apply(
    lambda row: Point(row["LONGITUDE"], row["LATITUDE"]), axis=1
)

# Convert to GeoDataFrame with WGS84 CRS
sightings_gdf = gpd.GeoDataFrame(
    sightings_data_raw, geometry="geometry", crs="EPSG:4326"
)

In [17]:
degree_buffer_for_buoy10 = 0.2  # 1 degree ~ 70 miles, 0.5 degree ~ 35 miles

locations_lookup_buffered = locations_lookup.copy()
locations_lookup_buffered["geometry"] = locations_lookup_buffered.buffer(
    degree_buffer_for_buoy10
)
locations_lookup_buffered = locations_lookup_buffered.reset_index()

sightings_gdf_near_buoy = sightings_gdf.sjoin(
    locations_lookup_buffered[locations_lookup_buffered["name"] == "Buoy 10"]
)


Geometry is in a geographic CRS. Results from 'buffer' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [18]:
sightings_gdf_near_buoy["DoY"] = sightings_gdf_near_buoy["DATE"].dt.day_of_year
sightings_gdf_near_buoy_sum = sightings_gdf_near_buoy.groupby("DoY", as_index=False)[
    "SIGHTING_COUNT"
].sum()

# Make sure your DATE column is datetime
sightings_gdf_near_buoy["DATE"] = pd.to_datetime(sightings_gdf_near_buoy["DATE"])

# Create a 'week_start' column (Monday as start of week)
sightings_gdf_near_buoy["week_start"] = (
    sightings_gdf_near_buoy["DATE"].dt.to_period("W").apply(lambda r: r.start_time)
)

# Group by week_start and sum sightings
weekly_sightings = (
    sightings_gdf_near_buoy.groupby("week_start")["SIGHTING_COUNT"].sum().reset_index()
)

In [19]:
weekly_sightings["DoY"] = weekly_sightings["week_start"].dt.day_of_year
weekly_sightings = weekly_sightings.sort_values("DoY").reset_index(drop=True)

In [20]:
# Step 1: Full DoY range (1–366)
full_doy = pd.DataFrame({"DoY": range(1, 367)})

# Step 2: Merge with existing data
weekly_sightings = full_doy.merge(weekly_sightings, on="DoY", how="left")

# Step 3: Fill missing SIGHTING_COUNT with 0
weekly_sightings["SIGHTING_COUNT"] = (
    weekly_sightings["SIGHTING_COUNT"].fillna(0).astype(int)
)

In [21]:
import plotly.graph_objects as go
import calendar

# Base figure
fig = go.Figure()

# Primary axis: Dam counts
fig.add_trace(
    go.Scatter(
        x=bon_avg["DoY"],
        y=bon_avg["ChinookAdult"],
        name="Bonneville",
        mode="lines",
        line=dict(color="#08B2E3", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=dalles_avg["DoY"],
        y=dalles_avg["ChinookAdult"],
        name="Dalles",
        mode="lines",
        line=dict(color="#EE6352", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=johnday_avg["DoY"],
        y=johnday_avg["ChinookAdult"],
        name="John Day",
        mode="lines",
        line=dict(color="#B2FFA9", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=willamette_avg["DoY"],
        y=willamette_avg["ChinookAdult"],
        name="Willamette",
        mode="lines",
        line=dict(color="#4B244A", width=2),
        yaxis="y",
    )
)

# Secondary axis: Buoy 10 catch data
fig.add_trace(
    go.Scatter(
        x=buoy10_avg["DoY"],
        y=buoy10_avg["Chinook Kept"] * weekly_sightings["SIGHTING_COUNT"].max(),
        name="Buoy 10 Chinook Kept",
        mode="lines",
        line=dict(color="#C155D3", width=2, dash="dot"),
        yaxis="y2",
    )
)

# Secondary axis: Buoy 10 catch data
fig.add_trace(
    go.Scatter(
        x=weekly_sightings["DoY"],
        y=weekly_sightings["SIGHTING_COUNT"],
        name="Orca Sightings Near Buoy-10",
        mode="lines",
        line=dict(color="#C81D25", width=1, dash="longdash"),
        yaxis="y2",
    )
)

# Layout
fig.update_layout(
    title=dict(
        text="Mean Daily Chinook Adult Count (Dams) vs. Chinook Kept (Buoy 10)",
        font=dict(size=20, family="Helvetica Neue", color="#333"),
    ),
    plot_bgcolor="white",
    paper_bgcolor="white",
    xaxis=dict(
        title=dict(text="Day of Year", font=dict(size=16)),
        tickfont=dict(size=14),
        gridcolor="#E5ECF6",
        zeroline=False,
    ),
    yaxis=dict(
        title=dict(text="Mean Count (Dam)", font=dict(size=16)),
        tickfont=dict(size=14),
        gridcolor="#E5ECF6",
        zeroline=False,
    ),
    yaxis2=dict(
        title=dict(text="Chinook Kept (Buoy 10)", font=dict(size=16)),
        tickfont=dict(size=14),
        overlaying="y",
        side="right",
        showgrid=False,
    ),
    legend=dict(title="Location", font=dict(size=14)),
    font=dict(family="Helvetica Neue", size=14, color="#333"),
    margin=dict(l=60, r=60, t=60, b=60),
)

# Month lines and labels
month_start_days = [1]
for month in range(2, 13):
    month_start_days.append(
        sum(calendar.monthrange(2024, m)[1] for m in range(1, month)) + 1
    )

for doy in month_start_days:
    fig.add_vline(x=doy, line=dict(color="#999", width=1, dash="dash"), layer="below")

for i, doy in enumerate(month_start_days):
    fig.add_annotation(
        x=doy + 15,
        y=0,
        yanchor="bottom",
        text=calendar.month_abbr[i + 1],
        showarrow=False,
        font=dict(size=12, color="#666"),
        xanchor="center",
        xref="x",
        yref="paper",
    )

fig.show()

In [22]:
rmpc_Data = "/Users/tylerstevenson/Documents/CODE/SalmonSignal/data/raw/RMPC/WDFW/processed/MA12_RMPC_CATCH.parquet"
rmpc_Data = pd.read_parquet(rmpc_Data)
rmpc_Data["DATE"] = pd.to_datetime(rmpc_Data["stat_week_sunday"])
rmpc_Data["DoY"] = rmpc_Data["DATE"].dt.day_of_year

rmpc_Data = rmpc_Data[rmpc_Data["species_name"] == "Chinook"]
rmpc_Data = rmpc_Data[rmpc_Data["MARINE_AREA_LARGE"] == "1"]
rmpc_Data = rmpc_Data.groupby(["MARINE_AREA_LARGE", "DoY"], as_index=False)[
    "number_caught"
].mean()

In [23]:
# fig = px.line(rmpc_Data, x = 'DoY', y = 'number_caught', color = 'MARINE_AREA_LARGE')
# fig.show()

In [24]:
rmpc_Data

Unnamed: 0,MARINE_AREA_LARGE,DoY,number_caught
0,1,1,2.875000
1,1,2,0.000000
2,1,3,1.428571
3,1,4,0.500000
4,1,5,0.000000
...,...,...,...
361,1,362,3.666667
362,1,363,0.000000
363,1,364,5.666667
364,1,365,1.375000


In [25]:
import plotly.graph_objects as go
import calendar

# Base figure
fig = go.Figure()

# Primary axis: Dam counts
fig.add_trace(
    go.Scatter(
        x=bon_avg["DoY"],
        y=bon_avg["ChinookAdult"],
        name="Bonneville",
        mode="lines",
        line=dict(color="#08B2E3", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=dalles_avg["DoY"],
        y=dalles_avg["ChinookAdult"],
        name="Dalles",
        mode="lines",
        line=dict(color="#EE6352", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=johnday_avg["DoY"],
        y=johnday_avg["ChinookAdult"],
        name="John Day",
        mode="lines",
        line=dict(color="#B2FFA9", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=rmpc_Data["DoY"],
        y=rmpc_Data["number_caught"],
        name="RMPC Catch - WDFW Marine Area 1",
        mode="lines",
        line=dict(color="#FFEC51", width=2),
        yaxis="y",
    )
)

# Secondary axis: Buoy 10 catch data
fig.add_trace(
    go.Scatter(
        x=buoy10_avg["DoY"],
        y=buoy10_avg["Chinook Kept"] * weekly_sightings["SIGHTING_COUNT"].max(),
        name="Buoy 10 Chinook Kept",
        mode="lines",
        line=dict(color="#C155D3", width=2, dash="dot"),
        yaxis="y2",
    )
)

# Secondary axis: Buoy 10 catch data
fig.add_trace(
    go.Scatter(
        x=weekly_sightings["DoY"],
        y=weekly_sightings["SIGHTING_COUNT"],
        name="Orca Sightings Near Buoy-10",
        mode="lines",
        line=dict(color="#C81D25", width=1, dash="longdash"),
        yaxis="y2",
    )
)

# Layout
fig.update_layout(
    title=dict(
        text="Mean Daily Chinook Adult Count (Dams) vs. Chinook Kept (Buoy 10)",
        font=dict(size=20, family="Helvetica Neue", color="#333"),
    ),
    plot_bgcolor="white",
    paper_bgcolor="white",
    xaxis=dict(
        title=dict(text="Day of Year", font=dict(size=16)),
        tickfont=dict(size=14),
        gridcolor="#E5ECF6",
        zeroline=False,
    ),
    yaxis=dict(
        title=dict(text="Mean Count (Dam)", font=dict(size=16)),
        tickfont=dict(size=14),
        gridcolor="#E5ECF6",
        zeroline=False,
    ),
    yaxis2=dict(
        title=dict(text="Chinook Kept (Buoy 10)", font=dict(size=16)),
        tickfont=dict(size=14),
        overlaying="y",
        side="right",
        showgrid=False,
    ),
    legend=dict(title="Location", font=dict(size=14)),
    font=dict(family="Helvetica Neue", size=14, color="#333"),
    margin=dict(l=60, r=60, t=60, b=60),
)

# Month lines and labels
month_start_days = [1]
for month in range(2, 13):
    month_start_days.append(
        sum(calendar.monthrange(2024, m)[1] for m in range(1, month)) + 1
    )

for doy in month_start_days:
    fig.add_vline(x=doy, line=dict(color="#999", width=1, dash="dash"), layer="below")

for i, doy in enumerate(month_start_days):
    fig.add_annotation(
        x=doy + 15,
        y=0,
        yanchor="bottom",
        text=calendar.month_abbr[i + 1],
        showarrow=False,
        font=dict(size=12, color="#666"),
        xanchor="center",
        xref="x",
        yref="paper",
    )

fig.show()

In [26]:
# https://www.fisheries.noaa.gov/inport/item/18090

In [27]:
def fetch_srkw_coastal_page(offset=0, rows=100):
    """Fetch a single page of SRKW coastal occurrence data."""
    url = "https://www.webapps.nwfsc.noaa.gov/apex/parr/srkw_occurrence_coastal/data/page/"
    params = {"offset": offset, "rows": rows}
    headers = {"User-Agent": "orca-fetcher-9000"}

    response = requests.get(url, params=params, headers=headers)
    response.raise_for_status()
    return response.json().get("items", [])


def fetch_all_srkw_coastal(max_pages=500, rows_per_page=100, sleep_sec=0.5):
    """Fetch all available pages of SRKW data, auto-stop when empty."""
    all_records = []
    for i in range(max_pages):
        offset = i * rows_per_page
        # print(f"Fetching page {i} (offset={offset})...")
        try:
            page = fetch_srkw_coastal_page(offset=offset, rows=rows_per_page)
            if not page:
                print("No more records — halting.")
                break
            all_records.extend(page)
            time.sleep(sleep_sec)  # be kind to their server
        except Exception as e:
            print(f"Page {i} errored: {e}")
            continue

    return pd.DataFrame(all_records)


def to_geodataframe(df, lon_field="lon_p", lat_field="lat_p"):
    """Convert a DataFrame with lon/lat to a GeoDataFrame."""
    df = df.copy()
    df["geometry"] = df.apply(lambda row: Point(row[lon_field], row[lat_field]), axis=1)
    return gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")


# 🐋 Run the full download
tagging_df = fetch_all_srkw_coastal()
tagging_gdf = to_geodataframe(tagging_df)

No more records — halting.


In [30]:
degree_buffer_for_buoy10 = 0.2  # 1 degree ~ 70 miles, 0.5 degree ~ 35 miles

locations_lookup_buffered = locations_lookup.copy()
locations_lookup_buffered["geometry"] = locations_lookup_buffered.buffer(
    degree_buffer_for_buoy10
)
locations_lookup_buffered = locations_lookup_buffered.reset_index()

tagging_gdf["Date"] = pd.to_datetime(tagging_gdf["gmt_date"])
tagging_gdf["Year"] = tagging_gdf["Date"].dt.year
tagging_gdf["DoY"] = tagging_gdf["Date"].dt.day_of_year


tagged_near_buoy10 = tagging_gdf.sjoin(
    locations_lookup_buffered[locations_lookup_buffered["name"] == "Buoy 10"]
)

tagged_near_buoy10_sums = tagged_near_buoy10[["DoY", "Year"]].drop_duplicates()
tagged_near_buoy10_sums = tagged_near_buoy10_sums.groupby("DoY", as_index=False).agg(
    COUNT=("Year", "count")
)
tagged_near_buoy10_sums["TYPE"] = "2012-2015 Coastal Tagging Near Buoy 10"

tagged_not_near_buoy10_sums = tagging_gdf[["DoY", "Year"]].drop_duplicates()
tagged_not_near_buoy10_sums = tagged_not_near_buoy10_sums.groupby(
    "DoY", as_index=False
).agg(COUNT=("Year", "count"))
tagged_not_near_buoy10_sums["TYPE"] = "2012-2015 Coastal Tagging Not Near Buoy 10"
tagged_not_near_buoy10_sums = tagged_not_near_buoy10_sums[
    ~tagged_not_near_buoy10_sums.DoY.isin(tagged_near_buoy10_sums.DoY)
]


Geometry is in a geographic CRS. Results from 'buffer' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.




In [31]:
tags = pd.concat([tagged_not_near_buoy10_sums, tagged_near_buoy10_sums])

In [32]:
# fig = px.scatter(tags, x="DoY", y="COUNT", color="TYPE")
# fig.show()

In [33]:
import plotly.graph_objects as go
import calendar

# Base figure
fig = go.Figure()

# Primary axis: Dam counts
fig.add_trace(
    go.Scatter(
        x=bon_avg["DoY"],
        y=bon_avg["ChinookAdult"],
        name="Adult Chinook Count (Bonneville)",
        mode="lines",
        line=dict(color="#08B2E3", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=dalles_avg["DoY"],
        y=dalles_avg["ChinookAdult"],
        name="Adult Chinook Count (Dalles)",
        mode="lines",
        line=dict(color="#EE6352", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=johnday_avg["DoY"],
        y=johnday_avg["ChinookAdult"],
        name="Adult Chinook Count (John Day)",
        mode="lines",
        line=dict(color="#B2FFA9", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=rmpc_Data["DoY"],
        y=rmpc_Data["number_caught"],
        name="WDFW Catch (Marine Area 1)",
        mode="lines",
        line=dict(color="#FFEC51", width=2),
        yaxis="y",
    )
)

fig.add_trace(
    go.Scatter(
        x=willamette_avg["DoY"],
        y=willamette_avg["ChinookAdult"],
        name="Adult Chinook Count (Willamette)",
        mode="lines",
        line=dict(color="#4B244A", width=2),
        yaxis="y",
    )
)

# Secondary axis: Buoy 10 catch data
fig.add_trace(
    go.Scatter(
        x=buoy10_avg["DoY"],
        y=buoy10_avg["Chinook Kept"] * weekly_sightings["SIGHTING_COUNT"].max(),
        name="WDFW Catch - (Buoy 10)",
        mode="lines",
        line=dict(color="#C155D3", width=2, dash="dot"),
        yaxis="y2",
    )
)

# Secondary axis: Buoy 10 catch data
fig.add_trace(
    go.Scatter(
        x=weekly_sightings["DoY"],
        y=weekly_sightings["SIGHTING_COUNT"],
        name="Orca Sightings Near Buoy-10",
        mode="lines",
        line=dict(color="#C81D25", width=1, dash="longdash"),
        yaxis="y2",
    )
)

# Define colors by TYPE
type_colors = {
    "2012-2015 Coastal Tagging Near Buoy 10": "#DE6E4B",
    "2012-2015 Coastal Tagging Not Near Buoy 10": "#7FD1B9",
}

# Add color-coded circles across top of plot
for _, row in tags.iterrows():
    fig.add_annotation(
        x=row["DoY"],
        y=1.02,  # Float above the plot
        yref="paper",
        xref="x",
        text="●",  # Circle glyph
        font=dict(
            size=10, color=type_colors.get(row["TYPE"], "#000000")  # Fallback to black
        ),
        showarrow=False,
        xanchor="center",
    )
for tag_type, color in type_colors.items():
    fig.add_trace(
        go.Scatter(
            x=[None],  # No visible points
            y=[None],
            mode="markers",
            marker=dict(size=10, color=color),
            name=tag_type,
            showlegend=True,
        )
    )

# Layout
fig.update_layout(
    title=dict(
        text="Mean Daily Chinook Adult Count (Dams) vs. Chinook Catch (Buoy 10 + Marine Area 1) vs. Orca Presence",
        font=dict(size=20, family="Helvetica Neue", color="#333"),
    ),
    plot_bgcolor="white",
    paper_bgcolor="white",
    xaxis=dict(
        title=dict(text="Day of Year", font=dict(size=16)),
        tickfont=dict(size=14),
        gridcolor="#E5ECF6",
        zeroline=False,
    ),
    yaxis=dict(
        title=dict(text="Mean Count (Dam)", font=dict(size=16)),
        tickfont=dict(size=14),
        gridcolor="#E5ECF6",
        zeroline=False,
    ),
    yaxis2=dict(
        title=dict(text="Chinook Kept (Buoy 10)", font=dict(size=16)),
        tickfont=dict(size=14),
        overlaying="y",
        side="right",
        showgrid=False,
    ),
    # legend=dict(title="Location", font=dict(size=14)),
    font=dict(family="Helvetica Neue", size=14, color="#333"),
    margin=dict(l=60, r=60, t=60, b=60),
)

fig.update_layout(
    legend=dict(
        title="Location",
        font=dict(size=14),
        x=1.1,  # 👈 Horizontal position (0 = far left, 1 = far right)
        y=0.99,  # 👈 Vertical position (1 = top, 0 = bottom)
        xanchor="left",  # Align legend box from left side
        yanchor="top",  # Align legend box from top
        bgcolor="rgba(255,255,255,0.8)",  # Optional: semi-transparent background
        bordercolor="#ccc",
        borderwidth=1,
    )
)

# Month lines and labels
month_start_days = [1]
for month in range(2, 13):
    month_start_days.append(
        sum(calendar.monthrange(2024, m)[1] for m in range(1, month)) + 1
    )

for doy in month_start_days:
    fig.add_vline(x=doy, line=dict(color="#999", width=1, dash="dash"), layer="below")

for i, doy in enumerate(month_start_days):
    fig.add_annotation(
        x=doy + 15,
        y=0,
        yanchor="bottom",
        text=calendar.month_abbr[i + 1],
        showarrow=False,
        font=dict(size=12, color="#666"),
        xanchor="center",
        xref="x",
        yref="paper",
    )

fig.show()

In [34]:
# tag_map = locations_lookup.explore()
# tagged_near_buoy10.explore(m=tag_map)
# sightings_gdf_near_buoy.explore(m=tag_map)