<a href="https://colab.research.google.com/github/ofSi02/ConcMap_test/blob/main/StreamStats.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Get stream stats for USGS gages:
Pull daily mean discharge (00060) and compute:
*   Avg winter (Nov-Mar)
*   Avg spring (Apr-Jun)
*   Baseflow (10th percentile of daily mean flow)


*Originally made for 1030-80-24-05 T201*

Step 1: Install Packages and Stuff

In [None]:
!pip -q install dataretrieval ipywidgets

import pandas as pd
from dataretrieval import nwis

import ipywidgets as widgets
from IPython.display import display, clear_output
from google.colab import files
import numpy as np
import pandas as pd


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m544.7 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[?25h

Step 2:  **You do not need to change anything** just run it :). This function gets all mean daily discharge data, starting in 1900. Fun Fact: The first stream gage in America was installed in 1889.

In [None]:
def fetch_daily_flow_period_of_record(site_no: str, start: str = "1900-01-01", end: str = None) -> pd.DataFrame:
    """Fetch ALL available daily mean discharge (00060) for a site by using an early start date."""
    df = nwis.get_record(
        sites=site_no,
        service="dv",
        parameterCd="00060",# change the parameter
        start=start,
        end=end
    )

    if df is None or df.empty:
        return pd.DataFrame()

    df = df.reset_index()
    df["datetime"] = pd.to_datetime(df["datetime"])

    flow_col = next((c for c in df.columns if "00060" in c and "Mean" in c), None)
    if flow_col is None:
        raise ValueError(f"Couldn't find discharge column. Columns: {df.columns.tolist()}")

    df = df.rename(columns={flow_col: "q_cfs"}).sort_values("datetime")
    return df[["datetime", "q_cfs"]]


Step 3: **You do not need to change anything**; this is the stats calculation section. Just run the code.

In [None]:
def make_season_stats_table(
    df: pd.DataFrame,
    gage_name: str,
    flow_col: str = "q_cfs",
    dt_col: str = "datetime",
    baseflow_col: str | None = None,   # set to a column name if you have USGS baseflow estimates
) -> pd.DataFrame:
    """
    Returns a 1-row stats table with:
      - Streamflow gage name
      - Average winter flow (Nov–Mar)
      - Average spring flow (Apr–Jun)
      - Baseflow (if provided via baseflow_col)

    Assumes df has datetime and flow columns.
    """

    d = df.copy()
    d[dt_col] = pd.to_datetime(d[dt_col])
    d[flow_col] = pd.to_numeric(d[flow_col], errors="coerce")

    # Basic cleanup
    d = d.dropna(subset=[dt_col, flow_col])
    d["month"] = d[dt_col].dt.month

    # Define seasons
    winter_mask = d["month"].isin([11, 12, 1, 2, 3])   # Nov–Mar
    spring_mask = d["month"].isin([4, 5, 6])          # Apr–Jun

    winter_avg = d.loc[winter_mask, flow_col].mean()
    spring_avg = d.loc[spring_mask, flow_col].mean()

    # Baseflow: only if you actually have a column of estimates
    baseflow_val = None
    if baseflow_col is not None and baseflow_col in d.columns:
        d[baseflow_col] = pd.to_numeric(d[baseflow_col], errors="coerce")
        baseflow_val = d[baseflow_col].dropna().mean()  # long-term mean baseflow estimate

    # Build table (match your screenshot vibe)
    out = pd.DataFrame([{
        "Streamflow gage name": gage_name,
        "Average winter flow (Nov to March)": winter_avg,
        "Average spring flow (Apr to Jun)": spring_avg,
        "Baseflow (if USGS has estimates)": baseflow_val
    }])

    # Optional: make it pretty (rounding)
    out["Average winter flow (Nov to March)"] = out["Average winter flow (Nov to March)"].round(2)
    out["Average spring flow (Apr to Jun)"] = out["Average spring flow (Apr to Jun)"].round(2)
    if baseflow_val is not None:
        out["Baseflow (if USGS has estimates)"] = out["Baseflow (if USGS has estimates)"].round(2)

    return out

Step 4: **Change the gage on the 5th line**; go to the bottom of the code to see the data. This step loads data from step 2 and shows the stats table. **Wait like 3 min** before downloading the data :D. Check total values match.

In [None]:
# Global storage for the currently loaded gage data
CURRENT = {"site_no": None, "df": pd.DataFrame(), "stats": pd.DataFrame()}

site_box = widgets.Text(
    value="11351948", # change the gage, this one is PIT R NR CANBY CA
    description="USGS site:",
    layout=widgets.Layout(width="300px")
)

load_btn = widgets.Button(description="Load / Preview Data", button_style="primary")
download_raw_btn = widgets.Button(description="Download Raw CSV", button_style="")
stats_btn = widgets.Button(description="Make Stats Table", button_style="success")
download_stats_btn = widgets.Button(description="Download Stats CSV", button_style="")
out = widgets.Output()

download_raw_btn.disabled = True
stats_btn.disabled = True
download_stats_btn.disabled = True


def on_load_clicked(_):
    with out:
        clear_output()
        site_no = site_box.value.strip()

        print(f"Loading daily mean discharge (00060) for site {site_no} (period of record)...")
        df = fetch_daily_flow_period_of_record(site_no)

        if df.empty:
            print("No data returned. Double-check the gage ID or that it has daily discharge data.")
            CURRENT.update({"site_no": site_no, "df": df, "stats": pd.DataFrame()})
            download_raw_btn.disabled = True
            stats_btn.disabled = True
            download_stats_btn.disabled = True
            return

        CURRENT["site_no"] = site_no
        CURRENT["df"] = df
        CURRENT["stats"] = pd.DataFrame()

        tmin = df["datetime"].min().date()
        tmax = df["datetime"].max().date()
        n = df["q_cfs"].notna().sum()

        print(f"Available data: {tmin} to {tmax}")
        print(f"Number of daily values: {n:,}")
        display(df.head(5))
        print("…")
        display(df.tail(5))

        download_raw_btn.disabled = False
        stats_btn.disabled = False
        download_stats_btn.disabled = True


def on_download_raw(_):
    with out:
        if CURRENT["df"].empty:
            print("No data loaded yet.")
            return

        site_no = CURRENT["site_no"]
        df = CURRENT["df"]

        fname = f"{site_no}_daily_discharge_00060_period_of_record.csv"
        df.to_csv(fname, index=False)
        print(f"Saved: {fname}")
        files.download(fname)


def on_stats_clicked(_):
    with out:
        clear_output()

        if CURRENT["df"].empty:
            print("No data loaded yet.")
            return

        site_no = CURRENT["site_no"]
        df = CURRENT["df"]

        # Optional: a friendly gage label
        gage_name = f"USGS {site_no}"

        # Build the new 1-row stats table (Nov–Mar, Apr–Jun)
        stats = make_season_stats_table(
            df=df,
            gage_name=gage_name,
            flow_col="q_cfs",
            dt_col="datetime",
            baseflow_col=None  # set to a column name if you actually have USGS baseflow estimates
        )

        # Add metadata columns (optional, but nice)
        stats.insert(0, "site_no", site_no)
        stats.insert(1, "period_start", df["datetime"].min().date())
        stats.insert(2, "period_end", df["datetime"].max().date())

        CURRENT["stats"] = stats

        print("Stats table:")
        display(stats)

        download_stats_btn.disabled = False


def on_download_stats(_):
    with out:
        if CURRENT["stats"].empty:
            print("Stats not created yet.")
            return

        site_no = CURRENT["site_no"]
        stats = CURRENT["stats"]

        fname = f"{site_no}_stream_stats_period_of_record.csv"
        stats.to_csv(fname, index=False)
        print(f"Saved: {fname}")
        files.download(fname)


load_btn.on_click(on_load_clicked)
download_raw_btn.on_click(on_download_raw)
stats_btn.on_click(on_stats_clicked)
download_stats_btn.on_click(on_download_stats)

ui = widgets.HBox([site_box, load_btn, download_raw_btn, stats_btn, download_stats_btn])
display(ui, out)


HBox(children=(Text(value='11351948', description='USGS site:', layout=Layout(width='300px')), Button(button_s…

Output()