# Oregon Air Quality Dashboard (PM2.5)

This notebook shows county-level PM2.5 and Ozone in Oregon over time, using:
- AQS historical data (2020–2024)
- Oregon county boundaries (GeoJSON)
- An interactive date slider

Use the slider below to explore how PM2.5 and Ozone vary by county on different days.


In [1]:
from pathlib import Path
import json

import pandas as pd
import plotly.express as px

import plotly.io as pio

import ipywidgets as widgets
from IPython.display import display, clear_output

pio.renderers.default = "notebook"  # try this first
# If that still shows nothing, later try: pio.renderers.default = "iframe_connected"

# Base data directory (relative to this notebook in /notebooks)
DATA_DIR = Path("../data").resolve()

# Load Oregon-only counties GeoJSON
with open(DATA_DIR / "oregon_counties.geojson", "r") as f:
    oregon_geojson = json.load(f)

In [2]:
# Adjust this filename to whatever you used for pm25 daily data
pm25_path = DATA_DIR / "aqs_daily_pm25_20200101_20241231.parquet"

df_pm25 = pd.read_parquet(pm25_path)

# Adjust this filename to whatever you used for ozone daily data
ozone_path = DATA_DIR / "aqs_daily_ozone_20200101_20241231.parquet"

df_ozone = pd.read_parquet(ozone_path)

In [3]:
geo_counties = sorted({f["properties"]["NAME"] for f in oregon_geojson["features"]})

In [4]:
df_geo = pd.DataFrame({"county": geo_counties})

df_map = df_geo.merge(
    df_pm25.rename(columns={"county_clean": "county"}),
    how="left",
    on="county"
)


In [5]:
date_col = "date_local"
county_col = "county"
value_col = "arithmetic_mean"  # or whatever your PM2.5 value column is


In [6]:
def compute_pm25_by_county(df, target_date):
    """Return a df with columns ['county', 'pm25_mean'] for one date."""
    
    # Filter to one date
    subset = df[df[date_col] == target_date]

    # Aggregate
    pm25_latest = (
        subset
        .groupby(county_col, as_index=False)
        .agg(pm25_mean=(value_col, "mean"))
    )

    # Build full county list
    df_geo = pd.DataFrame({"county": geo_counties})

    # Merge (left join)
    df_map = df_geo.merge(
        pm25_latest.rename(columns={county_col: "county"}),
        on="county",
        how="left"
    )

    return df_map   


In [7]:
def summarize_pm25_for_date(df, target_date):
    """
    Compute simple summary stats for PM2.5 on one date.
    Returns a dict with statewide_mean, worst_county, worst_value, best_county, best_value.
    """
    df_map = compute_pm25_by_county(df, target_date)

    df_nonnull = df_map[df_map["pm25_mean"].notna()]

    if df_nonnull.empty:
        return {
            "statewide_mean": None,
            "worst_county": None,
            "worst_value": None,
            "best_county": None,
            "best_value": None,
        }

    statewide_mean = df_nonnull["pm25_mean"].mean()

    worst_row = df_nonnull.loc[df_nonnull["pm25_mean"].idxmax()]
    best_row = df_nonnull.loc[df_nonnull["pm25_mean"].idxmin()]

    return {
        "statewide_mean": statewide_mean,
        "worst_county": worst_row["county"],
        "worst_value": worst_row["pm25_mean"],
        "best_county": best_row["county"],
        "best_value": best_row["pm25_mean"],
    }


In [8]:
def plot_pm25_map_for_date(df, target_date):
    # Compute the county-level data for the chosen date
    df_map = compute_pm25_by_county(df, target_date)

    # Build the map
    fig = px.choropleth_mapbox(
        df_map,
        geojson=oregon_geojson,
        locations="county",
        featureidkey="properties.NAME",
        color="pm25_mean",
        color_continuous_scale="Viridis",
        range_color=(df_map["pm25_mean"].min(), df_map["pm25_mean"].max()),
        mapbox_style="carto-positron",
        zoom=4.4,
        center={"lat": 44.0, "lon": -120.5},
        opacity=0.75,
    )

    fig.update_layout(
        title=f"Oregon PM2.5 Mean by County on {target_date}",
        margin={"r": 0, "t": 40, "l": 0, "b": 0},
    )

    return fig


In [9]:
oz_date_col = "date_local"
oz_county_col = "county"
oz_value_col = "arithmetic_mean"  # change if needed after you see columns

In [10]:
def compute_ozone_by_county(df, target_date):
    """Return a df with columns ['county', 'ozone_mean'] for one date."""
    subset = df[df[oz_date_col] == target_date]

    ozone_latest = (
        subset
        .groupby(oz_county_col, as_index=False)
        .agg(ozone_mean=(oz_value_col, "mean"))
    )

    df_geo = pd.DataFrame({"county": geo_counties})

    df_map = df_geo.merge(
        ozone_latest.rename(columns={oz_county_col: "county"}),
        on="county",
        how="left"
    )

    return df_map


In [11]:
def plot_ozone_map_for_date(df, target_date):
    df_map = compute_ozone_by_county(df, target_date)

    fig = px.choropleth_mapbox(
        df_map,
        geojson=oregon_geojson,
        locations="county",
        featureidkey="properties.NAME",
        color="ozone_mean",
        color_continuous_scale="Viridis",
        range_color=(df_map["ozone_mean"].min(), df_map["ozone_mean"].max()),
        mapbox_style="carto-positron",
        zoom=4.4,
        center={"lat": 44.0, "lon": -120.5},
        opacity=0.75,
    )

    fig.update_layout(
        title=f"Oregon Ozone Mean by County on {target_date}",
        margin={"r": 0, "t": 40, "l": 0, "b": 0},
    )

    return fig


In [12]:
def compute_ozone_by_county(df, target_date):
    """Return a df with columns ['county', 'ozone_mean'] for one date."""
    subset = df[df[oz_date_col] == target_date]

    ozone_latest = (
        subset
        .groupby(oz_county_col, as_index=False)
        .agg(ozone_mean=(oz_value_col, "mean"))
    )

    df_geo = pd.DataFrame({"county": geo_counties})

    df_map = df_geo.merge(
        ozone_latest.rename(columns={oz_county_col: "county"}),
        on="county",
        how="left"
    )

    return df_map


def summarize_ozone_for_date(df, target_date):
    """Summary stats for ozone on one date."""
    df_map = compute_ozone_by_county(df, target_date)
    df_nonnull = df_map[df_map["ozone_mean"].notna()]

    if df_nonnull.empty:
        return {
            "statewide_mean": None,
            "worst_county": None,
            "worst_value": None,
            "best_county": None,
            "best_value": None,
        }

    statewide_mean = df_nonnull["ozone_mean"].mean()
    worst_row = df_nonnull.loc[df_nonnull["ozone_mean"].idxmax()]
    best_row = df_nonnull.loc[df_nonnull["ozone_mean"].idxmin()]

    return {
        "statewide_mean": statewide_mean,
        "worst_county": worst_row["county"],
        "worst_value": worst_row["ozone_mean"],
        "best_value": best_row["ozone_mean"],
        "best_county": best_row["county"],
    }


In [13]:
def plot_ozone_map_for_date(df, target_date):
    df_map = compute_ozone_by_county(df, target_date)

    # Guard against all-NaN days
    if df_map["ozone_mean"].notna().any():
        vmin = df_map["ozone_mean"].min()
        vmax = df_map["ozone_mean"].max()
    else:
        vmin, vmax = 0, 1  # dummy range; map will just look blank

    fig = px.choropleth_mapbox(
        df_map,
        geojson=oregon_geojson,
        locations="county",
        featureidkey="properties.NAME",
        color="ozone_mean",
        color_continuous_scale="Viridis",
        range_color=(vmin, vmax),
        mapbox_style="carto-positron",
        zoom=4.4,
        center={"lat": 44.0, "lon": -120.5},
        opacity=0.75,
    )

    fig.update_layout(
        title=f"Oregon Ozone Mean by County on {target_date}",
        margin={"r": 0, "t": 40, "l": 0, "b": 0},
    )

    return fig


In [14]:
available_dates = sorted(df_pm25[date_col].unique())

pollutant_dropdown = widgets.Dropdown(
    options=["PM2.5", "Ozone"],
    value="PM2.5",
    description="Pollutant:",
)

date_selector = widgets.SelectionSlider(
    options=available_dates,
    value=available_dates[-1],
    description="Date:",
    continuous_update=False,
    layout=widgets.Layout(width="90%")
)

output_map = widgets.Output()
output_stats = widgets.Output()
output_line = widgets.Output()

date_col = "date_local"  # if not already defined

def make_statewide_mean_figure(df: pd.DataFrame, title_prefix: str, anchor_date, n: int = 90):
    """
    Build a line chart of statewide daily mean up to a given anchor date.
    Shows the last n days up to and including anchor_date.
    """
    if df.empty:
        return None

    if date_col not in df.columns or "arithmetic_mean" not in df.columns:
        return None

    # Filter to dates up to anchor_date
    daily = (
        df[df[date_col] <= anchor_date]
        .groupby(date_col, as_index=False)["arithmetic_mean"]
        .mean()
        .rename(columns={"arithmetic_mean": "statewide_daily_mean"})
        .sort_values(date_col)
    )

    if daily.empty:
        return None

    if len(daily) > n:
        daily = daily.tail(n)

    # Optional: make a nice title that includes the anchor date
    title = f"{title_prefix} (through {anchor_date})"

    fig = px.line(
        daily,
        x=date_col,
        y="statewide_daily_mean",
        title=title,
        markers=True,
    )
    fig.update_layout(height=360, margin=dict(l=10, r=10, t=40, b=10))

    # Optional: emphasize the anchor_date point if present
    if anchor_date in daily[date_col].values:
        fig.add_vline(x=anchor_date, line_dash="dash", line_width=1)

    return fig


def update_dashboard(change=None):
    selected_date = date_selector.value
    selected_pollutant = pollutant_dropdown.value

    # update map
    with output_map:
        clear_output(wait=True)
        if selected_pollutant == "PM2.5":
            fig = plot_pm25_map_for_date(df_pm25, selected_date)
        else:
            fig = plot_ozone_map_for_date(df_ozone, selected_date)
        fig.show()

    # update stats
    with output_stats:
        clear_output(wait=True)
        if selected_pollutant == "PM2.5":
            summary = summarize_pm25_for_date(df_pm25, selected_date)
            metric_name = "PM2.5"
        else:
            summary = summarize_ozone_for_date(df_ozone, selected_date)
            metric_name = "Ozone"

        if summary["statewide_mean"] is None:
            print(f"No {metric_name} data available for this date.")
        else:
            print(f"Date: {selected_date}")
            print(f"Pollutant: {metric_name}")
            print(f"Statewide mean (counties with data): {summary['statewide_mean']:.2f}")
            print(f"Worst county: {summary['worst_county']} ({summary['worst_value']:.2f})")
            print(f"Best county: {summary['best_county']} ({summary['best_value']:.2f})")

    # Line chart: last 90 days up to selected_date
    with output_line:
        clear_output(wait=True)
        if selected_pollutant == "PM2.5":
            fig_line = make_statewide_mean_figure(
                df_pm25,
                "Oregon PM2.5 statewide mean (last 90 days)",
                anchor_date=selected_date,
                n=90,
            )
        else:
            fig_line = make_statewide_mean_figure(
                df_ozone,
                "Oregon Ozone statewide mean (last 90 days)",
                anchor_date=selected_date,
                n=90,
            )

        if fig_line is None:
            print("No data available for line chart.")
        else:
            fig_line.show()

pollutant_dropdown.observe(update_dashboard, names="value")
date_selector.observe(update_dashboard, names="value")

display(pollutant_dropdown, date_selector, output_map, output_stats, output_line)

update_dashboard()


Dropdown(description='Pollutant:', options=('PM2.5', 'Ozone'), value='PM2.5')

SelectionSlider(continuous_update=False, description='Date:', index=1826, layout=Layout(width='90%'), options=…

Output()

Output()

Output()

---

## Live AirNow Snapshot (Station Level)

This section pulls the latest AirNow observations for Oregon (PM2.5 and Ozone)
and plots them as station markers. It uses the AIRNOW_KEY environment variable
and the same Oregon bounding box as the historical dashboard.


In [15]:
import os
from pathlib import Path
from datetime import datetime, timedelta, timezone

import pandas as pd
import requests

DATA_DIR = Path("../data").resolve()
DATA_DIR.mkdir(parents=True, exist_ok=True)

AIRNOW_KEY = os.getenv("AIRNOW_KEY")
BBOX = "-124.8,41.9,-116.4,46.4"          # Oregon-ish bbox
PARAMETERS = "PM25,OZONE"
AIRNOW_BASE_URL = "https://www.airnowapi.org/aq/data/"

cat_map = {
    1: "Good",
    2: "Moderate",
    3: "Unhealthy for Sensitive Groups",
    4: "Unhealthy",
    5: "Very Unhealthy",
    6: "Hazardous",
}


def fetch_airnow_current(hours_back: int = 1) -> pd.DataFrame:
    """
    Fetch recent AirNow observations (PM2.5 + Ozone) for Oregon bbox.

    This function is designed to be safe in environments like Binder:
    - If AIRNOW_KEY is not set, it prints a message and returns an empty DataFrame.
    - If the HTTP request fails or the response is not valid JSON,
      it prints a message and returns an empty DataFrame.

    Returns
    -------
    pd.DataFrame
        Cleaned AirNow data, or an empty DataFrame if anything goes wrong.
    """
    if not AIRNOW_KEY:
        print("AIRNOW_KEY is not set; skipping AirNow fetch (this is expected on Binder).")
        return pd.DataFrame()

    end_dt = datetime.now(timezone.utc).replace(minute=0, second=0, microsecond=0)
    start_dt = end_dt - timedelta(hours=hours_back)

    params = {
        "format": "application/json",
        "API_KEY": AIRNOW_KEY,
        "bbox": BBOX,
        "parameters": PARAMETERS,
        "startDate": start_dt.strftime("%Y-%m-%dT%H"),
        "endDate":   end_dt.strftime("%Y-%m-%dT%H"),
        "dataType": "A",   # AQI
    }

    try:
        r = requests.get(AIRNOW_BASE_URL, params=params, timeout=60)
    except requests.RequestException as e:
        print(f"Could not reach AirNow API: {e}")
        return pd.DataFrame()

    if r.status_code != 200:
        print(f"AirNow API returned status {r.status_code}. Response text (truncated):")
        print(r.text[:300])
        return pd.DataFrame()

    try:
        data = r.json()
    except ValueError as e:
        print(f"AirNow response was not valid JSON: {e}")
        print("First 300 chars of response:")
        print(r.text[:300])
        return pd.DataFrame()

    df = pd.DataFrame(data)
    if df.empty:
        print("No AirNow rows returned; try increasing hours_back if running locally.")
        return df

    # Parse time and coerce numerics
    if "UTC" in df.columns:
        df["UTC"] = pd.to_datetime(df["UTC"], errors="coerce", utc=True)

    for c in ["AQI", "Category", "Latitude", "Longitude"]:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")

    if "Category" in df.columns:
        df["CategoryName"] = df["Category"].map(cat_map)

    # Keep latest record per (lat, lon, pollutant)
    if {"Latitude", "Longitude", "Parameter", "UTC"}.issubset(df.columns):
        df = (
            df.sort_values("UTC")
              .drop_duplicates(["Latitude", "Longitude", "Parameter"], keep="last")
        )

    # Optional: save a cache file (this is safe even on Binder; it's just ephemeral)
    try:
        stamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
        out_path = DATA_DIR / f"airnow_observations_or_{stamp}.parquet"
        df.to_parquet(out_path, index=False)
        print(f"Saved live AirNow snapshot → {out_path}")
    except Exception as e:
        # If parquet save fails (e.g., missing pyarrow), do not crash the app
        print(f"Could not save AirNow snapshot to parquet: {e}")

    return df


In [16]:
import plotly.express as px

df_live = fetch_airnow_current(hours_back=1)

if df_live.empty:
    print("No live AirNow data available (this is expected on Binder without an API key).")
else:
    df_live_pm25 = df_live[df_live["Parameter"] == "PM2.5"].copy()
    if df_live_pm25.empty:
        print("No live PM2.5 data in this AirNow snapshot.")
    else:
        # your px.scatter_mapbox code here
        ...


if not df_live.empty and {"Latitude","Longitude","AQI","Parameter"}.issubset(df_live.columns):
    df_live_pm25 = df_live[df_live["Parameter"] == "PM2.5"].copy()

    if df_live_pm25.empty:
        print("No PM2.5 live data in this fetch.")
    else:
        fig_live_pm25 = px.scatter_mapbox(
            df_live_pm25,
            lat="Latitude",
            lon="Longitude",
            color="AQI",
            color_continuous_scale="Turbo",
            size="AQI",
            hover_data={"Parameter": True, "CategoryName": True, "UTC": True},
            mapbox_style="carto-positron",
            zoom=4.4,
            center={"lat": 44.0, "lon": -120.5},
            opacity=0.9,
        )

        fig_live_pm25.update_layout(
            title="Current PM2.5 AirNow Stations in Oregon (last hour)",
            margin={"r": 0, "t": 40, "l": 0, "b": 0},
        )

        fig_live_pm25.show()
else:
    print("No live AirNow data to plot.")


Saved live AirNow snapshot → C:\Users\steve\Documents\oregon-aqi-dashboard\data\airnow_observations_or_20251205T223745Z.parquet



*scatter_mapbox* is deprecated! Use *scatter_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



In [17]:
import plotly.express as px

df_live = fetch_airnow_current(hours_back=1)

if df_live.empty:
    print("No live AirNow data available (this is expected on Binder without an API key).")
else:
    df_live_pm25 = df_live[df_live["Parameter"] == "PM2.5"].copy()
    if df_live_pm25.empty:
        print("No live PM2.5 data in this AirNow snapshot.")
    else:
        # your px.scatter_mapbox code here
        ...


if not df_live.empty and {"Latitude","Longitude","AQI","Parameter"}.issubset(df_live.columns):
    df_live_pm25 = df_live[df_live["Parameter"] == "OZONE"].copy()

    if df_live_pm25.empty:
        print("No ozone live data in this fetch.")
    else:
        fig_live_pm25 = px.scatter_mapbox(
            df_live_pm25,
            lat="Latitude",
            lon="Longitude",
            color="AQI",
            color_continuous_scale="Turbo",
            size="AQI",
            hover_data={"Parameter": True, "CategoryName": True, "UTC": True},
            mapbox_style="carto-positron",
            zoom=4.4,
            center={"lat": 44.0, "lon": -120.5},
            opacity=0.9,
        )

        fig_live_pm25.update_layout(
            title="Current Ozone AirNow Stations in Oregon (last hour)",
            margin={"r": 0, "t": 40, "l": 0, "b": 0},
        )

        fig_live_pm25.show()
else:
    print("No live AirNow data to plot.")


Saved live AirNow snapshot → C:\Users\steve\Documents\oregon-aqi-dashboard\data\airnow_observations_or_20251205T223802Z.parquet



*scatter_mapbox* is deprecated! Use *scatter_map* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



In [18]:
import plotly.express as px

def make_statewide_mean_figure(df: pd.DataFrame, title: str, n: int = 90):
    """
    Return a Plotly figure of statewide daily mean for the last n days.
    """
    if df.empty or "date_local" not in df.columns or "arithmetic_mean" not in df.columns:
        return None

    daily = (
        df.groupby("date_local", as_index=False)["arithmetic_mean"]
          .mean()
          .rename(columns={"arithmetic_mean": "statewide_daily_mean"})
          .sort_values("date_local")
    )

    if len(daily) > n:
        daily = daily.tail(n)

    fig = px.line(
        daily,
        x="date_local",
        y="statewide_daily_mean",
        title=title,
        markers=True,
    )
    fig.update_layout(height=360, margin=dict(l=10, r=10, t=40, b=10))
    return fig
