In [1]:
import pandas as pd
import numpy as np

# 1. Load Reno PM2.5 data (adjust the filename if you changed the dates)
df = pd.read_csv("../data/pm25_Reno_2024-07-01.csv",
                 parse_dates=["datetime_utc"])

# 2. Daily mean concentration
daily = (df.set_index("datetime_utc")
           .resample("1D")["pm25"]
           .mean()
           .dropna())

print("✅ Got daily means for", len(daily), "day(s)")

# 3. Baseline forecast: “tomorrow = yesterday”
y_true = daily.shift(-1).dropna()          # tomorrow's actual value
y_pred = daily.loc[y_true.index]           # yesterday's value

if len(y_true) == 0:
    print("Not enough consecutive days. "
          "Re‑download a longer date range in 01_download.ipynb.")
else:
    # 4. Mean Absolute Error (manual, no scikit‑learn needed)
    mae = np.abs(y_true.values - y_pred.values).mean()
    print(f"Baseline MAE = {mae:.2f} µg/m³ over {len(y_true)} day‑pairs")


✅ Got daily means for 2063 day(s)
Baseline MAE = 3.91 µg/m³ over 2062 day‑pairs
