In [2]:
# --- Imports
import os, glob, re
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# --- Paths
DATA_DIR = r"C:\Users\krish\Desktop\SpatialCARE\DailyGPKG"
OUT_DIR  = r"C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI"
os.makedirs(OUT_DIR, exist_ok=True)

# --- Column helpers
PM_CANDS        = ["pm25","PM25","PM_25","PM2_5","PM2.5"]
STATION_CANDS   = ["stations","station","Station","STATION"]
LON_CANDS       = ["longitude","lon","LONG","x"]
LAT_CANDS       = ["latitude","lat","LAT","y"]

def pick(cols, cands):
    for c in cands:
        if c in cols: return c
    return None

def safe_filename(s):
    s = str(s)
    s = re.sub(r"[^\w\s\-]+", "", s)
    return s.strip().replace(" ", "_")

# --- AQI categories for PM2.5 (µg/m³, 24-hr) — from your table
# Ranges (inclusive on the upper bound of each bin):
# Good: 0–25.0 | Fair: 25.1–35.0 | Unhealthy (sensitive): 35.1–45.0
# Very unhealthy: 45.1–55.0 | Acutely unhealthy: 55.1–90.0 | Emergency: > 91
AQI_BINS   = [0, 25.0, 35.0, 45.0, 55.0, 90.0, float('inf')]
AQI_LABELS = [
    "Good (0–25.0)",
    "Fair (25.1–35.0)",
    "Unhealthy (sensitive) (35.1–45.0)",
    "Very unhealthy (45.1–55.0)",
    "Acutely unhealthy (55.1–90.0)",
    "Emergency (>91)"
]
AQI_COLORS = [
    "#00E400",  # Good - green
    "#FFFF00",  # Fair - yellow
    "#FF7E00",  # Unhealthy sensitive - orange
    "#FF0000",  # Very unhealthy - red
    "#8F3F97",  # Acutely unhealthy - purple
    "#7E0023"   # Emergency - maroon
]

def categorize_pm25(val):
    """Return (label, color) based on AQI categories."""
    v = float(val)
    for i, upper in enumerate(AQI_BINS[1:]):
        if v <= upper:
            return AQI_LABELS[i], AQI_COLORS[i]
    return AQI_LABELS[-1], AQI_COLORS[-1]

# --- Gather daily files
files = sorted(glob.glob(os.path.join(DATA_DIR, "date_2025-*.gpkg")))
if not files:
    raise SystemExit("No daily GPKG files found.")

# --- Build tidy table: date, station, pm25
rows = []
for f in files:
    day = os.path.splitext(os.path.basename(f))[0].replace("date_", "")
    g = gpd.read_file(f)

    pm_col = pick(g.columns, PM_CANDS)
    if pm_col is None:
        print("Skip (no PM2.5):", f); continue

    # Station name from 'stations' column if available; else fallbacks
    st_col = pick(g.columns, STATION_CANDS)
    if st_col is None:
        lon_col = pick(g.columns, LON_CANDS)
        lat_col = pick(g.columns, LAT_CANDS)
        if lon_col and lat_col:
            st_series = (g[lat_col].round(5).astype(str) + "," + g[lon_col].round(5).astype(str))
        elif "geometry" in g and not g.geometry.is_empty.all():
            st_series = (g.geometry.y.round(5).astype(str) + "," + g.geometry.x.round(5).astype(str))
        else:
            st_series = g.index.astype(str)
    else:
        st_series = g[st_col].astype(str)

    # Numeric PM (clean), clip negatives to 0
    pm = pd.to_numeric(g[pm_col], errors="coerce").clip(lower=0)

    for s, v in zip(st_series, pm):
        if pd.notna(v):
            rows.append({
                "date": pd.to_datetime(day, errors="coerce"),
                "station": s,
                "pm25": float(v)
            })

df = pd.DataFrame(rows).dropna(subset=["date", "pm25"])
if df.empty:
    raise SystemExit("No usable station PM data after cleaning.")

# --- Average duplicates (same station & day)
df = (df.groupby(["station", "date"], as_index=False)["pm25"]
        .mean()
        .sort_values(["station", "date"]))

# --- Global max across all stations/days for equal y-axes
global_max = float(df["pm25"].max())

# --- Save tidy CSV for audit
csv_out = os.path.join(OUT_DIR, "timelines_per_station_AQI.csv")
df.to_csv(csv_out, index=False)

# --- Plot one timeline per station (equal y-axis: 0 .. global_max)
for station, d in df.groupby("station", sort=False):
    d = d.sort_values("date")
    labels_colors = [categorize_pm25(v) for v in d["pm25"]]
    colors = [c for _, c in labels_colors]

    fig, ax = plt.subplots(figsize=(8, 3.2), dpi=150)
    # light grey line for trend
    ax.plot(d["date"], d["pm25"], color="#bbbbbb", linewidth=1.2, zorder=1)
    # colored markers by AQI category
    ax.scatter(d["date"], d["pm25"], c=colors, s=30,
               edgecolors="black", linewidths=0.4, zorder=2)

    ax.set_title(f"PM₂.₅ Timeline — {station}")
    ax.set_ylabel("µg/m³"); ax.set_xlabel("Date")
    ax.set_ylim(0, global_max)   # <- equal y-axis across all stations
    ax.grid(alpha=0.3)

    # Legend (AQI categories)
    patches = [mpatches.Patch(color=col, label=lab)
               for lab, col in zip(AQI_LABELS, AQI_COLORS)]
    ax.legend(handles=patches, bbox_to_anchor=(1.05, 1),
              loc='upper left', borderaxespad=0., fontsize=8)

    out_png = os.path.join(OUT_DIR, f"timeline_AQI_{safe_filename(station)}.png")
    plt.tight_layout(); plt.savefig(out_png); plt.close(fig)
    print("Saved:", out_png)

print("Tidy CSV:", csv_out)

Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_Brgy_San_Antonio_Fire_and_Rescue_Pasig_City.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_Country_Lodge_Pasig_EMBNCR.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_Dela_Paz_Barangay_Hall_Pasig_City.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_ICE_Pasig.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_Manggahan_Barangay_Hall_Pasig_City.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_Maybunga_Barangay_Hall_Pasig_City.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_Maybunga_Rainforest_Pasig_City.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_San_Antonio_Barangay_Hall_Pasig_City.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_San_Nicolas_Barangay_Ha