In [2]:
# --- Imports
import os, glob, re
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# --- Paths
DATA_DIR = r"C:\Users\krish\Desktop\SpatialCARE\DailyGPKG"
OUT_DIR  = r"C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI"
os.makedirs(OUT_DIR, exist_ok=True)

# --- Column helpers
PM_CANDS        = ["pm25","PM25","PM_25","PM2_5","PM2.5"]
STATION_CANDS   = ["stations","station","Station","STATION"]
LON_CANDS       = ["longitude","lon","LONG","x"]
LAT_CANDS       = ["latitude","lat","LAT","y"]

def pick(cols, cands):
    for c in cands:
        if c in cols: return c
    return None

def safe_filename(s):
    s = str(s)
    s = re.sub(r"[^\w\s\-]+", "", s)
    return s.strip().replace(" ", "_")

# --- AQI categories for PM2.5 (µg/m³, 24-hr) — from your table
# Ranges (inclusive on the upper bound of each bin):
# Good: 0–25.0 | Fair: 25.1–35.0 | Unhealthy (sensitive): 35.1–45.0
# Very unhealthy: 45.1–55.0 | Acutely unhealthy: 55.1–90.0 | Emergency: > 91
AQI_BINS   = [0, 25.0, 35.0, 45.0, 55.0, 90.0, float('inf')]
AQI_LABELS = [
    "Good (0–25.0)",
    "Fair (25.1–35.0)",
    "Unhealthy (sensitive) (35.1–45.0)",
    "Very unhealthy (45.1–55.0)",
    "Acutely unhealthy (55.1–90.0)",
    "Emergency (>91)"
]
AQI_COLORS = [
    "#00E400",  # Good - green
    "#FFFF00",  # Fair - yellow
    "#FF7E00",  # Unhealthy sensitive - orange
    "#FF0000",  # Very unhealthy - red
    "#8F3F97",  # Acutely unhealthy - purple
    "#7E0023"   # Emergency - maroon
]

def categorize_pm25(val):
    """Return (label, color) based on AQI categories."""
    v = float(val)
    for i, upper in enumerate(AQI_BINS[1:]):
        if v <= upper:
            return AQI_LABELS[i], AQI_COLORS[i]
    return AQI_LABELS[-1], AQI_COLORS[-1]

# --- Gather daily files
files = sorted(glob.glob(os.path.join(DATA_DIR, "date_2025-*.gpkg")))
if not files:
    raise SystemExit("No daily GPKG files found.")

# --- Build tidy table: date, station, pm25
rows = []
for f in files:
    day = os.path.splitext(os.path.basename(f))[0].replace("date_", "")
    g = gpd.read_file(f)

    pm_col = pick(g.columns, PM_CANDS)
    if pm_col is None:
        print("Skip (no PM2.5):", f); continue

    # Station name from 'stations' column if available; else fallbacks
    st_col = pick(g.columns, STATION_CANDS)
    if st_col is None:
        lon_col = pick(g.columns, LON_CANDS)
        lat_col = pick(g.columns, LAT_CANDS)
        if lon_col and lat_col:
            st_series = (g[lat_col].round(5).astype(str) + "," + g[lon_col].round(5).astype(str))
        elif "geometry" in g and not g.geometry.is_empty.all():
            st_series = (g.geometry.y.round(5).astype(str) + "," + g.geometry.x.round(5).astype(str))
        else:
            st_series = g.index.astype(str)
    else:
        st_series = g[st_col].astype(str)

    # Numeric PM (clean), clip negatives to 0
    pm = pd.to_numeric(g[pm_col], errors="coerce").clip(lower=0)

    for s, v in zip(st_series, pm):
        if pd.notna(v):
            rows.append({
                "date": pd.to_datetime(day, errors="coerce"),
                "station": s,
                "pm25": float(v)
            })

df = pd.DataFrame(rows).dropna(subset=["date", "pm25"])
if df.empty:
    raise SystemExit("No usable station PM data after cleaning.")

# --- Average duplicates (same station & day)
df = (df.groupby(["station", "date"], as_index=False)["pm25"]
        .mean()
        .sort_values(["station", "date"]))

# --- Global max across all stations/days for equal y-axes
global_max = float(df["pm25"].max())

# --- Save tidy CSV for audit
csv_out = os.path.join(OUT_DIR, "timelines_per_station_AQI.csv")
df.to_csv(csv_out, index=False)

# --- Plot one timeline per station (equal y-axis: 0 .. global_max)
for station, d in df.groupby("station", sort=False):
    d = d.sort_values("date")
    labels_colors = [categorize_pm25(v) for v in d["pm25"]]
    colors = [c for _, c in labels_colors]

    fig, ax = plt.subplots(figsize=(8, 3.2), dpi=150)
    # light grey line for trend
    ax.plot(d["date"], d["pm25"], color="#bbbbbb", linewidth=1.2, zorder=1)
    # colored markers by AQI category
    ax.scatter(d["date"], d["pm25"], c=colors, s=30,
               edgecolors="black", linewidths=0.4, zorder=2)

    ax.set_title(f"PM₂.₅ Timeline — {station}")
    ax.set_ylabel("µg/m³"); ax.set_xlabel("Date")
    ax.set_ylim(0, global_max)   # <- equal y-axis across all stations
    ax.grid(alpha=0.3)

    # Legend (AQI categories)
    patches = [mpatches.Patch(color=col, label=lab)
               for lab, col in zip(AQI_LABELS, AQI_COLORS)]
    ax.legend(handles=patches, bbox_to_anchor=(1.05, 1),
              loc='upper left', borderaxespad=0., fontsize=8)

    out_png = os.path.join(OUT_DIR, f"timeline_AQI_{safe_filename(station)}.png")
    plt.tight_layout(); plt.savefig(out_png); plt.close(fig)
    print("Saved:", out_png)

print("Tidy CSV:", csv_out)

Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_Brgy_San_Antonio_Fire_and_Rescue_Pasig_City.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_Country_Lodge_Pasig_EMBNCR.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_Dela_Paz_Barangay_Hall_Pasig_City.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_ICE_Pasig.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_Manggahan_Barangay_Hall_Pasig_City.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_Maybunga_Barangay_Hall_Pasig_City.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_Maybunga_Rainforest_Pasig_City.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_San_Antonio_Barangay_Hall_Pasig_City.png
Saved: C:\Users\krish\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_AQI_San_Nicolas_Barangay_Ha

In [2]:
# timelines_pm25_and_ph_aqi_with_diurnal.py

# --- Imports
import os, glob, re
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# --- Paths (adjust as needed)
DATA_DIR = r"C:\Users\HP\Documents\SpatialCARE\Daily\DailyGPKG"
OUT_DIR  = r"C:\Users\HP\Desktop\SpatialCARE\Outputs\timelines_AQI"
os.makedirs(OUT_DIR, exist_ok=True)

# --- Column helpers
PM_CANDS        = ["pm25","PM25","PM_25","PM2_5","PM2.5"]
STATION_CANDS   = ["stations","station","Station","STATION"]
LON_CANDS       = ["longitude","lon","LONG","x"]
LAT_CANDS       = ["latitude","lat","LAT","y"]
DATE_CANDS      = ["date","Date","DATE"]
TIME_CANDS      = ["time","Time","TIME","hour","Hour","HOUR","HH"]

def pick(cols, cands):
    for c in cands:
        if c in cols: return c
    return None

def safe_filename(s):
    s = str(s)
    s = re.sub(r"[^\w\s\-]+", "", s)
    return s.strip().replace(" ", "_")

# ==============================
# Philippine AQI for PM2.5 (24-hr) — DAO 2020-14
# ==============================
PH_AQI_LABELS = [
    "Good (0–50)",
    "Fair (51–100)",
    "Unhealthy for sensitive groups (101–150)",
    "Very unhealthy (151–200)",
    "Acutely unhealthy (201–300)",
    "Emergency (301–500)",
]
PH_AQI_COLORS = ["#00E400","#FFFF00","#FF7E00","#FF0000","#8F3F97","#7E0023"]
NO_DATA_LABEL = "No data"
NO_DATA_COLOR = "#bdbdbd"

_PH_AQI_BANDS = [(0,50),(51,100),(101,150),(151,200),(201,300),(301,500)]
_PH_PM25_BREAKS = [(0.0,25.0),(25.1,35.0),(35.1,45.0),(45.1,55.0),(55.1,90.0),(91.0, float("inf"))]

def _trunc01(x):  # truncate to 0.1 µg/m³ before converting
    return (float(x)*10)//1 / 10.0

def pm25_to_ph_aqi(x):
    v = pd.to_numeric(x, errors="coerce")
    if v is None or not np.isfinite(v): return np.nan
    v = max(0.0, _trunc01(v))
    for (Cl, Ch), (Il, Ih) in zip(_PH_PM25_BREAKS, _PH_AQI_BANDS):
        if (Cl <= v <= Ch) or (np.isinf(Ch) and v >= Cl):
            if np.isfinite(Ch):
                return (Ih-Il)/(Ch-Cl) * (v-Cl) + Il
            # open-ended emergency band; interpolate up to a practical ceiling then cap
            ceiling = 150.0
            vv = min(v, ceiling)
            aqi = (500-301)/(ceiling-91.0) * (vv-91.0) + 301
            return min(500.0, aqi)
    return np.nan

def aqi_to_ph_cat_color(aqi):
    if not np.isfinite(aqi): return NO_DATA_LABEL, NO_DATA_COLOR
    a = float(aqi)
    if a <= 50:   return PH_AQI_LABELS[0], PH_AQI_COLORS[0]
    if a <= 100:  return PH_AQI_LABELS[1], PH_AQI_COLORS[1]
    if a <= 150:  return PH_AQI_LABELS[2], PH_AQI_COLORS[2]
    if a <= 200:  return PH_AQI_LABELS[3], PH_AQI_COLORS[3]
    if a <= 300:  return PH_AQI_LABELS[4], PH_AQI_COLORS[4]
    return PH_AQI_LABELS[5], PH_AQI_COLORS[5]

# --- Gather daily files
files = sorted(glob.glob(os.path.join(DATA_DIR, "date_2025-*.gpkg")))
if not files:
    raise SystemExit("No daily GPKG files found.")

# --- Build tidy daily table: date, station, pm25
rows_daily = []
# Also gather potential hourly rows if datetime exists
rows_hourly = []

for f in files:
    basename = os.path.basename(f)
    day_from_name = os.path.splitext(basename)[0].replace("date_","")
    g = gpd.read_file(f)

    pm_col = pick(g.columns, PM_CANDS)
    if pm_col is None:
        print("Skip (no PM2.5):", f); continue

    # Station name
    st_col = pick(g.columns, STATION_CANDS)
    if st_col is None:
        lon_col = pick(g.columns, LON_CANDS)
        lat_col = pick(g.columns, LAT_CANDS)
        if lon_col and lat_col:
            st_series = (g[lat_col].round(5).astype(str) + "," + g[lon_col].round(5).astype(str))
        elif "geometry" in g and not g.geometry.is_empty.all():
            st_series = (g.geometry.y.round(5).astype(str) + "," + g.geometry.x.round(5).astype(str))
        else:
            st_series = g.index.astype(str)
    else:
        st_series = g[st_col].astype(str)

    # Numeric PM (clip negatives)
    pm = pd.to_numeric(g[pm_col], errors="coerce").clip(lower=0)

    # Try to build datetime if columns exist; else fall back to filename date
    dt_col = None
    # Direct datetime column?
    for cand in ("datetime","date_time","DateTime","DATETIME"):
        if cand in g.columns:
            dt_col = pd.to_datetime(g[cand], errors="coerce")
            break
    if dt_col is None:
        dcol = pick(g.columns, DATE_CANDS)
        tcol = pick(g.columns, TIME_CANDS)
        if dcol is not None and tcol is not None:
            dt_col = pd.to_datetime(g[dcol].astype(str) + " " + g[tcol].astype(str), errors="coerce")

    # Rows
    for s, v, i in zip(st_series, pm, g.index):
        if pd.isna(v): 
            continue
        if dt_col is not None and pd.notna(dt_col.iloc[i]):
            dt = dt_col.iloc[i]
            rows_hourly.append({"station": s, "datetime": dt, "pm25": float(v)})
            rows_daily.append({"station": s, "date": dt.normalize(), "pm25": float(v)})
        else:
            # use date from filename
            rows_daily.append({
                "station": s,
                "date": pd.to_datetime(day_from_name, errors="coerce"),
                "pm25": float(v)
            })

# --- DAILY tidy
df = pd.DataFrame(rows_daily).dropna(subset=["date","pm25"])
if df.empty:
    raise SystemExit("No usable station PM data after cleaning.")

# Average duplicates (same station & day)
df = (df.groupby(["station","date"], as_index=False)["pm25"]
        .mean()
        .sort_values(["station","date"]))

# Compute PH AQI & category/color
df["aqi_ph"] = df["pm25"].apply(pm25_to_ph_aqi).round(0)
cat_col = df["aqi_ph"].apply(aqi_to_ph_cat_color)
df["aqi_label"] = [c[0] for c in cat_col]
df["aqi_color"] = [c[1] for c in cat_col]

# Save DAILY audit CSV
csv_out = os.path.join(OUT_DIR, "timelines_per_station_PM25_PH_AQI_daily.csv")
df[["station","date","pm25","aqi_ph","aqi_label"]].to_csv(csv_out, index=False)
print("Saved CSV:", csv_out)

# Global y-limits (PM timeline)
pm_global_max = float(df["pm25"].max())
pm_ymax = np.ceil(pm_global_max / 5.0) * 5.0  # round up to nearest 5

# --- Plot station timelines (two subplots: PM vs PH-AQI)
for station, d in df.groupby("station", sort=False):
    d = d.sort_values("date")

    fig, axes = plt.subplots(1, 2, figsize=(12, 3.6), dpi=150, sharex=True)
    ax_pm, ax_aqi = axes

    # Left: PURE PM2.5
    ax_pm.plot(d["date"], d["pm25"], color="#4C78A8", linewidth=1.6, zorder=1)
    ax_pm.scatter(d["date"], d["pm25"], color="#4C78A8", s=22, edgecolors="white", linewidths=0.6, zorder=2)
    ax_pm.set_title(f"PM₂.₅ (µg/m³) — {station}")
    ax_pm.set_ylabel("µg/m³"); ax_pm.set_xlabel("Date")
    ax_pm.set_ylim(0, max(5, pm_ymax))
    ax_pm.grid(alpha=0.3, linestyle="--", linewidth=0.6)

    # Right: PH-AQI (numeric)
    ax_aqi.plot(d["date"], d["aqi_ph"], color="#666666", linewidth=1.2, zorder=1)
    ax_aqi.scatter(d["date"], d["aqi_ph"], c=d["aqi_color"], s=24,
                   edgecolors="black", linewidths=0.4, zorder=2)
    ax_aqi.set_title("PH AQI (PM₂.₅, 24-hr)")
    ax_aqi.set_ylabel("AQI"); ax_aqi.set_xlabel("Date")
    ax_aqi.set_ylim(0, 500)
    ax_aqi.grid(alpha=0.3, linestyle="--", linewidth=0.6)

    # AQI legend
    patches = [mpatches.Patch(color=c, label=l) for l, c in zip(PH_AQI_LABELS, PH_AQI_COLORS)]
    ax_aqi.legend(handles=patches, bbox_to_anchor=(1.02, 1),
                  loc='upper left', borderaxespad=0., fontsize=8, title="PH AQI Category")

    out_png = os.path.join(OUT_DIR, f"timeline_PM25_and_PHAQI_{safe_filename(station)}.png")
    plt.tight_layout(); plt.savefig(out_png, bbox_inches="tight"); plt.close(fig)
    print("Saved:", out_png)

# ==============================
# DIURNAL profiles (Weekday vs Weekend) — if datetime exists
# ==============================
df_hour = pd.DataFrame(rows_hourly).dropna(subset=["datetime","pm25"]) if rows_hourly else pd.DataFrame()
if df_hour.empty:
    print("[info] No timestamped/hourly data found in files. Skipping diurnal plots.")
else:
    # Normalize station names (same as daily)
    # (We keep as-is; if needed, you can standardize here.)

    # Compute AQI
    df_hour["aqi_ph"] = df_hour["pm25"].apply(pm25_to_ph_aqi)

    # Weekend/Weekday flag & hour
    dt_series = pd.to_datetime(df_hour["datetime"], errors="coerce")
    df_hour["hour"] = dt_series.dt.hour
    df_hour["is_weekend"] = dt_series.dt.weekday >= 5
    df_hour["grp"] = np.where(df_hour["is_weekend"], "Weekend", "Weekday")

    # Aggregate to mean per hour per group per station
    agg_pm = (df_hour.groupby(["station","grp","hour"], as_index=False)["pm25"]
                    .mean()
                    .sort_values(["station","grp","hour"]))
    agg_aqi = (df_hour.groupby(["station","grp","hour"], as_index=False)["aqi_ph"]
                    .mean()
                    .sort_values(["station","grp","hour"]))

    # Plot per station
    for station in sorted(set(agg_pm["station"])):
        pm_sub  = agg_pm[agg_pm["station"] == station]
        aqi_sub = agg_aqi[agg_aqi["station"] == station]

        # Ensure both groups/hours present (fill missing hours with NaN)
        def ensure_full_hours(df_in, val_col):
            idx = pd.MultiIndex.from_product(
                [sorted(df_in["grp"].unique()), range(24)],
                names=["grp","hour"]
            )
            tmp = (df_in.set_index(["grp","hour"])[val_col]
                      .reindex(idx)
                      .reset_index())
            return tmp

        pm_full  = ensure_full_hours(pm_sub, "pm25")
        aqi_full = ensure_full_hours(aqi_sub, "aqi_ph")

        colors = {"Weekday":"#1f77b4", "Weekend":"#d62728"}  # blue vs red

        fig, axes = plt.subplots(1, 2, figsize=(12, 3.6), dpi=150, sharey=False)
        ax1, ax2 = axes

        # Left: PM2.5 diurnal
        for grp in ["Weekday","Weekend"]:
            dd = pm_full[pm_full["grp"] == grp]
            ax1.plot(dd["hour"], dd["pm25"], label=grp, linewidth=2.0, marker="o",
                     markersize=4.5, color=colors.get(grp, "#555555"))
        ax1.set_title(f"Diurnal PM₂.₅ — {station}")
        ax1.set_xlabel("Hour (local)"); ax1.set_ylabel("µg/m³")
        ax1.set_xticks(range(0,24,2))
        ax1.grid(alpha=0.3, linestyle="--", linewidth=0.6)
        ax1.legend(frameon=False)

        # Right: PH-AQI diurnal
        for grp in ["Weekday","Weekend"]:
            dd = aqi_full[aqi_full["grp"] == grp]
            ax2.plot(dd["hour"], dd["aqi_ph"], label=grp, linewidth=2.0, marker="o",
                     markersize=4.5, color=colors.get(grp, "#555555"))
        ax2.set_title("Diurnal PH AQI (PM₂.₅, 24-hr)")
        ax2.set_xlabel("Hour (local)"); ax2.set_ylabel("AQI")
        ax2.set_xticks(range(0,24,2))
        ax2.set_ylim(0, 500)
        ax2.grid(alpha=0.3, linestyle="--", linewidth=0.6)
        ax2.legend(frameon=False)

        out_png = os.path.join(OUT_DIR, f"diurnal_PM25_and_PHAQI_{safe_filename(station)}.png")
        plt.tight_layout(); plt.savefig(out_png, bbox_inches="tight"); plt.close(fig)
        print("Saved:", out_png)


Saved CSV: C:\Users\HP\Desktop\SpatialCARE\Outputs\timelines_AQI\timelines_per_station_PM25_PH_AQI_daily.csv
Saved: C:\Users\HP\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_PM25_and_PHAQI_Brgy_San_Antonio_Fire_and_Rescue_Pasig_City.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_PM25_and_PHAQI_Country_Lodge_Pasig_EMBNCR.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_PM25_and_PHAQI_Dela_Paz_Barangay_Hall_Pasig_City.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_PM25_and_PHAQI_ICE_Pasig.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_PM25_and_PHAQI_Manggahan_Barangay_Hall_Pasig_City.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_PM25_and_PHAQI_Maybunga_Barangay_Hall_Pasig_City.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Outputs\timelines_AQI\timeline_PM25_and_PHAQI_Maybunga_Rainforest_Pasig_City.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Outputs\timeli