In [2]:
# H1_hourly_timeseries_aqi.ipynb

import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

# =============================
# USER SETTINGS
# =============================
CSV_PATH = r"C:\Users\HP\Desktop\SpatialCARE\Hourly\pasig_hourly_corrected.csv"
OUT_DIR = r"C:\Users\HP\Desktop\SpatialCARE\Hourly\HourlyOutputs\timeseries"
os.makedirs(OUT_DIR, exist_ok=True)

# =============================
# AQI Categories (PM2.5 breakpoints µg/m³)
# =============================
AQI_BREAKS = [
    (0, 12, "Good", "limegreen"),
    (13, 35, "Fair", "yellow"),
    (36, 55, "Unhealthy for Sensitive Groups", "orange"),
    (56, 150, "Very Unhealthy", "red"),
    (151, 250, "Acutely Unhealthy", "purple"),
    (251, 500, "Emergency", "maroon")
]

# =============================
# LOAD CSV
# =============================
df = pd.read_csv(CSV_PATH)
df["datetime"] = pd.to_datetime(df["Date"] + " " + df["Time"])
df = df.sort_values("datetime")

# =============================
# PLOTTING TIMESERIES
# =============================
for loc, g in df.groupby("location_name"):
    plt.figure(figsize=(12,4))

    # Add AQI shaded bands
    for (low, high, label, color) in AQI_BREAKS:
        plt.axhspan(low, high, color=color, alpha=0.15)

    # Plot PM2.5
    plt.plot(g["datetime"], g["pm25"], label="PM₂.₅", color="black", linewidth=1)

    plt.title(f"Hourly PM₂.₅ — {loc}")
    plt.ylabel("µg/m³")
    plt.xlabel("DateTime")

    # Add legend manually with colored patches
    handles = [Rectangle((0,0),1,1, color=c, alpha=0.5, label=l) for _,_,l,c in AQI_BREAKS]
    plt.legend(handles=handles, bbox_to_anchor=(1.05,1), loc='upper left')

    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, f"timeseries_{loc.replace(' ','_')}_AQI.png"), dpi=300)
    plt.close()

# =============================
# DIURNAL AVERAGE PLOT
# =============================
df["hour"] = df["datetime"].dt.hour
diurnal = df.groupby(["location_name","hour"])["pm25"].agg(["mean","std"]).reset_index()

for loc, g in diurnal.groupby("location_name"):
    plt.figure(figsize=(8,4))

    # Add AQI bands
    for (low, high, label, color) in AQI_BREAKS:
        plt.axhspan(low, high, color=color, alpha=0.15)

    plt.errorbar(g["hour"], g["mean"], yerr=g["std"], fmt='-o', color="black")
    plt.title(f"Diurnal Cycle of PM₂.₅ — {loc}")
    plt.xlabel("Hour of Day")
    plt.ylabel("µg/m³")

    handles = [Rectangle((0,0),1,1, color=c, alpha=0.5, label=l) for _,_,l,c in AQI_BREAKS]
    plt.legend(handles=handles, bbox_to_anchor=(1.05,1), loc='upper left')

    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, f"diurnal_{loc.replace(' ','_')}_AQI.png"), dpi=300)
    plt.close()

In [5]:
# H1_hourly_timeseries_aqi.ipynb

import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

# =============================
# USER SETTINGS
# =============================
CSV_PATH = r"C:\Users\krish\Desktop\SpatialCARE\Hourly\pasig_hourly_corrected.csv"
OUT_DIR = r"C:\Users\krish\Desktop\SpatialCARE\Hourly\Outputs\timeseries"
os.makedirs(OUT_DIR, exist_ok=True)

# =============================
# AQI Categories (PM2.5 breakpoints µg/m³)
# =============================
AQI_BREAKS = [
    (0, 12, "Good", "limegreen"),
    (13, 35, "Fair", "yellow"),
    (36, 55, "Unhealthy for Sensitive Groups", "orange"),
    (56, 150, "Very Unhealthy", "red"),
    (151, 250, "Acutely Unhealthy", "purple"),
    (251, 500, "Emergency", "maroon")
]

# =============================
# LOAD CSV
# =============================
df = pd.read_csv(CSV_PATH)
df["datetime"] = pd.to_datetime(df["Date"] + " " + df["Time"])
df = df.sort_values("datetime")

# =============================
# PLOTTING TIMESERIES
# =============================
for loc, g in df.groupby("location_name"):
    plt.figure(figsize=(12,4))

    # Add AQI shaded bands
    for (low, high, label, color) in AQI_BREAKS:
        plt.axhspan(low, high, color=color, alpha=0.15)

    # Plot PM2.5
    plt.plot(g["datetime"], g["pm25"], label="PM₂.₅", color="black", linewidth=1)

    plt.title(f"Hourly PM₂.₅ — {loc}")
    plt.ylabel("µg/m³")
    plt.xlabel("DateTime")

    # Add legend manually with colored patches
    handles = [Rectangle((0,0),1,1, color=c, alpha=0.5, label=l) for _,_,l,c in AQI_BREAKS]
    plt.legend(handles=handles, bbox_to_anchor=(1.05,1), loc='upper left')

    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, f"timeseries_{loc.replace(' ','_')}_AQI.png"), dpi=300)
    plt.close()

# =============================
# DIURNAL AVERAGE PLOT
# =============================
df["hour"] = df["datetime"].dt.hour
diurnal = df.groupby(["location_name","hour"])["pm25"].agg(["mean","std"]).reset_index()

for loc, g in diurnal.groupby("location_name"):
    plt.figure(figsize=(8,4))

    # Add AQI bands
    for (low, high, label, color) in AQI_BREAKS:
        plt.axhspan(low, high, color=color, alpha=0.15)

    plt.errorbar(g["hour"], g["mean"], yerr=g["std"], fmt='-o', color="black")
    plt.title(f"Diurnal Cycle of PM₂.₅ — {loc}")
    plt.xlabel("Hour of Day")
    plt.ylabel("µg/m³")

    handles = [Rectangle((0,0),1,1, color=c, alpha=0.5, label=l) for _,_,l,c in AQI_BREAKS]
    plt.legend(handles=handles, bbox_to_anchor=(1.05,1), loc='upper left')

    plt.tight_layout()
    plt.savefig(os.path.join(OUT_DIR, f"diurnal_{loc.replace(' ','_')}_AQI.png"), dpi=300)
    plt.close()


## new update

In [3]:
# timelines_pm25_and_ph_aqi_with_diurnal_HOURLY.py

# --- Imports
import os, re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

# --- Paths (HOURLY)
CSV_PATH = r"C:\Users\HP\Desktop\SpatialCARE\Hourly\pasig_hourly_corrected.csv"
OUT_DIR  = r"C:\Users\HP\Desktop\SpatialCARE\Hourly\HourlyOutputs\timeseries"
os.makedirs(OUT_DIR, exist_ok=True)

# --- Column helpers
PM_CANDS        = ["pm25","PM25","PM_25","PM2_5","PM2.5"]
STATION_CANDS   = ["stations","station","Station","STATION","location_name","Location","site","Site"]
DATE_CANDS      = ["date","Date","DATE"]
TIME_CANDS      = ["time","Time","TIME","hour","Hour","HOUR","HH"]
DT_CANDS        = ["datetime","date_time","DateTime","DATETIME","timestamp","Timestamp"]

def pick(cols, cands):
    for c in cands:
        if c in cols: return c
    return None

def safe_filename(s):
    s = str(s)
    s = re.sub(r"[^\w\s\-]+", "", s)
    return s.strip().replace(" ", "_")

# ==============================
# Philippine AQI for PM2.5 (24-hr) — DAO 2020-14
# ==============================
PH_AQI_LABELS = [
    "Good (0–50)",
    "Fair (51–100)",
    "Unhealthy for sensitive groups (101–150)",
    "Very unhealthy (151–200)",
    "Acutely unhealthy (201–300)",
    "Emergency (301–500)",
]
PH_AQI_COLORS = ["#00E400","#FFFF00","#FF7E00","#FF0000","#8F3F97","#7E0023"]
NO_DATA_LABEL = "No data"
NO_DATA_COLOR = "#bdbdbd"

_PH_AQI_BANDS = [(0,50),(51,100),(101,150),(151,200),(201,300),(301,500)]
_PH_PM25_BREAKS = [(0.0,25.0),(25.1,35.0),(35.1,45.0),(45.1,55.0),(55.1,90.0),(91.0, float("inf"))]

def _trunc01(x):  # truncate to 0.1 µg/m³ before converting
    return (float(x)*10)//1 / 10.0

def pm25_to_ph_aqi(x):
    v = pd.to_numeric(x, errors="coerce")
    if v is None or not np.isfinite(v): return np.nan
    v = max(0.0, _trunc01(v))
    for (Cl, Ch), (Il, Ih) in zip(_PH_PM25_BREAKS, _PH_AQI_BANDS):
        if (Cl <= v <= Ch) or (np.isinf(Ch) and v >= Cl):
            if np.isfinite(Ch):
                return (Ih-Il)/(Ch-Cl) * (v-Cl) + Il
            # open-ended emergency band; interpolate up to a practical ceiling then cap
            ceiling = 150.0
            vv = min(v, ceiling)
            aqi = (500-301)/(ceiling-91.0) * (vv-91.0) + 301
            return min(500.0, aqi)
    return np.nan

def aqi_to_ph_cat_color(aqi):
    if not np.isfinite(aqi): return NO_DATA_LABEL, NO_DATA_COLOR
    a = float(aqi)
    if a <= 50:   return PH_AQI_LABELS[0], PH_AQI_COLORS[0]
    if a <= 100:  return PH_AQI_LABELS[1], PH_AQI_COLORS[1]
    if a <= 150:  return PH_AQI_LABELS[2], PH_AQI_COLORS[2]
    if a <= 200:  return PH_AQI_LABELS[3], PH_AQI_COLORS[3]
    if a <= 300:  return PH_AQI_LABELS[4], PH_AQI_COLORS[4]
    return PH_AQI_LABELS[5], PH_AQI_COLORS[5]

# ==============================
# Load HOURLY CSV
# ==============================
if not os.path.exists(CSV_PATH):
    raise SystemExit(f"CSV not found: {CSV_PATH}")

raw = pd.read_csv(CSV_PATH)

pm_col  = pick(raw.columns, PM_CANDS)
st_col  = pick(raw.columns, STATION_CANDS)
dt_col0 = pick(raw.columns, DT_CANDS)

if pm_col is None:
    raise SystemExit("No PM2.5 column found in CSV.")
if st_col is None:
    # fallback: create a generic station label if missing
    st_col = "_station_fallback_"
    raw[st_col] = "Station"

# Build datetime
if dt_col0 is not None:
    dt = pd.to_datetime(raw[dt_col0], errors="coerce")
else:
    dcol = pick(raw.columns, DATE_CANDS)
    tcol = pick(raw.columns, TIME_CANDS)
    if dcol is None and tcol is None:
        raise SystemExit("No datetime/date/time columns found.")
    if dcol is not None and tcol is not None:
        dt = pd.to_datetime(raw[dcol].astype(str) + " " + raw[tcol].astype(str), errors="coerce")
    elif dcol is not None:
        dt = pd.to_datetime(raw[dcol], errors="coerce")
    else:
        # time without a date is not usable—fail clearly
        raise SystemExit("Found time column but no date column to build datetime.")

# Tidy hourly dataframe
df_hour = pd.DataFrame({
    "station": raw[st_col].astype(str),
    "datetime": dt,
    "pm25": pd.to_numeric(raw[pm_col], errors="coerce").clip(lower=0)
}).dropna(subset=["datetime","pm25"])

if df_hour.empty:
    raise SystemExit("No usable hourly rows after cleaning.")

# ==============================
# DAILY aggregation from hourly
# ==============================
df_hour["date"] = df_hour["datetime"].dt.normalize()
df_daily = (df_hour.groupby(["station","date"], as_index=False)["pm25"]
                 .mean()
                 .sort_values(["station","date"]))

# Compute PH AQI (daily) & category/color
df_daily["aqi_ph"] = df_daily["pm25"].apply(pm25_to_ph_aqi).round(0)
cat_col = df_daily["aqi_ph"].apply(aqi_to_ph_cat_color)
df_daily["aqi_label"] = [c[0] for c in cat_col]
df_daily["aqi_color"] = [c[1] for c in cat_col]

# Save DAILY audit CSV
csv_out = os.path.join(OUT_DIR, "timelines_per_station_PM25_PH_AQI_daily.csv")
df_daily[["station","date","pm25","aqi_ph","aqi_label"]].to_csv(csv_out, index=False)
print("Saved CSV:", csv_out)

# ==============================
# Plot daily timelines (PM vs PH AQI)
# ==============================
pm_global_max = float(df_daily["pm25"].max()) if not df_daily.empty else 0.0
pm_ymax = np.ceil(pm_global_max / 5.0) * 5.0 if pm_global_max > 0 else 5.0

for station, d in df_daily.groupby("station", sort=False):
    d = d.sort_values("date")

    fig, axes = plt.subplots(1, 2, figsize=(12, 3.6), dpi=150, sharex=True)
    ax_pm, ax_aqi = axes

    # Left: PM2.5
    ax_pm.plot(d["date"], d["pm25"], color="#4C78A8", linewidth=1.6, zorder=1)
    ax_pm.scatter(d["date"], d["pm25"], color="#4C78A8", s=22, edgecolors="white", linewidths=0.6, zorder=2)
    ax_pm.set_title(f"PM₂.₅ (µg/m³) — {station}")
    ax_pm.set_ylabel("µg/m³"); ax_pm.set_xlabel("Date")
    ax_pm.set_ylim(0, max(5, pm_ymax))
    ax_pm.grid(alpha=0.3, linestyle="--", linewidth=0.6)

    # Right: PH-AQI
    ax_aqi.plot(d["date"], d["aqi_ph"], color="#666666", linewidth=1.2, zorder=1)
    ax_aqi.scatter(d["date"], d["aqi_ph"], c=d["aqi_color"], s=24,
                   edgecolors="black", linewidths=0.4, zorder=2)
    ax_aqi.set_title("PH AQI (PM₂.₅, 24-hr)")
    ax_aqi.set_ylabel("AQI"); ax_aqi.set_xlabel("Date")
    ax_aqi.set_ylim(0, 500)
    ax_aqi.grid(alpha=0.3, linestyle="--", linewidth=0.6)

    # AQI legend
    patches = [mpatches.Patch(color=c, label=l) for l, c in zip(PH_AQI_LABELS, PH_AQI_COLORS)]
    ax_aqi.legend(handles=patches, bbox_to_anchor=(1.02, 1),
                  loc='upper left', borderaxespad=0., fontsize=8, title="PH AQI Category")

    out_png = os.path.join(OUT_DIR, f"timeline_PM25_and_PHAQI_{safe_filename(station)}.png")
    plt.tight_layout(); plt.savefig(out_png, bbox_inches="tight"); plt.close(fig)
    print("Saved:", out_png)

# ==============================
# DIURNAL profiles (Weekday vs Weekend) — from hourly
# ==============================
df_hour["aqi_ph"] = df_hour["pm25"].apply(pm25_to_ph_aqi)
dt_series = pd.to_datetime(df_hour["datetime"], errors="coerce")
df_hour["hour"] = dt_series.dt.hour
df_hour["is_weekend"] = dt_series.dt.weekday >= 5
df_hour["grp"] = np.where(df_hour["is_weekend"], "Weekend", "Weekday")

# Aggregate to mean per hour per group per station
agg_pm = (df_hour.groupby(["station","grp","hour"], as_index=False)["pm25"]
                .mean()
                .sort_values(["station","grp","hour"]))
agg_aqi = (df_hour.groupby(["station","grp","hour"], as_index=False)["aqi_ph"]
                .mean()
                .sort_values(["station","grp","hour"]))

def ensure_full_hours(df_in, val_col):
    idx = pd.MultiIndex.from_product(
        [sorted(df_in["grp"].unique()), range(24)],
        names=["grp","hour"]
    )
    tmp = (df_in.set_index(["grp","hour"])[val_col]
              .reindex(idx)
              .reset_index())
    return tmp

for station in sorted(set(agg_pm["station"])):
    pm_sub  = agg_pm[agg_pm["station"] == station]
    aqi_sub = agg_aqi[agg_aqi["station"] == station]

    pm_full  = ensure_full_hours(pm_sub, "pm25")
    aqi_full = ensure_full_hours(aqi_sub, "aqi_ph")

    colors = {"Weekday":"#1f77b4", "Weekend":"#d62728"}  # blue vs red

    fig, axes = plt.subplots(1, 2, figsize=(12, 3.6), dpi=150, sharey=False)
    ax1, ax2 = axes

    # Left: PM2.5 diurnal
    for grp in ["Weekday","Weekend"]:
        dd = pm_full[pm_full["grp"] == grp]
        ax1.plot(dd["hour"], dd["pm25"], label=grp, linewidth=2.0, marker="o",
                 markersize=4.5, color=colors.get(grp, "#555555"))
    ax1.set_title(f"Diurnal PM₂.₅ — {station}")
    ax1.set_xlabel("Hour (local)"); ax1.set_ylabel("µg/m³")
    ax1.set_xticks(range(0,24,2))
    ax1.grid(alpha=0.3, linestyle="--", linewidth=0.6)
    ax1.legend(frameon=False)

    # Right: PH-AQI diurnal
    for grp in ["Weekday","Weekend"]:
        dd = aqi_full[aqi_full["grp"] == grp]
        ax2.plot(dd["hour"], dd["aqi_ph"], label=grp, linewidth=2.0, marker="o",
                 markersize=4.5, color=colors.get(grp, "#555555"))
    ax2.set_title("Diurnal PH AQI (PM₂.₅, 24-hr)")
    ax2.set_xlabel("Hour (local)"); ax2.set_ylabel("AQI")
    ax2.set_xticks(range(0,24,2))
    ax2.set_ylim(0, 500)
    ax2.grid(alpha=0.3, linestyle="--", linewidth=0.6)
    ax2.legend(frameon=False)

    out_png = os.path.join(OUT_DIR, f"diurnal_PM25_and_PHAQI_{safe_filename(station)}.png")
    plt.tight_layout(); plt.savefig(out_png, bbox_inches="tight"); plt.close(fig)
    print("Saved:", out_png)


Saved CSV: C:\Users\HP\Desktop\SpatialCARE\Hourly\HourlyOutputs\timeseries\timelines_per_station_PM25_PH_AQI_daily.csv
Saved: C:\Users\HP\Desktop\SpatialCARE\Hourly\HourlyOutputs\timeseries\timeline_PM25_and_PHAQI_Brgy_San_Antonio_Fire_and_Rescue_Pasig_City.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Hourly\HourlyOutputs\timeseries\timeline_PM25_and_PHAQI_Country_Lodge_Pasig_EMBNCR.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Hourly\HourlyOutputs\timeseries\timeline_PM25_and_PHAQI_Dela_Paz_Barangay_Hall_Pasig_City.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Hourly\HourlyOutputs\timeseries\timeline_PM25_and_PHAQI_ICE_Pasig.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Hourly\HourlyOutputs\timeseries\timeline_PM25_and_PHAQI_Manggahan_Barangay_Hall_Pasig_City.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Hourly\HourlyOutputs\timeseries\timeline_PM25_and_PHAQI_Maybunga_Barangay_Hall_Pasig_City.png
Saved: C:\Users\HP\Desktop\SpatialCARE\Hourly\HourlyOutputs\timeseries\timeline_PM25_and_PHAQI_Maybunga