In [4]:
import os, glob
import numpy as np
import numpy.ma as ma
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.patches as mpatches

# ---------------- Paths (edit if needed)
DATA_DIR  = r"C:\Users\krish\Desktop\SpatialCARE\DailyGPKG"
OUT_DIR   = r"C:\Users\krish\Desktop\SpatialCARE\Outputs\figures\temporal_heatmaps_guideline"
os.makedirs(OUT_DIR, exist_ok=True)

# ---------------- Figure settings
FIG_DPI         = 150
FIG_SIZE_STRIP  = (10, 2.8)           # city mean strip
BASE_HEIGHT_STN = 0.28                # inches per station (auto height)

# ---------------- PM2.5 column helper
PM_CANDS = ["pm25","PM25","PM_25","PM2_5","PM2.5"]
def pick_pm(cols):
    for c in PM_CANDS:
        if c in cols:
            return c
    return None

# ---------------- Local PM2.5 guideline (µg/m³, 24-hr) + colors
# Good: 0–25.0 | Fair: 25.1–35.0 | Unhealthy-SG: 35.1–45.0 | Very unhealthy: 45.1–55.0
# Acutely unhealthy: 55.1–90.0 | Emergency: ≥91
BINS    = [0.0, 25.0, 35.0, 45.0, 55.0, 90.0, 1e9]   # last bin very high for safety
LABELS  = [
    "Good (0–25.0)",
    "Fair (25.1–35.0)",
    "Unhealthy (sensitive) (35.1–45.0)",
    "Very unhealthy (45.1–55.0)",
    "Acutely unhealthy (55.1–90.0)",
    "Emergency (≥91)"
]
COLORS  = ["#00E400","#FFFF00","#FF7E00","#FF0000","#8F3F97","#7E0023"]
NO_DATA_COLOR = "#e0e0e0"

cmap = mpl.colors.ListedColormap(COLORS)
cmap.set_bad(NO_DATA_COLOR)
norm = mpl.colors.BoundaryNorm(BINS, ncolors=len(COLORS), clip=False)

# ---------------- Gather daily city mean and station-day table
files = sorted(glob.glob(os.path.join(DATA_DIR, "date_2025-*.gpkg")))
if not files:
    raise SystemExit("No daily GPKG files found.")

city_rows = []
sd_rows   = []

for f in files:
    date = os.path.splitext(os.path.basename(f))[0].replace("date_","")
    g = gpd.read_file(f)

    pm_col = pick_pm(g.columns)
    if pm_col is None:
        continue

    # numeric PM (clean), clip negatives to 0
    pm = pd.to_numeric(g[pm_col], errors="coerce").clip(lower=0)

    # citywide mean for the strip
    city_rows.append({"date": pd.to_datetime(date), "city_mean": pm.mean(skipna=True)})

    # station id: prefer 'stations' if present; else lat,lon; else fallback index
    if "stations" in g.columns:
        sid = g["stations"].astype(str)
    else:
        lon_col = next((c for c in g.columns if str(c).lower() in ("longitude","lon","x")), None)
        lat_col = next((c for c in g.columns if str(c).lower() in ("latitude","lat","y")), None)
        if lon_col and lat_col:
            sid = (g[lat_col].round(5).astype(str) + "," + g[lon_col].round(5).astype(str))
        else:
            sid = g.index.astype(str)

    for s, v in zip(sid, pm):
        sd_rows.append({"date": pd.to_datetime(date), "station": s, "pm25": v})

city = pd.DataFrame(city_rows).sort_values("date").reset_index(drop=True)
sd   = pd.DataFrame(sd_rows)

# station × date matrix
sd_p = (sd.pivot_table(index="station", columns="date", values="pm25", aggfunc="mean")
          .sort_index(axis=0)  # stations alphabetical
          .sort_index(axis=1)) # dates ascending

# ---------------- Plot 1: City mean heatstrip (categorical by guideline)
strip_data = city["city_mean"].to_numpy()     # 1D array of means
strip_img  = strip_data[None, :]              # 1 x N (prevents pandas indexing issue)
strip_mask = ma.masked_invalid(strip_img)     # mask NaNs for 'set_bad' to work

fig, ax = plt.subplots(figsize=FIG_SIZE_STRIP, dpi=FIG_DPI)
im = ax.imshow(strip_mask, aspect="auto", cmap=cmap, norm=norm)
ax.set_yticks([])

# De-clutter x-axis: show about 14 evenly spaced labels
target_labels = 14
step = max(1, len(city) // target_labels)
ax.set_xticks(range(0, len(city), step))
ax.set_xticklabels(city["date"].dt.strftime("%m-%d")[::step], rotation=45, ha="right", fontsize=8)

ax.set_title("Citywide Daily Mean PM₂.₅ — colored by local guideline")

# Category legend
patches = [mpatches.Patch(color=c, label=l) for c, l in zip(COLORS, LABELS)]
ax.legend(handles=patches, loc="upper left", bbox_to_anchor=(1.01, 1.0),
          fontsize=8, frameon=True)

out1 = os.path.join(OUT_DIR, "city_daily_mean_heatstrip_categorical.png")
plt.tight_layout(); plt.savefig(out1, bbox_inches="tight"); plt.close(fig)

# ---------------- Plot 2: Station × Date heatmap (categorical)
arr = sd_p.to_numpy(dtype=float)     # matrix with NaN for gaps
masked = ma.masked_invalid(arr)

# Auto figure height: BASE_HEIGHT_STN inch per station (min 3 inches)
h_inches = max(3.0, BASE_HEIGHT_STN * sd_p.shape[0])
fig2, ax2 = plt.subplots(figsize=(10, h_inches), dpi=FIG_DPI)

im2 = ax2.imshow(masked, aspect="auto", cmap=cmap, norm=norm)

# Y labels: all stations
ax2.set_yticks(range(len(sd_p.index)))
ax2.set_yticklabels(sd_p.index, fontsize=7)

# X labels: de-clutter (about 15 labels)
target_labels = 15
n_dates = len(sd_p.columns)
step = max(1, n_dates // target_labels)
date_labels = pd.to_datetime(sd_p.columns).strftime("%m-%d")
ax2.set_xticks(range(0, n_dates, step))
ax2.set_xticklabels(date_labels[::step], rotation=45, ha="right", fontsize=7)

ax2.set_title("PM₂.₅ by Station and Date — colored by local guideline")

# Category legend
patches2 = [mpatches.Patch(color=c, label=l) for c, l in zip(COLORS, LABELS)]
ax2.legend(handles=patches2, loc="upper left", bbox_to_anchor=(1.01, 1.0),
           fontsize=8, frameon=True)

out2 = os.path.join(OUT_DIR, "station_x_date_heatmap_categorical.png")
plt.tight_layout(); plt.savefig(out2, bbox_inches="tight"); plt.close(fig2)

print("Saved:")
print(" -", out1)
print(" -", out2)


Saved:
 - C:\Users\krish\Desktop\SpatialCARE\Outputs\figures\temporal_heatmaps_guideline\city_daily_mean_heatstrip_categorical.png
 - C:\Users\krish\Desktop\SpatialCARE\Outputs\figures\temporal_heatmaps_guideline\station_x_date_heatmap_categorical.png
