### Make an animated hourly PM2.5 map over Pasig from your hourly CSV.

In [7]:
# animated_hourly_pm25_map_progress.py
# MP4 preferred (needs ffmpeg). Falls back to GIF. Shows % progress while saving.

import os, re, shutil
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation, FFMpegWriter, PillowWriter
from matplotlib.colors import Normalize

# ---------------- PATHS ----------------
CSV_PATH  = r"C:\Users\HP\Desktop\SpatialCARE\Hourly\pasig_hourly_corrected.csv"
PASIG_SHP = r"C:\Users\HP\Desktop\SpatialCARE\Pasig\Pasig.shp"
OUT_DIR   = r"C:\Users\HP\Desktop\SpatialCARE\Hourly\HourlyOutputs\animated"
os.makedirs(OUT_DIR, exist_ok=True)
OUT_MP4   = os.path.join(OUT_DIR, "animated_hourly_PM25.mp4")
OUT_GIF   = os.path.join(OUT_DIR, "animated_hourly_PM25.gif")

# ---------------- USER OPTIONS ----------------
STATION_CANDS = ["stations","station","Station","STATION","location_name","Location","site","Site"]
PM_CANDS      = ["pm25","PM25","PM_25","PM2_5","PM2.5"]
LAT_CANDS     = ["latitude","lat","LAT","y","Y","Lat"]
LON_CANDS     = ["longitude","lon","LONG","x","X","Lon"]
DT_CANDS      = ["datetime","date_time","DateTime","DATETIME","timestamp","Timestamp"]
DATE_CANDS    = ["date","Date","DATE"]
TIME_CANDS    = ["time","Time","TIME","hour","Hour","HOUR","HH"]

DATE_START = None   # e.g., "2025-01-01"
DATE_END   = None   # e.g., "2025-01-07"

FIGSIZE      = (7.0, 7.0)
DPI          = 150
POINT_SIZE   = 55
POINT_EDGE   = 0.6
CMAP         = "viridis"
FPS          = 5
INTERVAL_MS  = 200
VMIN_VMAX_PADDING = 0.05

DO_IDW    = False
GRID_RES  = 250
IDW_POWER = 2.0

# ---------- Helpers ----------
def pick(cols, cands):
    for c in cands:
        if c in cols: return c
    return None

def to_datetime(df):
    dt_col = pick(df.columns, DT_CANDS)
    if dt_col is not None:
        return pd.to_datetime(df[dt_col], errors="coerce")
    dcol = pick(df.columns, DATE_CANDS)
    tcol = pick(df.columns, TIME_CANDS)
    if dcol is not None and tcol is not None:
        return pd.to_datetime(df[dcol].astype(str) + " " + df[tcol].astype(str), errors="coerce")
    if dcol is not None:
        return pd.to_datetime(df[dcol], errors="coerce")
    raise SystemExit("No datetime/date/time columns found in CSV.")

def make_idw_surface(xy, z, grid_x, grid_y, power=2.0):
    eps = 1e-12
    gx = grid_x.ravel(); gy = grid_y.ravel()
    dx = gx[:, None] - xy[:, 0][None, :]
    dy = gy[:, None] - xy[:, 1][None, :]
    dist = np.sqrt(dx*dx + dy*dy) + eps
    w = 1.0 / (dist**power)
    zw = (w * z[None, :]).sum(axis=1) / w.sum(axis=1)
    return zw.reshape(grid_x.shape)

def progress_cb(i, n):
    pct = (i + 1) * 100.0 / n
    print(f"\rRendering frames: {i+1}/{n} ({pct:5.1f}%)", end="", flush=True)

# ---------- Load data ----------
if not os.path.exists(CSV_PATH):
    raise SystemExit(f"CSV not found: {CSV_PATH}")
raw = pd.read_csv(CSV_PATH)

pm_col = pick(raw.columns, PM_CANDS)
if pm_col is None:
    raise SystemExit("No PM2.5 column found.")
lat_col = pick(raw.columns, LAT_CANDS)
lon_col = pick(raw.columns, LON_CANDS)
if lat_col is None or lon_col is None:
    raise SystemExit("CSV must include latitude and longitude columns for stations.")
st_col = pick(raw.columns, STATION_CANDS) or "station"
if st_col not in raw.columns:
    raw[st_col] = "Station"

dt = to_datetime(raw)
df = pd.DataFrame({
    "station": raw[st_col].astype(str),
    "datetime": dt,
    "lat": pd.to_numeric(raw[lat_col], errors="coerce"),
    "lon": pd.to_numeric(raw[lon_col], errors="coerce"),
    "pm25": pd.to_numeric(raw[pm_col], errors="coerce").clip(lower=0)
}).dropna(subset=["datetime","lat","lon","pm25"])

# Date filter (optional)
if DATE_START: df = df[df["datetime"] >= pd.to_datetime(DATE_START)]
if DATE_END:   df = df[df["datetime"] <= pd.to_datetime(DATE_END)]
if df.empty: raise SystemExit("No rows after cleaning/date filtering.")

# Round to the hour (use 'h' to avoid deprecation)
df["datetime"] = df["datetime"].dt.floor("h")

# ---------- Pasig boundary ----------
g_pasig = gpd.read_file(PASIG_SHP)
if g_pasig.crs is None:
    g_pasig = g_pasig.set_crs(4326)
g_pasig = g_pasig.to_crs(32651)

# ---------- Geo pts ----------
g_pts = gpd.GeoDataFrame(df.copy(),
                         geometry=gpd.points_from_xy(df["lon"], df["lat"]),
                         crs=4326).to_crs(32651)

xmin, ymin, xmax, ymax = g_pasig.total_bounds
pad = 300
xmin, ymin, xmax, ymax = xmin - pad, ymin - pad, xmax + pad, ymax + pad

vmin = g_pts["pm25"].min()
vmax = g_pts["pm25"].max()
vrange = vmax - vmin if np.isfinite(vmax) else 1.0
vmin = vmin - VMIN_VMAX_PADDING * vrange
vmax = vmax + VMIN_VMAX_PADDING * vrange
norm = Normalize(vmin=vmin, vmax=vmax)

times = np.sort(g_pts["datetime"].unique())

if DO_IDW:
    xs = np.arange(xmin, xmax, GRID_RES)
    ys = np.arange(ymin, ymax, GRID_RES)
    grid_x, grid_y = np.meshgrid(xs, ys)

# ---------- Figure ----------
fig, ax = plt.subplots(figsize=FIGSIZE, dpi=DPI)
ax.set_aspect("equal")
g_pasig.plot(ax=ax, facecolor="none", edgecolor="#111111", linewidth=1.0)
ax.set_xlim(xmin, xmax); ax.set_ylim(ymin, ymax)
ax.set_title("")

first_t = times[0]
sub0 = g_pts[g_pts["datetime"] == first_t]
sc = ax.scatter(sub0.geometry.x, sub0.geometry.y,
                c=sub0["pm25"], s=POINT_SIZE, cmap=CMAP, norm=norm,
                edgecolors="white", linewidths=POINT_EDGE, zorder=3)

idw_artist = None
if DO_IDW and len(sub0) >= 3:
    xy0 = np.c_[sub0.geometry.x.values, sub0.geometry.y.values]
    z0  = sub0["pm25"].values
    zi0 = make_idw_surface(xy0, z0, grid_x, grid_y, power=IDW_POWER)
    idw_artist = ax.imshow(zi0, extent=[xs.min(), xs.max(), ys.min(), ys.max()],
                           origin="lower", cmap=CMAP, norm=norm, alpha=0.75, zorder=1)

cbar = fig.colorbar(sc, ax=ax, shrink=0.8, pad=0.02)
cbar.set_label("PM₂.₅ (µg/m³)")

def title_for(t):
    return f"Hourly PM₂.₅ — Pasig City\n{pd.to_datetime(t).strftime('%Y-%m-%d %H:00')}"

def update(i):
    t = times[i]
    sub = g_pts[g_pts["datetime"] == t]
    sc.set_offsets(np.c_[sub.geometry.x.values, sub.geometry.y.values])
    sc.set_array(sub["pm25"].values)
    if DO_IDW:
        if len(sub) >= 3:
            xy = np.c_[sub.geometry.x.values, sub.geometry.y.values]
            z  = sub["pm25"].values
            zi = make_idw_surface(xy, z, grid_x, grid_y, power=IDW_POWER)
            if idw_artist is not None:
                idw_artist.set_data(zi); idw_artist.set_alpha(0.75)
        else:
            if idw_artist is not None:
                idw_artist.set_alpha(0.0)
    ax.set_title(title_for(t))
    return (sc,)

# Keep animation alive
anim = FuncAnimation(fig, update, frames=len(times), interval=INTERVAL_MS, blit=False)

# ---------- Save with progress ----------
has_ffmpeg = shutil.which("ffmpeg") is not None

if has_ffmpeg:
    print("Saving MP4 with ffmpeg ...")
    writer = FFMpegWriter(fps=FPS, bitrate=1800)
    anim.save(OUT_MP4, writer=writer, progress_callback=progress_cb)
    print("\nSaved:", OUT_MP4)
else:
    print("[info] ffmpeg not found — falling back to GIF.")
    anim.save(OUT_GIF, writer=PillowWriter(fps=max(1, FPS//2)), progress_callback=progress_cb)
    print("\nSaved:", OUT_GIF)

plt.close(fig)

[info] ffmpeg not found — falling back to GIF.
Rendering frames: 5047/5047 (100.0%)
Saved: C:\Users\HP\Desktop\SpatialCARE\Hourly\HourlyOutputs\animated\animated_hourly_PM25.gif
