In [15]:
# ======================================
# Notebook H2: Hourly Spatial Maps (CSV + AQI colors + All stations)
# ======================================

import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from shapely.geometry import Point
import os

# ==============================
# User settings
# ==============================
DATA_FILE = r"C:\Users\krish\Desktop\SpatialCARE\Hourly\pasig_hourly_corrected.csv"
OUT_FIG_DIR = r"C:\Users\krish\Desktop\SpatialCARE\Hourly\Outputs\figures"
OUT_MAP_DIR = os.path.join(OUT_FIG_DIR, "spatial_maps")

os.makedirs(OUT_FIG_DIR, exist_ok=True)
os.makedirs(OUT_MAP_DIR, exist_ok=True)

# ==============================
# Load CSV
# ==============================
df = pd.read_csv(DATA_FILE)

# Create datetime column
df["datetime"] = pd.to_datetime(
    df["Date"].astype(str) + " " + df["Time"].astype(str),
    errors="coerce"
)
df = df.sort_values("datetime")

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(
    df,
    geometry=[Point(xy) for xy in zip(df["longitude"], df["latitude"])],
    crs="EPSG:4326"
)

print("Loaded rows:", len(gdf))
print("Stations:", gdf["location_name"].nunique())

# ==============================
# Define AQI categories and colors
# ==============================
aqi_bins = [0, 25, 35, 45, 55, 90, float("inf")]
aqi_labels = [
    "Good",
    "Fair",
    "Unhealthy (Sensitive)",
    "Very Unhealthy",
    "Acutely Unhealthy",
    "Emergency"
]
aqi_colors = {
    "Good": "#00E400",              # Green
    "Fair": "#FFFF00",              # Yellow
    "Unhealthy (Sensitive)": "#FF7E00", # Orange
    "Very Unhealthy": "#FF0000",    # Red
    "Acutely Unhealthy": "#8F3F97", # Purple
    "Emergency": "#7E0023"          # Maroon
}

# Assign AQI category
gdf["AQI_Category"] = pd.cut(gdf["pm25"], bins=aqi_bins, labels=aqi_labels, right=True)

# ==============================
# Plot 1: Timeseries for ALL stations
# ==============================
stations = gdf["location_name"].unique()

print(f"Generating timeseries plots for {len(stations)} stations...")

for station in stations:
    sub = gdf[gdf["location_name"] == station]

    plt.figure(figsize=(14,5))
    plt.plot(sub["datetime"], sub["pm25"], marker="o", linestyle="-", markersize=2)
    plt.title(f"PM2.5 Hourly Timeseries — {station}")
    plt.ylabel("PM2.5 (µg/m³)")
    plt.xlabel("Date")
    plt.xticks(rotation=45)
    plt.tight_layout()

    out_path = os.path.join(
        OUT_FIG_DIR,
        f"timeseries_{station.replace(' ', '_').replace('[','').replace(']','')}.png"
    )
    plt.savefig(out_path)
    plt.close()

print("✅ Timeseries plots saved for all stations.")

# ==============================
# Plot 2: Diurnal pattern (average across all stations)
# ==============================
gdf["hour"] = gdf["datetime"].dt.hour
diurnal = gdf.groupby("hour")["pm25"].mean().reset_index()

plt.figure(figsize=(10,4))
sns.lineplot(data=diurnal, x="hour", y="pm25", marker="o")
plt.title("Average Diurnal PM2.5 across all stations")
plt.ylabel("PM2.5 (µg/m³)")
plt.xlabel("Hour of Day")
plt.grid(True)
plt.tight_layout()
plt.savefig(os.path.join(OUT_FIG_DIR, "diurnal_pattern.png"))
plt.close()

print("✅ Diurnal pattern saved.")

# ======================================
# 04_AQI_Temporal_Heatmaps_Hourly.ipynb
# ======================================

import os, numpy as np, numpy.ma as ma
import pandas as pd, geopandas as gpd
import matplotlib.pyplot as plt, matplotlib as mpl
import matplotlib.patches as mpatches

# ---------------- Paths
DATA_FILE = r"C:\Users\krish\Desktop\SpatialCARE\Hourly\pasig_hourly_corrected.csv"
OUT_DIR   = r"C:\Users\krish\Desktop\SpatialCARE\Hourly\Outputs\figures\temporal_heatmaps_hourly"
os.makedirs(OUT_DIR, exist_ok=True)

# ---------------- Figure settings
FIG_DPI         = 150
FIG_SIZE_STRIP  = (12, 2.8)          # city mean strip
BASE_HEIGHT_STN = 0.28               # inches per station (auto height)

# ---------------- AQI bins + colors
BINS    = [0.0, 25.0, 35.0, 45.0, 55.0, 90.0, 1e9]   # safe high bound
LABELS  = [
    "Good (0–25.0)",
    "Fair (25.1–35.0)",
    "Unhealthy (sensitive) (35.1–45.0)",
    "Very unhealthy (45.1–55.0)",
    "Acutely unhealthy (55.1–90.0)",
    "Emergency (≥91)"
]
COLORS  = ["#00E400","#FFFF00","#FF7E00","#FF0000","#8F3F97","#7E0023"]
NO_DATA_COLOR = "#e0e0e0"

cmap = mpl.colors.ListedColormap(COLORS)
cmap.set_bad(NO_DATA_COLOR)
norm = mpl.colors.BoundaryNorm(BINS, ncolors=len(COLORS), clip=False)

# ---------------- Load hourly CSV
df = pd.read_csv(DATA_FILE)

# Ensure datetime
df["datetime"] = pd.to_datetime(df["Date"].astype(str) + " " + df["Time"].astype(str),
                                errors="coerce")

# Clean numeric PM
df["pm25"] = pd.to_numeric(df["pm25"], errors="coerce").clip(lower=0)

# City mean (hourly)
city = df.groupby("datetime")["pm25"].mean().reset_index(name="city_mean")

# Station-hour records
sd = df[["datetime","location_name","pm25"]].copy()
sd.rename(columns={"location_name":"station"}, inplace=True)

# Pivot station × datetime
sd_p = (sd.pivot_table(index="station", columns="datetime", values="pm25", aggfunc="mean")
          .sort_index(axis=0)  # sort stations
          .sort_index(axis=1)) # sort times

# ---------------- Plot 1: City mean heatstrip
strip_data = city["city_mean"].to_numpy()
strip_img  = strip_data[None, :]         # 1 × N array
strip_mask = ma.masked_invalid(strip_img)

fig, ax = plt.subplots(figsize=FIG_SIZE_STRIP, dpi=FIG_DPI)
im = ax.imshow(strip_mask, aspect="auto", cmap=cmap, norm=norm)
ax.set_yticks([])

# X axis tick spacing (≈20 labels max)
target_labels = 20
step = max(1, len(city) // target_labels)
ax.set_xticks(range(0, len(city), step))
ax.set_xticklabels(city["datetime"].dt.strftime("%m-%d %H:%M")[::step],
                   rotation=45, ha="right", fontsize=7)

ax.set_title("Citywide Hourly Mean PM₂.₅ — AQI colored")

# Legend
patches = [mpatches.Patch(color=c, label=l) for c, l in zip(COLORS, LABELS)]
ax.legend(handles=patches, loc="upper left", bbox_to_anchor=(1.01, 1.0),
          fontsize=8, frameon=True)

out1 = os.path.join(OUT_DIR, "city_hourly_mean_heatstrip_AQI.png")
plt.tight_layout(); plt.savefig(out1, bbox_inches="tight"); plt.close(fig)

# ---------------- Plot 2: Station × Hour heatmap
arr = sd_p.to_numpy(dtype=float)
masked = ma.masked_invalid(arr)

# Auto height scaling
h_inches = max(3.0, BASE_HEIGHT_STN * sd_p.shape[0])
fig2, ax2 = plt.subplots(figsize=(12, h_inches), dpi=FIG_DPI)

im2 = ax2.imshow(masked, aspect="auto", cmap=cmap, norm=norm)

# Y labels = stations
ax2.set_yticks(range(len(sd_p.index)))
ax2.set_yticklabels(sd_p.index, fontsize=7)

# X labels (≈20 labels max)
n_times = len(sd_p.columns)
step = max(1, n_times // target_labels)
time_labels = pd.to_datetime(sd_p.columns).strftime("%m-%d %H:%M")
ax2.set_xticks(range(0, n_times, step))
ax2.set_xticklabels(time_labels[::step], rotation=45, ha="right", fontsize=6)

ax2.set_title("PM₂.₅ by Station and Hour — AQI colored")

# Legend
patches2 = [mpatches.Patch(color=c, label=l) for c, l in zip(COLORS, LABELS)]
ax2.legend(handles=patches2, loc="upper left", bbox_to_anchor=(1.01, 1.0),
           fontsize=8, frameon=True)

out2 = os.path.join(OUT_DIR, "station_x_hour_heatmap_AQI.png")
plt.tight_layout(); plt.savefig(out2, bbox_inches="tight"); plt.close(fig2)

print("Saved:")
print(" -", out1)
print(" -", out2)


Loaded rows: 36673
Stations: 12
Generating timeseries plots for 12 stations...
✅ Timeseries plots saved for all stations.
✅ Diurnal pattern saved.
Saved:
 - C:\Users\krish\Desktop\SpatialCARE\Hourly\Outputs\figures\temporal_heatmaps_hourly\city_hourly_mean_heatstrip_AQI.png
 - C:\Users\krish\Desktop\SpatialCARE\Hourly\Outputs\figures\temporal_heatmaps_hourly\station_x_hour_heatmap_AQI.png
