In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl

BASE_OUT = r"C:\Users\krish\Desktop\SpatialCARE\Outputs"
CSV_PRIMARY = os.path.join(BASE_OUT, "exceedance_local_summary.csv")  # from Notebook 7
CSV_ALT1    = os.path.join(BASE_OUT, "exceedance_summary.csv")        # older WHO version
CSV_ALT2    = os.path.join(BASE_OUT, "kriging_daily_metrics.csv")     # legacy

OUT_DIR = os.path.join(BASE_OUT, "figures", "summary")
os.makedirs(OUT_DIR, exist_ok=True)

FIG_DPI     = 150
FIG_SIZE_TS = (9, 3.2)   # timeline
FIG_SIZE_T5 = (7, 5.0)   # top-5
CMAP        = mpl.colormaps["RdBu"]  # redder = worse
VMIN, VMAX  = 0.0, 100.0

# ---- Load whichever CSV exists
if os.path.exists(CSV_PRIMARY):
    df = pd.read_csv(CSV_PRIMARY)
    # expected: date, stations, pct_gt25, pct_gt35, pct_gt55, pct_ge91, pct_good, ..., total_area_km2
elif os.path.exists(CSV_ALT1):
    df = pd.read_csv(CSV_ALT1)  # expected: date, stations, pct_area_gt15, pct_area_gt35, total_area_km2
elif os.path.exists(CSV_ALT2):
    df = pd.read_csv(CSV_ALT2)
else:
    raise SystemExit("No exceedance CSV found. Run Notebook 7 first.")

# ---- Normalize columns
df.columns = [c.strip() for c in df.columns]

def pick(df_cols, name):
    for c in df_cols:
        if c.lower() == name.lower(): return c
    return None

c_date = pick(df.columns, "date")
c_stn  = pick(df.columns, "stations")
c_p35  = pick(df.columns, "pct_gt35") or pick(df.columns, "pct_area_gt35")

if c_date is None or c_p35 is None:
    raise SystemExit("Required columns not found (need 'date' and a %>35 column).")

df[c_date] = pd.to_datetime(df[c_date], errors="coerce")
df[c_p35]  = pd.to_numeric(df[c_p35], errors="coerce")

if c_stn is None:
    df["stations"] = np.nan
    c_stn = "stations"
else:
    df[c_stn] = pd.to_numeric(df[c_stn], errors="coerce")

df = df.dropna(subset=[c_date, c_p35]).sort_values(c_date).reset_index(drop=True)

# ---- Save a cleaned copy
clean_csv = os.path.join(OUT_DIR, "exceedance_clean.csv")
df.to_csv(clean_csv, index=False)

# ---- Timeline (% area >35 µg/m³) with decluttered x-axis
fig_ts, ax_ts = plt.subplots(figsize=FIG_SIZE_TS, dpi=FIG_DPI)
ax_ts.plot(df[c_date], df[c_p35], marker="o", linewidth=1.2)
ax_ts.set_title("% of Pasig Area > 35 µg/m³ (daily)")
ax_ts.set_ylabel("Percent of city (%)")
ax_ts.set_xlabel("Date")
ax_ts.set_ylim(0, max(5, min(100, float(np.nanmax(df[c_p35]))*1.15)))
ax_ts.grid(alpha=0.3)

# x-axis: show about 14 labels, rotated
target_labels = 14
step = max(1, len(df)//target_labels)
ax_ts.set_xticks(df[c_date].iloc[::step])
ax_ts.set_xticklabels(df[c_date].dt.strftime("%Y-%m-%d").iloc[::step], rotation=45, ha="right", fontsize=8)

out_ts = os.path.join(OUT_DIR, "timeline_pct_gt35.png")
plt.tight_layout(); plt.savefig(out_ts); plt.close(fig_ts)

# ---- Top-5 by % area >35
top5 = df.sort_values(c_p35, ascending=False).head(5).copy()
top5["date_str"] = top5[c_date].dt.strftime("%Y-%m-%d")
top5["pct_label"] = top5[c_p35].round(1).astype(str) + "%"

norm = mpl.colors.Normalize(vmin=VMIN, vmax=VMAX)
bar_colors = [CMAP(norm(v)) for v in top5[c_p35]]

fig_t5, ax_t5 = plt.subplots(figsize=FIG_SIZE_T5, dpi=FIG_DPI)
bars = ax_t5.bar(top5["date_str"], top5[c_p35], color=bar_colors, edgecolor="black", linewidth=0.6)
ax_t5.set_ylim(0, max(5, min(100, float(np.nanmax(df[c_p35]))*1.15)))
ax_t5.set_ylabel("% of city above 35 µg/m³")
ax_t5.set_title("Top 5 Days by Exceedance Area (> 35 µg/m³)")

for rect, label in zip(bars, top5["pct_label"]):
    ax_t5.text(rect.get_x() + rect.get_width()/2, rect.get_height() + 1,
               label, ha="center", va="bottom", fontsize=10)

# station counts in subtitle (if present)
if top5[c_stn].notna().any():
    sub = "Stations used: " + ", ".join(top5[c_stn].fillna("-").astype(int).astype(str).tolist())
    ax_t5.text(0.01, 1.02, sub, transform=ax_t5.transAxes, fontsize=9)

# add a small colorbar to show the 0–100 reference
cbar = plt.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=CMAP), ax=ax_t5, fraction=0.04, pad=0.02)
cbar.set_label("Color scale reference (0–100%)")

out_t5 = os.path.join(OUT_DIR, "top5_area_gt35.png")
plt.tight_layout(); plt.savefig(out_t5); plt.close(fig_t5)

# ---- Save the top-5 rows
top_csv = os.path.join(OUT_DIR, "top5_exceedance.csv")
cols_to_save = [c_date, c_p35, c_stn] if c_stn in top5.columns else [c_date, c_p35]
top5[cols_to_save].to_csv(top_csv, index=False)

print("Saved:")
print(" -", out_ts)
print(" -", out_t5)
print(" -", top_csv)
print(" -", clean_csv)


Saved:
 - C:\Users\krish\Desktop\SpatialCARE\Outputs\figures\summary\timeline_pct_gt35.png
 - C:\Users\krish\Desktop\SpatialCARE\Outputs\figures\summary\top5_area_gt35.png
 - C:\Users\krish\Desktop\SpatialCARE\Outputs\figures\summary\top5_exceedance.csv
 - C:\Users\krish\Desktop\SpatialCARE\Outputs\figures\summary\exceedance_clean.csv
