In [5]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl

BASE_OUT = r"C:\Users\HP\Desktop\SpatialCARE\Outputs"
CSV_PRIMARY = os.path.join(BASE_OUT, "exceedance_local_summary.csv")  # from Notebook 7
CSV_ALT1    = os.path.join(BASE_OUT, "exceedance_summary.csv")        # older WHO version
CSV_ALT2    = os.path.join(BASE_OUT, "kriging_daily_metrics.csv")     # legacy

OUT_DIR = os.path.join(BASE_OUT, "figures", "summary")
os.makedirs(OUT_DIR, exist_ok=True)

FIG_DPI     = 150
FIG_SIZE_TS = (9, 3.2)    # timeline
FIG_SIZE_T5 = (7.6, 5.2)  # top-5 (slightly taller than before for headroom)
CMAP        = mpl.colormaps["RdBu_r"]  # redder = worse
VMIN, VMAX  = 0.0, 100.0

# ---- Load whichever CSV exists
if os.path.exists(CSV_PRIMARY):
    df = pd.read_csv(CSV_PRIMARY)
elif os.path.exists(CSV_ALT1):
    df = pd.read_csv(CSV_ALT1)
elif os.path.exists(CSV_ALT2):
    df = pd.read_csv(CSV_ALT2)
else:
    raise SystemExit("No exceedance CSV found. Run Notebook 7 first.")

# ---- Normalize columns
df.columns = [c.strip() for c in df.columns]

def pick(df_cols, name):
    for c in df_cols:
        if c.lower() == name.lower():
            return c
    return None

c_date = pick(df.columns, "date")
c_stn  = pick(df.columns, "stations")
c_p35  = pick(df.columns, "pct_gt35") or pick(df.columns, "pct_area_gt35")

if c_date is None or c_p35 is None:
    raise SystemExit("Required columns not found (need 'date' and a %>35 column).")

df[c_date] = pd.to_datetime(df[c_date], errors="coerce")
df[c_p35]  = pd.to_numeric(df[c_p35], errors="coerce")

if c_stn is None:
    df["stations"] = np.nan
    c_stn = "stations"
else:
    df[c_stn] = pd.to_numeric(df[c_stn], errors="coerce")

df = df.dropna(subset=[c_date, c_p35]).sort_values(c_date).reset_index(drop=True)

# ---- Save a cleaned copy
clean_csv = os.path.join(OUT_DIR, "exceedance_clean.csv")
df.to_csv(clean_csv, index=False)

# ======================
# Timeline (% area >35) 
# ======================
fig_ts, ax_ts = plt.subplots(figsize=FIG_SIZE_TS, dpi=FIG_DPI)
ax_ts.plot(df[c_date], df[c_p35], marker="o", linewidth=1.2)
ax_ts.set_title("% of Pasig Area > 35 µg/m³ (daily)")
ax_ts.set_ylabel("Percent of city (%)")
ax_ts.set_xlabel("Date")
ax_ts.set_ylim(0, max(5, min(100, float(np.nanmax(df[c_p35])) * 1.15)))
ax_ts.grid(alpha=0.3)

# x-axis: ~14 labels, rotated
target_labels = 14
step = max(1, len(df) // target_labels)
ax_ts.set_xticks(df[c_date].iloc[::step])
ax_ts.set_xticklabels(df[c_date].dt.strftime("%Y-%m-%d").iloc[::step],
                      rotation=45, ha="right", fontsize=8)

out_ts = os.path.join(OUT_DIR, "timeline_pct_gt35.png")
plt.tight_layout()
plt.savefig(out_ts, bbox_inches="tight")
plt.close(fig_ts)

# =========================
# Top-5 by % area > 35 µg/m³
# =========================
top5 = df.sort_values(c_p35, ascending=False).head(5).copy()
top5["date_str"] = top5[c_date].dt.strftime("%Y-%m-%d")
top5["pct_val"]  = top5[c_p35].round(1)
top5["pct_label"] = top5["pct_val"].astype(str) + "%"

norm = mpl.colors.Normalize(vmin=VMIN, vmax=VMAX)
bar_colors = [CMAP(norm(v)) for v in top5[c_p35]]

# Use constrained layout to help spacing + manual fine-tuning
fig_t5, ax_t5 = plt.subplots(figsize=FIG_SIZE_T5, dpi=FIG_DPI, constrained_layout=False)

bars = ax_t5.bar(top5["date_str"], top5[c_p35], color=bar_colors, edgecolor="black", linewidth=0.6)

# --- Headroom and margins to prevent overlap
max_val = float(np.nanmax(df[c_p35])) if len(df) else 0.0
ymax = max(5, min(100, max_val * 1.25))   # more headroom than before (1.25x)
ax_t5.set_ylim(0, ymax)
ax_t5.margins(y=0.10)  # add a bit of top margin within axes

ax_t5.set_ylabel("% of city above 35 µg/m³")

# Title and subtitle separated; extra pad to move title away from bars
main_title = "Top 5 Days by Exceedance Area (> 35 µg/m³)"
ax_t5.set_title(main_title, pad=14)

# Stations subtitle rendered as a suptitle line (smaller) above the plot area
if c_stn in top5.columns and top5[c_stn].notna().any():
    sub = "Stations used: " + ", ".join(top5[c_stn].fillna("-").astype(int).astype(str).tolist())
    fig_t5.suptitle(sub, y=0.99, fontsize=9)  # sits at very top, away from bar labels

# --- Smart value labels: inside tall bars; above short bars with cap
span = ax_t5.get_ylim()[1] - ax_t5.get_ylim()[0]
above_offset = 0.015 * span          # offset when placing above
inside_offset = 0.02 * span          # padding from top when placing inside
inside_cut = 0.85 * ymax             # threshold to decide inside vs above

for rect, pct, label in zip(bars, top5[c_p35], top5["pct_label"]):
    h = rect.get_height()
    x = rect.get_x() + rect.get_width() / 2.0
    if h >= inside_cut:
        # place inside bar near the top, with contrasting color
        y = h - inside_offset
        ax_t5.text(x, y, label, ha="center", va="top", fontsize=10,
                   color="white", fontweight="bold", clip_on=False)
    else:
        # place above bar but cap so it won't collide with the title
        y = min(h + above_offset, ymax * 0.92)
        ax_t5.text(x, y, label, ha="center", va="bottom", fontsize=10,
                   color="#222222", clip_on=False)

# Keep x labels readable
ax_t5.set_xticklabels(top5["date_str"], rotation=0)

# Compact colorbar (right), but make sure it doesn't crowd the title
cbar = plt.colorbar(mpl.cm.ScalarMappable(norm=norm, cmap=CMAP),
                    ax=ax_t5, fraction=0.04, pad=0.03)
cbar.set_label("Color scale reference (0–100%)")

# Final spacing: leave generous top margin for title + labels
plt.subplots_adjust(top=0.86, bottom=0.14)  # top lowered to create more space inside axes

out_t5 = os.path.join(OUT_DIR, "top5_area_gt35.png")
plt.savefig(out_t5, bbox_inches="tight")
plt.close(fig_t5)

# ---- Save the top-5 rows
top_csv = os.path.join(OUT_DIR, "top5_exceedance.csv")
cols_to_save = [c_date, c_p35, c_stn] if c_stn in top5.columns else [c_date, c_p35]
top5[cols_to_save].to_csv(top_csv, index=False)

print("Saved:")
print(" -", out_ts)
print(" -", out_t5)
print(" -", top_csv)
print(" -", clean_csv)

  ax_t5.set_xticklabels(top5["date_str"], rotation=0)


Saved:
 - C:\Users\HP\Desktop\SpatialCARE\Outputs\figures\summary\timeline_pct_gt35.png
 - C:\Users\HP\Desktop\SpatialCARE\Outputs\figures\summary\top5_area_gt35.png
 - C:\Users\HP\Desktop\SpatialCARE\Outputs\figures\summary\top5_exceedance.csv
 - C:\Users\HP\Desktop\SpatialCARE\Outputs\figures\summary\exceedance_clean.csv
