Figure unique 3 rangées: F10.7, Kp, Dst

In [None]:
# ===== Composite figure (3 rows): F10.7, Kp, Dst (2010–2025) with TEC period highlighted =====
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime, timezone
from pathlib import Path

# --------- Plot settings ---------
matplotlib.rcParams["savefig.dpi"] = 300
matplotlib.rcParams["figure.figsize"] = (10, 9)
matplotlib.rcParams["font.size"] = 8

# --------- Paths (adapt as needed) ---------
CSV_PATH    = Path("data/indices/F10_7_2010_2025.csv")              # JD, F_obs, F_adj
Kp_TXT      = Path("data/indices/GFZ_all_indices_2010-2025.txt")    # GFZ Kp file
Dst_TXT     = Path("data/indices/Kyoto_DST_index_2010-2025.txt")    # Kyoto Dst file
OUT_ALL_PNG = Path("figures/F107_Kp_Dst_3rows_2010-2025.png")

# --------- Time windows ---------
plot_start = pd.Timestamp("2010-01-01")
plot_end   = pd.Timestamp("2025-09-26")
tec_start  = pd.Timestamp("2015-10-01")
tec_end    = pd.Timestamp("2025-09-26")


# --------- Helpers ---------
def jd_to_datetime_utc(jd: float) -> datetime:
    """
    Convert a Julian Date (JD) to a timezone-aware UTC datetime.
    """
    jd = float(jd)
    J = int(jd + 0.5)
    f = jd + 0.5 - J
    if J >= 2299161:
        a = int((J - 1867216.25) / 36524.25)
        A = J + 1 + a - int(a / 4)
    else:
        A = J
    B = A + 1524
    C = int((B - 122.1) / 365.25)
    D = int(365.25 * C)
    E = int((B - D) / 30.6001)
    day = B - D - int(30.6001 * E) + f
    month = E - 1 if E < 14 else E - 13
    year = C - 4716 if month > 2 else C - 4715

    day_int = int(day)
    frac_day = day - day_int
    seconds = frac_day * 86400.0
    hh = int(seconds // 3600)
    mm = int((seconds % 3600) // 60)
    ss = int(round(seconds % 60))

    if ss == 60:
        ss = 0
        mm += 1
    if mm == 60:
        mm = 0
        hh += 1

    return datetime(year, month, day_int, hh, mm, ss, tzinfo=timezone.utc)


def load_kp_gfz_daily(path: Path) -> pd.DataFrame:
    """
    Load daily Kp from GFZ “all indices” text file.

    Returns a DataFrame with:
    - date      (Timestamp)
    - kp_daily  (daily mean of 8 × 3-hour Kp values)
    - kp8       (list of 8 Kp values per day)
    """
    dates, kp_daily, kp8_store = [], [], []

    with path.open("r", errors="ignore") as fh:
        for line in fh:
            if not line.strip() or line.lstrip().startswith("#"):
                continue
            parts = line.split()
            if len(parts) < 28:
                continue
            try:
                y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
                kp8 = [float(x) for x in parts[7:15]]
            except Exception:
                continue

            kparr = np.array(kp8, dtype=float)
            kparr[kparr < 0] = np.nan
            kp_mean = float(np.nanmean(kparr)) if np.isfinite(kparr).any() else np.nan

            dates.append(pd.Timestamp(year=y, month=m, day=d))
            kp_daily.append(kp_mean)
            kp8_store.append(kp8)

    df = pd.DataFrame({"date": dates, "kp_daily": kp_daily, "kp8": kp8_store})
    return df.sort_values("date").reset_index(drop=True)


def load_dst_kyoto_daily(path: Path) -> pd.DataFrame:
    """
    Load daily Dst from a Kyoto DST index file (2010–2025, fixed-width lines).

    Returns a DataFrame with:
    - date       (Timestamp)
    - dst_daily  (daily mean of 24 hourly values)
    - dst24      (list of 24 hourly values)
    """
    dates, daily, hours_list = [], [], []

    with path.open("r", errors="ignore") as fh:
        for line in fh:
            if (
                not line.strip()
                or line.lstrip().startswith("#")
                or not line.startswith("DST")
            ):
                continue

            try:
                yy = int(line[3:5])
                mm = int(line[5:7])
                dd = int(line[8:10])
            except Exception:
                continue

            cent = line[14:16].strip()
            if cent.isdigit():
                year = int(f"{cent}{yy:02d}")
            else:
                year = 1900 + yy if yy >= 50 else 2000 + yy

            vals = []
            for i in range(24):
                s = line[20 + i * 4 : 24 + i * 4].strip()
                if not s:
                    vals.append(np.nan)
                    continue
                try:
                    v = int(s)
                except Exception:
                    vals.append(np.nan)
                    continue
                vals.append(np.nan if v == 9999 else float(v))

            dst_mean = float(np.nanmean(vals)) if np.isfinite(vals).any() else np.nan
            dates.append(pd.Timestamp(year=year, month=mm, day=dd))
            daily.append(dst_mean)
            hours_list.append(vals)

    df = pd.DataFrame({"date": dates, "dst_daily": daily, "dst24": hours_list})
    return df.sort_values("date").reset_index(drop=True)


# --------- Load F10.7 (CSV: JD, F_obs, F_adj) ---------
df = pd.read_csv(CSV_PATH)
cols = list(df.columns)
if len(cols) < 3:
    raise ValueError("F10.7 CSV must have at least 3 columns: JD, F_obs, F_adj.")

jd_col, fobs_col, fadj_col = cols[0], cols[1], cols[2]

# Convert JD to UTC datetime and extract date
df["datetime_utc"] = pd.to_datetime(
    [jd_to_datetime_utc(x) for x in df[jd_col]], utc=True
)
df["date"] = pd.to_datetime(df["datetime_utc"].dt.date)

# Remove obvious outliers in the adjusted column
df_no_out = df.drop(df[df[fadj_col] > 500].index)

mask_window = (df_no_out["date"] >= plot_start) & (df_no_out["date"] <= plot_end)
dfw = df_no_out.loc[mask_window].copy()

# --------- Load Kp and Dst ---------
kp_df = load_kp_gfz_daily(Kp_TXT)
dst_df = load_dst_kyoto_daily(Dst_TXT)

kp_df = kp_df[(kp_df["date"] >= plot_start) & (kp_df["date"] <= plot_end)].copy()
dst_df = dst_df[(dst_df["date"] >= plot_start) & (dst_df["date"] <= plot_end)].copy()

# --------- Masks for TEC study period ---------
m_f_tec = (dfw["date"] >= tec_start) & (dfw["date"] <= tec_end)
m_kp_tec = (kp_df["date"] >= tec_start) & (kp_df["date"] <= tec_end)
m_dst_tec = (dst_df["date"] >= tec_start) & (dst_df["date"] <= tec_end)

# --------- Figure: 3 stacked panels, shared x-axis ---------
fig, axes = plt.subplots(3, 1, sharex=True, figsize=(10, 9), dpi=300)

# 1) F10.7 (adjusted to 1 AU)
ax = axes[0]
ax.plot(
    dfw["date"],
    dfw[fadj_col],
    lw=1.2,
    label="F10.7 (1-AU adjusted, Penticton)",
)
ax.plot(
    dfw.loc[m_f_tec, "date"],
    dfw.loc[m_f_tec, fadj_col],
    lw=2.0,
    color="red",
    label="TEC study period",
)
ax.set_ylabel(r"$F_{10.7}$ [sfu]", fontsize=12, weight="bold")
ax.grid(True, alpha=0.35)
ax.legend(loc="best", prop={"size": 8})

# 2) Daily Kp (mean of 8 × 3-hour values)
ax = axes[1]
ax.plot(kp_df["date"], kp_df["kp_daily"], lw=1.2, label="Daily Kp")
ax.plot(
    kp_df.loc[m_kp_tec, "date"],
    kp_df.loc[m_kp_tec, "kp_daily"],
    lw=2.0,
    color="red",
    label="TEC study period",
)
ax.set_ylabel("Kp", fontsize=12, weight="bold")
ax.grid(True, alpha=0.35)
ax.legend(loc="best", prop={"size": 8})

# 3) Daily Dst (mean over 24 hours)
ax = axes[2]
ax.plot(dst_df["date"], dst_df["dst_daily"], lw=1.2, label="Daily Dst")
ax.plot(
    dst_df.loc[m_dst_tec, "date"],
    dst_df.loc[m_dst_tec, "dst_daily"],
    lw=2.0,
    color="red",
    label="TEC study period",
)
ax.set_ylabel("Dst [nT]", fontsize=12, weight="bold")
ax.grid(True, alpha=0.35)
ax.legend(loc="best", prop={"size": 8})

# --------- Common formatting ---------
for ax in axes:
    ax.set_xlim(plot_start, plot_end)
    for tick in ax.yaxis.get_ticklabels():
        tick.set_fontsize(12)
        tick.set_color("black")
        tick.set_weight("bold")

for ax in axes[:-1]:
    ax.tick_params(axis="x", labelbottom=False)

for tick in axes[-1].xaxis.get_ticklabels():
    tick.set_fontsize(12)
    tick.set_color("black")
    tick.set_weight("bold")

fig.text(
    0.5,
    0.965,
    "F10.7, Kp, and Dst — 2010–2025 | TEC study period: Oct 2015–Sep 2025",
    ha="center",
    fontsize=12,
    weight="bold",
)
fig.subplots_adjust(left=0.12, right=0.97, top=0.93, bottom=0.06, hspace=0.0)
fig.text(0.5, 0.01, "Date", ha="center", fontsize=12, weight="bold")

OUT_ALL_PNG.parent.mkdir(parents=True, exist_ok=True)
fig.savefig(OUT_ALL_PNG, dpi=300)
plt.close(fig)

print(f"Combined F10.7–Kp–Dst figure saved to: {OUT_ALL_PNG}")


Figure unique 3 rangées avec ombre: F10.7, Kp, Dst

In [None]:
# ===== Composite figure (3 rows): F10.7, Kp, Dst (2010–2025) with shaded TEC period =====
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime, timezone
from pathlib import Path

# --------- Plot settings ---------
matplotlib.rcParams["savefig.dpi"] = 300
matplotlib.rcParams["figure.figsize"] = (10, 9)
matplotlib.rcParams["font.size"] = 8

# --------- Paths (adapt as needed) ---------
CSV_PATH     = Path("data/indices/F10_7_2010_2025.csv")              # JD, F_obs, F_adj
Kp_TXT       = Path("data/indices/GFZ_all_indices_2010-2025.txt")    # GFZ Kp file
Dst_TXT      = Path("data/indices/Kyoto_DST_index_2010-2025.txt")    # Kyoto Dst file
OUT_ALL_PNG  = Path("figures/F107_Kp_Dst_3rows_2010-2025_shadowed.png")

# --------- Time windows ---------
plot_start = pd.Timestamp("2010-01-01")
plot_end   = pd.Timestamp("2025-09-26")
tec_start  = pd.Timestamp("2015-10-01")
tec_end    = pd.Timestamp("2025-09-26")


# --------- Helpers ---------
def jd_to_datetime_utc(jd: float) -> datetime:
    """
    Convert a Julian Date (JD) to a timezone-aware UTC datetime.
    """
    jd = float(jd)
    J = int(jd + 0.5)
    f = jd + 0.5 - J
    if J >= 2299161:
        a = int((J - 1867216.25) / 36524.25)
        A = J + 1 + a - int(a / 4)
    else:
        A = J
    B = A + 1524
    C = int((B - 122.1) / 365.25)
    D = int(365.25 * C)
    E = int((B - D) / 30.6001)
    day = B - D - int(30.6001 * E) + f
    month = E - 1 if E < 14 else E - 13
    year = C - 4716 if month > 2 else C - 4715

    day_int = int(day)
    frac_day = day - day_int
    seconds = frac_day * 86400.0
    hh = int(seconds // 3600)
    mm = int((seconds % 3600) // 60)
    ss = int(round(seconds % 60))

    if ss == 60:
        ss = 0
        mm += 1
    if mm == 60:
        mm = 0
        hh += 1

    return datetime(year, month, day_int, hh, mm, ss, tzinfo=timezone.utc)


def load_kp_gfz_daily(path: Path) -> pd.DataFrame:
    """
    Load daily Kp from GFZ “all indices” text file.

    Returns a DataFrame with:
    - date      (Timestamp)
    - kp_daily  (daily mean of 8 × 3-hour Kp values)
    - kp8       (list of 8 Kp values per day)
    """
    dates, kp_daily, kp8_store = [], [], []

    with path.open("r", errors="ignore") as fh:
        for line in fh:
            if not line.strip() or line.lstrip().startswith("#"):
                continue
            parts = line.split()
            if len(parts) < 28:
                continue
            try:
                y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
                kp8 = [float(x) for x in parts[7:15]]
            except Exception:
                continue

            kparr = np.array(kp8, dtype=float)
            kparr[kparr < 0] = np.nan
            kp_mean = float(np.nanmean(kparr)) if np.isfinite(kparr).any() else np.nan

            dates.append(pd.Timestamp(year=y, month=m, day=d))
            kp_daily.append(kp_mean)
            kp8_store.append(kp8)

    df = pd.DataFrame({"date": dates, "kp_daily": kp_daily, "kp8": kp8_store})
    return df.sort_values("date").reset_index(drop=True)


def load_dst_kyoto_daily(path: Path) -> pd.DataFrame:
    """
    Load daily Dst from a Kyoto DST index file (2010–2025, fixed-width lines).

    Returns a DataFrame with:
    - date       (Timestamp)
    - dst_daily  (daily mean of 24 hourly values)
    - dst24      (list of 24 hourly values)
    """
    dates, daily, hours_list = [], [], []

    with path.open("r", errors="ignore") as fh:
        for line in fh:
            if (
                not line.strip()
                or line.lstrip().startswith("#")
                or not line.startswith("DST")
            ):
                continue

            try:
                yy = int(line[3:5])
                mm = int(line[5:7])
                dd = int(line[8:10])
            except Exception:
                continue

            cent = line[14:16].strip()
            if cent.isdigit():
                year = int(f"{cent}{yy:02d}")
            else:
                year = 1900 + yy if yy >= 50 else 2000 + yy

            vals = []
            for i in range(24):
                s = line[20 + i * 4 : 24 + i * 4].strip()
                if not s:
                    vals.append(np.nan)
                    continue
                try:
                    v = int(s)
                except Exception:
                    vals.append(np.nan)
                    continue
                vals.append(np.nan if v == 9999 else float(v))

            dst_mean = float(np.nanmean(vals)) if np.isfinite(vals).any() else np.nan
            dates.append(pd.Timestamp(year=year, month=mm, day=dd))
            daily.append(dst_mean)
            hours_list.append(vals)

    df = pd.DataFrame({"date": dates, "dst_daily": daily, "dst24": hours_list})
    return df.sort_values("date").reset_index(drop=True)


# --------- Load F10.7 (CSV: JD, F_obs, F_adj) ---------
df = pd.read_csv(CSV_PATH)
cols = list(df.columns)
if len(cols) < 3:
    raise ValueError("F10.7 CSV must have at least 3 columns: JD, F_obs, F_adj.")

jd_col, fobs_col, fadj_col = cols[0], cols[1], cols[2]

# Convert JD to UTC datetime and extract date
df["datetime_utc"] = pd.to_datetime(
    [jd_to_datetime_utc(x) for x in df[jd_col]], utc=True
)
df["date"] = pd.to_datetime(df["datetime_utc"].dt.date)

# Remove obvious outliers in the adjusted column
df_no_out = df.drop(df[df[fadj_col] > 500].index)

mask_window = (df_no_out["date"] >= plot_start) & (df_no_out["date"] <= plot_end)
dfw = df_no_out.loc[mask_window].copy()

# --------- Load Kp and Dst ---------
kp_df = load_kp_gfz_daily(Kp_TXT)
dst_df = load_dst_kyoto_daily(Dst_TXT)

kp_df = kp_df[(kp_df["date"] >= plot_start) & (kp_df["date"] <= plot_end)].copy()
dst_df = dst_df[(dst_df["date"] >= plot_start) & (dst_df["date"] <= plot_end)].copy()

# --------- Masks for TEC study period ---------
m_f_tec = (dfw["date"] >= tec_start) & (dfw["date"] <= tec_end)
m_kp_tec = (kp_df["date"] >= tec_start) & (kp_df["date"] <= tec_end)
m_dst_tec = (dst_df["date"] >= tec_start) & (dst_df["date"] <= tec_end)

# --------- Figure: 3 stacked panels, shared x-axis ---------
fig, axes = plt.subplots(3, 1, sharex=True, figsize=(10, 9), dpi=300)

# 1) F10.7 (adjusted to 1 AU)
ax = axes[0]
ax.plot(
    dfw["date"],
    dfw[fadj_col],
    lw=1.2,
    label="F10.7 (1-AU adjusted, Penticton)",
)
ax.axvspan(
    tec_start,
    tec_end,
    color="red",
    alpha=0.2,
    linewidth=0,
    label="TEC study period",
)
ax.grid(True, alpha=0.35)
ax.legend(loc="upper center", prop={"size": 8})

# 2) Daily Kp (mean of 8 × 3-hour values)
ax = axes[1]
ax.plot(kp_df["date"], kp_df["kp_daily"], lw=1.2, label="Daily Kp (GFZ)")
ax.axvspan(
    tec_start,
    tec_end,
    color="green",
    alpha=0.2,
    linewidth=0,
    label="TEC study period",
)
ax.grid(True, alpha=0.35)
ax.legend(loc="upper center", prop={"size": 8})

# 3) Daily Dst (mean over 24 hours)
ax = axes[2]
ax.plot(dst_df["date"], dst_df["dst_daily"], lw=1.2, label="Daily Dst (Kyoto)")
ax.axvspan(
    tec_start,
    tec_end,
    color="orange",
    alpha=0.2,
    linewidth=0,
    label="TEC study period",
)
ax.grid(True, alpha=0.35)
ax.legend(loc="lower center", prop={"size": 8})

# --------- Common formatting ---------
for ax in axes:
    ax.set_xlim(plot_start, plot_end)
    for tick in ax.yaxis.get_ticklabels():
        tick.set_fontsize(12)
        tick.set_color("black")
        tick.set_weight("bold")

for ax in axes[:-1]:
    ax.tick_params(axis="x", labelbottom=False)

for tick in axes[-1].xaxis.get_ticklabels():
    tick.set_fontsize(12)
    tick.set_color("black")
    tick.set_weight("bold")

fig.text(
    0.5,
    0.965,
    "F10.7, Kp, and Dst — 2010–2025 | TEC study period: Oct 2015–Sep 2025 (shaded)",
    ha="center",
    fontsize=12,
    weight="bold",
)
fig.subplots_adjust(left=0.12, right=0.97, top=0.93, bottom=0.06, hspace=0.0)
fig.text(0.5, 0.01, "Date", ha="center", fontsize=12, weight="bold")
fig.text(0.05, 0.75, r"$F_{10.7}$ [sfu]", ha="center", rotation=90, fontsize=12, weight="bold")
fig.text(0.05, 0.50, "Kp", ha="center", rotation=90, fontsize=12, weight="bold")
fig.text(0.05, 0.15, "Dst [nT]", ha="center", rotation=90, fontsize=12, weight="bold")

OUT_ALL_PNG.parent.mkdir(parents=True, exist_ok=True)
fig.savefig(OUT_ALL_PNG, dpi=300)
plt.close(fig)

print(f"Combined shaded F10.7–Kp–Dst figure saved to: {OUT_ALL_PNG}")


Generate 4 daily tec figures / Year with GIM and offset 

In [None]:
# ========================
# DAILY 2×2 FIGURES PER YEAR
# ========================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# --- input CSV files (already enriched with indices / GIM) ---
CSV_30MIN_WITH_GIM = Path("data/2015_2025_MS_VTEC_30min_stats.csv")
CSV_DAILY_WITH_OFF = Path(
    "data/2015_2025_MS_VTEC_daily_stats_UTC_with_indices_and_max_with_GFZlabels.csv"
)

# --- TEC/GIM column names in the 30-min CSV ---
VTEC_MED_COL  = "VTEC_median"   # main 30-min VTEC statistic (median)
VTEC_MEAN_COL = "VTEC_mean"     # optional; ignore if not present
GIM_COL       = "vtec_gim"      # collocated GIM VTEC
GIM_OFFSET_COL = "gim_offset_tecu"

# --- output directory for figures ---
FIG_DIR = Path("figures/figs_quiet/MS_FIGURES_GIM")
FIG_DIR.mkdir(parents=True, exist_ok=True)


In [None]:
def _set_xticks_30min(ax: plt.Axes) -> None:
    """Set x-ticks every 2 hours from 0 to 24."""
    ax.set_xlim(-1, 24)
    ax.set_xticks(np.arange(0, 24, 2))
    ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 2)], rotation=0)


# --- I/O helpers for 30-min and daily CSVs ---
def _load_30min_csv(path: Path) -> pd.DataFrame:
    """
    Load the 30-min VTEC file, ensuring we have:
      - ts_utc   (UTC timestamp)
      - date_utc (date)
      - numeric VTEC/GIM columns
    """
    df = pd.read_csv(path)

    # UTC timestamp column
    if "ts_utc" in df.columns:
        df["ts_utc"] = pd.to_datetime(df["ts_utc"], utc=True, errors="coerce")
    else:
        t = pd.to_datetime(df["time"], utc=True, errors="coerce")
        df["ts_utc"] = t

    # UTC date (date_utc)
    if "date_utc" in df.columns:
        df["date_utc"] = pd.to_datetime(df["date_utc"], utc=True, errors="coerce").dt.date
    else:
        df["date_utc"] = df["ts_utc"].dt.floor("D").dt.date

    # Coerce key columns to numeric if present
    for c in [VTEC_MED_COL, VTEC_MEAN_COL, GIM_COL]:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")

    return df


def _load_daily_csv(path: Path) -> pd.DataFrame:
    """
    Load the daily statistics file, ensuring a date_utc column.
    """
    df = pd.read_csv(path)
    if "date_utc" in df.columns:
        df["date_utc"] = pd.to_datetime(df["date_utc"], utc=True, errors="coerce").dt.date
    elif "date" in df.columns:
        df["date_utc"] = pd.to_datetime(df["date"], utc=True, errors="coerce").dt.date
    return df


# --- load 30-min and daily inputs ---
s30 = _load_30min_csv(CSV_30MIN_WITH_GIM)

# normalize time/date types
s30["ts_utc"] = pd.to_datetime(s30["ts_utc"], utc=True, errors="coerce")
s30["date_utc"] = pd.to_datetime(s30["date_utc"], utc=True, errors="coerce").dt.date

daily_off = (
    _load_daily_csv(CSV_DAILY_WITH_OFF)
    if Path(CSV_DAILY_WITH_OFF).exists()
    else pd.DataFrame(columns=["date_utc", GIM_OFFSET_COL])
)


# --- per-day GIM offset: from daily CSV if present, otherwise from 30-min median difference ---
def get_daily_offset(day, s30_df: pd.DataFrame, daily_df: pd.DataFrame | None = None) -> float:
    """
    Return the daily GIM offset (median(VTEC − GIM) in TECU) for a given day.

    Priority:
      1) If a daily CSV is provided and contains a non-NaN gim_offset_tecu for that day, use it.
      2) Otherwise, compute median(VTEC_median − vtec_gim) from the 30-min record.
    """
    # 1) daily CSV
    if daily_df is not None and GIM_OFFSET_COL in daily_df.columns:
        row = daily_df.loc[daily_df["date_utc"] == day, GIM_OFFSET_COL]
        if not row.empty and pd.notna(row.iloc[0]):
            return float(row.iloc[0])

    # 2) fallback from 30-min time series
    mask = s30_df["date_utc"] == day
    v = pd.to_numeric(s30_df.loc[mask, VTEC_MED_COL], errors="coerce")
    g = pd.to_numeric(s30_df.loc[mask, GIM_COL], errors="coerce")
    joined = pd.concat([v, g], axis=1).dropna()
    if joined.empty:
        return float("nan")
    return float((joined.iloc[:, 0] - joined.iloc[:, 1]).median())


# --- selection of 4 representative days per year ---
# Default targets: ~15 Jan / 15 Apr / 15 Jul / 15 Nov.
DEFAULT_MONTH_DAY = [(1, 15), (4, 15), (7, 15), (11, 15)]


def nearest_available_date(year: int, target_date, available_dates) -> date | None:
    """
    Return the available date in `available_dates` closest to `target_date`.
    All dates treated as naïve (no timezone).
    """
    target = pd.Timestamp(target_date)  # naïve

    avail_ts = pd.to_datetime(available_dates, errors="coerce")
    avail_ts = avail_ts[~pd.isna(avail_ts)]
    if len(avail_ts) == 0:
        return None

    diff = (avail_ts - target)
    try:
        deltas = diff.to_numpy()
    except Exception:
        deltas = np.asarray(diff)

    i_min = int(np.nanargmin(np.abs(deltas)))
    return pd.Timestamp(avail_ts[i_min]).date()


def pick_days_for_year(year: int, s30: pd.DataFrame) -> list[date]:
    """
    Pick 4 representative days for a given year, as close as possible
    to (year-01-15, year-04-15, year-07-15, year-11-15), but constrained
    to dates where 30-min data exist.
    """
    date_utc = pd.to_datetime(s30["date_utc"], errors="coerce").dt.date
    mask_year = pd.to_datetime(date_utc).dt.year == year

    dates_year = pd.to_datetime(date_utc[mask_year], errors="coerce").dropna().unique()
    out: list[date] = []
    for m, d in DEFAULT_MONTH_DAY:
        tgt = pd.Timestamp(year=year, month=m, day=d)
        chosen = nearest_available_date(year, tgt, dates_year)
        if chosen is not None:
            out.append(chosen)
    return out


# --- plot 2×2 daily panels for a given year ---
def plot_year_grid(year: int, s30: pd.DataFrame, days: list[date] | None = None) -> Path | None:
    """
    Produce a 2×2 daily panel figure for one year, comparing 30-min VTEC
    (median / mean) with CODG GIM, and annotate the daily median offset.
    """
    # selected days
    if not days:
        days = pick_days_for_year(year, s30)
    days = list(days)

    if len(days) == 0:
        print(f"[INFO] {year}: no data available.")
        return None
    if len(days) < 4:
        print(f"[INFO] {year}: only {len(days)} day(s) available; 2×2 grid will be partial.")

    fig, axes = plt.subplots(2, 2, figsize=(12, 4), sharex=True, sharey=False, constrained_layout=False)
    fig.subplots_adjust(hspace=0.0, wspace=0.12)

    has_mean = VTEC_MEAN_COL in s30.columns
    line_kw = dict(lw=1.6)

    for k in range(4):
        r, c = divmod(k, 2)
        ax = axes[r, c]
        if k >= len(days):
            ax.axis("off")
            continue

        day = days[k]
        day0 = pd.Timestamp(day, tz="UTC")
        day1 = day0 + pd.Timedelta(days=1)

        sd = s30.loc[(s30["date_utc"] == day)].copy()
        if sd.empty:
            ax.text(
                0.5, 0.5, "No data",
                transform=ax.transAxes, ha="center", va="center"
            )
            continue

        # UT hours since midnight
        hours = (sd["ts_utc"] - day0).dt.total_seconds() / 3600.0

        # plots
        if VTEC_MED_COL in sd.columns:
            ax.plot(hours, sd[VTEC_MED_COL], label="VTEC median (30 min)", **line_kw)
        if has_mean:
            ax.plot(hours, sd[VTEC_MEAN_COL], label="VTEC mean (30 min)", ls="--", **line_kw)
        if GIM_COL in sd.columns:
            ax.plot(hours, sd[GIM_COL], label="CODG GIM (30 min)", ls=":", **line_kw)

        # daily GIM offset
        off = get_daily_offset(day, sd, daily_off)
        if np.isfinite(off):
            off_txt = f"median offset = {off:.2f} TECU"
        else:
            off_txt = "median offset = n/a"

        ax.set_title(f"{day} — {off_txt}", fontsize=10, loc="center", weight="bold")
        ax.set_xlim(0, 24)
        ax.set_yticks([5, 15, 25, 35, 45])
        ax.set_ylim(0, 35)
        ax.grid(True, alpha=0.25)

        _set_xticks_30min(ax)

        for t in ax.yaxis.get_ticklabels():
            t.set_fontsize(10)
            t.set_color("black")
            t.set_weight("bold")
        for t in ax.xaxis.get_ticklabels():
            t.set_fontsize(10)
            t.set_color("black")
            t.set_weight("bold")

    # common legend from first panel
    handles, labels = [], []
    for line in axes[0, 0].lines:
        handles.append(line)
        labels.append(line.get_label())

    # hide x-labels on top row
    plt.setp([a.get_xticklabels() for a in axes[0, :]], visible=False)

    fig.subplots_adjust(hspace=0)
    fig.legend(
        handles, labels,
        loc="upper center", ncol=3, frameon=False, bbox_to_anchor=(0.5, 1.02)
    )

    fig.text(
        0.5, 1.06,
        f"OUCA Observatory — 30-min VTEC vs CODG GIM (year {year})",
        ha="center", fontsize=12, weight="bold"
    )
    fig.text(0.5, 0.02, "UTC hour", ha="center", fontsize=12, weight="bold")
    fig.text(0.08, 0.5, "TEC [TECU]", va="center", rotation="vertical", fontsize=12, weight="bold")

    out = FIG_DIR / f"VTEC_GIM_daily_2x2_{year}.png"
    fig.savefig(out, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"✅ Figure {year} → {out}")
    return out


# --- run for all years available in the 30-min CSV ---
years = (
    pd.to_datetime(s30["date_utc"], errors="coerce")
      .dt.year.dropna().astype(int).unique()
)
years = sorted(years)

for y in years:
    plot_year_grid(y, s30)


Generate Observed VTEC and GIM for Quiet days

In [None]:
# === Target period ===
PERIOD_START = pd.Timestamp("2015-10-01", tz="UTC").date()
PERIOD_END   = pd.Timestamp("2025-09-26", tz="UTC").date()

FIG_DIR = Path(r"C:/path/to/output/figures/figs_quiet/MS_FIGURES_GIM")  # <-- adjust
FIG_DIR.mkdir(parents=True, exist_ok=True)

VTEC_COL   = "VTEC_median"
MEAN_COL   = "VTEC_mean"
GIM_COL    = "vtec_gim"
GIM_OFFSET_COL = "gim_offset_tecu"


def _set_xticks_30min(ax: plt.Axes) -> None:
    """Set x-ticks every 2 hours from 0 to 24."""
    ax.set_xlim(0, 24)
    ax.set_xticks(np.arange(0, 24, 2))
    ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 2)], rotation=0)


# --- Ensure time/date types are clean in s30 and daily ---
s30["ts_utc"]   = pd.to_datetime(s30["ts_utc"], utc=True, errors="coerce")
s30["date_utc"] = pd.to_datetime(s30["date_utc"], errors="coerce").dt.date

daily = _load_daily_csv(CSV_DAILY_WITH_OFF)
daily["date_utc"] = pd.to_datetime(daily["date_utc"], errors="coerce").dt.date


# --- Per-day GIM offset: daily CSV if present, otherwise from 30-min series ---
def get_daily_offset(day, s30_df: pd.DataFrame, daily_df: pd.DataFrame | None = None) -> float:
    """
    Return the daily offset (median(VTEC − GIM) in TECU) for a given day.

    Priority:
      1) If `daily_df` has a non-NaN gim_offset_tecu for that day, use it.
      2) Otherwise, compute median(VTEC_median − vtec_gim) from the 30-min series.
    """
    # 1) from daily file (if available)
    if daily_df is not None and GIM_OFFSET_COL in daily_df.columns:
        v = daily_df.loc[daily_df["date_utc"] == day, GIM_OFFSET_COL].dropna()
        if len(v):
            return float(v.iloc[0])

    # 2) fallback from 30-min data
    df = s30_df.loc[s30_df["date_utc"] == day, [VTEC_COL, GIM_COL]].dropna()
    if df.empty:
        return float("nan")
    return float((df[VTEC_COL] - df[GIM_COL]).median())


# --- Quietness metrics computed from the 30-min VTEC series ---
def daily_quiet_metrics(s30_df: pd.DataFrame) -> pd.DataFrame:
    """
    Compute basic daily metrics used to rank "quiet" days:
      - coverage of VTEC and GIM (fraction of 48 slots with data)
      - interquartile range (IQR)
      - standard deviation
      - median |ΔVTEC| between consecutive samples
    """

    def _one(df: pd.DataFrame) -> pd.Series:
        v = pd.to_numeric(df[VTEC_COL], errors="coerce")
        g = pd.to_numeric(df[GIM_COL], errors="coerce")

        cov_v = v.notna().sum() / 48.0
        cov_g = g.notna().sum() / 48.0

        if v.notna().any():
            q75 = np.nanpercentile(v, 75)
            q25 = np.nanpercentile(v, 25)
            iqr = q75 - q25
            std = np.nanstd(v)
            vv = v.to_numpy()
            dmed = (
                np.nanmedian(np.abs(np.diff(vv)))
                if np.count_nonzero(~np.isnan(vv)) > 1
                else np.nan
            )
        else:
            iqr = std = dmed = np.nan

        return pd.Series(
            {
                "cov_vtec": cov_v,
                "cov_gim":  cov_g,
                "iqr":      iqr,
                "std":      std,
                "dV_med":   dmed,
            }
        )

    m = s30_df.groupby("date_utc", as_index=False).apply(_one).reset_index(drop=True)
    return m


def pick_10_quiet_days_geomagQ(s30_df: pd.DataFrame, daily_df: pd.DataFrame) -> list:
    """
    Select 10 geomagnetically quiet days based on:
      - daily_df.geomag_label_gfz_QDNQ == 'Q'
      - good coverage / smooth VTEC metrics (cov_vtec, cov_gim, iqr, dV_med, std)
    """
    if "geomag_label_gfz_QDNQ" not in daily_df.columns:
        raise RuntimeError("Column 'geomag_label_gfz_QDNQ' is missing from the daily CSV.")

    # Restrict to target period
    d = daily_df.copy()
    d = d[(d["date_utc"] >= PERIOD_START) & (d["date_utc"] <= PERIOD_END)].copy()
    d["geomag_label_norm"] = (
        d["geomag_label_gfz_QDNQ"].astype(str).str.strip().str.upper()
    )

    # 'Q' (quiet) days from GFZ
    q_days = d.loc[d["geomag_label_norm"] == "Q", ["date_utc"]].dropna().drop_duplicates()

    # Metrics over 30-min data in the same period
    s = s30_df[
        (s30_df["date_utc"] >= PERIOD_START) & (s30_df["date_utc"] <= PERIOD_END)
    ].copy()
    metrics = daily_quiet_metrics(s)

    # Keep only Q days
    metQ = metrics.merge(q_days, on="date_utc", how="inner")

    # Data coverage filters
    metQ = metQ[(metQ["cov_vtec"] >= 0.85) & (metQ["cov_gim"] >= 0.70)].copy()

    # Sort by quietness: low IQR, low median |ΔV|, low std
    metQ = metQ.sort_values(["iqr", "dV_med", "std"], ascending=True)

    days = list(metQ["date_utc"].head(10))
    if len(days) < 10:
        print(
            f"[INFO] Only {len(days)} 'Q' day(s) pass coverage/quality filters in the target period."
        )
    return days


def plot_quiet_days_5x2(
    days: list,
    s30_df: pd.DataFrame,
    daily_df: pd.DataFrame | None = None,
    title_suffix: str = "geomagnetically quiet days (GFZ label Q)",
) -> Path | None:
    """
    Plot up to 10 quiet days in a 5×2 panel:
      - VTEC median (30-min)
      - optional VTEC mean (30-min)
      - CODG GIM VTEC (30-min)
    """
    # Pad to 10 to fill grid
    days = list(days) + [None] * max(0, 10 - len(days))

    fig, axes = plt.subplots(
        5, 2, figsize=(10, 10), sharex=True, sharey=False, constrained_layout=False
    )
    fig.subplots_adjust(hspace=0.0, wspace=0.12)

    handles_global = None

    for k, day in enumerate(days):
        r, c = divmod(k, 2)
        ax = axes[r, c]

        if day is None:
            ax.axis("off")
            continue

        df = s30_df.loc[s30_df["date_utc"] == day].copy().sort_values("ts_utc")
        if df.empty:
            ax.text(
                0.5, 0.5, "No data",
                transform=ax.transAxes, ha="center", va="center"
            )
            continue

        t_hours = df["ts_utc"].dt.hour + df["ts_utc"].dt.minute / 60.0

        ln1 = ax.plot(
            t_hours, df[VTEC_COL],
            lw=2.0, label="VTEC median (30 min)"
        )[0]

        if MEAN_COL in df.columns:
            ln2 = ax.plot(
                t_hours, df[MEAN_COL],
                lw=1.8, ls="--", label="VTEC mean (30 min)"
            )[0]

        ln3 = ax.plot(
            t_hours, df[GIM_COL],
            lw=1.8, ls="--", label="CODG GIM (30 min)"
        )[0]

        off = get_daily_offset(day, s30_df, daily_df)
        off_txt = (
            f"median offset = {off:.2f} TECU"
            if np.isfinite(off) else "median offset = n/a"
        )

        ax.set_title(
            f"{day} — {off_txt}",
            fontsize=10, loc="center", weight="bold", y=0.80
        )
        ax.set_xlim(0, 24)
        ax.set_ylim(0, 21)
        ax.set_yticks([5, 10, 15, 20])
        ax.grid(True, alpha=0.25)

        _set_xticks_30min(ax)

        for t in ax.yaxis.get_ticklabels():
            t.set_fontsize(10)
            t.set_color("black")
            t.set_weight("bold")
        for t in ax.xaxis.get_ticklabels():
            t.set_fontsize(10)
            t.set_color("black")
            t.set_weight("bold")

        if handles_global is None:
            handles_global = [line for line in ax.lines]

    # Common legend
    if handles_global:
        labels_global = [h.get_label() for h in handles_global]
        fig.legend(
            handles_global, labels_global,
            loc="upper center", ncol=3, frameon=False,
            bbox_to_anchor=(0.5, 0.93)
        )

    # Hide x labels on top row
    plt.setp([a.get_xticklabels() for a in axes[0, :]], visible=False)
    fig.subplots_adjust(hspace=0.0)

    fig.text(
        0.5, 0.94,
        "OUCA Observatory — Quiet-time VTEC vs CODG GIM",
        ha="center", fontsize=12, weight="bold"
    )
    fig.text(0.5, 0.06, "Local Time (LT)", ha="center", fontsize=12, weight="bold")
    fig.text(0.08, 0.5, "TEC [TECU]", va="center", rotation="vertical",
             fontsize=12, weight="bold")

    out = FIG_DIR / "VTEC_GIM_10days_geomagQ_5x2_2015-2025.png"
    fig.savefig(out, dpi=300)
    plt.close(fig)
    print(f"✅ Quiet 10-day figure saved → {out}")
    return out


# === Selection + plot (based ONLY on geomag_label_gfz_QDNQ == 'Q') ===
quiet_days_Q = pick_10_quiet_days_geomagQ(s30, daily)
print("Selected 'Q' days:", quiet_days_Q)

plot_quiet_days_5x2(
    quiet_days_Q, s30, daily,
    title_suffix="geomagnetically quiet days (GFZ label Q)"
)


Generate Final figures

In [None]:
import re
from pathlib import Path
from typing import Union, List
import numpy as np
from datetime import datetime, timezone, date, timedelta
import pandas as pd
import matplotlib.pyplot as plt

# =========================
# CELL 0 — SETUP & HELPERS
# =========================

# ------- Paths & parameters (ADAPT TO YOUR SETUP) -------
CSV_DAILY = Path("/path/to/2015_2025_MS_VTEC_daily_stats_UTC_with_indices_and_max_with_GFZlabels.csv")
CSV_30MIN = Path("/path/to/2015_2025_MS_VTEC_30min_stats.csv")
OUTDIR_FIG = Path("/path/to/figures/figs_quiet")
OUTDIR_FIG.mkdir(parents=True, exist_ok=True)

TIME_COL   = "date_utc"
OFFSET_COL = "gim_offset_tecu"
QUIET_COL  = "geomag_label_gfz_QDNQ"   # 'Q', 'D', 'NQ'
SOLAR_COL  = "solar_label"            # 'Low', 'High', 'NA'
LOCAL_TZ   = "Africa/Casablanca"      # used in other cells for local-time plots
VTEC_COL   = "VTEC_median"            # VTEC column in the 30-min CSV

START = pd.Timestamp("2015-10-01", tz="UTC")
END   = pd.Timestamp("2025-09-26", tz="UTC")
# --------------------------------------------------------


# ===== Load 30-min series (for possible later use) =====
s30 = pd.read_csv(CSV_30MIN)

t_utc = pd.to_datetime(s30["time"], utc=True, errors="coerce")
s30 = s30.assign(
    ts_utc=t_utc,
    date_utc=t_utc.dt.floor("D").dt.date,
    slot=(t_utc.dt.hour * 2 + (t_utc.dt.minute // 30)).astype(int)  # 0..47
)
s30[VTEC_COL] = pd.to_numeric(s30[VTEC_COL], errors="coerce")


# ===== Load daily stats + restrict time window =====
df = pd.read_csv(CSV_DAILY)
df[TIME_COL] = pd.to_datetime(df[TIME_COL], utc=True, errors="coerce")
df = df[(df[TIME_COL] >= START) & (df[TIME_COL] <= END)].copy()

# Basic masks
offset_abs = pd.to_numeric(df[OFFSET_COL], errors="coerce").abs()
m_off   = offset_abs < 20.0                      # |GIM offset| < 20 TECU
m_quiet = (df[QUIET_COL] == "Q")                 # GFZ geomagnetic label = Q
m_slow  = (df[SOLAR_COL].str.lower() == "low")   # solar_label = Low

# Year column for grouping
df["year"] = df[TIME_COL].dt.year


# =========================
# 1) Global summary (Oct 2015–Sep 2025)
# =========================
N = len(df)

summary_global = pd.DataFrame({
    "metric": [
        "total days",
        "|offset| < 20 TECU",
        "geomagnetic quiet (Q)",
        "solar low",
        "|offset| < 20 ∩ quiet",
        "|offset| < 20 ∩ solar low",
        "quiet ∩ solar low",
        "|offset| < 20 ∩ quiet ∩ solar low",
    ],
    "count": [
        N,
        int(m_off.sum()),
        int(m_quiet.sum()),
        int(m_slow.sum()),
        int((m_off & m_quiet).sum()),
        int((m_off & m_slow).sum()),
        int((m_quiet & m_slow).sum()),
        int((m_off & m_quiet & m_slow).sum()),
    ],
})

summary_global["pct_of_total_%"] = (100.0 * summary_global["count"] / N).round(2)

print("=== Global summary (Oct 2015 → Sep 2025) ===")
print(summary_global.to_string(index=False))


# =========================
# 2) Per-year summary — simple filters
# =========================
by_year_simple = (
    df.assign(
        off=m_off,
        quiet=m_quiet,
        solar_low=m_slow,
    )
    .groupby("year")
    .agg(
        total=("year", "size"),
        off_sm20=("off", "sum"),
        quiet_Q=("quiet", "sum"),
        solar_low=("solar_low", "sum"),
    )
    .assign(
        pct_off=lambda d:  (100.0 * d["off_sm20"]   / d["total"]).round(2),
        pct_quiet=lambda d:(100.0 * d["quiet_Q"]    / d["total"]).round(2),
        pct_slow=lambda d: (100.0 * d["solar_low"]  / d["total"]).round(2),
    )
)

print("\n=== Per-year summary — simple filters ===")
print(by_year_simple.to_string())


# =========================
# 3) Per-year summary — intersections
# =========================
by_year_inter = (
    df.assign(
        inter_off_quiet = (m_off & m_quiet),
        inter_off_slow  = (m_off & m_slow),
        inter_quiet_slow= (m_quiet & m_slow),
        inter_all       = (m_off & m_quiet & m_slow),
    )
    .groupby("year")
    .agg(
        total=("year", "size"),
        off_quiet=("inter_off_quiet", "sum"),
        off_slow=("inter_off_slow", "sum"),
        quiet_slow=("inter_quiet_slow", "sum"),
        all_three=("inter_all", "sum"),
    )
    .assign(
        pct_off_quiet=lambda d:   (100.0 * d["off_quiet"]   / d["total"]).round(2),
        pct_off_slow=lambda d:    (100.0 * d["off_slow"]    / d["total"]).round(2),
        pct_quiet_slow=lambda d:  (100.0 * d["quiet_slow"]  / d["total"]).round(2),
        pct_all_three=lambda d:   (100.0 * d["all_three"]   / d["total"]).round(2),
    )
)

print("\n=== Per-year summary — intersections ===")
print(by_year_inter.to_string())

In [None]:
# =====================================================
# CELL — Keep only intersection (|offset|<20, quiet, solar-low)
#        and prepare 30-min sample + diurnal helpers
# =====================================================

# df, s30, VTEC_COL, m_off, m_quiet, m_slow must already be defined
N = len(df)

# --------- Keep only the intersection of the three filters ---------
df_kept = df[m_off & m_quiet & m_slow].copy()
print(f"\nRows kept (intersection of three filters): {len(df_kept)}/{N}")

# Normalize date_utc as a date object
if "date_utc" in df_kept.columns:
    d = pd.to_datetime(df_kept["date_utc"], utc=True, errors="coerce")
else:
    d = pd.to_datetime(df_kept["date"], utc=True, errors="coerce")

df_kept["date_utc"] = d.dt.floor("D").dt.date

# Match 30-min samples for the kept days
S = s30[s30["date_utc"].isin(df_kept["date_utc"])].copy().reset_index(drop=True)

# --------- Helper functions for diurnal aggregation/plotting ---------
def _set_xticks_30min(ax):
    """Set x-axis for 48 slots (30-min sampling) with 4-hour labels."""
    ax.set_xlim(0, 48)
    ax.set_xticks(np.arange(0, 48, 8))
    ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 4)], rotation=0)


def _clean_slot(df_in: pd.DataFrame, name: str = "slot") -> pd.DataFrame:
    """
    Ensure 'slot' is a regular column (not index) and remove duplicate columns.
    Useful before grouping and reindexing by slot.
    """
    out = df_in
    if name in (out.index.names or []):
        out = out.reset_index()
    if out.columns.duplicated().any():
        out = out.loc[:, ~out.columns.duplicated()]
    return out


def agg_diurnal(df_in: pd.DataFrame, val: str = VTEC_COL) -> pd.DataFrame:
    """
    Aggregate a 30-min series on the 48 diurnal slots (0..47) with a clean axis.

    Returns a DataFrame with:
      slot, median, mean, q25, q75, std, N
    """
    df_in = _clean_slot(df_in, "slot")
    slots = pd.Index(range(48), name="slot")

    g = df_in.groupby("slot")[val]

    def _reindex_series(s: pd.Series) -> pd.Series:
        s = s.copy()
        s.index.name = "slot"
        return s.reindex(slots)

    out = pd.DataFrame({
        "slot":  slots.values,
        "median": _reindex_series(g.median()),
        "mean":   _reindex_series(g.mean()),
        "q25":    _reindex_series(g.quantile(0.25)),
        "q75":    _reindex_series(g.quantile(0.75)),
        "std":    _reindex_series(g.std(ddof=1)),
        "N":      _reindex_series(g.count()),
    }).reset_index(drop=True)

    return out


def slot_ticks_2h(ax):
    """Set x-axis ticks every 2 hours (48 slots → 24 h)."""
    ax.set_xlim(-1, 48)
    ax.set_xticks(np.arange(0, 48, 4))
    ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 2)], rotation=0)


def nice_y(ax, vmin: float = 0.0, vmax: float = 50.0, step: float = 10.0):
    """Convenience function for setting a nice TEC y-axis."""
    ax.set_ylim(vmin, vmax)
    ax.set_yticks(np.arange(vmin, vmax + step, step))


row_colors = ["green", "red", "blue", "orange", "purple", "brown"]

def _panel_color(idx: int) -> str:
    """Assign a row-based color for panel grids (2 columns)."""
    r, _ = divmod(idx, 2)
    return row_colors[r % len(row_colors)]


print(f"Quiet+low-solar days kept: {len(df_kept)} — 30-min samples: {len(S)}")


In [None]:
# ==========================================
# CELL 1 — COURBES DIURNES PAR ANNÉE (4×2)
# median + IQR (q25–q75) + mean — jours quiet
# ==========================================
years = sorted({pd.Timestamp(d).year for d in df_kept['date_utc']})
n = len(years)
rows, cols = (6, 2) if n > 6 else (int(np.ceil(n/2)), 2)
fig, axes = plt.subplots(rows, cols, figsize=(12, 2*rows), squeeze=False, sharex='col')
ylim=(0,50)
for i, y in enumerate(years):
    ax = axes[i//2, i%2]
    color = _panel_color(i)
    Sy = S[pd.to_datetime(S["date_utc"]).dt.year == y]
    D = agg_diurnal(Sy)

    ax.plot(D["slot"].values, D["median"].values, '-',color=color,  lw=2.2, label='median')
    ax.fill_between(D["slot"].values, D["q25"].values, D["q75"].values,color=color, alpha=0.2, label='IQR (25–75)')
    ax.plot(D["slot"].values, D["mean"].values,   '--',color='k', lw=1.6, label='mean')

    ax.set_ylim(*ylim)
    ax.set_yticks([5,15,25,35,45])
    
    label = str(y)
    ax.set_ylabel(label, weight='bold')
    
    _set_xticks_30min(ax)
    
    #slot_ticks_2h(ax); nice_y(ax, 0, 45, 5)
    ax.grid(alpha=0.3) 
    #ax.set_title(str(y), weight='bold')
    # style ticks
    for t in ax.yaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    for t in ax.xaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    if i == 0:
        ax.legend(loc='upper left', fontsize=8, frameon=False)

# nettoyer cases vides
for j in range(n, rows*cols):
    axes[j//2, j%2].axis('off')

plt.setp([a.get_xticklabels() for a in axes[0, :]], visible=False)
fig.subplots_adjust(hspace=0)

fig.text(0.5, 0.89, f'Annualy Diurnal Quiet time VTEC', ha='center', fontsize=12, weight='bold')
fig.text(0.5, 0.06, 'Local Time (LT)', ha='center', fontsize=12, weight='bold')
fig.text(0.06, 0.5, 'TEC [TECU]', va='center', rotation='vertical', fontsize=12, weight='bold')

out = OUTDIR_FIG / "Low_solar_activity_QUIET_YEARS_diurnal_mean_median_IQR_MS.png"
#plt.show()
fig.savefig(out, dpi=300, bbox_inches='tight')
plt.close(fig)
print("Saved:", out)


In [None]:
# ==========================================
# CELL 2 — COURBES DIURNES PAR MOIS (6×2)
# median + IQR + mean sur tous jours quiet, toutes années
# ==========================================
ylim=(0,50)
fig, axes = plt.subplots(6,2, figsize=(12,12), squeeze=False, sharex='col')
for m in range(1,13):
    ax = axes[(m-1)//2, (m-1)%2]
    Sm = S[pd.to_datetime(S["date_utc"]).dt.month == m]
    D = agg_diurnal(Sm)
    color = _panel_color(m-1)
    ax.plot(D["slot"], D["median"], '-',color=color, lw=2.2, label='median')
    ax.fill_between(D["slot"], D["q25"], D["q75"],color=color, alpha=0.2, label='IQR')
    ax.plot(D["slot"], D["mean"], '--',color='k', lw=1.6, label='mean')
    ax.set_ylim(*ylim)
    ax.set_yticks([5,15,25,35,45])
    
    label = pd.Timestamp(2000, m, 1).strftime('%B')
    ax.set_ylabel(label, weight='bold')
    
    _set_xticks_30min(ax)
    ax.grid(alpha=0.3)
    ax.set_ylabel(pd.Timestamp(2000,m,1).strftime('%B'), weight='bold')
    
    # style ticks
    for t in ax.yaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    for t in ax.xaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
        
axes[0,0].legend(frameon=False, fontsize=9, loc='upper left')

plt.setp([a.get_xticklabels() for a in axes[0, :]], visible=False)
fig.subplots_adjust(hspace=0)

fig.text(0.5, 0.89, f'Monthly Diurnal Quiet time VTEC', ha='center', fontsize=12, weight='bold')
fig.text(0.5, 0.06, 'Local Time (LT)', ha='center', fontsize=12, weight='bold')
fig.text(0.06, 0.5, 'TEC [TECU]', va='center', rotation='vertical', fontsize=12, weight='bold')

#plt.show()
out = OUTDIR_FIG/"Low_solar_activity_QUIET_MONTHS_diurnal_mean_median_IQR_MS.png"
fig.savefig(out, dpi=300, bbox_inches='tight'); plt.close(fig)
print("Saved:", out)


In [None]:
# ==========================================
# CELL A — STATS CLÉS COURBES DIURNES PAR MOIS
# (à partir de S et agg_diurnal)
# ==========================================
import numpy as np
import pandas as pd

def _slot_to_lt_h(slot: int) -> float:
    """slot (0..47) -> heure locale en décimal (0.0–23.5)."""
    return 0.5 * float(slot)

def _slot_to_lt_str(slot: int) -> str:
    """slot (0..47) -> 'HH:MM'."""
    h_float = 0.5 * float(slot)
    h = int(h_float)
    m = int(round((h_float - h) * 60))
    return f"{h:02d}:{m:02d}"

rows = []

for m in range(1, 13):
    Sm = S[pd.to_datetime(S["date_utc"]).dt.month == m].copy()
    if Sm.empty:
        continue

    D = agg_diurnal(Sm).copy()   # doit contenir au moins: 'slot', 'median'
    D = D.dropna(subset=["median"])
    if D.empty:
        continue

    # pic diurne du mois (max de la médiane)
    idx_max = D["median"].idxmax()
    peak_val = float(D.loc[idx_max, "median"])
    peak_slot = int(D.loc[idx_max, "slot"])
    peak_lt   = _slot_to_lt_str(peak_slot)

    # minimum diurne (souvent pré-dawn / nuit)
    idx_min = D["median"].idxmin()
    min_val = float(D.loc[idx_min, "median"])
    min_slot = int(D.loc[idx_min, "slot"])
    min_lt   = _slot_to_lt_str(min_slot)

    # contraste jour/nuit (amplitude intra-journalière)
    day_night_range = peak_val - min_val

    # pour info: médiane dans un noyau pré-dawn (00–04 LT) et après-midi (12–18 LT)
    D["lt_hour"] = D["slot"].apply(_slot_to_lt_h)
    pre_dawn = D[(D["lt_hour"] >= 0.0) & (D["lt_hour"] < 4.0)]
    afternoon = D[(D["lt_hour"] >= 12.0) & (D["lt_hour"] < 18.0)]

    pre_dawn_med = float(pre_dawn["median"].mean()) if not pre_dawn.empty else np.nan
    aft_med      = float(afternoon["median"].mean()) if not afternoon.empty else np.nan

    rows.append({
        "month": m,
        "month_name": pd.Timestamp(2000, m, 1).strftime("%b"),
        "peak_median_TECU": peak_val,
        "lt_of_peak": peak_lt,
        "min_median_TECU": min_val,
        "lt_of_min": min_lt,
        "day_night_range_TECU": day_night_range,
        "pre_dawn_median_00_04_TECU": pre_dawn_med,
        "afternoon_median_12_18_TECU": aft_med,
    })

monthly_diurnal_stats = pd.DataFrame(rows).sort_values("month")

print("=== Monthly diurnal median VTEC stats (quiet QSL–GIM20) ===")
print(monthly_diurnal_stats.to_string(index=False,
      float_format=lambda x: f"{x:6.2f}"))

# Résumés rapides pour le texte
print("\n--- Key diagnostics for text ---")
# mois du pic maximal
i_max = monthly_diurnal_stats["peak_median_TECU"].idxmax()
print("Largest monthly diurnal median peak:",
      f"{monthly_diurnal_stats.loc[i_max, 'peak_median_TECU']:.2f} TECU",
      f"in {monthly_diurnal_stats.loc[i_max, 'month_name']} at",
      monthly_diurnal_stats.loc[i_max, "lt_of_peak"])

# mois du plus faible pic
i_min = monthly_diurnal_stats["peak_median_TECU"].idxmin()
print("Smallest monthly diurnal median peak:",
      f"{monthly_diurnal_stats.loc[i_min, 'peak_median_TECU']:.2f} TECU",
      f"in {monthly_diurnal_stats.loc[i_min, 'month_name']} at",
      monthly_diurnal_stats.loc[i_min, "lt_of_peak"])

# plage des heures de pic
lt_peaks = monthly_diurnal_stats["lt_of_peak"].tolist()
print("Monthly peak times (LT) from",
      lt_peaks[0], "to", lt_peaks[-1],
      "for", ", ".join(monthly_diurnal_stats['month_name'].tolist()))


In [None]:
# ==========================================
# CELL 3 — COURBES DIURNES PAR SAISON (2×2)
# median + IQR + mean — jours quiet
# ==========================================
def season_of_month(m):
    return "DJF" if m in (12,1,2) else ("MAM" if m in (3,4,5) else ("JJA" if m in (6,7,8) else "SON"))

S["season"] = pd.to_datetime(S["date_utc"]).dt.month.map(season_of_month)

order = ["DJF","MAM","JJA","SON"]
fig, axes = plt.subplots(2,2, figsize=(12,5), squeeze=False, sharex='col')

for i, sname in enumerate(order):
    color = _panel_color(i)
    ax = axes[i//2, i%2]
    Ss = S[S["season"]==sname]
    D = agg_diurnal(Ss)
    
    ax.plot(D["slot"], D["median"], '-', color=color, lw=2.2, label='median')
    ax.fill_between(D["slot"], D["q25"], D["q75"],color=color, alpha=0.2, label='IQR')
    ax.plot(D["slot"], D["mean"], '--',color='k', lw=1.6, label='mean')
    
    ax.set_ylim(*ylim)
    ax.set_yticks([5,15,25,35,45])
    
    label = sname
    ax.set_ylabel(label, weight='bold')
    
    _set_xticks_30min(ax)

    # style ticks
    for t in ax.yaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    for t in ax.xaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
        
    ax.grid(alpha=0.3)


axes[0,0].legend(frameon=False, fontsize=9, loc='upper left')

plt.setp([a.get_xticklabels() for a in axes[0, :]], visible=False)
fig.subplots_adjust(hspace=0)

fig.text(0.5, 0.89, f'Seasonal Diurnal Quiet time VTEC', ha='center', fontsize=12, weight='bold')
fig.text(0.5, 0.03, 'Local Time (LT)', ha='center', fontsize=12, weight='bold')
fig.text(0.06, 0.5, 'TEC [TECU]', va='center', rotation='vertical', fontsize=12, weight='bold')

#plt.show()
out = OUTDIR_FIG/"Low_solar_activity_QUIET_SEASONS_diurnal_mean_median_IQR_MS.png"
fig.savefig(out, dpi=300, bbox_inches='tight'); plt.close(fig)
print("Saved:", out)

In [None]:
# ==========================================
# CELL B — STATS CLÉS COURBES DIURNES PAR SAISON
# (à partir de S, season_of_month et agg_diurnal)
# ==========================================
import numpy as np
import pandas as pd

def _slot_to_lt_h(slot: int) -> float:
    return 0.5 * float(slot)

def _slot_to_lt_str(slot: int) -> str:
    h_float = 0.5 * float(slot)
    h = int(h_float)
    m = int(round((h_float - h) * 60))
    return f"{h:02d}:{m:02d}"

def season_of_month(m):
    return "DJF" if m in (12,1,2) else ("MAM" if m in (3,4,5)
           else ("JJA" if m in (6,7,8) else "SON"))

S = S.copy()
S["month"] = pd.to_datetime(S["date_utc"]).dt.month
S["season"] = S["month"].map(season_of_month)

rows = []
order = ["DJF","MAM","JJA","SON"]

for sname in order:
    Ss = S[S["season"] == sname].copy()
    if Ss.empty:
        continue

    D = agg_diurnal(Ss).copy()
    D = D.dropna(subset=["median"])
    if D.empty:
        continue

    # pic diurne saisonnier
    idx_max = D["median"].idxmax()
    peak_val = float(D.loc[idx_max, "median"])
    peak_slot = int(D.loc[idx_max, "slot"])
    peak_lt   = _slot_to_lt_str(peak_slot)

    # minimum saisonnier
    idx_min = D["median"].idxmin()
    min_val = float(D.loc[idx_min, "median"])
    min_slot = int(D.loc[idx_min, "slot"])
    min_lt   = _slot_to_lt_str(min_slot)

    day_night_range = peak_val - min_val

    D["lt_hour"] = D["slot"].apply(_slot_to_lt_h)
    pre_dawn = D[(D["lt_hour"] >= 0.0) & (D["lt_hour"] < 4.0)]
    afternoon = D[(D["lt_hour"] >= 12.0) & (D["lt_hour"] < 18.0)]

    pre_dawn_med = float(pre_dawn["median"].mean()) if not pre_dawn.empty else np.nan
    aft_med      = float(afternoon["median"].mean()) if not afternoon.empty else np.nan

    rows.append({
        "season": sname,
        "peak_median_TECU": peak_val,
        "lt_of_peak": peak_lt,
        "min_median_TECU": min_val,
        "lt_of_min": min_lt,
        "day_night_range_TECU": day_night_range,
        "pre_dawn_median_00_04_TECU": pre_dawn_med,
        "afternoon_median_12_18_TECU": aft_med,
    })

seasonal_diurnal_stats = pd.DataFrame(rows).set_index("season").loc[order].reset_index()

print("=== Seasonal diurnal median VTEC stats (quiet QSL–GIM20) ===")
print(seasonal_diurnal_stats.to_string(index=False,
      float_format=lambda x: f"{x:6.2f}"))

print("\n--- Key diagnostics for text ---")
for _, r in seasonal_diurnal_stats.iterrows():
    print(f"{r['season']}: peak median ≈ {r['peak_median_TECU']:.2f} TECU at {r['lt_of_peak']} LT; "
          f"night min ≈ {r['min_median_TECU']:.2f} TECU at {r['lt_of_min']} LT; "
          f"day–night range ≈ {r['day_night_range_TECU']:.2f} TECU; "
          f"pre-dawn (00–04 LT) median ≈ {r['pre_dawn_median_00_04_TECU']:.2f} TECU; "
          f"afternoon (12–18 LT) median ≈ {r['afternoon_median_12_18_TECU']:.2f} TECU.")


In [None]:
# ==========================================
# CELL 4 — HEURE DU MAX QUOTIDIEN (HISTOGRAMME, LT)
# ==========================================
# pour chaque jour quiet : slot du max (si plusieurs, dernier)
idx = S.groupby("date_utc")[VTEC_COL].idxmax()
daily_max = S.loc[idx, ["date_utc","slot",VTEC_COL]].sort_values("date_utc")
# convertir slot en heure locale (LT) lisible
hours = daily_max["slot"]/2.0  # 0..23.5
fig, ax = plt.subplots(figsize=(10,5))
ax.hist(hours, bins=np.arange(-0.25,24.75,0.5), edgecolor='k', alpha=0.7)
def _set_xticks_30min(ax):
    ax.set_xlim(-1, 24)
    ax.set_xticks(np.arange(0, 24, 4))
    ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 4)], rotation=0)
#label = sname
ax.set_ylabel("Count of days", weight='bold')
    
_set_xticks_30min(ax)
# style ticks
for t in ax.yaxis.get_ticklabels():
    t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
for t in ax.xaxis.get_ticklabels():
    t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    
ax.grid(alpha=0.3)
#ax.set_xticks(np.arange(0,24,2)); ax.set_xticklabels([f"{h:02d}:00" for h in range(0,24,2)])
#ax.set_xlim(-0.5, 23.5); ax.grid(axis='y', alpha=0.3)
ax.set_title("Distribution of Local-Time of the Daily VTEC Maximum on Quiet Days", weight='bold')
ax.set_xlabel("Local Time (LT)", weight='bold');
#plt.show()
out = OUTDIR_FIG/"Low_solar_activity_QUIET_hist_hour_of_daily_max_LT_MS.png"
fig.savefig(out, dpi=300); plt.close(fig)
print("Saved:", out)



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import pytz

# ============================================
# Histogram + seasonal diagnostics of the local
# time of the daily VTEC maximum (quiet/low-solar subset)
# ============================================

# ---------------- CONFIG ----------------
LOCAL_TZ     = "Africa/Casablanca"
VTEC_MAX_COL = "VTEC_max"      # amplitude of daily max on the 30-min grid
TIME_COL_UTC = "max_ts_utc"    # timestamp of daily max in UTC (tz-aware)

FIG_DIR = Path("figures/quiet")
FIG_DIR.mkdir(parents=True, exist_ok=True)
OUT_FIG = FIG_DIR / "TEC09_low_solar_quiet_hist_hour_of_daily_max_LT_MS.png"
# ----------------------------------------


def hist_fwhm(centers: np.ndarray, counts: np.ndarray) -> float:
    """
    Simple FWHM (full width at half-maximum) approximation for a 1D histogram.

    Parameters
    ----------
    centers : array-like
        Bin centers (same length as `counts`).
    counts : array-like
        Bin heights (e.g., from np.histogram or plt.hist).

    Returns
    -------
    float
        Approximate FWHM in the same units as `centers`, or NaN if undefined.
    """
    counts = np.asarray(counts, float)
    centers = np.asarray(centers, float)

    if counts.size == 0:
        return np.nan

    peak = counts.max()
    if peak <= 0:
        return np.nan

    half = peak / 2.0
    mask = counts >= half
    if not np.any(mask):
        return np.nan

    xs = centers[mask]
    if xs.size == 1:
        # Only one bin above half-maximum
        return 0.0

    return xs[-1] - xs[0]


# ====== PREPARE DAILY DATAFRAME (df_kept MUST EXIST) ======
# df_kept is assumed to contain the "quiet & low-solar & |offset|<20" subset
df_daily = df_kept.copy()

# Ensure UTC timestamp column for the daily max is tz-aware UTC
df_daily[TIME_COL_UTC] = pd.to_datetime(df_daily[TIME_COL_UTC], utc=True, errors="coerce")

# Convert daily max timestamp to local time and compute local hour
tz = pytz.timezone(LOCAL_TZ)
df_daily["max_ts_lt"] = df_daily[TIME_COL_UTC].dt.tz_convert(tz)
df_daily["max_hour_lt"] = (
    df_daily["max_ts_lt"].dt.hour
    + df_daily["max_ts_lt"].dt.minute / 60.0
)

# Month and DOY (optional)
df_daily["month"] = df_daily["max_ts_lt"].dt.month
df_daily["doy"] = df_daily["max_ts_lt"].dt.dayofyear

# Basic NaN filtering
df_daily = df_daily[
    np.isfinite(df_daily["max_hour_lt"]) &
    np.isfinite(df_daily[VTEC_MAX_COL])
].copy()

if df_daily.empty:
    print("df_daily is empty after filtering (no valid daily maxima).")
else:
    # ====== 3-PANEL FIGURE ======
    fig, (ax1, ax2, ax3) = plt.subplots(
        3, 1, figsize=(10, 9),
        gridspec_kw={"height_ratios": [2.0, 1.3, 1.3]}
    )

    # --------------------------------------------------
    # 1) Histogram of local time of daily max VTEC (LT)
    # --------------------------------------------------
    h = df_daily["max_hour_lt"].values

    # 30-min bins from 0 to 24h
    bins = np.arange(0.0, 24.0 + 0.5, 0.5)
    counts, bin_edges, _ = ax1.hist(
        h,
        bins=bins,
        edgecolor="black",
        alpha=0.7,
        label="Daily max count",
    )
    centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])

    # Line connecting bin tops
    ax1.plot(
        centers,
        counts,
        "-o",
        linewidth=1.5,
        markersize=4,
        label="Bin-top curve",
    )

    # Mode, FWHM, peak amplitude
    if counts.sum() > 0:
        idx_peak = int(np.argmax(counts))
        mode_hour = centers[idx_peak]
        median_hour = df_daily["max_hour_lt"].median()
        peak_days = int(counts[idx_peak])
        fwhm = hist_fwhm(centers, counts)

        print(
            f"Mode ≈ {mode_hour:.2f} LT, "
            f"median ≈ {median_hour:.2f} LT, "
            f"FWHM ≈ {fwhm:.2f} h, "
            f"peak = {peak_days} days."
        )

        stats_label = (
            f"Mode ≈ {mode_hour:4.1f} h LT, "
            f"FWHM ≈ {fwhm:3.1f} h, "
            f"peak = {peak_days} days"
        )
        ax1.legend(title=stats_label, loc="upper left", fontsize=10)
    else:
        ax1.legend(loc="upper left", fontsize=9)

    ax1.set_xlim(0, 24)
    ax1.set_xticks(np.arange(0, 25, 2))
    ax1.set_ylabel("Number of days", fontweight="bold")
    ax1.set_xlabel("Local Time (LT)", fontweight="bold")
    ax1.set_title(
        "Local time of daily VTEC maximum (quiet, low-solar subset)",
        fontweight="bold",
    )
    ax1.grid(True, alpha=0.3)
    for t in ax1.xaxis.get_ticklabels():
        t.set_fontsize(10)
        t.set_color("black")
        t.set_weight("bold")
    for t in ax1.yaxis.get_ticklabels():
        t.set_fontsize(10)
        t.set_color("black")
        t.set_weight("bold")

    # --------------------------------------------------
    # 2) Daily max amplitude vs month (scatter + median)
    # --------------------------------------------------
    x_month = df_daily["month"].values
    y_vtec = df_daily[VTEC_MAX_COL].values

    ax2.scatter(
        x_month,
        y_vtec,
        color="k",
        alpha=0.4,
        s=15,
        label="Daily maxima",
    )

    month_median_vtec = (
        df_daily.groupby("month")[VTEC_MAX_COL]
        .median()
        .reindex(np.arange(1, 13))
    )

    ax2.plot(
        np.arange(1, 13),
        month_median_vtec.values,
        "-o",
        linewidth=2,
        label="Monthly median max",
    )

    ax2.set_xlim(0.5, 12.5)
    ax2.set_xticks(np.arange(1, 13))
    ax2.set_xticklabels(
        ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
    )
    ax2.set_ylabel("Daily max VTEC [TECU]", fontweight="bold")
    ax2.set_xlabel("Month of year", fontweight="bold")
    ax2.grid(True, alpha=0.3)
    ax2.legend(loc="upper center", fontsize=9)

    for t in ax2.xaxis.get_ticklabels():
        t.set_fontsize(10)
        t.set_color("black")
        t.set_weight("bold")
    for t in ax2.yaxis.get_ticklabels():
        t.set_fontsize(10)
        t.set_color("black")
        t.set_weight("bold")

    # --------------------------------------------------
    # 3) Local time of daily max vs month (scatter + median)
    # --------------------------------------------------
    y_hour = df_daily["max_hour_lt"].values

    ax3.scatter(
        x_month,
        y_hour,
        color="k",
        alpha=0.4,
        s=15,
        label="Daily peak time",
    )

    month_median_hour = (
        df_daily.groupby("month")["max_hour_lt"]
        .median()
        .reindex(np.arange(1, 13))
    )

    ax3.plot(
        np.arange(1, 13),
        month_median_hour.values,
        "-o",
        linewidth=2,
        label="Monthly median time",
    )

    ax3.set_xlim(0.5, 12.5)
    ax3.set_xticks(np.arange(1, 13))
    ax3.set_xticklabels(
        ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
    )
    ax3.set_xlabel("Month of year", fontweight="bold")
    ax3.set_ylabel("Local time of daily max [LT]", fontweight="bold")
    ax3.set_ylim(0, 24)
    ax3.grid(True, alpha=0.3)
    ax3.legend(loc="lower center", fontsize=9)

    for t in ax3.xaxis.get_ticklabels():
        t.set_fontsize(10)
        t.set_color("black")
        t.set_weight("bold")
    for t in ax3.yaxis.get_ticklabels():
        t.set_fontsize(10)
        t.set_color("black")
        t.set_weight("bold")

    fig.tight_layout()
    fig.savefig(OUT_FIG, dpi=300)
    plt.close(fig)
    print("Saved:", OUT_FIG)


In [None]:
# seasonal peak hour variability
df_season = df_daily.copy()
df_season["month"] = df_season["max_ts_lt"].dt.month

def month_to_season(m):
    if m in (12, 1, 2):
        return "DJF"
    elif m in (3, 4, 5):
        return "MAM"
    elif m in (6, 7, 8):
        return "JJA"
    else:
        return "SON"

df_season["season"] = df_season["month"].apply(month_to_season)

# Median and IQR of peak time per season
def iqr(x):
    x = np.asarray(x, float)
    return np.nanpercentile(x, 75) - np.nanpercentile(x, 25)

season_stats = (df_season
                .groupby("season")["max_hour_lt"]
                .agg(["count", "median", iqr])
                .reset_index())
print(season_stats)


In [None]:
# ============================================
# Daily peak VTEC amplitude diagnostics
#  - Works on df_kept (quiet, low-solar, |offset|<20 subset)
#  - Prints global and monthly statistics for VTEC maxima
# ============================================

import numpy as np
import pandas as pd

# --------- CONFIG ---------
VTEC_MAX_COL = "VTEC_max_from_30min"   # daily amplitude of the VTEC maximum
TIME_COL_UTC = "max_ts_utc"            # UTC timestamp of the daily maximum
# ---------------------------

# df_kept is assumed to be pre-computed:
# "quiet & low-solar & |gim_offset_tecu| < 20" subset
df_daily = df_kept.copy()

# Ensure proper types
df_daily[VTEC_MAX_COL] = pd.to_numeric(df_daily[VTEC_MAX_COL], errors="coerce")
df_daily[TIME_COL_UTC] = pd.to_datetime(df_daily[TIME_COL_UTC], utc=True, errors="coerce")

# Derive month if not already present
if "month" not in df_daily.columns:
    df_daily["month"] = df_daily[TIME_COL_UTC].dt.month

# Drop NaNs on the amplitude
df_daily = df_daily[np.isfinite(df_daily[VTEC_MAX_COL])].copy()

if df_daily.empty:
    print("df_daily is empty after filtering valid daily VTEC maxima.")
else:
    # ----- 1) Global range of daily maxima -----
    vtec_min = df_daily[VTEC_MAX_COL].min()
    vtec_max = df_daily[VTEC_MAX_COL].max()
    vtec_p05 = np.nanpercentile(df_daily[VTEC_MAX_COL], 5)
    vtec_p95 = np.nanpercentile(df_daily[VTEC_MAX_COL], 95)

    print("Global daily peak VTEC range (quiet, low-solar subset):")
    print(f"  min       = {vtec_min:.2f} TECU")
    print(f"  max       = {vtec_max:.2f} TECU")
    print(f"  5th perc  = {vtec_p05:.2f} TECU  (robust lower bound)")
    print(f"  95th perc = {vtec_p95:.2f} TECU  (robust upper bound)")
    print("")

    # ----- 2) Monthly medians of daily maxima -----
    month_median = (
        df_daily
        .groupby("month")[VTEC_MAX_COL]
        .median()
        .reindex(np.arange(1, 13))
    )
    month_count = (
        df_daily
        .groupby("month")[VTEC_MAX_COL]
        .count()
        .reindex(np.arange(1, 13))
    )

    month_stats = pd.DataFrame({
        "month": np.arange(1, 13),
        "n_days": month_count.values,
        "median_VTEC_max": month_median.values,
    })

    month_name = {
        1: "Jan", 2: "Feb", 3: "Mar", 4: "Apr",
        5: "May", 6: "Jun", 7: "Jul", 8: "Aug",
        9: "Sep", 10: "Oct", 11: "Nov", 12: "Dec",
    }
    month_stats["name"] = month_stats["month"].map(month_name)

    print("Monthly median of daily VTEC maxima (quiet, low-solar subset):")
    print(month_stats.to_string(index=False, float_format=lambda x: f"{x:6.2f}"))
    print("")

    # ----- 3) Months / seasons with largest and smallest medians -----

    # Month with largest median daily maximum (typically equinox/summer)
    idx_max = month_median.idxmax()
    vtec_equinox_summer = month_median.max()
    print("Largest monthly median daily-peak VTEC:")
    print(f"  month = {idx_max} ({month_name[idx_max]}), "
          f"median = {vtec_equinox_summer:.2f} TECU")
    print("")

    # Winter: DJF (Dec–Jan–Feb) → minimum median among these months
    winter_months = [12, 1, 2]
    winter_medians = month_median.loc[winter_months].dropna()

    if not winter_medians.empty:
        w_idx_min = winter_medians.idxmin()
        vtec_winter = winter_medians.min()
        print("Winter (DJF) monthly median daily peaks:")
        for m in winter_months:
            if not np.isnan(month_median.loc[m]):
                print(f"  {m:2d} ({month_name[m]}): "
                      f"median = {month_median.loc[m]:.2f} TECU")
        print(f"Lowest winter monthly median peak VTEC:")
        print(f"  month = {w_idx_min} ({month_name[w_idx_min]}), "
              f"median = {vtec_winter:.2f} TECU")
    else:
        print("No data for winter months (DJF) in monthly medians.")

    # ----- 4) Ready-to-use numbers for the text (e.g. LaTeX paragraph) -----
    print("\n--- Summary numbers for the manuscript ---")
    print(f"Use VTEC_min ≈ {vtec_p05:.1f} TECU and "
          f"VTEC_max ≈ {vtec_p95:.1f} TECU as a robust daily-peak range.")
    print(f"Use VTEC_equinox/summer ≈ {vtec_equinox_summer:.1f} TECU "
          f"in {month_name[idx_max]} (month with largest median peak).")
    if not winter_medians.empty:
        print(f"Use VTEC_winter ≈ {vtec_winter:.1f} TECU "
              f"in {month_name[w_idx_min]} for the lowest winter monthly median.")


In [None]:
# ==========================================
# CELL 6b — Local-time boxplots (2 h bins)
# Quiet days (QSL–GIM20 subset)
# ==========================================
import numpy as np
import matplotlib.pyplot as plt

# We assume the following are already defined upstream:
#   - S          : 30-min VTEC time series (quiet / low-solar / |offset|<20 subset)
#   - VTEC_COL   : column name for VTEC in S
#   - OUTDIR_FIG : base output directory for figures

# ---- Build 2 h bins from 30-min slots (0..47 → 0, 2, 4, ..., 22) ----
S_hour = S.copy()
S_hour["hour2"] = (S_hour["slot"] // 4) * 2  # 4 slots of 30 min → 2 h bin

hours2 = np.arange(0, 24, 2)  # 0, 2, ..., 22

# Collect data for each 2 h bin
data_h = [
    S_hour.loc[S_hour["hour2"] == h, VTEC_COL].dropna().to_numpy()
    for h in hours2
]
has_data = [len(v) > 0 for v in data_h]

# Mean VTEC per 2 h bin (for the overlaid black curve)
mh = (
    S_hour.groupby("hour2", as_index=False)[VTEC_COL]
          .mean()
          .set_index("hour2")
          .reindex(hours2)
)

fig, ax = plt.subplots(1, 1, figsize=(10, 4))

# Positions of box centers: 1..N for bins that actually have data
pos  = [i + 1 for i, ok in enumerate(has_data) if ok]
dat  = [v for v, ok in zip(data_h, has_data) if ok]
labs = [h for h, ok in zip(hours2, has_data) if ok]

# Boxplots (2 h LT bins)
if dat:
    ax.boxplot(
        dat,
        positions=pos,
        patch_artist=True,
        showmeans=True,
        whis=(5, 95),  # 5–95 % range as whiskers
        meanprops=dict(
            marker="D",
            markersize=4,
            markerfacecolor="black",
            markeredgecolor="white"
        ),
        boxprops=dict(
            facecolor="#c7e9ff",
            edgecolor="#1f4d7a",
            linewidth=1.2
        ),
        medianprops=dict(
            color="crimson",
            linewidth=1.8
        ),
        whiskerprops=dict(
            color="#1f4d7a",
            linewidth=1.0,
            linestyle="--"
        ),
        capprops=dict(
            color="#1f4d7a",
            linewidth=1.0
        ),
        flierprops=dict(
            marker="x",
            markersize=3,
            markeredgecolor="gray",
            markerfacecolor="none",
            alpha=0.5
        ),
    )

# Overlay the mean VTEC per 2 h bin (aligned with box centers)
ax.plot(
    np.arange(1, len(hours2) + 1),
    mh[VTEC_COL].to_numpy(),
    color="black",
    marker="o",
    linewidth=2,
    markersize=5,
    label="Mean"
)

# Axis ticks and style
ax.set_xlim(0.5, len(hours2) + 0.5)
ax.set_xticks(np.arange(1, len(hours2) + 1))
ax.set_xticklabels([f"{h:02d}" for h in hours2])

for tick in ax.yaxis.get_ticklabels():
    tick.set_fontsize(12)
    tick.set_color("black")
    tick.set_weight("bold")

for tick in ax.xaxis.get_ticklabels():
    tick.set_fontsize(12)
    tick.set_color("black")
    tick.set_weight("bold")

ax.grid(True, alpha=0.3)
ax.legend(frameon=False, loc="upper left")

# Global title and labels (consistent with seasonal/monthly boxplots)
fig.text(
    0.5, 0.92,
    "Quiet-time VTEC distribution by 2 h local-time bins (QSL–GIM20)",
    ha="center",
    fontsize=12,
    fontweight="bold"
)
fig.text(
    0.5, 0.01,
    "Local time (LT)",
    ha="center",
    fontsize=12,
    fontweight="bold"
)
fig.text(
    0.04, 0.5,
    "TEC [TECU]",
    va="center",
    rotation="vertical",
    fontsize=12,
    fontweight="bold"
)

out = OUTDIR_FIG / "TEC08_Low_solar_activity_QUIET_boxplot_by_2h_MS.png"
fig.savefig(out, dpi=300)
plt.close(fig)
print("Saved:", out)


In [None]:
# ==========================================
# CELL X — VTEC stats by 2 h local-time bins (QSL–GIM20)
# ==========================================
import numpy as np
import pandas as pd
import pytz

# -------- CONFIG --------
TIME_COL       = "time"          # 30-min UTC timestamp column in the input dataframe
VTEC_COL       = "VTEC_median"   # 30-min VTEC column
QUIET_FLAG_COL = None            # e.g. "quiet_QSL_GIM20" if you have such a boolean flag; otherwise leave as None
LOCAL_TZ       = "Africa/Casablanca"
# ------------------------

# Start from the 30-min climatology dataframe built earlier
# (replace `S` by your actual 30-min dataframe if needed)
df = S.copy()

# Optionally restrict to quiet QSL–GIM20 days if a boolean flag column exists
if QUIET_FLAG_COL is not None and QUIET_FLAG_COL in df.columns:
    df = df[df[QUIET_FLAG_COL] == True].copy()

# Parse UTC timestamps and convert to local time
df[TIME_COL] = pd.to_datetime(df[TIME_COL], utc=True, errors="coerce")
tz = pytz.timezone(LOCAL_TZ)
df["ts_lt"] = df[TIME_COL].dt.tz_convert(tz)

# Local time in hours (float)
df["lt_hour"] = df["ts_lt"].dt.hour + df["ts_lt"].dt.minute / 60.0

# Define 2 h LT bins: 0–2, 2–4, ..., 22–24
df["lt_bin_start"] = (2 * np.floor(df["lt_hour"] / 2)).astype(int)
df.loc[df["lt_bin_start"] == 24, "lt_bin_start"] = 22  # safety (should not really occur)

# Drop obvious NaNs
df[VTEC_COL] = pd.to_numeric(df[VTEC_COL], errors="coerce")
df = df[np.isfinite(df[VTEC_COL]) & np.isfinite(df["lt_bin_start"])].copy()

# Helper: interquartile range
def iqr(x: pd.Series | np.ndarray) -> float:
    x = np.asarray(x, float)
    if x.size == 0:
        return np.nan
    return np.nanpercentile(x, 75) - np.nanpercentile(x, 25)

# Aggregate by 2 h LT bin
stats = (
    df.groupby("lt_bin_start")[VTEC_COL]
      .agg(count="count",
           median="median",
           iqr=iqr)
      .reset_index()
      .sort_values("lt_bin_start")
)

# Human-readable bin labels like "00–02", "02–04", ...
def bin_label(h: int) -> str:
    return f"{h:02d}–{(h + 2) % 24:02d}"

stats["LT_bin"] = stats["lt_bin_start"].apply(bin_label)

print("VTEC distribution by 2-hour local-time bins (QSL–GIM20 subset):")
print(
    stats[["LT_bin", "count", "median", "iqr"]]
    .to_string(index=False, float_format=lambda x: f"{x:6.2f}")
)

# ---- Extract a few key bins for text / paper use ----
def get_bin_value(h_start: int) -> tuple[float, float]:
    row = stats.loc[stats["lt_bin_start"] == h_start]
    if row.empty:
        return np.nan, np.nan
    return float(row["median"].iloc[0]), float(row["iqr"].iloc[0])

# Example bins (adapt as needed):
med_06_08, iqr_06_08 = get_bin_value(6)   # 06–08 LT
med_14_16, iqr_14_16 = get_bin_value(14)  # 14–16 LT
med_00_02, iqr_00_02 = get_bin_value(0)   # 00–02 LT
med_02_04, iqr_02_04 = get_bin_value(2)   # 02–04 LT

print("\n--- Key values for text ---")
print(f"Median VTEC 06–08 LT  ≈ {med_06_08:.2f} TECU (IQR ≈ {iqr_06_08:.2f} TECU)")
print(f"Median VTEC 14–16 LT  ≈ {med_14_16:.2f} TECU (IQR ≈ {iqr_14_16:.2f} TECU)")
print(f"Median VTEC 00–02 LT  ≈ {med_00_02:.2f} TECU (IQR ≈ {iqr_00_02:.2f} TECU)")
print(f"Median VTEC 02–04 LT  ≈ {med_02_04:.2f} TECU (IQR ≈ {iqr_02_04:.2f} TECU)")


In [None]:
# ==========================================
# CELL N — Combined ECDF (3 local times) + exceedance (20/30/40 TECU)
# Requires:
#   - S: 30-min VTEC climatology dataframe (QSL–GIM20 subset)
#   - VTEC_COL: name of the VTEC 30-min column in S
#   - OUTDIR_FIG: output directory (Path)
# ==========================================
import numpy as np
import matplotlib.pyplot as plt

def ecdf(x: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
    """
    Simple empirical CDF:
      x: 1D array of values (NaN allowed)
    Returns:
      x_sorted, F(x) = i/N
    """
    x = np.asarray(x, float)
    x = x[~np.isnan(x)]
    if x.size == 0:
        return np.array([]), np.array([])
    x_sorted = np.sort(x)
    y = np.arange(1, x_sorted.size + 1) / x_sorted.size
    return x_sorted, y

# ---------- CONFIG ----------
# Slots corresponding to 09:00, 12:00, 15:00 LT (30-min grid: slot = 2*hour)
LABELS_ECDF = {"09:00": 18, "12:00": 24, "15:00": 30}
# VTEC exceedance thresholds (TECU)
THRESHOLDS  = [20, 30, 40]
OUT_FIG     = OUTDIR_FIG / "Low_solar_activity_QUIET_ECDF_exceedance_MS.png"
# -----------------------------

fig, (ax1, ax2) = plt.subplots(
    2, 1, figsize=(8, 8),
    gridspec_kw={"height_ratios": [1.1, 1.0]}
)

# ===================== TOP PANEL: ECDF ======================
quantiles = {}

for label, slot in LABELS_ECDF.items():
    vals = S.loc[S["slot"] == slot, VTEC_COL].to_numpy()
    x, y = ecdf(vals)
    if x.size:
        ax1.plot(x, y, lw=3, label=label)

        # Store a few quantiles for later use in the text if needed
        p50 = np.nanpercentile(vals, 50)
        p75 = np.nanpercentile(vals, 75)
        p90 = np.nanpercentile(vals, 90)
        quantiles[label] = (p50, p75, p90)

# Axis styling for ECDF
# Use a robust upper x-limit based on the 99th percentile
x_max = max(50, float(np.nanpercentile(S[VTEC_COL], 99)))
ax1.set_xlim(0, x_max)
ax1.set_ylim(0, 1)
ax1.grid(alpha=0.3)
ax1.legend(frameon=True, title="Local time")

for t in ax1.yaxis.get_ticklabels():
    t.set_fontsize(10); t.set_color("black"); t.set_weight("bold")
for t in ax1.xaxis.get_ticklabels():
    t.set_fontsize(10); t.set_color("black"); t.set_weight("bold")

ax1.set_ylabel("CDF of VTEC", weight="bold")
ax1.set_title(
    "(a) Empirical CDF of VTEC at 09:00, 12:00, and 15:00 LT",
    loc="left", weight="bold"
)

# ================= BOTTOM PANEL: EXCEEDANCE ==================
# Group by slot (0..47) and count available samples
g   = S.groupby("slot")[VTEC_COL]
den = g.count().rename("N")  # denominator per slot

def slot_ticks_2h_local(ax):
    """
    Place ticks every 2 h in local time on the x-axis.
    Assumes:
      slot ∈ [0..47], 1 slot = 30 min, LT [h] = slot * 0.5
    """
    slots_2h = np.arange(0, 48, 4)  # 4 slots = 2 h
    labels = [f"{int(0.5 * s):02d}:00" for s in slots_2h]
    ax.set_xlim(0, 47)
    ax.set_xticks(slots_2h)
    ax.set_xticklabels(labels)

# Exceedance probability curves for each threshold
for thr in THRESHOLDS:
    hits = S.assign(hit=S[VTEC_COL] >= thr).groupby("slot")["hit"].sum()
    p = (hits / den).reindex(np.arange(48)).values  # probability per slot
    ax2.plot(
        np.arange(48), p,
        "-o", ms=3, lw=2,
        label=f"VTEC ≥ {thr} TECU"
    )

slot_ticks_2h_local(ax2)
ax2.set_ylim(0, 0.4)
ax2.grid(alpha=0.3)

for t in ax2.yaxis.get_ticklabels():
    t.set_fontsize(10); t.set_color("black"); t.set_weight("bold")
for t in ax2.xaxis.get_ticklabels():
    t.set_fontsize(10); t.set_color("black"); t.set_weight("bold")

ax2.set_xlabel("Local Time (LT)", weight="bold")
ax2.set_ylabel("Probability",    weight="bold")
ax2.legend(frameon=True)
ax2.set_title(
    "(b) Probability of VTEC exceeding fixed thresholds",
    loc="left", weight="bold"
)

# ===== Global layout and save =====
fig.tight_layout(rect=[0.03, 0.03, 0.97, 0.95])
fig.suptitle(
    "VTEC distributions and exceedance probabilities at Oukaimeden (QSL–GIM20 subset)",
    y=0.98, fontsize=12, weight="bold"
)

fig.savefig(OUT_FIG, dpi=300)
plt.close(fig)
print("Saved combined figure:", OUT_FIG)


In [None]:
# ==========================================
# CELL N — Yearly anomalies (diurnal median – quiet-time climatology)
# Requires:
#   - S: 30-min VTEC dataframe (QSL–GIM20 subset) with columns ["slot","date_utc", VTEC_COL]
#   - df_kept: daily subset used to select years (has "date_utc")
#   - agg_diurnal(): function returning diurnal statistics per slot (0..47)
#   - slot_ticks_2h(ax): helper to put 2-hour LT ticks on x-axis (0..47 → 0–24 h)
#   - OUTDIR_FIG: output directory (Path)
# ==========================================
CLIM = (
    agg_diurnal(S)[["slot", "median"]]
    .rename(columns={"median": "clim_median"})
)

fig, axes = plt.subplots(6, 2, figsize=(12, 8), squeeze=False)

# Unique years present in df_kept
years = sorted({pd.Timestamp(d).year for d in df_kept["date_utc"]})

for i, y in enumerate(years):
    ax = axes[i // 2, i % 2]

    # Subset S for year y
    Sy = S[pd.to_datetime(S["date_utc"]).dt.year == y]
    Dy = agg_diurnal(Sy)[["slot", "median"]].merge(CLIM, on="slot", how="left")

    # Anomaly = yearly diurnal median – quiet-time climatological median
    Dy["anom"] = Dy["median"] - Dy["clim_median"]

    ax.axhline(0, color="k", lw=3, ls="--")
    ax.plot(Dy["slot"], Dy["anom"], "-", lw=2)

    # Style ticks
    for t in ax.yaxis.get_ticklabels():
        t.set_fontsize(10)
        t.set_color("black")
        t.set_weight("bold")
    for t in ax.xaxis.get_ticklabels():
        t.set_fontsize(10)
        t.set_color("black")
        t.set_weight("bold")

    slot_ticks_2h(ax)
    ax.grid(alpha=0.3)

    ax.set_title(str(y), fontweight="bold", y=0.75)
    ax.set_ylim(-5, 50)
    ax.set_yticks(np.arange(0, 49, 15))

# Remove bottom-right (unused) panel
fig.delaxes(axes[-1, 1])

# ----- X-tick visibility logic -----
rows, cols = axes.shape

# 1) Hide all x labels by default
for r in range(rows):
    for c in range(cols):
        axes[r, c].tick_params(axis="x", labelbottom=False)

# 2) Show x labels on bottom-left panel
axes[rows - 1, 0].tick_params(axis="x", labelbottom=True)

# 3) Find lowest existing panel on the right column and show x labels
right_row = None
for r in range(rows - 1, -1, -1):
    if axes[r, 1] in fig.axes:  # panel not removed
        right_row = r
        break

if right_row is not None:
    axes[right_row, 1].tick_params(axis="x", labelbottom=True)

# Global layout and labels
plt.setp([a.get_xticklabels() for a in axes[0, :]], visible=False)
fig.subplots_adjust(hspace=0)

fig.text(
    0.5, 0.9,
    "VTEC anomaly of diurnal median relative to quiet-time climatology",
    ha="center", fontsize=12, weight="bold"
)
fig.text(
    0.5, 0.05,
    "Local Time (LT)",
    ha="center", fontsize=12, weight="bold"
)
fig.text(
    0.07, 0.5,
    "VTEC anomaly [TECU]",
    va="center", rotation="vertical",
    fontsize=12, weight="bold"
)

out = OUTDIR_FIG / "Low_solar_activity_QUIET_yearly_anomaly_vs_climatology_MS.png"
fig.savefig(out, dpi=300)
plt.close(fig)
print("Saved:", out)


In [None]:
import numpy as np
import pandas as pd

# ==========================================
# CELL N — Yearly diagnostics of diurnal anomalies
# (diurnal median – multi-year quiet-time climatology)
#
# Requirements:
#   - S: 30-min VTEC dataframe (QSL–GIM20 subset) with columns:
#       ["slot", "date_utc", VTEC_COL, ...]
#   - df_kept: daily subset (quiet, low solar, |offset|<20) with "date_utc"
#   - agg_diurnal(S): returns diurnal statistics per slot (0..47),
#       including a "median" column
# ==========================================

# Multi-year quiet-time diurnal climatology (median vs slot)
CLIM = (
    agg_diurnal(S)[["slot", "median"]]
    .rename(columns={"median": "clim_median"})
)

# List of years present in df_kept
years = sorted({pd.Timestamp(d).year for d in df_kept["date_utc"]})


def slot_to_lt(slot: int) -> str:
    """
    Convert a 30-min slot index (0..47) to local time "HH:MM",
    assuming slot * 0.5 h since midnight.
    """
    h_float = 0.5 * slot
    h = int(h_float)
    m = int(round((h_float - h) * 60))
    return f"{h:02d}:{m:02d}"


rows = []

for y in years:
    # Subset S for year y
    Sy = S[pd.to_datetime(S["date_utc"]).dt.year == y].copy()
    if Sy.empty:
        continue

    # Yearly diurnal median and anomaly vs climatology
    Dy = (
        agg_diurnal(Sy)[["slot", "median"]]
        .merge(CLIM, on="slot", how="left")
    )
    Dy["anom"] = Dy["median"] - Dy["clim_median"]

    # 1) Maximum anomaly and its local time
    max_anom = float(Dy["anom"].max())
    idx_max = Dy["anom"].idxmax()
    slot_max = int(Dy.loc[idx_max, "slot"])
    lt_max = slot_to_lt(slot_max)

    # 2) Minimum anomaly and its local time
    min_anom = float(Dy["anom"].min())
    idx_min = Dy["anom"].idxmin()
    slot_min = int(Dy.loc[idx_min, "slot"])
    lt_min = slot_to_lt(slot_min)

    # 3) Mean anomaly over the full diurnal cycle (48 slots)
    mean_anom_all = float(Dy["anom"].mean())

    # 4) Mean anomaly over the afternoon window (e.g. 12–18 LT → slots 24..36)
    aft = Dy[(Dy["slot"] >= 24) & (Dy["slot"] <= 36)]
    mean_anom_aft = float(aft["anom"].mean()) if not aft.empty else np.nan

    rows.append({
        "year": y,
        "max_anom_TECU": max_anom,
        "lt_of_max": lt_max,
        "min_anom_TECU": min_anom,
        "lt_of_min": lt_min,
        "mean_anom_all_TECU": mean_anom_all,
        "mean_anom_12_18LT_TECU": mean_anom_aft,
    })

anom_stats = pd.DataFrame(rows).sort_values("year")

# Nicely formatted output for the paper
print("Yearly diurnal VTEC anomalies relative to quiet-time climatology:")
print(
    anom_stats.to_string(
        index=False,
        float_format=lambda x: f"{x:6.2f}"
    )
)

# Compact summary for LaTeX text
print("\n--- Summary for LaTeX paragraph ---")

y_max_global = anom_stats.loc[anom_stats["max_anom_TECU"].idxmax(), "year"]
max_global = anom_stats["max_anom_TECU"].max()
lt_max_global = anom_stats.loc[anom_stats["max_anom_TECU"].idxmax(), "lt_of_max"]
print(
    f"Largest positive anomaly: {max_global:.2f} TECU in {y_max_global} "
    f"(at local time {lt_max_global})."
)

y_min_global = anom_stats.loc[anom_stats["min_anom_TECU"].idxmin(), "year"]
min_global = anom_stats["min_anom_TECU"].min()
lt_min_global = anom_stats.loc[anom_stats["min_anom_TECU"].idxmin(), "lt_of_min"]
print(
    f"Largest negative anomaly: {min_global:.2f} TECU in {y_min_global} "
    f"(at local time {lt_min_global})."
)

print("\nAfternoon (12–18 LT) mean anomalies by year:")
for _, r in anom_stats.iterrows():
    print(
        f"  {int(r['year'])}: mean anomaly 12–18 LT "
        f"≈ {r['mean_anom_12_18LT_TECU']:.2f} TECU"
    )


In [None]:
# ==========================================
# CELL 10 — Monthly & seasonal boxplots + mean (quiet days)
# ==========================================
import calendar
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# --------- PARAMS ---------
TIME_COL  = "date_utc"
VTEC_COL  = "VTEC_median"
LABEL_COL = "geomag_label_gfz_QDNQ"   # 'Q', 'D', 'NQ'
START     = pd.Timestamp("2015-10-01", tz="UTC")
END       = pd.Timestamp("2025-09-26", tz="UTC")
# --------------------------

# Work on a copy of the daily dataframe `df`
daily_q = df.copy()

# Ensure time column is proper UTC datetime and apply time window
daily_q[TIME_COL] = pd.to_datetime(daily_q[TIME_COL], utc=True, errors="coerce")
daily_q = daily_q[(daily_q[TIME_COL] >= START) & (daily_q[TIME_COL] <= END)].copy()

# If you want to enforce QSL–GIM20 here, uncomment:
# daily_q = daily_q[daily_q[LABEL_COL] == "Q"].copy()
# daily_q = daily_q[pd.to_numeric(daily_q["gim_offset_tecu"], errors="coerce").abs() <= 20].copy()

# Derive month index
daily_q["month"] = daily_q[TIME_COL].dt.month

# -------- MONTHLY (all years combined) --------
month_labels = [calendar.month_abbr[m] for m in range(1, 13)]
all_months = np.arange(1, 13)

# Gather per-month VTEC samples
data_bp = [
    daily_q.loc[daily_q["month"] == m, VTEC_COL].dropna().to_numpy()
    for m in all_months
]
has_data = [len(v) > 0 for v in data_bp]

# Monthly mean for the black curve
mm = (
    daily_q.groupby("month", as_index=False)[VTEC_COL]
    .mean()
    .set_index("month")
    .reindex(all_months)
)

fig, ax = plt.subplots(1, 1, figsize=(10, 4))

pos = [m for m, ok in zip(all_months, has_data) if ok]
dat = [v for v, ok in zip(data_bp, has_data) if ok]

if dat:
    ax.boxplot(
        dat,
        positions=pos,
        patch_artist=True,
        showmeans=True,
        whis=(5, 95),
        meanprops=dict(
            marker="D",
            markersize=4,
            markerfacecolor="black",
            markeredgecolor="white",
        ),
        boxprops=dict(
            facecolor="#c7e9ff",
            edgecolor="#1f4d7a",
            linewidth=1.2,
        ),
        medianprops=dict(color="crimson", linewidth=1.8),
        whiskerprops=dict(
            color="#1f4d7a",
            linewidth=1.0,
            linestyle="--",
        ),
        capprops=dict(color="#1f4d7a", linewidth=1.0),
        flierprops=dict(
            marker="x",
            markersize=3,
            markeredgecolor="gray",
            markerfacecolor="none",
            alpha=0.5,
        ),
    )

# Monthly mean curve on top of the boxes
ax.plot(
    all_months,
    mm[VTEC_COL].to_numpy(),
    color="black",
    marker="o",
    linewidth=2,
    markersize=5,
    label="Mean",
)

# Styling
for tick in ax.yaxis.get_ticklabels():
    tick.set_fontsize(12)
    tick.set_color("black")
    tick.set_weight("bold")
for tick in ax.xaxis.get_ticklabels():
    tick.set_fontsize(12)
    tick.set_color("black")
    tick.set_weight("bold")

ax.set_xlim(0.5, 12.5)
ax.set_xticks(all_months)
ax.set_xticklabels(month_labels)
ax.grid(True, alpha=0.3)
ax.legend(frameon=False, loc="upper left")

fig.text(
    0.5,
    0.92,
    "Quiet days monthly median and mean from Oct 2015 to Sep 2025",
    ha="center",
    fontsize=12,
    fontweight="bold",
)
fig.text(
    0.5,
    0.00,
    "Month",
    ha="center",
    fontsize=14,
    fontweight="bold",
)
fig.text(
    0.04,
    0.5,
    "TEC [TECU]",
    va="center",
    rotation="vertical",
    fontsize=14,
    fontweight="bold",
)

out = OUTDIR_FIG / "Low_solar_activity_QUIET_monthly_median_boxplot_means_curve_by_LT_MS.png"
fig.savefig(out, dpi=300)
plt.close(fig)
print("Saved:", out)

# -------- SEASONAL (all years combined) --------
def month_to_season(m: int) -> str:
    if m in (12, 1, 2):
        return "DJF"
    if m in (3, 4, 5):
        return "MAM"
    if m in (6, 7, 8):
        return "JJA"
    return "SON"

daily_q["season"] = daily_q["month"].map(month_to_season)

season_order = ["DJF", "MAM", "JJA", "SON"]

data_s = [
    daily_q.loc[daily_q["season"] == s, VTEC_COL].dropna().to_numpy()
    for s in season_order
]
has_s = [len(v) > 0 for v in data_s]

# Seasonal mean for the black curve
ms = (
    daily_q.groupby("season", as_index=False)[VTEC_COL]
    .mean()
    .set_index("season")
    .reindex(season_order)
)

fig, ax = plt.subplots(1, 1, figsize=(8, 4))

pos = [i + 1 for i, ok in enumerate(has_s) if ok]
dat = [v for v, ok in zip(data_s, has_s) if ok]

if dat:
    ax.boxplot(
        dat,
        positions=pos,
        patch_artist=True,
        showmeans=True,
        whis=(5, 95),
        meanprops=dict(
            marker="D",
            markersize=4,
            markerfacecolor="black",
            markeredgecolor="white",
        ),
        boxprops=dict(
            facecolor="#c7e9ff",
            edgecolor="#1f4d7a",
            linewidth=1.2,
        ),
        medianprops=dict(color="crimson", linewidth=1.8),
        whiskerprops=dict(
            color="#1f4d7a",
            linewidth=1.0,
            linestyle="--",
        ),
        capprops=dict(color="#1f4d7a", linewidth=1.0),
        flierprops=dict(
            marker="x",
            markersize=3,
            markeredgecolor="gray",
            markerfacecolor="none",
            alpha=0.5,
        ),
    )

# Seasonal mean curve
ax.plot(
    np.arange(1, 5),
    ms[VTEC_COL].to_numpy(),
    color="black",
    marker="o",
    linewidth=3,
    markersize=5,
    label="Mean",
)

# Styling
for tick in ax.yaxis.get_ticklabels():
    tick.set_fontsize(12)
    tick.set_color("black")
    tick.set_weight("bold")
for tick in ax.xaxis.get_ticklabels():
    tick.set_fontsize(12)
    tick.set_color("black")
    tick.set_weight("bold")

ax.set_xlim(0.5, 4.5)
ax.set_xticks(np.arange(1, 5))
ax.set_xticklabels(season_order)
ax.legend(frameon=False, loc="upper left")
ax.grid(True, alpha=0.3)

fig.text(
    0.5,
    0.92,
    "Quiet days seasonal median and mean from Oct 2015 to Sep 2025",
    ha="center",
    fontsize=12,
    fontweight="bold",
)
fig.text(
    0.5,
    0.001,
    "Season",
    ha="center",
    fontsize=14,
    fontweight="bold",
)
fig.text(
    0.04,
    0.5,
    "TEC [TECU]",
    va="center",
    rotation="vertical",
    fontsize=15,
    fontweight="bold",
)

out = OUTDIR_FIG / "Low_solar_activity_QUIET_seasonal_median_boxplot_means_curve_by_LT_MS.png"
fig.savefig(out, dpi=300)
plt.close(fig)
print("Saved:", out)


In [None]:
# ==========================================
# CELL 10b — Numeric stats for monthly & seasonal boxplots (quiet days)
# ==========================================
import numpy as np
import pandas as pd
import calendar

VTEC_COL = "VTEC_median"  # consistent with the previous cell

# Start from the same daily dataframe used in CELL 10 (quiet subset)
daily_stats = daily_q.copy()

# Ensure we have a proper datetime column
if "date_utc" in daily_stats.columns:
    daily_stats["date_utc"] = pd.to_datetime(daily_stats["date_utc"], utc=True, errors="coerce")
elif "date" in daily_stats.columns:
    daily_stats["date_utc"] = pd.to_datetime(daily_stats["date"], utc=True, errors="coerce")

# Ensure month and season exist
if "month" not in daily_stats.columns:
    daily_stats["month"] = daily_stats["date_utc"].dt.month

def month_to_season(m: int) -> str:
    if m in (12, 1, 2):
        return "DJF"
    if m in (3, 4, 5):
        return "MAM"
    if m in (6, 7, 8):
        return "JJA"
    return "SON"

if "season" not in daily_stats.columns:
    daily_stats["season"] = daily_stats["month"].map(month_to_season)

# ---------- 1) MONTHLY STATS ----------
def iqr(x):
    x = np.asarray(x, float)
    return np.nanpercentile(x, 75) - np.nanpercentile(x, 25)

monthly_stats = (
    daily_stats.groupby("month")[VTEC_COL]
    .agg(
        n_days="count",
        median="median",
        mean="mean",
        iqr=iqr,
        p5=lambda x: np.nanpercentile(x, 5),
        p95=lambda x: np.nanpercentile(x, 95),
    )
    .reset_index()
)

monthly_stats["month_name"] = monthly_stats["month"].apply(lambda m: calendar.month_abbr[m])

print("\n=== Monthly stats of daily median VTEC (quiet QSL–GIM20) ===")
print(monthly_stats.to_string(index=False, float_format=lambda v: f"{v:5.2f}"))

# Month with maximum / minimum median
idx_max_m = monthly_stats["median"].idxmax()
idx_min_m = monthly_stats["median"].idxmin()

m_max_row = monthly_stats.loc[idx_max_m]
m_min_row = monthly_stats.loc[idx_min_m]

print("\n--- Key monthly values for LaTeX paragraph ---")
print(
    f"Highest monthly median VTEC: {m_max_row['median']:.2f} TECU in {m_max_row['month_name']} "
    f"(mean ≈ {m_max_row['mean']:.2f} TECU, IQR ≈ {m_max_row['iqr']:.2f} TECU)."
)
print(
    f"Lowest monthly median VTEC:  {m_min_row['median']:.2f} TECU in {m_min_row['month_name']} "
    f"(mean ≈ {m_min_row['mean']:.2f} TECU, IQR ≈ {m_min_row['iqr']:.2f} TECU)."
)

# Months with largest / smallest IQR
idx_max_iqr_m = monthly_stats["iqr"].idxmax()
idx_min_iqr_m = monthly_stats["iqr"].idxmin()
iqr_max_row = monthly_stats.loc[idx_max_iqr_m]
iqr_min_row = monthly_stats.loc[idx_min_iqr_m]

print(
    f"Largest monthly spread (IQR): {iqr_max_row['iqr']:.2f} TECU in {iqr_max_row['month_name']}."
)
print(
    f"Smallest monthly spread (IQR): {iqr_min_row['iqr']:.2f} TECU in {iqr_min_row['month_name']}."
)

# Simple skewness proxy (mean–median) to identify tails
monthly_stats["skew_proxy"] = monthly_stats["mean"] - monthly_stats["median"]
idx_max_skew = monthly_stats["skew_proxy"].idxmax()
idx_min_skew = monthly_stats["skew_proxy"].idxmin()

skew_max_row = monthly_stats.loc[idx_max_skew]
skew_min_row = monthly_stats.loc[idx_min_skew]

print(
    "Largest positive mean–median difference (right tail): "
    f"{skew_max_row['skew_proxy']:.2f} TECU in {skew_max_row['month_name']}."
)
print(
    "Largest negative mean–median difference (left tail): "
    f"{skew_min_row['skew_proxy']:.2f} TECU in {skew_min_row['month_name']}."
)

# ---------- 2) SEASONAL STATS ----------
seasonal_stats = (
    daily_stats.groupby("season")[VTEC_COL]
    .agg(
        n_days="count",
        median="median",
        mean="mean",
        iqr=iqr,
        p5=lambda x: np.nanpercentile(x, 5),
        p95=lambda x: np.nanpercentile(x, 95),
    )
    .reindex(["DJF", "MAM", "JJA", "SON"])  # canonical order
    .reset_index()
)

print("\n=== Seasonal stats of daily median VTEC (quiet QSL–GIM20) ===")
print(seasonal_stats.to_string(index=False, float_format=lambda v: f"{v:5.2f}"))

idx_max_s = seasonal_stats["median"].idxmax()
idx_min_s = seasonal_stats["median"].idxmin()
s_max_row = seasonal_stats.loc[idx_max_s]
s_min_row = seasonal_stats.loc[idx_min_s]

print("\n--- Key seasonal values for LaTeX paragraph ---")
print(
    f"Highest seasonal median VTEC: {s_max_row['median']:.2f} TECU in {s_max_row['season']} "
    f"(mean ≈ {s_max_row['mean']:.2f} TECU, IQR ≈ {s_max_row['iqr']:.2f} TECU)."
)
print(
    f"Lowest seasonal median VTEC:  {s_min_row['median']:.2f} TECU in {s_min_row['season']} "
    f"(mean ≈ {s_min_row['mean']:.2f} TECU, IQR ≈ {s_min_row['iqr']:.2f} TECU)."
)

# Skewness proxy by season
seasonal_stats["skew_proxy"] = seasonal_stats["mean"] - seasonal_stats["median"]
idx_max_skew_s = seasonal_stats["skew_proxy"].idxmax()
idx_min_skew_s = seasonal_stats["skew_proxy"].idxmin()

sskew_max_row = seasonal_stats.loc[idx_max_skew_s]
sskew_min_row = seasonal_stats.loc[idx_min_skew_s]

print(
    f"Largest seasonal mean–median difference: {sskew_max_row['skew_proxy']:.2f} TECU in {sskew_max_row['season']}."
)
print(
    f"Smallest seasonal mean–median difference: {sskew_min_row['skew_proxy']:.2f} TECU in {sskew_min_row['season']}."
)


In [None]:
"""
Robust extraction of the northern EIA crest latitude around OUCA
using the custom read_ionex() reader.
"""

# ================== CELL 0 — IMPORTS & CONFIG ==================
import io
import subprocess
from datetime import date, datetime, timedelta, timezone
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pytz

# ---- Paths ----
DEC_DIR = Path("\data\IONEX\IONEX_decompressed")  # single directory containing all decompressed IONEX files
OUT_DAILY = Path("\output_csv\daily_eia_crest_latitude.csv")
OUT_FIG = Path("\output_figs\daily_eia_crest_latitude.png")

# ---- OUCA and search geometry ----
OUCA_LAT, OUCA_LON = 31.206, -7.866
LON_BAND_DEG = 5.0                      # average VTEC in [lon - 5°, lon + 5°]
SEARCH_LAT_MIN, SEARCH_LAT_MAX = 0.0, 50.0  # crest search window (North)

# ---- Local-time filtering ----
ONLY_AFTERNOON_LT = False      # set True to restrict to LT_HOURS_WINDOW
LT_TZ = pytz.timezone("Africa/Casablanca")
LT_HOURS_WINDOW = (4, 20)      # local-time window if ONLY_AFTERNOON_LT is True

# ---- Date range ----
START = date(2015, 10, 1)
END = date(2025, 9, 26)

# ==============================================================
# CELL 1 — IONEX UTILITIES (unchanged logic)
# ==============================================================

def _open_text(p: Path) -> io.StringIO:
    """
    Open an IONEX file (plain, .gz, or .Z) and return a text stream.
    """
    p = Path(p)
    if not p.exists():
        raise FileNotFoundError(p)

    ext = p.suffix.lower()
    if ext == ".gz":
        out = subprocess.run(["gzip", "-dc", str(p)], capture_output=True)
        if out.returncode != 0:
            raise RuntimeError("gzip -dc failed")
        return io.StringIO(out.stdout.decode("ascii", "ignore"))

    if ext == ".z":
        # try gzip -dc first
        gz = subprocess.run(["gzip", "-dc", str(p)], capture_output=True)
        if gz.returncode == 0 and gz.stdout:
            return io.StringIO(gz.stdout.decode("ascii", "ignore"))
        # fallback: 7z
        sz = subprocess.run(["7z", "e", "-so", str(p)], capture_output=True)
        if sz.returncode != 0:
            raise RuntimeError("7z -so failed")
        return io.StringIO(sz.stdout.decode("ascii", "ignore"))

    # plain text
    return io.StringIO(p.read_text(encoding="ascii", errors="ignore"))


def ionex_first_epoch_date(path: Path) -> date | None:
    """
    Return the date of the first IONEX epoch (EPOCH OF FIRST MAP) in the file,
    or None if it cannot be read.
    """
    try:
        f = _open_text(path)
    except Exception:
        return None

    for _ in range(400):
        line = f.readline()
        if not line:
            break
        if "EPOCH OF FIRST MAP" in line:
            yr, mo, dy, hh, mm, ss = map(int, line[:60].split()[:6])
            return datetime(yr, mo, dy, hh, mm, ss, tzinfo=timezone.utc).date()
        if "END OF HEADER" in line:
            break
    return None


def read_ionex(path: Path):
    """
    Read a single IONEX file and return:
        times (UTC, tz-aware), latitudes, longitudes, TEC[time, lat, lon]
    TEC values are in TECU (scaling by 10^EXPONENT).
    """
    f = _open_text(path)

    exp = -1
    lat1 = lat2 = dlat = None
    lon1 = lon2 = dlon = None

    # ---- header ----
    while True:
        line = f.readline()
        if not line:
            raise ValueError("Incomplete IONEX header")

        if "EXPONENT" in line:
            s = line[:8].strip()
            exp = int(s) if s else -1

        if "LAT1 / LAT2 / DLAT" in line:
            lat1, lat2, dlat = map(float, line[:60].split()[:3])

        if "LON1 / LON2 / DLON" in line:
            lon1, lon2, dlon = map(float, line[:60].split()[:3])

        if "END OF HEADER" in line:
            break

    if None in (lat1, lat2, dlat, lon1, lon2, dlon):
        raise ValueError("Grid definition missing in IONEX header")

    nlat = int(round((lat2 - lat1) / dlat)) + 1
    nlon = int(round((lon2 - lon1) / dlon)) + 1
    lats = np.linspace(lat1, lat2, nlat)
    lons = np.linspace(lon1, lon2, nlon)

    times = []
    maps = []

    # ---- TEC maps ----
    while True:
        line = f.readline()
        if not line:
            break

        if "START OF TEC MAP" in line:
            # epoch line
            line = f.readline()
            while line and "EPOCH OF CURRENT MAP" not in line:
                line = f.readline()
            if not line:
                break

            yr, mo, dy, hh, mm, ss = map(int, line[:60].split()[:6])
            t = pd.Timestamp(datetime(yr, mo, dy, hh, mm, ss, tzinfo=timezone.utc))

            tec = np.full((nlat, nlon), np.nan)
            bad = False

            for ilat in range(nlat):
                hdr = f.readline()
                if not hdr or "LAT/LON1/LON2/DLON/H" not in hdr:
                    bad = True
                    break

                vals = []
                while len(vals) < nlon:
                    data = f.readline()
                    if (
                        not data
                        or ("START OF" in data)
                        or ("END OF" in data)
                        or ("LAT/LON1" in data)
                    ):
                        bad = True
                        break

                    chunks = [data[i : i + 5] for i in range(0, len(data.rstrip()), 5)]
                    for c in chunks:
                        c = c.strip().upper()
                        if c == "" or c == "9999":
                            vals.append(np.nan)
                        else:
                            try:
                                vals.append(float(c) * (10.0 ** exp))
                            except Exception:
                                vals.append(np.nan)
                        if len(vals) == nlon:
                            break

                if bad:
                    break

                if len(vals) < nlon:
                    vals += [np.nan] * (nlon - len(vals))
                tec[ilat, :] = vals

            if bad:
                # skip until the end of this TEC MAP
                x = hdr
                while x:
                    if "END OF TEC MAP" in x:
                        break
                    x = f.readline()
                continue

            times.append(t)
            maps.append(tec)

    if not maps:
        raise ValueError("No TEC map found in file")

    TEC = np.stack(maps, axis=0)

    # normalize longitude to [-180, 180)
    if lons.min() >= 0 and lons.max() > 180:
        order = np.argsort(((lons + 180) % 360) - 180)
        lons = (((lons + 180) % 360) - 180)[order]
        TEC = TEC[:, :, order]

    # ensure latitudes are ascending
    if lats[0] > lats[-1]:
        lats = lats[::-1]
        TEC = TEC[:, ::-1, :]

    times = pd.to_datetime(times, utc=True)
    return times, lats, lons, TEC


def product_window(day: date) -> str:
    """
    Select the IONEX product window for a given day:
        - 'OLD'    : legacy CODG products
        - 'OPSFIN' : final OPSFIN GIM
        - 'OPSRAP' : rapid OPSRAP GIM
    """
    if day <= date(2022, 11, 27):  # DOY <= 330
        return "OLD"
    if day <= date(2025, 9, 20):   # OPSFIN window
        return "OPSFIN"
    return "OPSRAP"


def pick_ionex_for_day(day: date) -> Path | None:
    """
    Pick the best IONEX file for a given day, using:
      - CODG (legacy) for the OLD window
      - COD0OPSFIN / COD0OPSRAP GIM products afterwards.
    Returns the matching Path, or None if nothing is found.
    """
    yy = f"{day.year % 100:02d}"
    doy = f"{int(pd.Timestamp(day).strftime('%j')):03d}"
    mode = product_window(day)

    if mode == "OLD":
        # canonical CODG names
        candidates = [f"codg{doy}0.{yy}i", f"CODG{doy}0.{yy}I"]
        for name in candidates:
            p = DEC_DIR / name
            if p.exists() and ionex_first_epoch_date(p) == day:
                return p

        # fallback: any CODG* file for that year whose first epoch matches
        for q in DEC_DIR.glob(f"codg*{yy}i"):
            if ionex_first_epoch_date(q) == day:
                return q
        for q in DEC_DIR.glob(f"CODG*{yy}I"):
            if ionex_first_epoch_date(q) == day:
                return q
        return None

    # New OPSFIN / OPSRAP products
    if mode == "OPSFIN":
        patterns = [
            f"COD0OPSFIN_*{day.year}{doy}*_GIM.INX",
            f"COD0OPSRAP_*{day.year}{doy}*_GIM.INX",
        ]
    else:  # OPSRAP priority
        patterns = [
            f"COD0OPSRAP_*{day.year}{doy}*_GIM.INX",
            f"COD0OPSFIN_*{day.year}{doy}*_GIM.INX",
        ]

    for pat in patterns:
        for q in DEC_DIR.glob(pat):
            if ionex_first_epoch_date(q) == day:
                return q

    return None


# ==============================================================
# CELL 2 — EIA CREST UTILITIES
# ==============================================================

def lon_wrap(x: np.ndarray | float) -> np.ndarray | float:
    """
    Wrap longitude(s) into the [-180, 180) range.
    """
    return (x + 180.0) % 360.0 - 180.0


def lon_band_indices(lons: np.ndarray, lon0: float, band_deg: float) -> np.ndarray:
    """
    Return indices of longitudes within ±band_deg of lon0 (all in wrapped [-180, 180) frame).
    """
    l = lon_wrap(lons.copy())
    lon0 = lon_wrap(lon0)
    return np.where(np.abs(l - lon0) <= band_deg)[0]


def crest_lat_from_slice(
    lat: np.ndarray,
    vtec_lat: np.ndarray,
    lat_min: float,
    lat_max: float,
) -> float:
    """
    From a latitudinal slice vtec_lat(lat), return the latitude of the
    maximum (northern EIA crest) within [lat_min, lat_max].
    """
    mask = (lat >= lat_min) & (lat <= lat_max)
    if not np.any(mask):
        return np.nan

    with np.errstate(invalid="ignore"):
        idx = np.nanargmax(vtec_lat[mask])

    return float(lat[mask][idx])


# ==============================================================
# CELL 3 — MAIN LOOP: DAILY EIA CREST LATITUDE AROUND OUCA
# ==============================================================

rows: list[dict] = []
current_day = START
n_days = 0

while current_day <= END:
    ionex_file = pick_ionex_for_day(current_day)
    if ionex_file is None:
        current_day += timedelta(days=1)
        continue

    try:
        times, lats, lons, TEC = read_ionex(ionex_file)
    except Exception as exc:
        print(f"[WARN] {ionex_file.name}: could not be read -> {exc}")
        current_day += timedelta(days=1)
        continue

    # longitude band indices ±LON_BAND_DEG around OUCA longitude
    j_idx = lon_band_indices(lons, OUCA_LON, LON_BAND_DEG)
    if j_idx.size == 0:
        current_day += timedelta(days=1)
        continue

    for k, t in enumerate(times):
        t_utc = t.to_pydatetime()

        # We allow maps that might spill into previous/next day; the daily
        # aggregation will be made in local time (date_lt).
        lt = t_utc.astimezone(LT_TZ)
        if ONLY_AFTERNOON_LT and not (
            LT_HOURS_WINDOW[0] <= lt.hour < LT_HOURS_WINDOW[1]
        ):
            continue

        # mean VTEC over the longitude band
        vlat = np.nanmean(TEC[k][:, j_idx], axis=1)

        crest_lat = crest_lat_from_slice(
            lats,
            vlat,
            SEARCH_LAT_MIN,
            SEARCH_LAT_MAX,
        )

        rows.append(
            {
                "epoch_utc": t_utc,
                "epoch_lt": lt,
                "date_lt": lt.date(),
                "crest_lat": crest_lat,
                "ouca_lat": OUCA_LAT,
                "crest_minus_ouca_deg": crest_lat - OUCA_LAT,
                "src_file": ionex_file.name,
            }
        )

    n_days += 1
    current_day += timedelta(days=1)

# ==============================================================
# CELL 4 — DAILY AGGREGATION & QUICKLOOK FIGURE
# ==============================================================

if not rows:
    print("[INFO] No eligible epochs found in the requested date/LT window.")
else:
    df = pd.DataFrame(rows)

    # daily median crest latitude and IQR
    daily = (
        df.groupby("date_lt", as_index=False)
        .agg(
            crest_lat_med=("crest_lat", "median"),
            crest_lat_iqr=(
                "crest_lat",
                lambda x: np.nanpercentile(x, 75) - np.nanpercentile(x, 25),
            ),
            crest_minus_ouca_med=("crest_minus_ouca_deg", "median"),
            n_epochs=("crest_lat", "count"),
        )
    )

    daily.to_csv(OUT_DAILY, index=False)
    print(
        f"[OK] Daily EIA-crest CSV written -> {OUT_DAILY}  | "
        f"days = {len(daily)}  | IONEX files read = {n_days}"
    )

    # quicklook time series of daily median crest latitude
    fig, ax = plt.subplots(figsize=(10, 4))
    ax.plot(
        pd.to_datetime(daily["date_lt"]),
        daily["crest_lat_med"],
        lw=1.2,
        label="Daily median crest latitude",
    )
    ax.axhline(
        OUCA_LAT,
        ls="--",
        color="k",
        lw=1.0,
        label="OUCA latitude",
    )

    ax.set_ylabel("Daily median crest latitude [deg]")
    ax.set_title("Northern EIA crest latitude near OUCA longitude")
    ax.grid(True, alpha=0.3)
    ax.legend()
    fig.tight_layout()
    fig.savefig(OUT_FIG, dpi=200)
    plt.close(fig)

    print(f"[OK] Quicklook figure written -> {OUT_FIG}")


In [None]:
"""
Compute robust diurnal VTEC slopes (morning and evening) from 30-min
VTEC data at OUCA using Theil–Sen regression, with local-time windows
that vary by month (seasonal windows).

This cell/script assumes that you already have a 30-min dataframe
`S` in memory with at least:

    - a UTC timestamp column named TIME_COL (default: "time")
    - a 30-min VTEC column named VTEC_COL (default: "VTEC_median")

Optionally, you may also have a boolean flag column (QUIET_FLAG_COL)
marking “quiet” QSL–GIM20 samples, and set KEEP_ONLY_QUIET = True.
"""

import numpy as np
import pandas as pd
from pathlib import Path
import pytz
import matplotlib.pyplot as plt

# ---------------- CONFIG ----------------
TIME_COL = "time"            # 30-min timestamp column (UTC, or convertible to UTC)
VTEC_COL = "VTEC_median"     # 30-min VTEC column
LOCAL_TZ = "Africa/Casablanca"

# Default windows (kept for reference, main logic uses seasonal_windows_for_month)
MORNING_LT = (6.0, 12.0)     # [start, end) in local time
EVENING_LT = (17.0, 22.0)

KEEP_ONLY_QUIET = False
QUIET_FLAG_COL = "is_QSL_GIM20"  # optional boolean quiet flag in S

OUT_CSV = Path("\output_csv\monthly_diurnal_slopes_seasonal_windows.csv")
OUT_PNG = Path("\output_figs\monthly_diurnal_slopes_seasonal_windows.png")
# ----------------------------------------

def theil_sen(x, y):
    """
    Robust Theil–Sen slope estimator between x and y (median of all pairwise slopes).

    Parameters
    ----------
    x, y : array-like
        1D arrays of the same length.

    Returns
    -------
    float
        Theil–Sen slope (NaN if less than 3 valid points).
    """
    x = np.asarray(x, float)
    y = np.asarray(y, float)
    m = np.isfinite(x) & np.isfinite(y)
    x = x[m]
    y = y[m]
    if x.size < 3:
        return np.nan

    slopes = []
    for i in range(x.size - 1):
        dx = x[i + 1 :] - x[i]
        dy = y[i + 1 :] - y[i]
        valid = np.abs(dx) > 1e-12
        if not np.any(valid):
            continue
        v = dy[valid] / dx[valid]
        if v.size:
            slopes.append(v)

    if not slopes:
        return np.nan

    slopes = np.concatenate(slopes)
    return np.nanmedian(slopes)


def window_slope(df_lt, start_hour, end_hour):
    """
    Compute a Theil–Sen slope of VTEC vs local-time hour within a given LT window.

    Parameters
    ----------
    df_lt : DataFrame
        Must contain columns 'lt_hour_float' and VTEC_COL.
    start_hour, end_hour : float
        Local-time window [start_hour, end_hour) in hours.

    Returns
    -------
    float
        Theil–Sen slope d(VTEC)/dt in TECU/hour (NaN if not enough points).
    """
    mask = (df_lt["lt_hour_float"] >= start_hour) & (df_lt["lt_hour_float"] < end_hour)
    sub = df_lt.loc[mask, ["lt_hour_float", VTEC_COL]].dropna()
    if len(sub) < 3:
        return np.nan
    return theil_sen(sub["lt_hour_float"].values, sub[VTEC_COL].values)


def seasonal_windows_for_month(month: int):
    """
    Define local-time windows for “morning” and “evening” as a function of month.

    These windows are tuned to approximate seasonal changes in sunrise/sunset:

    - Extended winter: November (11) → January (1)
        morning : 07–12 LT
        evening : 16–20 LT

    - February, March:
        morning : 06–12 LT
        evening : 18–21 LT

    - April:
        morning : 06–14 LT
        evening : 19–22 LT

    - May:
        morning : 06–15 LT
        evening : 19–22 LT

    - June:
        morning : 06–15 LT
        evening : 18.5–23 LT

    - July, August:
        morning : 06–16 LT
        evening : 19–23 LT

    - September:
        morning : 06–14 LT
        evening : 18.5–22 LT

    - October:
        morning : 07–14 LT
        evening : 16–21 LT

    Returns
    -------
    (morning_start, morning_end), (evening_start, evening_end)
    """
    if month in (11, 12, 1):
        # Nov–Jan
        morning = (7.0, 12.0)
        evening = (16.0, 20.0)
    elif month == 2:
        # Feb
        morning = (6.0, 12.0)
        evening = (18.0, 21.0)
    elif month == 3:
        # Mar
        morning = (6.0, 12.0)
        evening = (18.0, 21.0)
    elif month == 4:
        # Apr
        morning = (6.0, 14.0)
        evening = (19.0, 22.0)
    elif month == 5:
        # May
        morning = (6.0, 15.0)
        evening = (19.0, 22.0)
    elif month == 6:
        # Jun
        morning = (6.0, 15.0)
        evening = (18.5, 23.0)
    elif month in (7, 8):
        # Jul & Aug
        morning = (6.0, 16.0)
        evening = (19.0, 23.0)
    elif month == 9:
        # Sep
        morning = (6.0, 14.0)
        evening = (18.5, 22.0)
    else:
        # Oct
        morning = (7.0, 14.0)
        evening = (16.0, 21.0)

    return morning, evening


def main():
    """
    Main driver: compute daily Theil–Sen slopes (morning/evening) in local time,
    then aggregate to monthly medians and save both CSV and a quicklook figure.

    This assumes a 30-min dataframe `S` is already defined in the notebook/session.
    """
    # Start from existing 30-min dataframe S (already filtered if needed)
    df = S.copy()

    # Parse UTC timestamps
    df[TIME_COL] = pd.to_datetime(df[TIME_COL], utc=True, errors="coerce")

    # Optional quiet-day filter
    if KEEP_ONLY_QUIET and QUIET_FLAG_COL in df.columns:
        df = df[df[QUIET_FLAG_COL] == True].copy()

    # Local time conversion
    tz = pytz.timezone(LOCAL_TZ)
    df["ts_lt"] = df[TIME_COL].dt.tz_convert(tz)
    df["date_lt"] = df["ts_lt"].dt.date
    df["lt_hour_float"] = df["ts_lt"].dt.hour + df["ts_lt"].dt.minute / 60.0
    df["month"] = df["ts_lt"].dt.month
    df["year"] = df["ts_lt"].dt.year

    # --- Daily slopes with month-dependent windows ---
    daily_rows = []

    for d, g in df.groupby("date_lt"):
        month = int(g["month"].iloc[0])
        (m_start, m_end), (e_start, e_end) = seasonal_windows_for_month(month)

        s_m = window_slope(g, m_start, m_end)
        s_e = window_slope(g, e_start, e_end)

        if not np.isfinite(s_m) and not np.isfinite(s_e):
            continue

        daily_rows.append(
            {
                "date_lt": d,
                "month": month,
                "year": g["year"].iloc[0],
                "morning_start_lt": m_start,
                "morning_end_lt": m_end,
                "evening_start_lt": e_start,
                "evening_end_lt": e_end,
                "slope_morning_tec_per_hour": s_m,
                "slope_evening_tec_per_hour": s_e,
            }
        )

    daily = pd.DataFrame(daily_rows)
    if daily.empty:
        print("No daily slopes computed.")
        return

    # --- Monthly aggregates of slopes ---
    def iqr(x):
        x = np.asarray(x, float)
        return np.nanpercentile(x, 75) - np.nanpercentile(x, 25)

    monthly = (
        daily.groupby("month", as_index=False)
        .agg(
            n_days=("date_lt", "count"),
            m_med=("slope_morning_tec_per_hour", "median"),
            m_iqr=("slope_morning_tec_per_hour", iqr),
            e_med=("slope_evening_tec_per_hour", "median"),
            e_iqr=("slope_evening_tec_per_hour", iqr),
        )
        .sort_values("month")
    )

    monthly.to_csv(OUT_CSV, index=False)
    print(f"Saved {OUT_CSV}")

    # --- Quicklook figure ---
    x = monthly["month"].values
    fig, ax = plt.subplots(figsize=(9, 4))

    ax.plot(x, monthly["m_med"], "-o", label="Morning slope (median)")
    ax.plot(x, monthly["e_med"], "-o", label="Evening slope (median)")
    ax.axhline(0, color="k", lw=1.2, ls="--")

    ax.set_yticks([-3, -2, -1, 0, 1, 2, 3])
    ax.set_xticks(np.arange(1, 13))
    ax.set_xlim(0.5, 12.5)

    # Tick styling
    for t in ax.yaxis.get_ticklabels():
        t.set_fontsize(10)
        t.set_color("black")
        t.set_weight("bold")
    for t in ax.xaxis.get_ticklabels():
        t.set_fontsize(10)
        t.set_color("black")
        t.set_weight("bold")

    ax.grid(True, alpha=0.3)
    ax.legend()

    fig.text(
        0.5,
        0.89,
        "Quiet monthly diurnal slopes at OUCA (seasonal LT windows)",
        ha="center",
        fontsize=12,
        weight="bold",
    )
    fig.text(
        0.5,
        0.03,
        "Month of year",
        ha="center",
        fontsize=12,
        weight="bold",
    )
    fig.text(
        0.06,
        0.5,
        "dVTEC/dt [TECU/hour]",
        va="center",
        rotation="vertical",
        fontsize=12,
        weight="bold",
    )

    fig.savefig(OUT_PNG, dpi=300)
    plt.close(fig)
    print(f"Saved {OUT_PNG}")


if __name__ == "__main__":
    main()


In [None]:
"""
Combined figure: EIA crest latitude vs. monthly diurnal VTEC slopes

This cell builds a 2-panel figure:

  (1) Time series of the daily median latitude of the northern EIA crest
      near the OUCA longitude.
  (2) Monthly median diurnal slopes (morning/evening) of VTEC at OUCA,
      previously computed with diurnal_slopes_from_S30.py.

Inputs
------
- CSV_EIA:
    CSV produced by eia_crest_from_ionex_using_my_reader.py
    Must contain at least:
        * date_lt          (local-date of the crest)
        * crest_lat_med    (daily median crest latitude in degrees)

- CSV_SLOPE:
    CSV produced by diurnal_slopes_from_S30.py
    Must contain at least:
        * month            (1..12)
        * m_med            (morning slope median, TECU/hour)
        * e_med            (evening slope median, TECU/hour)

Output
------
- OUT_PNG_COMBO:
    PNG file with the two-panel figure, ready for publication.
"""

import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import calendar

# --------- CONFIG: update paths if needed ---------
CSV_EIA = Path("\output_csv\daily_eia_crest_latitude.csv")
CSV_SLOPE = Path("\output_csv\monthly_diurnal_slopes_seasonal_windows.csv")
OUT_PNG_COMBO = Path("\output_csv\EIAcrest_vs_monthly_slopes_MS.png")

OUCA_LAT = 31.206  # OUCA geodetic latitude [deg]

# --------- READ INPUT DATA ---------
# 1) EIA crest (daily)
crest = pd.read_csv(CSV_EIA)

if "date_lt" not in crest.columns:
    raise ValueError("CSV_EIA must contain a 'date_lt' column.")

crest["date_lt"] = pd.to_datetime(crest["date_lt"], errors="coerce")
crest = crest.dropna(subset=["date_lt", "crest_lat_med"])

# 2) Monthly diurnal slopes
monthly = pd.read_csv(CSV_SLOPE)
for col in ["month", "m_med", "e_med"]:
    if col not in monthly.columns:
        raise ValueError(f"CSV_SLOPE must contain the '{col}' column.")

monthly = monthly.sort_values("month")
x = monthly["month"].values
month_labels = [calendar.month_abbr[m] for m in x]  # Jan, Feb, ...

# --------- BUILD 2-PANEL FIGURE ---------
fig, (ax_top, ax_bot) = plt.subplots(2, 1, figsize=(12, 10), sharex=False)

# =======================
# (1) TOP PANEL: EIA CREST LATITUDE
# =======================
ax_top.plot(
    crest["date_lt"],
    crest["crest_lat_med"],
    lw=1.2,
    label="Daily median crest latitude",
)
ax_top.axhline(
    OUCA_LAT,
    ls="--",
    color="k",
    lw=1.0,
    label="OUCA latitude",
)

ax_top.set_ylabel("Latitude [deg]", fontsize=12, fontweight="bold")
ax_top.grid(True, alpha=0.3)
ax_top.legend(loc="best", frameon=True)

# Tick styling
for t in ax_top.yaxis.get_ticklabels():
    t.set_fontsize(12)
    t.set_color("black")
    t.set_weight("bold")
for t in ax_top.xaxis.get_ticklabels():
    t.set_fontsize(12)
    t.set_color("black")
    t.set_weight("bold")

ax_top.set_title(
    "Northern EIA crest latitude near OUCA longitude",
    fontsize=14,
    fontweight="bold",
    loc="center",
)

# =======================
# (2) BOTTOM PANEL: MONTHLY DIURNAL SLOPES
# =======================
ax_bot.plot(
    x,
    monthly["m_med"],
    "-o",
    label="Morning slope (median)",
)
ax_bot.plot(
    x,
    monthly["e_med"],
    "-o",
    label="Evening slope (median)",
)
ax_bot.axhline(0, color="k", lw=1.2, ls="--")

ax_bot.set_yticks([-3, -2, -1, 0, 1, 2, 3])
ax_bot.set_xticks(x)
ax_bot.set_xticklabels(month_labels)
ax_bot.set_xlim(0.5, 12.5)

# Tick styling
for t in ax_bot.yaxis.get_ticklabels():
    t.set_fontsize(12)
    t.set_color("black")
    t.set_weight("bold")
for t in ax_bot.xaxis.get_ticklabels():
    t.set_fontsize(12)
    t.set_color("black")
    t.set_weight("bold")

ax_bot.grid(True, alpha=0.3)
ax_bot.legend(loc="best", frameon=True)

ax_bot.set_ylabel("dVTEC/dt [TECU/hour]", fontsize=12, fontweight="bold")
ax_bot.set_title(
    "Quiet monthly diurnal slopes at OUCA (seasonal LT windows)",
    fontsize=14,
    fontweight="bold",
    loc="center",
)

# Global x-axis label
fig.text(
    0.5,
    0.03,
    "Month of year",
    ha="center",
    fontsize=12,
    weight="bold",
)

fig.savefig(OUT_PNG_COMBO, dpi=300)
plt.close(fig)
print(f"Saved combined figure -> {OUT_PNG_COMBO}")
