Figure unique 3 rangées: F10.7, Kp, Dst

In [None]:
# ===== Figure unique 3 rangées: F10.7, Kp, Dst (2010–2025) avec période TEC surlignée =====
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime, timezone
from pathlib import Path

# --------- Paramètres d'affichage ---------
matplotlib.rcParams['savefig.dpi'] = 300
matplotlib.rcParams['figure.figsize'] = (10, 9)
matplotlib.rcParams['font.size'] = 8

# --------- Chemins (adapter si besoin) ---------
CSV_PATH   = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/Indices/F10_7_2010_2025.csv")
Kp_TXT     = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/Indices/GFZ_all_indices_2010-2025.txt")
Dst_TXT    = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/Indices/Kyoto_DST_index_2010-2025.txt")
OUT_ALL_PNG = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/figures/F107_Kp_Dst_3rows_2010-2025.png")

# --------- Fenêtres temporelles ---------
plot_start = pd.Timestamp("2010-01-01")
plot_end   = pd.Timestamp("2025-09-26")
tec_start  = pd.Timestamp("2015-10-01")
tec_end    = pd.Timestamp("2025-09-26")

# --------- Outils ---------
def jd_to_datetime_utc(jd):
    jd = float(jd)
    J = int(jd + 0.5)
    f = jd + 0.5 - J
    if J >= 2299161:
        a = int((J - 1867216.25) / 36524.25)
        A = J + 1 + a - int(a / 4)
    else:
        A = J
    B = A + 1524
    C = int((B - 122.1) / 365.25)
    D = int(365.25 * C)
    E = int((B - D) / 30.6001)
    day = B - D - int(30.6001 * E) + f
    month = E - 1 if E < 14 else E - 13
    year = C - 4716 if month > 2 else C - 4715
    day_int = int(day)
    frac_day = day - day_int
    seconds = frac_day * 86400.0
    hh = int(seconds // 3600)
    mm = int((seconds % 3600) // 60)
    ss = int(round(seconds % 60))
    if ss == 60:
        ss = 0; mm += 1
    if mm == 60:
        mm = 0; hh += 1
    return datetime(year, month, day_int, hh, mm, ss, tzinfo=timezone.utc)

def load_kp_gfz_daily(path: Path) -> pd.DataFrame:
    dates, kp_daily, kp8_store = [], [], []
    with open(path, "r", errors="ignore") as fh:
        for line in fh:
            if not line.strip() or line.lstrip().startswith("#"):
                continue
            parts = line.split()
            if len(parts) < 28:
                continue
            try:
                y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
                kp8 = [float(x) for x in parts[7:15]]
            except Exception:
                continue
            kparr = np.array(kp8, dtype=float)
            kparr[kparr < 0] = np.nan
            kp_mean = float(np.nanmean(kparr)) if np.isfinite(kparr).any() else np.nan
            dates.append(pd.Timestamp(year=y, month=m, day=d))
            kp_daily.append(kp_mean)
            kp8_store.append(kp8)
    df = pd.DataFrame({"date": dates, "kp_daily": kp_daily, "kp8": kp8_store})
    return df.sort_values("date").reset_index(drop=True)

def load_dst_kyoto_daily(path: Path) -> pd.DataFrame:
    dates, daily, hours_list = [], [], []
    with open(path, "r", errors="ignore") as fh:
        for line in fh:
            if not line.strip() or line.lstrip().startswith("#") or not line.startswith("DST"):
                continue
            try:
                yy = int(line[3:5]); mm = int(line[5:7]); dd = int(line[8:10])
            except Exception:
                continue
            cent = line[14:16].strip()
            if cent.isdigit():
                year = int(f"{cent}{yy:02d}")
            else:
                year = 1900 + yy if yy >= 50 else 2000 + yy
            vals = []
            for i in range(24):
                s = line[20 + i*4 : 24 + i*4].strip()
                if not s:
                    vals.append(np.nan); continue
                try:
                    v = int(s)
                except Exception:
                    vals.append(np.nan); continue
                vals.append(np.nan if v == 9999 else float(v))
            dst_mean = float(np.nanmean(vals)) if np.isfinite(vals).any() else np.nan
            dates.append(pd.Timestamp(year=year, month=mm, day=dd))
            daily.append(dst_mean)
            hours_list.append(vals)
    df = pd.DataFrame({"date": dates, "dst_daily": daily, "dst24": hours_list})
    return df.sort_values("date").reset_index(drop=True)

# --------- Lecture F10.7 (CSV Penticton: JD, F_obs, F_adj) ---------
df = pd.read_csv(CSV_PATH)
cols = list(df.columns)
if len(cols) < 3:
    raise ValueError("Le CSV doit contenir ≥3 colonnes (JD, F_obs, F_adj).")
jd_col, fobs_col, fadj_col = cols[0], cols[1], cols[2]

df["datetime_utc"] = pd.to_datetime([jd_to_datetime_utc(x) for x in df[jd_col]], utc=True)
df["date"] = pd.to_datetime(df["datetime_utc"].dt.date)

# filtrage outliers sur la colonne ajustée
non_outliers_df = df.drop(df[df[fadj_col] > 500].index)

m_window = (non_outliers_df["date"] >= plot_start) & (non_outliers_df["date"] <= plot_end)
dfw = non_outliers_df.loc[m_window].copy()

# --------- Lecture Kp et Dst ---------
kp_df  = load_kp_gfz_daily(Kp_TXT)
dst_df = load_dst_kyoto_daily(Dst_TXT)

kp_df  = kp_df[(kp_df["date"]  >= plot_start) & (kp_df["date"]  <= plot_end)].copy()
dst_df = dst_df[(dst_df["date"] >= plot_start) & (dst_df["date"] <= plot_end)].copy()

# --------- Masques période TEC ---------
m_f_tec   = (dfw["date"]   >= tec_start) & (dfw["date"]   <= tec_end)
m_kp_tec  = (kp_df["date"] >= tec_start) & (kp_df["date"] <= tec_end)
m_dst_tec = (dst_df["date"]>= tec_start) & (dst_df["date"]<= tec_end)

# --------- Figure 3 rangées, abscisses partagées ---------
fig, axes = plt.subplots(3, 1, sharex=True, figsize=(10, 9), dpi=300)

# 1) F10.7 (ajusté 1 AU)
ax = axes[0]
ax.plot(dfw["date"], dfw[fadj_col], lw=1.2, label="F10.7 (1-AU adjusted) (Penticton)")
ax.plot(dfw.loc[m_f_tec, "date"], dfw.loc[m_f_tec, fadj_col], lw=2.0, color="red",
        label="Période TEC")
ax.set_ylabel("$F_{10.7}$ [sfu]", fontsize=12, weight="bold")
ax.grid(True, alpha=0.35)
ax.legend(loc="best", prop={'size':8})

# 2) Kp journalier (moyenne des 8×3h)
ax = axes[1]
ax.plot(kp_df["date"], kp_df["kp_daily"], lw=1.2, label="Kp quotidien")
ax.plot(kp_df.loc[m_kp_tec, "date"], kp_df.loc[m_kp_tec, "kp_daily"], lw=2.0, color="red",
        label="Période TEC")
ax.set_ylabel("Kp", fontsize=12, weight="bold")
ax.grid(True, alpha=0.35)
ax.legend(loc="best", prop={'size':8})

# 3) Dst journalier (moyenne des 24h)
ax = axes[2]
ax.plot(dst_df["date"], dst_df["dst_daily"], lw=1.2, label="Dst quotidien")
ax.plot(dst_df.loc[m_dst_tec, "date"], dst_df.loc[m_dst_tec, "dst_daily"], lw=2.0, color="red",
        label="Période TEC")
ax.set_ylabel("Dst [nT]", fontsize=12, weight="bold")
ax.grid(True, alpha=0.35)
ax.legend(loc="best", prop={'size':8})

# --------- Mise en forme commune ---------
for ax in axes:
    ax.set_xlim(plot_start, plot_end)
    for tick in ax.yaxis.get_ticklabels():
        tick.set_fontsize(12)
        tick.set_color('black')
        tick.set_weight('bold')

for ax in axes[:-1]:
    ax.tick_params(axis='x', labelbottom=False)

for tick in axes[-1].xaxis.get_ticklabels():
    tick.set_fontsize(12)
    tick.set_color('black')
    tick.set_weight('bold')

fig.text(0.5, 0.965, "F10.7, Kp, and Dst — 2010–2025 | TEC study period: Oct 2015–Sep 2025",
         ha="center", fontsize=12, weight="bold")
fig.subplots_adjust(left=0.12, right=0.97, top=0.93, bottom=0.06, hspace=0.0)
fig.text(0.5, 0.01, "Date", ha="center", fontsize=12, weight="bold")

fig.savefig(OUT_ALL_PNG, dpi=300)
plt.close(fig)
print(f"✅ Figure combinée sauvegardée → {OUT_ALL_PNG}")


Figure unique 3 rangées avec ombre: F10.7, Kp, Dst

In [None]:
# ===== Figure unique 3 rangées: F10.7, Kp, Dst (2010–2025) avec période TEC surlignée =====
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime, timezone
from pathlib import Path

# --------- Paramètres d'affichage ---------
matplotlib.rcParams['savefig.dpi'] = 300
matplotlib.rcParams['figure.figsize'] = (10, 9)
matplotlib.rcParams['font.size'] = 8

# --------- Chemins (adapter si besoin) ---------
CSV_PATH   = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/Indices/F10_7_2010_2025.csv")
Kp_TXT     = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/Indices/GFZ_all_indices_2010-2025.txt")
Dst_TXT    = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/Indices/Kyoto_DST_index_2010-2025.txt")
OUT_ALL_PNG = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/figures/F107_Kp_Dst_3rows_2010-2025_shadowed.png")

# --------- Fenêtres temporelles ---------
plot_start = pd.Timestamp("2010-01-01")
plot_end   = pd.Timestamp("2025-09-26")
tec_start  = pd.Timestamp("2015-10-01")
tec_end    = pd.Timestamp("2025-09-26")

# --------- Outils ---------
def jd_to_datetime_utc(jd):
    jd = float(jd)
    J = int(jd + 0.5)
    f = jd + 0.5 - J
    if J >= 2299161:
        a = int((J - 1867216.25) / 36524.25)
        A = J + 1 + a - int(a / 4)
    else:
        A = J
    B = A + 1524
    C = int((B - 122.1) / 365.25)
    D = int(365.25 * C)
    E = int((B - D) / 30.6001)
    day = B - D - int(30.6001 * E) + f
    month = E - 1 if E < 14 else E - 13
    year = C - 4716 if month > 2 else C - 4715
    day_int = int(day)
    frac_day = day - day_int
    seconds = frac_day * 86400.0
    hh = int(seconds // 3600)
    mm = int((seconds % 3600) // 60)
    ss = int(round(seconds % 60))
    if ss == 60:
        ss = 0; mm += 1
    if mm == 60:
        mm = 0; hh += 1
    return datetime(year, month, day_int, hh, mm, ss, tzinfo=timezone.utc)

def load_kp_gfz_daily(path: Path) -> pd.DataFrame:
    dates, kp_daily, kp8_store = [], [], []
    with open(path, "r", errors="ignore") as fh:
        for line in fh:
            if not line.strip() or line.lstrip().startswith("#"):
                continue
            parts = line.split()
            if len(parts) < 28:
                continue
            try:
                y, m, d = int(parts[0]), int(parts[1]), int(parts[2])
                kp8 = [float(x) for x in parts[7:15]]
            except Exception:
                continue
            kparr = np.array(kp8, dtype=float)
            kparr[kparr < 0] = np.nan
            kp_mean = float(np.nanmean(kparr)) if np.isfinite(kparr).any() else np.nan
            dates.append(pd.Timestamp(year=y, month=m, day=d))
            kp_daily.append(kp_mean)
            kp8_store.append(kp8)
    df = pd.DataFrame({"date": dates, "kp_daily": kp_daily, "kp8": kp8_store})
    return df.sort_values("date").reset_index(drop=True)

def load_dst_kyoto_daily(path: Path) -> pd.DataFrame:
    dates, daily, hours_list = [], [], []
    with open(path, "r", errors="ignore") as fh:
        for line in fh:
            if not line.strip() or line.lstrip().startswith("#") or not line.startswith("DST"):
                continue
            try:
                yy = int(line[3:5]); mm = int(line[5:7]); dd = int(line[8:10])
            except Exception:
                continue
            cent = line[14:16].strip()
            if cent.isdigit():
                year = int(f"{cent}{yy:02d}")
            else:
                year = 1900 + yy if yy >= 50 else 2000 + yy
            vals = []
            for i in range(24):
                s = line[20 + i*4 : 24 + i*4].strip()
                if not s:
                    vals.append(np.nan); continue
                try:
                    v = int(s)
                except Exception:
                    vals.append(np.nan); continue
                vals.append(np.nan if v == 9999 else float(v))
            dst_mean = float(np.nanmean(vals)) if np.isfinite(vals).any() else np.nan
            dates.append(pd.Timestamp(year=year, month=mm, day=dd))
            daily.append(dst_mean)
            hours_list.append(vals)
    df = pd.DataFrame({"date": dates, "dst_daily": daily, "dst24": hours_list})
    return df.sort_values("date").reset_index(drop=True)

# --------- Lecture F10.7 (CSV Penticton: JD, F_obs, F_adj) ---------
df = pd.read_csv(CSV_PATH)
cols = list(df.columns)
if len(cols) < 3:
    raise ValueError("Le CSV doit contenir ≥3 colonnes (JD, F_obs, F_adj).")
jd_col, fobs_col, fadj_col = cols[0], cols[1], cols[2]

df["datetime_utc"] = pd.to_datetime([jd_to_datetime_utc(x) for x in df[jd_col]], utc=True)
df["date"] = pd.to_datetime(df["datetime_utc"].dt.date)

# filtrage outliers sur la colonne ajustée
non_outliers_df = df.drop(df[df[fadj_col] > 500].index)

m_window = (non_outliers_df["date"] >= plot_start) & (non_outliers_df["date"] <= plot_end)
dfw = non_outliers_df.loc[m_window].copy()

# --------- Lecture Kp et Dst ---------
kp_df  = load_kp_gfz_daily(Kp_TXT)
dst_df = load_dst_kyoto_daily(Dst_TXT)

kp_df  = kp_df[(kp_df["date"]  >= plot_start) & (kp_df["date"]  <= plot_end)].copy()
dst_df = dst_df[(dst_df["date"] >= plot_start) & (dst_df["date"] <= plot_end)].copy()

# --------- Masques période TEC ---------
m_f_tec   = (dfw["date"]   >= tec_start) & (dfw["date"]   <= tec_end)
m_kp_tec  = (kp_df["date"] >= tec_start) & (kp_df["date"] <= tec_end)
m_dst_tec = (dst_df["date"]>= tec_start) & (dst_df["date"]<= tec_end)

# --------- Figure 3 rangées, abscisses partagées ---------
fig, axes = plt.subplots(3, 1, sharex=True, figsize=(10, 9), dpi=300)

# 1) F10.7 (ajusté 1 AU)
ax = axes[0]
ax.plot(dfw["date"], dfw[fadj_col], lw=1.2, label="F10.7 (1-AU adjusted) (Penticton)")
ax.axvspan(tec_start, tec_end, color="red", alpha=0.2, linewidth=0, label="TEC period")
#ax.set_ylabel("$F_{10.7}$ [sfu]", fontsize=12, weight="bold")
ax.grid(True, alpha=0.35)
ax.legend(loc="upper center", prop={'size':8})

# 2) Kp journalier (moyenne des 8×3h)
ax = axes[1]
ax.plot(kp_df["date"], kp_df["kp_daily"], lw=1.2, label="Daily Kp (GFZ)")
ax.axvspan(tec_start, tec_end, color="green", alpha=0.2, linewidth=0, label="TEC period")
#ax.set_ylabel("Kp", fontsize=12, weight="bold")
ax.grid(True, alpha=0.35)
ax.legend(loc="upper center", prop={'size':8})

# 3) Dst journalier (moyenne des 24h)
ax = axes[2]
ax.plot(dst_df["date"], dst_df["dst_daily"], lw=1.2, label="Daily Dst (Kyoto)")
ax.axvspan(tec_start, tec_end, color="orange", alpha=0.2, linewidth=0, label="TEC period")
#ax.set_ylabel("Dst [nT]", fontsize=12, weight="bold")
ax.grid(True, alpha=0.35)
ax.legend(loc="lower center", prop={'size':8})

# --------- Mise en forme commune ---------
for ax in axes:
    ax.set_xlim(plot_start, plot_end)
    for tick in ax.yaxis.get_ticklabels():
        tick.set_fontsize(12)
        tick.set_color('black')
        tick.set_weight('bold')

for ax in axes[:-1]:
    ax.tick_params(axis='x', labelbottom=False)

for tick in axes[-1].xaxis.get_ticklabels():
    tick.set_fontsize(12)
    tick.set_color('black')
    tick.set_weight('bold')

fig.text(0.5, 0.965, "F10.7, Kp, and Dst — 2010–2025 | TEC study period: Oct 2015–Sep 2025 shadowed",
         ha="center", fontsize=12, weight="bold")
fig.subplots_adjust(left=0.12, right=0.97, top=0.93, bottom=0.06, hspace=0.0)
fig.text(0.5, 0.01, "Date", ha="center", fontsize=12, weight="bold")
fig.text(0.05, 0.75, "$F_{10.7}$ [sfu]", ha="center", rotation=90, fontsize=12, weight="bold")
fig.text(0.05, 0.5, "Kp", ha="center", rotation=90, fontsize=12, weight="bold")
fig.text(0.05, 0.15, "DST [nT]", ha="center", rotation=90, fontsize=12, weight="bold")
#plt.show()
fig.savefig(OUT_ALL_PNG, dpi=300)
plt.close(fig)
print(f"✅ Figure combinée sauvegardée → {OUT_ALL_PNG}")


Generate 4 daily tec figures / Year with GIM and offset 

In [None]:
# ========================
# FIGURES JOURNALIERES 2×2 PAR AN
# ========================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

# --- chemins des CSV déjà enrichis ---
CSV_30MIN_WITH_GIM = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/2015_2025_MS_VTEC_30min_stats.csv")
CSV_DAILY_WITH_OFF = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/2015_2025_MS_VTEC_daily_stats_UTC_with_indices_and_max_with_GFZlabels.csv")

# --- colonnes TEC dans le CSV 30 min ---
VTEC_MED_COL  = "VTEC_median"        # <- déjà utilisé dans ta pipeline
VTEC_MEAN_COL = "VTEC_mean"          # <- si elle n'existe pas, on l'ignore
GIM_COL       = "vtec_gim"
GIM_OFSSET= 'gim_offset_tecu'

# --- dossier de sortie des figures ---
FIG_DIR = Path(CSV_30MIN_WITH_GIM).parent / "figures/figs_quiet/MS_FIGURES_GIM"
FIG_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
def _set_xticks_30min(ax):
    ax.set_xlim(-1, 24)
    ax.set_xticks(np.arange(0, 24, 2))
    ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 2)], rotation=0)
# --- utilitaires de lecture ---
def _load_30min_csv(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path)

    # ts_utc
    if "ts_utc" in df.columns:
        df["ts_utc"] = pd.to_datetime(df["ts_utc"], utc=True, errors="coerce")
    else:
        t = pd.to_datetime(df["time"], utc=True, errors="coerce")
        df["ts_utc"] = t

    # date_utc
    if "date_utc" in df.columns:
        df["date_utc"] = pd.to_datetime(df["date_utc"], utc=True, errors="coerce").dt.date
    else:
        df["date_utc"] = df["ts_utc"].dt.floor("D").dt.date

    for c in [VTEC_MED_COL, VTEC_MEAN_COL, GIM_COL]:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors="coerce")

    return df


def _load_daily_csv(path: Path) -> pd.DataFrame:
    df = pd.read_csv(path)
    if "date_utc" in df.columns:
        df["date_utc"] = pd.to_datetime(df["date_utc"], utc=True, errors="coerce").dt.date
    elif "date" in df.columns:
        df["date_utc"] = pd.to_datetime(df["date"], utc=True, errors="coerce").dt.date
    return df

s30 = _load_30min_csv(CSV_30MIN_WITH_GIM)
# --- normalisation des types temps/date ---
s30["ts_utc"]   = pd.to_datetime(s30["ts_utc"], utc=True, errors="coerce")
s30["date_utc"] = pd.to_datetime(s30["date_utc"], utc=True, errors="coerce").dt.date

daily_off = _load_daily_csv(CSV_DAILY_WITH_OFF) if Path(CSV_DAILY_WITH_OFF).exists() else pd.DataFrame(columns=["date_utc","gim_offset_tecu"])

# --- offset du jour: depuis le CSV daily si dispo, sinon calcul médiane(VTEC_median - GIM_VTEC) sur le jour ---
def get_daily_offset(day, s30_df, daily_df=None):
    # 1) try daily CSV if provided
    if daily_df is not None and "gim_offset_tecu" in daily_df.columns:
        row = daily_df.loc[daily_df["date_utc"] == day, "gim_offset_tecu"]
        if not row.empty and pd.notna(row.iloc[0]):
            return float(row.iloc[0])

    # 2) fallback: compute from the 30-min CSV
    mask = s30_df["date_utc"] == day
    v = pd.to_numeric(s30_df.loc[mask, "VTEC_median"], errors="coerce")
    g = pd.to_numeric(s30_df.loc[mask, "vtec_gim"],    errors="coerce")
    joined = pd.concat([v, g], axis=1).dropna()
    if joined.empty:
        return float("nan")
    return float((joined.iloc[:,0] - joined.iloc[:,1]).median())


# --- choix des 4 jours par année ---
# Par défaut: ~15 jan/avr/jul/oct ; s’il n’y a pas de données ce jour-là,
# on prend le jour existant le plus proche dans l’année.
# 4 jours "typiques" (tu peux changer)
DEFAULT_MONTH_DAY = [(1, 15), (4, 15), (7, 15), (11, 15)]

def nearest_available_date(year: int, target_date, available_dates):
    """Renvoie la date dispo la plus proche de target_date (tout en tz-naïf)."""
    target = pd.Timestamp(target_date)  # naïf (pas de tz)

    # Liste -> DatetimeIndex, purge des NaT
    avail_ts = pd.to_datetime(available_dates, errors="coerce")
    avail_ts = avail_ts[~pd.isna(avail_ts)]
    if len(avail_ts) == 0:
        return None

    # Écarts temporels (timedelta64[ns]) -> ndarray robuste
    diff = (avail_ts - target)
    try:
        deltas = diff.to_numpy()      # pandas >= 0.24+
    except Exception:
        deltas = np.asarray(diff)     # repli générique

    # indice de l’écart absolu minimal
    i_min = int(np.nanargmin(np.abs(deltas)))
    return pd.Timestamp(avail_ts[i_min]).date()


def pick_days_for_year(year: int, s30: pd.DataFrame) -> list:
    """Choisit 4 jours pour l'année donnée, au plus proche des jours cibles."""
    # s'assurer que date_utc est bien un 'date' (naïf)
    date_utc = pd.to_datetime(s30["date_utc"], errors="coerce").dt.date
    mask_year = pd.to_datetime(date_utc).dt.year == year

    dates_year = pd.to_datetime(date_utc[mask_year], errors="coerce").dropna().unique()
    out = []
    for m, d in DEFAULT_MONTH_DAY:
        tgt = pd.Timestamp(year=year, month=m, day=d)        # <-- SANS tz
        chosen = nearest_available_date(year, tgt, dates_year)
        if chosen is not None:
            out.append(chosen)
    return out


# --- plot d'une année (2×2) ---
def plot_year_grid(year: int, s30: pd.DataFrame, days: list[object] | None = None) -> Path | None:
    # jours choisis
    if not days:
        days = pick_days_for_year(year, s30)
    days = list(days)
    if len(days) == 0:
        print(f"[INFO] {year}: pas de données.")
        return None
    if len(days) < 4:
        print(f"[INFO] {year}: seulement {len(days)} jour(s) disponibles, la grille 2×2 sera partielle.")

    # figure
    fig, axes = plt.subplots(2, 2, figsize=(12, 4), sharex=True, sharey=False, constrained_layout=False)
    # pas d’espace vertical dans une même colonne
    fig.subplots_adjust(hspace=0.0, wspace=0.12)

    # styles
    has_mean = VTEC_MEAN_COL in s30.columns
    line_kw = dict(lw=1.6)
    # boucler sur les 4 cases (row-major)
    for k in range(4):
        r, c = divmod(k, 2)
        ax = axes[r, c]
        if k >= len(days):
            ax.axis("off")
            continue
        day = days[k]
        day0 = pd.Timestamp(day, tz="UTC")
        day1 = day0 + pd.Timedelta(days=1)

        sd = s30.loc[(s30["date_utc"] == day)].copy()
        if sd.empty:
            ax.text(0.5, 0.5, "Pas de données", transform=ax.transAxes, ha="center", va="center")
            continue

        # x = heures UT depuis minuit
        hours = (sd["ts_utc"] - day0).dt.total_seconds() / 3600.0

        # tracés
        if VTEC_MED_COL in sd.columns:
            ax.plot(hours, sd[VTEC_MED_COL], label="VTEC median (30 min)", **line_kw)
        if has_mean:
            ax.plot(hours, sd[VTEC_MEAN_COL], label="VTEC mean (30 min)", ls="--", **line_kw)
        if GIM_COL in sd.columns:
            ax.plot(hours, sd[GIM_COL], label="GIM (CODG) 30 min", ls=":", **line_kw)

        # offset du jour (CSV daily ou calcul direct)
        off = get_daily_offset(day, sd)
        off_txt = f"median offset = {off:.2f} TECU" if np.isfinite(off) else "offset médian = n/a"

        # titres/labels
        #ax.text(0.8, 0.09, f"{day}  —  {off_txt}", ha='center', fontsize=8, weight='bold')
        ax.set_title(f"{day}  —  {off_txt}", fontsize=10, loc="center", weight='bold')
        ax.set_xlim(0, 24)
        ax.set_yticks([5,15,25,35,45])
        ax.grid(True, alpha=0.25)
        # ticks heures pleines
        #ax.set_xticks(np.arange(0, 25, 3))
        ylim=(0,35)
        ax.set_ylim(*ylim)
        _set_xticks_30min(ax)
        for t in ax.yaxis.get_ticklabels():
            t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
        for t in ax.xaxis.get_ticklabels():
            t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
        #if r == 0:
        #    ax.legend(loc='upper left', fontsize=8, frameon=False)
        #if r == 1:
        #    ax.set_xlabel("Heures UT")

        #if c == 0:
        #    ax.set_ylabel("VTEC (TECU)")
        # pas d'étiquette x sur la ligne du haut
        #if r == 0:
        #    ax.label_outer()

    # légende commune (une seule fois)
    handles, labels = [], []
    for line in axes[0,0].lines:
        handles.append(line)
        labels.append(line.get_label())
    plt.setp([a.get_xticklabels() for a in axes[0, :]], visible=False)
    fig.subplots_adjust(hspace=0)
    fig.legend(handles, labels, loc="upper center", ncol=3, frameon=False, bbox_to_anchor=(0.5, 1))
    fig.text(0.5, 1, f"OUCA Observatory — VTEC 30 min vs GIM (année {year})", ha='center', fontsize=12, weight='bold')
    fig.text(0.5, 0.02, 'Local Time (LT)', ha='center', fontsize=12, weight='bold')
    fig.text(0.08, 0.5, 'TEC [TECU]', va='center', rotation='vertical', fontsize=12, weight='bold')
    out = FIG_DIR / f"VTEC_GIM_daily_2x2_{year}.png"
    #fig.suptitle(f"Observatoire d'Oukaimeden — VTEC 30 min vs GIM (année {year})", y=1.08, fontsize=12)
    fig.savefig(out, dpi=300, bbox_inches="tight")
    plt.close(fig)
    print(f"✅ Figure {year} → {out}")
    return out

# --- exécution: toutes les années présentes dans le CSV 30 min ---
years = sorted(
    pd.to_datetime(s30["date_utc"], errors="coerce")
      .dt.year.dropna().astype(int).unique()
)

for y in years:
    plot_year_grid(y, s30)  # ou bien: plot_year_grid(y, s30, days=[date1, date2, date3, date4])


Generate Observed VTEC and GIM for Quiet days

In [None]:
# === Fenêtre demandée ===
PERIOD_START = pd.Timestamp("2015-10-01", tz="UTC").date()
PERIOD_END   = pd.Timestamp("2025-09-26", tz="UTC").date()

FIG_DIR = Path(r"C:\Users\mkmoh\Dropbox\1-DATA\TEC_DATA\New_Data\figures\figs_quiet\MS_FIGURES_GIM")
FIG_DIR.mkdir(parents=True, exist_ok=True)

def _set_xticks_30min(ax):
    ax.set_xlim(0, 24)
    ax.set_xticks(np.arange(0, 24, 2))
    ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 2)], rotation=0)
    
# --- Sécurité types ---
s30["ts_utc"]   = pd.to_datetime(s30["ts_utc"], utc=True, errors="coerce")
s30["date_utc"] = pd.to_datetime(s30["date_utc"], errors="coerce").dt.date

daily=_load_daily_csv(CSV_DAILY_WITH_OFF)
daily["date_utc"] = pd.to_datetime(daily["date_utc"], errors="coerce").dt.date
VTEC_COL  = "VTEC_median"
# --- Si vous avez une colonne de moyenne, on l'utilise (sinon elle sera ignorée dans la légende)
#MEAN_CANDIDATES = ["VTEC_mean","VTEC_Mean","VTEC_avg","VTEC_average","mean_VTEC","VTECmean"]
MEAN_COL = "VTEC_mean"

def get_daily_offset(day, s30_df, daily_df=None):
    if daily_df is not None and "gim_offset_tecu" in daily_df.columns:
        v = daily_df.loc[daily_df["date_utc"] == day, "gim_offset_tecu"].dropna()
        if len(v): return float(v.iloc[0])
    df = s30_df.loc[s30_df["date_utc"] == day, [VTEC_COL, "vtec_gim"]].dropna()
    return float((df[VTEC_COL] - df["GIM_VTEC"]).median()) if not df.empty else np.nan

def daily_quiet_metrics(s30_df):
    def _one(df):
        v = pd.to_numeric(df[VTEC_COL], errors="coerce")
        g = pd.to_numeric(df["vtec_gim"], errors="coerce")
        cov_v = v.notna().sum() / 48.0
        cov_g = g.notna().sum() / 48.0
        if v.notna().any():
            q75 = np.nanpercentile(v, 75); q25 = np.nanpercentile(v, 25)
            iqr = q75 - q25
            std = np.nanstd(v)
            vv = v.to_numpy()
            dmed = np.nanmedian(np.abs(np.diff(vv))) if np.count_nonzero(~np.isnan(vv)) > 1 else np.nan
        else:
            iqr = std = dmed = np.nan
        return pd.Series({"cov_vtec": cov_v, "cov_gim": cov_g, "iqr": iqr, "std": std, "dV_med": dmed})
    m = s30_df.groupby("date_utc", as_index=False).apply(_one).reset_index(drop=True)
    return m

def pick_10_quiet_days_geomagQ(s30_df, daily_df):
    """10 jours calmes en se basant sur daily.geomag_label == 'Q' + métriques VTEC."""
    if "geomag_label_gfz_QDNQ" not in daily_df.columns:
        raise RuntimeError("La colonne 'geomag_label_gfz_QDNQ' est absente du CSV daily.")
    # restreindre période et normaliser label
    d = daily_df.copy()
    d = d[(d["date_utc"] >= PERIOD_START) & (d["date_utc"] <= PERIOD_END)].copy()
    d["geomag_label_norm"] = d["geomag_label_gfz_QDNQ"].astype(str).str.strip().str.upper()

    # jours 'Q' (quiet)
    q_days = d.loc[d["geomag_label_norm"] == "Q", ["date_utc"]].dropna().drop_duplicates()

    # métriques sur s30 dans la période
    s = s30_df[(s30_df["date_utc"] >= PERIOD_START) & (s30_df["date_utc"] <= PERIOD_END)].copy()
    metrics = daily_quiet_metrics(s)

    # ne garder que les jours 'Q'
    metQ = metrics.merge(q_days, on="date_utc", how="inner")

    # qualité: couverture suffisante
    metQ = metQ[(metQ["cov_vtec"] >= 0.85) & (metQ["cov_gim"] >= 0.70)].copy()

    # trier par calme (iqr, puis |Δ| médian, puis std)
    metQ = metQ.sort_values(["iqr", "dV_med", "std"], ascending=True)

    days = list(metQ["date_utc"].head(10))
    if len(days) < 10:
        print(f"[INFO] Seulement {len(days)} jour(s) 'Q' éligible(s) dans la période (couverture/qualité).")
    return days

def plot_quiet_days_5x2(days, s30_df, daily_df=None, title_suffix="jours Q (2015-10 → 2018-07)"):
    # complétion à 10 pour remplir la grille
    days = list(days) + [None] * max(0, 10 - len(days))
    fig, axes = plt.subplots(5, 2, figsize=(10, 10), sharex=True, sharey=False, constrained_layout=False)
    handles_global = None
    fig.subplots_adjust(hspace=0.0, wspace=0.12)
    for k, day in enumerate(days):
        r, c = divmod(k, 2)
        ax = axes[r, c]
        if day is None:
            ax.axis("off"); continue

        df = s30_df.loc[s30_df["date_utc"] == day].copy().sort_values("ts_utc")
        t_hours = df["ts_utc"].dt.hour + df["ts_utc"].dt.minute/60.0

        ln1 = ax.plot(t_hours, df[VTEC_COL], lw=2, label="VTEC mediane")[0]
        if MEAN_COL is not None:
            ln2 = ax.plot(t_hours, df[MEAN_COL], lw=1.8, ls="--", label="VTEC mean")[0]
        ln3 = ax.plot(t_hours, df["vtec_gim"], lw=1.8, ls="--", label="VTEC GIM")[0]

        off = get_daily_offset(day, s30_df, daily_df)
        off_txt = f"offset median = {off:.2f} TECU" if np.isfinite(off) else "offset médian = n/a"
        ax.set_title(f"{day} — {off_txt}", fontsize=10, loc="center", weight='bold',y=0.8)
        #ax.set_title(str(y), fontweight='bold', )
        ax.set_xlim(0, 24)
        ax.grid(True, alpha=0.25)
        # ticks heures pleines
        #ax.set_xticks(np.arange(0, 25, 3))
        ylim=(0,21)
        ax.set_ylim(*ylim)
        ax.set_yticks([5,10,15,20])
        
        _set_xticks_30min(ax)
        for t in ax.yaxis.get_ticklabels():
            t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
        for t in ax.xaxis.get_ticklabels():
            t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
        
        #if c == 0: ax.set_ylabel("VTEC (TECU)")
        #if r == 4: ax.set_xlabel("Heures UT")
        #else:      ax.label_outer()

        if handles_global is None:
            handles_global = [line for line in ax.lines]

    if handles_global:
        labels_global = [h.get_label() for h in handles_global]
        fig.legend(handles_global, labels_global, loc="upper center", ncol=3, frameon=False, bbox_to_anchor=(0.5, 0.93))

    #fig.suptitle(f"Observatoire d’Oukaimeden — 10 {title_suffix}\nVTEC 30 min (médiane/moyenne) vs GIM", y=1.04, fontsize=13)
    plt.setp([a.get_xticklabels() for a in axes[0, :]], visible=False)
    fig.subplots_adjust(hspace=0)
    #fig.legend(handles, labels, loc="upper center", ncol=3, frameon=False, bbox_to_anchor=(0.5, 1))
    fig.text(0.5, 0.94, f"OUCA Observatory — Quiet time VTEC vs GIM", ha='center', fontsize=12, weight='bold')
    fig.text(0.5, 0.06, 'Local Time (LT)', ha='center', fontsize=12, weight='bold')
    fig.text(0.08, 0.5, 'TEC [TECU]', va='center', rotation='vertical', fontsize=12, weight='bold')
    out = FIG_DIR / "VTEC_GIM_10days_geomagQ_5x2_2015-2025.png"
    fig.savefig(out, dpi=300)
    plt.close(fig)
    print(f"✅ Figure sauvegardée → {out}")
    return out

# === Sélection + tracé (basé UNIQUEMENT sur geomag_label == 'Q') ===
quiet_days_Q = pick_10_quiet_days_geomagQ(s30, daily)
print("Jours 'Q' sélectionnés:", quiet_days_Q)
plot_quiet_days_5x2(quiet_days_Q, s30, daily, title_suffix="jours géomagnétiquement calmes (label Q)")


Generate Final figures

In [None]:
import re
from pathlib import Path
from typing import Union, List
import numpy as np
from datetime import datetime, timezone, date, timedelta
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# =========================
# CELL 0 — SETUP & HELPERS
# =========================
# ------- Paramètres -------
CSV = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/2015_2025_MS_VTEC_daily_stats_UTC_with_indices_and_max_with_GFZlabels.csv")
CSV_30MIN  = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/2015_2025_MS_VTEC_30min_stats.csv")
OUTDIR_FIG = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/figures/figs_quiet/")
TIME_COL   = "date_utc"
OFFSET_COL = "gim_offset_tecu"
QUIET_COL  = "geomag_label_gfz_QDNQ"      # 'Q','D','NQ'
SOLAR_COL  = "solar_label"                # 'low','high'
LOCAL_TZ = "Africa/Casablanca"   # pour les diurnes (affichage LT)
VTEC_COL = "VTEC_median"         # colonne VTEC dans le CSV 30 min
START      = pd.Timestamp("2015-10-01", tz="UTC")
END        = pd.Timestamp("2025-09-26", tz="UTC")
# -------------------------
# ---- chargements (30 min) ----
s30 = pd.read_csv(CSV_30MIN)
t_utc = pd.to_datetime(s30["time"], utc=True, errors="coerce")
s30 = s30.assign(
    ts_utc=t_utc,
    date_utc=t_utc.dt.floor("D").dt.date,
    slot=(t_utc.dt.hour*2 + (t_utc.dt.minute//30)).astype(int)  # 0..47
)
s30[VTEC_COL] = pd.to_numeric(s30[VTEC_COL], errors="coerce")

# Chargement + fenêtre temporelle
df = pd.read_csv(CSV)
df[TIME_COL] = pd.to_datetime(df[TIME_COL], utc=True, errors="coerce")
df = df[(df[TIME_COL] >= START) & (df[TIME_COL] <= END)].copy()

# Masques
m_off   = pd.to_numeric(df[OFFSET_COL], errors="coerce").abs() < 20
m_quiet = (df[QUIET_COL] == "Q")
m_slow  = (df[SOLAR_COL].str.lower() == "low")

# Année
df["year"] = df[TIME_COL].dt.year

# --------- Récap global ---------
N = len(df)
summary_global = pd.DataFrame({
    "metric": [
        "total",
        "|offset|<20",
        "quiet(Q)",
        "solar_low",
        "|offset|<20 ∩ quiet",
        "|offset|<20 ∩ solar_low",
        "quiet ∩ solar_low",
        "offset ∩ quiet ∩ solar_low",
    ],
    "count": [
        N,
        int(m_off.sum()),
        int(m_quiet.sum()),
        int(m_slow.sum()),
        int((m_off & m_quiet).sum()),
        int((m_off & m_slow).sum()),
        int((m_quiet & m_slow).sum()),
        int((m_off & m_quiet & m_slow).sum()),
    ],
})
summary_global["pct_of_total_%"] = (100 * summary_global["count"] / N).round(2)
print("=== Global (Oct 2015 → Sep 2025) ===")
print(summary_global.to_string(index=False))

# --------- Par année: filtres simples ---------
by_year_simple = (
    df.assign(
        off=m_off,
        quiet=m_quiet,
        solar_low=m_slow,
    )
    .groupby("year")
    .agg(
        total=("year","size"),
        off_sm20=("off","sum"),
        quiet_Q=("quiet","sum"),
        solar_low=("solar_low","sum"),
    )
    .assign(
        pct_off=lambda d: (100*d["off_sm20"]/d["total"]).round(2),
        pct_quiet=lambda d: (100*d["quiet_Q"]/d["total"]).round(2),
        pct_slow=lambda d: (100*d["solar_low"]/d["total"]).round(2),
    )
)
print("\n=== Par année — filtres simples ===")
print(by_year_simple.to_string())

# --------- Par année: intersections ---------
by_year_inter = (
    df.assign(
        inter_off_quiet = (m_off & m_quiet),
        inter_off_slow  = (m_off & m_slow),
        inter_quiet_slow= (m_quiet & m_slow),
        inter_all       = (m_off & m_quiet & m_slow),
    )
    .groupby("year")
    .agg(
        total=("year","size"),
        off_quiet=("inter_off_quiet","sum"),
        off_slow=("inter_off_slow","sum"),
        quiet_slow=("inter_quiet_slow","sum"),
        all_three=("inter_all","sum"),
    )
    .assign(
        pct_off_quiet=lambda d: (100*d["off_quiet"]/d["total"]).round(2),
        pct_off_slow =lambda d: (100*d["off_slow"]/d["total"]).round(2),
        pct_quiet_slow=lambda d:(100*d["quiet_slow"]/d["total"]).round(2),
        pct_all_three=lambda d:(100*d["all_three"]/d["total"]).round(2),
    )
)
print("\n=== Par année — intersections ===")
print(by_year_inter.to_string())

In [None]:
# --------- (Option) Conserver uniquement l’intersection pour analyses suivantes ---------
df_kept = df[m_off & m_quiet & m_slow].copy()
print(f"\nLignes gardées (intersection trois filtres): {len(df_kept)}/{N}")
if "date_utc" in df_kept.columns:
    d = pd.to_datetime(df_kept["date_utc"], utc=True, errors="coerce")
else:
    d = pd.to_datetime(df_kept["date"], utc=True, errors="coerce")
df_kept["date_utc"] = d.dt.floor("D").dt.date

S = s30[s30["date_utc"].isin(df_kept['date_utc'])].copy().reset_index(drop=True)

In [None]:
# ---- utilitaires ----
def _set_xticks_30min(ax):
    ax.set_xlim(0, 48)
    ax.set_xticks(np.arange(0, 48, 8))
    ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 4)], rotation=0)

def _clean_slot(df, name="slot"):
    """Enlève 'slot' de l'index s'il y est, et supprime les colonnes dupliquées."""
    out = df
    if name in (out.index.names or []):
        out = out.reset_index()
    if out.columns.duplicated().any():
        out = out.loc[:, ~out.columns.duplicated()]
    return out

def agg_diurnal(df, val=VTEC_COL):
    """Agrège sur les 48 slots (0..47) en imposant un axe de réindexage clair."""
    df = _clean_slot(df, "slot")
    slots = pd.Index(range(48), name="slot")
    g = df.groupby("slot")[val]
    def _re(s): 
        s = s.copy()
        s.index.name = "slot"
        return s.reindex(slots)
    out = pd.DataFrame({
        "slot": slots.values,
        "median": _re(g.median()),
        "mean":   _re(g.mean()),
        "q25":    _re(g.quantile(0.25)),
        "q75":    _re(g.quantile(0.75)),
        "std":    _re(g.std(ddof=1)),
        "N":      _re(g.count())
    }).reset_index(drop=True)
    return out

def slot_ticks_2h(ax):
    ax.set_xlim(-1, 48)
    ax.set_xticks(np.arange(0,48,4))
    ax.set_xticklabels([f"{h:02d}" for h in range(0,24,2)], rotation=0)

def nice_y(ax, vmin=0, vmax=50, step=10):
    ax.set_ylim(vmin, vmax)
    ax.set_yticks(np.arange(vmin, vmax+step, step))

row_colors = ['green','red','blue','orange','purple','brown']
def _panel_color(idx):
    r, _ = divmod(idx, 2)
    return row_colors[r % len(row_colors)]

print(f"Quiet days retenus: {len(df_kept)} — échantillons 30min: {len(S)}")

In [None]:
# ==========================================
# CELL 1 — COURBES DIURNES PAR ANNÉE (4×2)
# median + IQR (q25–q75) + mean — jours quiet
# ==========================================
years = sorted({pd.Timestamp(d).year for d in df_kept['date_utc']})
n = len(years)
rows, cols = (6, 2) if n > 6 else (int(np.ceil(n/2)), 2)
fig, axes = plt.subplots(rows, cols, figsize=(12, 2*rows), squeeze=False, sharex='col')
ylim=(0,50)
for i, y in enumerate(years):
    ax = axes[i//2, i%2]
    color = _panel_color(i)
    Sy = S[pd.to_datetime(S["date_utc"]).dt.year == y]
    D = agg_diurnal(Sy)

    ax.plot(D["slot"].values, D["median"].values, '-',color=color,  lw=2.2, label='median')
    ax.fill_between(D["slot"].values, D["q25"].values, D["q75"].values,color=color, alpha=0.2, label='IQR (25–75)')
    ax.plot(D["slot"].values, D["mean"].values,   '--',color='k', lw=1.6, label='mean')

    ax.set_ylim(*ylim)
    ax.set_yticks([5,15,25,35,45])
    
    label = str(y)
    ax.set_ylabel(label, weight='bold')
    
    _set_xticks_30min(ax)
    
    #slot_ticks_2h(ax); nice_y(ax, 0, 45, 5)
    ax.grid(alpha=0.3) 
    #ax.set_title(str(y), weight='bold')
    # style ticks
    for t in ax.yaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    for t in ax.xaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    if i == 0:
        ax.legend(loc='upper left', fontsize=8, frameon=False)

# nettoyer cases vides
for j in range(n, rows*cols):
    axes[j//2, j%2].axis('off')

plt.setp([a.get_xticklabels() for a in axes[0, :]], visible=False)
fig.subplots_adjust(hspace=0)

fig.text(0.5, 0.89, f'Annualy Diurnal Quiet time VTEC', ha='center', fontsize=12, weight='bold')
fig.text(0.5, 0.06, 'Local Time (LT)', ha='center', fontsize=12, weight='bold')
fig.text(0.06, 0.5, 'TEC [TECU]', va='center', rotation='vertical', fontsize=12, weight='bold')

out = OUTDIR_FIG / "Low_solar_activity_QUIET_YEARS_diurnal_mean_median_IQR_MS.png"
#plt.show()
fig.savefig(out, dpi=300, bbox_inches='tight')
plt.close(fig)
print("Saved:", out)


In [None]:
# ==========================================
# CELL 2 — COURBES DIURNES PAR MOIS (6×2)
# median + IQR + mean sur tous jours quiet, toutes années
# ==========================================
ylim=(0,50)
fig, axes = plt.subplots(6,2, figsize=(12,12), squeeze=False, sharex='col')
for m in range(1,13):
    ax = axes[(m-1)//2, (m-1)%2]
    Sm = S[pd.to_datetime(S["date_utc"]).dt.month == m]
    D = agg_diurnal(Sm)
    color = _panel_color(m-1)
    ax.plot(D["slot"], D["median"], '-',color=color, lw=2.2, label='median')
    ax.fill_between(D["slot"], D["q25"], D["q75"],color=color, alpha=0.2, label='IQR')
    ax.plot(D["slot"], D["mean"], '--',color='k', lw=1.6, label='mean')
    ax.set_ylim(*ylim)
    ax.set_yticks([5,15,25,35,45])
    
    label = pd.Timestamp(2000, m, 1).strftime('%B')
    ax.set_ylabel(label, weight='bold')
    
    _set_xticks_30min(ax)
    ax.grid(alpha=0.3)
    ax.set_ylabel(pd.Timestamp(2000,m,1).strftime('%B'), weight='bold')
    
    # style ticks
    for t in ax.yaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    for t in ax.xaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
        
axes[0,0].legend(frameon=False, fontsize=9, loc='upper left')

plt.setp([a.get_xticklabels() for a in axes[0, :]], visible=False)
fig.subplots_adjust(hspace=0)

fig.text(0.5, 0.89, f'Monthly Diurnal Quiet time VTEC', ha='center', fontsize=12, weight='bold')
fig.text(0.5, 0.06, 'Local Time (LT)', ha='center', fontsize=12, weight='bold')
fig.text(0.06, 0.5, 'TEC [TECU]', va='center', rotation='vertical', fontsize=12, weight='bold')

#plt.show()
out = OUTDIR_FIG/"Low_solar_activity_QUIET_MONTHS_diurnal_mean_median_IQR_MS.png"
fig.savefig(out, dpi=300, bbox_inches='tight'); plt.close(fig)
print("Saved:", out)


In [None]:
# ==========================================
# CELL A — STATS CLÉS COURBES DIURNES PAR MOIS
# (à partir de S et agg_diurnal)
# ==========================================
import numpy as np
import pandas as pd

def _slot_to_lt_h(slot: int) -> float:
    """slot (0..47) -> heure locale en décimal (0.0–23.5)."""
    return 0.5 * float(slot)

def _slot_to_lt_str(slot: int) -> str:
    """slot (0..47) -> 'HH:MM'."""
    h_float = 0.5 * float(slot)
    h = int(h_float)
    m = int(round((h_float - h) * 60))
    return f"{h:02d}:{m:02d}"

rows = []

for m in range(1, 13):
    Sm = S[pd.to_datetime(S["date_utc"]).dt.month == m].copy()
    if Sm.empty:
        continue

    D = agg_diurnal(Sm).copy()   # doit contenir au moins: 'slot', 'median'
    D = D.dropna(subset=["median"])
    if D.empty:
        continue

    # pic diurne du mois (max de la médiane)
    idx_max = D["median"].idxmax()
    peak_val = float(D.loc[idx_max, "median"])
    peak_slot = int(D.loc[idx_max, "slot"])
    peak_lt   = _slot_to_lt_str(peak_slot)

    # minimum diurne (souvent pré-dawn / nuit)
    idx_min = D["median"].idxmin()
    min_val = float(D.loc[idx_min, "median"])
    min_slot = int(D.loc[idx_min, "slot"])
    min_lt   = _slot_to_lt_str(min_slot)

    # contraste jour/nuit (amplitude intra-journalière)
    day_night_range = peak_val - min_val

    # pour info: médiane dans un noyau pré-dawn (00–04 LT) et après-midi (12–18 LT)
    D["lt_hour"] = D["slot"].apply(_slot_to_lt_h)
    pre_dawn = D[(D["lt_hour"] >= 0.0) & (D["lt_hour"] < 4.0)]
    afternoon = D[(D["lt_hour"] >= 12.0) & (D["lt_hour"] < 18.0)]

    pre_dawn_med = float(pre_dawn["median"].mean()) if not pre_dawn.empty else np.nan
    aft_med      = float(afternoon["median"].mean()) if not afternoon.empty else np.nan

    rows.append({
        "month": m,
        "month_name": pd.Timestamp(2000, m, 1).strftime("%b"),
        "peak_median_TECU": peak_val,
        "lt_of_peak": peak_lt,
        "min_median_TECU": min_val,
        "lt_of_min": min_lt,
        "day_night_range_TECU": day_night_range,
        "pre_dawn_median_00_04_TECU": pre_dawn_med,
        "afternoon_median_12_18_TECU": aft_med,
    })

monthly_diurnal_stats = pd.DataFrame(rows).sort_values("month")

print("=== Monthly diurnal median VTEC stats (quiet QSL–GIM20) ===")
print(monthly_diurnal_stats.to_string(index=False,
      float_format=lambda x: f"{x:6.2f}"))

# Résumés rapides pour le texte
print("\n--- Key diagnostics for text ---")
# mois du pic maximal
i_max = monthly_diurnal_stats["peak_median_TECU"].idxmax()
print("Largest monthly diurnal median peak:",
      f"{monthly_diurnal_stats.loc[i_max, 'peak_median_TECU']:.2f} TECU",
      f"in {monthly_diurnal_stats.loc[i_max, 'month_name']} at",
      monthly_diurnal_stats.loc[i_max, "lt_of_peak"])

# mois du plus faible pic
i_min = monthly_diurnal_stats["peak_median_TECU"].idxmin()
print("Smallest monthly diurnal median peak:",
      f"{monthly_diurnal_stats.loc[i_min, 'peak_median_TECU']:.2f} TECU",
      f"in {monthly_diurnal_stats.loc[i_min, 'month_name']} at",
      monthly_diurnal_stats.loc[i_min, "lt_of_peak"])

# plage des heures de pic
lt_peaks = monthly_diurnal_stats["lt_of_peak"].tolist()
print("Monthly peak times (LT) from",
      lt_peaks[0], "to", lt_peaks[-1],
      "for", ", ".join(monthly_diurnal_stats['month_name'].tolist()))


In [None]:
# ==========================================
# CELL 3 — COURBES DIURNES PAR SAISON (2×2)
# median + IQR + mean — jours quiet
# ==========================================
def season_of_month(m):
    return "DJF" if m in (12,1,2) else ("MAM" if m in (3,4,5) else ("JJA" if m in (6,7,8) else "SON"))

S["season"] = pd.to_datetime(S["date_utc"]).dt.month.map(season_of_month)

order = ["DJF","MAM","JJA","SON"]
fig, axes = plt.subplots(2,2, figsize=(12,5), squeeze=False, sharex='col')

for i, sname in enumerate(order):
    color = _panel_color(i)
    ax = axes[i//2, i%2]
    Ss = S[S["season"]==sname]
    D = agg_diurnal(Ss)
    
    ax.plot(D["slot"], D["median"], '-', color=color, lw=2.2, label='median')
    ax.fill_between(D["slot"], D["q25"], D["q75"],color=color, alpha=0.2, label='IQR')
    ax.plot(D["slot"], D["mean"], '--',color='k', lw=1.6, label='mean')
    
    ax.set_ylim(*ylim)
    ax.set_yticks([5,15,25,35,45])
    
    label = sname
    ax.set_ylabel(label, weight='bold')
    
    _set_xticks_30min(ax)

    # style ticks
    for t in ax.yaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    for t in ax.xaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
        
    ax.grid(alpha=0.3)


axes[0,0].legend(frameon=False, fontsize=9, loc='upper left')

plt.setp([a.get_xticklabels() for a in axes[0, :]], visible=False)
fig.subplots_adjust(hspace=0)

fig.text(0.5, 0.89, f'Seasonal Diurnal Quiet time VTEC', ha='center', fontsize=12, weight='bold')
fig.text(0.5, 0.03, 'Local Time (LT)', ha='center', fontsize=12, weight='bold')
fig.text(0.06, 0.5, 'TEC [TECU]', va='center', rotation='vertical', fontsize=12, weight='bold')

#plt.show()
out = OUTDIR_FIG/"Low_solar_activity_QUIET_SEASONS_diurnal_mean_median_IQR_MS.png"
fig.savefig(out, dpi=300, bbox_inches='tight'); plt.close(fig)
print("Saved:", out)

In [None]:
# ==========================================
# CELL B — STATS CLÉS COURBES DIURNES PAR SAISON
# (à partir de S, season_of_month et agg_diurnal)
# ==========================================
import numpy as np
import pandas as pd

def _slot_to_lt_h(slot: int) -> float:
    return 0.5 * float(slot)

def _slot_to_lt_str(slot: int) -> str:
    h_float = 0.5 * float(slot)
    h = int(h_float)
    m = int(round((h_float - h) * 60))
    return f"{h:02d}:{m:02d}"

def season_of_month(m):
    return "DJF" if m in (12,1,2) else ("MAM" if m in (3,4,5)
           else ("JJA" if m in (6,7,8) else "SON"))

S = S.copy()
S["month"] = pd.to_datetime(S["date_utc"]).dt.month
S["season"] = S["month"].map(season_of_month)

rows = []
order = ["DJF","MAM","JJA","SON"]

for sname in order:
    Ss = S[S["season"] == sname].copy()
    if Ss.empty:
        continue

    D = agg_diurnal(Ss).copy()
    D = D.dropna(subset=["median"])
    if D.empty:
        continue

    # pic diurne saisonnier
    idx_max = D["median"].idxmax()
    peak_val = float(D.loc[idx_max, "median"])
    peak_slot = int(D.loc[idx_max, "slot"])
    peak_lt   = _slot_to_lt_str(peak_slot)

    # minimum saisonnier
    idx_min = D["median"].idxmin()
    min_val = float(D.loc[idx_min, "median"])
    min_slot = int(D.loc[idx_min, "slot"])
    min_lt   = _slot_to_lt_str(min_slot)

    day_night_range = peak_val - min_val

    D["lt_hour"] = D["slot"].apply(_slot_to_lt_h)
    pre_dawn = D[(D["lt_hour"] >= 0.0) & (D["lt_hour"] < 4.0)]
    afternoon = D[(D["lt_hour"] >= 12.0) & (D["lt_hour"] < 18.0)]

    pre_dawn_med = float(pre_dawn["median"].mean()) if not pre_dawn.empty else np.nan
    aft_med      = float(afternoon["median"].mean()) if not afternoon.empty else np.nan

    rows.append({
        "season": sname,
        "peak_median_TECU": peak_val,
        "lt_of_peak": peak_lt,
        "min_median_TECU": min_val,
        "lt_of_min": min_lt,
        "day_night_range_TECU": day_night_range,
        "pre_dawn_median_00_04_TECU": pre_dawn_med,
        "afternoon_median_12_18_TECU": aft_med,
    })

seasonal_diurnal_stats = pd.DataFrame(rows).set_index("season").loc[order].reset_index()

print("=== Seasonal diurnal median VTEC stats (quiet QSL–GIM20) ===")
print(seasonal_diurnal_stats.to_string(index=False,
      float_format=lambda x: f"{x:6.2f}"))

print("\n--- Key diagnostics for text ---")
for _, r in seasonal_diurnal_stats.iterrows():
    print(f"{r['season']}: peak median ≈ {r['peak_median_TECU']:.2f} TECU at {r['lt_of_peak']} LT; "
          f"night min ≈ {r['min_median_TECU']:.2f} TECU at {r['lt_of_min']} LT; "
          f"day–night range ≈ {r['day_night_range_TECU']:.2f} TECU; "
          f"pre-dawn (00–04 LT) median ≈ {r['pre_dawn_median_00_04_TECU']:.2f} TECU; "
          f"afternoon (12–18 LT) median ≈ {r['afternoon_median_12_18_TECU']:.2f} TECU.")


In [None]:
# ==========================================
# CELL 4 — HEURE DU MAX QUOTIDIEN (HISTOGRAMME, LT)
# ==========================================
# pour chaque jour quiet : slot du max (si plusieurs, dernier)
idx = S.groupby("date_utc")[VTEC_COL].idxmax()
daily_max = S.loc[idx, ["date_utc","slot",VTEC_COL]].sort_values("date_utc")
# convertir slot en heure locale (LT) lisible
hours = daily_max["slot"]/2.0  # 0..23.5
fig, ax = plt.subplots(figsize=(10,5))
ax.hist(hours, bins=np.arange(-0.25,24.75,0.5), edgecolor='k', alpha=0.7)
def _set_xticks_30min(ax):
    ax.set_xlim(-1, 24)
    ax.set_xticks(np.arange(0, 24, 4))
    ax.set_xticklabels([f"{h:02d}" for h in range(0, 24, 4)], rotation=0)
#label = sname
ax.set_ylabel("Count of days", weight='bold')
    
_set_xticks_30min(ax)
# style ticks
for t in ax.yaxis.get_ticklabels():
    t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
for t in ax.xaxis.get_ticklabels():
    t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    
ax.grid(alpha=0.3)
#ax.set_xticks(np.arange(0,24,2)); ax.set_xticklabels([f"{h:02d}:00" for h in range(0,24,2)])
#ax.set_xlim(-0.5, 23.5); ax.grid(axis='y', alpha=0.3)
ax.set_title("Distribution of Local-Time of the Daily VTEC Maximum on Quiet Days", weight='bold')
ax.set_xlabel("Local Time (LT)", weight='bold');
#plt.show()
out = OUTDIR_FIG/"Low_solar_activity_QUIET_hist_hour_of_daily_max_LT_MS.png"
fig.savefig(out, dpi=300); plt.close(fig)
print("Saved:", out)



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import pytz

# ---------------- CONFIG ----------------
LOCAL_TZ = "Africa/Casablanca"
VTEC_MAX_COL = "VTEC_max"  # amplitude du max sur la grille 30 min
TIME_COL_UTC = "max_ts_utc"           # timestamp du max en UTC (tz-aware)
OUT_FIG = Path(r"C:\Users\mkmoh\Dropbox\1-DATA\TEC_DATA\New_Data\figures\figs_quiet\TEC09_Low_solar_activity_QUIET_hist_hour_of_daily_max_LT_MS.png")
# ----------------------------------------


def hist_fwhm(centers, counts):
    """
    Approximation simple de la FWHM (full width at half-maximum) pour un histogramme:
    distance entre le premier et le dernier centre dont la hauteur >= half-max.
    """
    counts = np.asarray(counts, float)
    centers = np.asarray(centers, float)
    if counts.size == 0:
        return np.nan
    peak = counts.max()
    if peak <= 0:
        return np.nan
    half = peak / 2.0
    mask = counts >= half
    if not np.any(mask):
        return np.nan
    xs = centers[mask]
    if xs.size == 1:
        return 0.0
    return xs[-1] - xs[0]


# ====== PRÉPARATION DU DATAFRAME ======
df_daily = df_kept.copy()

# s'assurer que max_ts_utc est bien en datetime UTC
df_daily[TIME_COL_UTC] = pd.to_datetime(df_daily[TIME_COL_UTC], utc=True, errors="coerce")
tz = pytz.timezone(LOCAL_TZ)

df_daily["max_ts_lt"] = df_daily[TIME_COL_UTC].dt.tz_convert(tz)
df_daily["max_hour_lt"] = df_daily["max_ts_lt"].dt.hour + df_daily["max_ts_lt"].dt.minute/60.0
df_daily["month"] = df_daily["max_ts_lt"].dt.month
df_daily["doy"] = df_daily["max_ts_lt"].dt.dayofyear  # si tu veux DOY plus tard

# filtrage NaN de base
df_daily = df_daily[np.isfinite(df_daily["max_hour_lt"]) & np.isfinite(df_daily[VTEC_MAX_COL])].copy()

if df_daily.empty:
    print("df_daily est vide après filtrage.")
else:
    # ====== FIGURE 3 PANNEAUX ======
    fig, (ax1, ax2, ax3) = plt.subplots(
        3, 1, figsize=(10, 9),
        gridspec_kw={"height_ratios": [2.0, 1.3, 1.3]}
    )

    # ---------- 1) HISTOGRAMME HEURE LOCALE DU MAX ----------
    h = df_daily["max_hour_lt"].values

    # bins 30 min entre 0 et 24h
    bins = np.arange(0.0, 24.0 + 0.5, 0.5)
    counts, bin_edges, patches = ax1.hist(
        h, bins=bins, edgecolor="black", alpha=0.7,
        label="Daily max count"
    )
    centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])

    # courbe reliant les sommets
    ax1.plot(centers, counts, "-o", linewidth=1.5, markersize=4,
             label="Bin-top curve")

    # stats: mode, FWHM, amplitude du pic
    if counts.sum() > 0:
        idx_peak = int(np.argmax(counts))
        mode_hour = centers[idx_peak]
        mode_hour = centers[idx_peak]
        median_hour = df_daily["max_hour_lt"].median()
        peak_amp = int(counts[idx_peak])
        fwhm = hist_fwhm(centers, counts)

        print(f"Mode ≈ {mode_hour:.2f} LT, median ≈ {median_hour:.2f} LT, FWHM ≈ {fwhm:.2f} h, peak = {peak_amp} days.")

        stats_label = (
            f"Mode ≈ {mode_hour:4.1f} h LT, "
            f"FWHM ≈ {fwhm:3.1f} h, "
            f"peak = {peak_amp} days"
        )
        ax1.legend(title=stats_label, loc="upper left", fontsize=10)
    else:
        ax1.legend(loc="upper left", fontsize=9)

    ax1.set_xlim(0, 24)
    ax1.set_xticks(np.arange(0, 25, 2))
    ax1.set_ylabel("Number of days", weight="bold")
    ax1.set_xlabel("Local Time (LT)", weight="bold")
    ax1.set_title("Local time of daily VTEC maximum (QSL–GIM20)", weight="bold")
    ax1.grid(True, alpha=0.3)
    for t in ax1.xaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color("black"); t.set_weight("bold")
    for t in ax1.yaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color("black"); t.set_weight("bold")

    # ---------- 2) SCATTER AMPLITUDE DU MAX VS MOIS ----------
    x_month = df_daily["month"].values
    y_vtec = df_daily[VTEC_MAX_COL].values

    ax2.scatter(x_month, y_vtec, color='k', alpha=0.4, s=15,
                label="Daily maxima")

    # médiane mensuelle pour tendance saisonnière
    month_median_vtec = (df_daily
                         .groupby("month")[VTEC_MAX_COL]
                         .median()
                         .reindex(np.arange(1, 13)))

    ax2.plot(np.arange(1, 13), month_median_vtec.values, "-o",
             linewidth=2, label="Monthly median max")

    ax2.set_xlim(0.5, 12.5)
    ax2.set_xticks(np.arange(1, 13))
    ax2.set_xticklabels(["Jan","Feb","Mar","Apr","May","Jun",
                         "Jul","Aug","Sep","Oct","Nov","Dec"])
    ax2.set_ylabel("Daily max VTEC [TECU]", weight="bold")
    ax2.set_xlabel("Month of year", weight="bold")
    ax2.grid(True, alpha=0.3)
    ax2.legend(loc="upper center", fontsize=9)

    for t in ax2.xaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color("black"); t.set_weight("bold")
    for t in ax2.yaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color("black"); t.set_weight("bold")

    # ---------- 3) SCATTER HEURE DU MAX VS MOIS ----------
    y_hour = df_daily["max_hour_lt"].values

    ax3.scatter(x_month, y_hour, color='k', alpha=0.4, s=15,
                label="Daily peak time")

    # médiane mensuelle de l'heure du max
    month_median_hour = (df_daily
                         .groupby("month")["max_hour_lt"]
                         .median()
                         .reindex(np.arange(1, 13)))

    ax3.plot(np.arange(1, 13), month_median_hour.values, "-o",
             linewidth=2, label="Monthly median time")

    ax3.set_xlim(0.5, 12.5)
    ax3.set_xticks(np.arange(1, 13))
    ax3.set_xticklabels(["Jan","Feb","Mar","Apr","May","Jun",
                         "Jul","Aug","Sep","Oct","Nov","Dec"])
    ax3.set_xlabel("Month of year", weight="bold")
    ax3.set_ylabel("Local time \n of daily max [LT]", weight="bold")
    ax3.set_ylim(0, 24)
    ax3.grid(True, alpha=0.3)
    ax3.legend(loc="lower center", fontsize=9)

    for t in ax3.xaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color("black"); t.set_weight("bold")
    for t in ax3.yaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color("black"); t.set_weight("bold")

    fig.savefig(OUT_FIG, dpi=300)
    plt.close(fig)
    print("Saved:", OUT_FIG)


In [None]:
# seasonal peak hour variability
df_season = df_daily.copy()
df_season["month"] = df_season["max_ts_lt"].dt.month

def month_to_season(m):
    if m in (12, 1, 2):
        return "DJF"
    elif m in (3, 4, 5):
        return "MAM"
    elif m in (6, 7, 8):
        return "JJA"
    else:
        return "SON"

df_season["season"] = df_season["month"].apply(month_to_season)

# Median and IQR of peak time per season
def iqr(x):
    x = np.asarray(x, float)
    return np.nanpercentile(x, 75) - np.nanpercentile(x, 25)

season_stats = (df_season
                .groupby("season")["max_hour_lt"]
                .agg(["count", "median", iqr])
                .reset_index())
print(season_stats)


In [None]:
# Amplitude of daily peak variability
import numpy as np
import pandas as pd

# --------- CONFIG ---------
VTEC_MAX_COL = "VTEC_max_from_30min"   # amplitude quotidienne du max
TIME_COL_UTC = "max_ts_utc"           # timestamp du max en UTC
# ---------------------------

df_daily = df_kept.copy()

# Assurer types corrects
df_daily[VTEC_MAX_COL] = pd.to_numeric(df_daily[VTEC_MAX_COL], errors="coerce")
df_daily[TIME_COL_UTC] = pd.to_datetime(df_daily[TIME_COL_UTC], utc=True, errors="coerce")

# Derive month if not present
if "month" not in df_daily.columns:
    df_daily["month"] = df_daily[TIME_COL_UTC].dt.month

# Filtrer les NaN
df_daily = df_daily[np.isfinite(df_daily[VTEC_MAX_COL])].copy()
if df_daily.empty:
    print("df_daily is empty after filtering.")
else:
    # ----- 1) Range global des maxima quotidiens -----
    vtec_min = df_daily[VTEC_MAX_COL].min()
    vtec_max = df_daily[VTEC_MAX_COL].max()
    vtec_p05 = np.nanpercentile(df_daily[VTEC_MAX_COL], 5)
    vtec_p95 = np.nanpercentile(df_daily[VTEC_MAX_COL], 95)

    print("Global daily peak VTEC range:")
    print(f"  min        = {vtec_min:.2f} TECU")
    print(f"  max        = {vtec_max:.2f} TECU")
    print(f"  5th perc   = {vtec_p05:.2f} TECU  (robust lower bound)")
    print(f"  95th perc  = {vtec_p95:.2f} TECU  (robust upper bound)")
    print("")

    # ----- 2) Médianes mensuelles des maxima -----
    month_median = (df_daily
                    .groupby("month")[VTEC_MAX_COL]
                    .median()
                    .reindex(np.arange(1,13)))
    month_count = (df_daily
                   .groupby("month")[VTEC_MAX_COL]
                   .count()
                   .reindex(np.arange(1,13)))

    month_stats = pd.DataFrame({
        "month": np.arange(1,13),
        "n_days": month_count.values,
        "median_VTEC_max": month_median.values
    })

    month_name = {
        1:"Jan", 2:"Feb", 3:"Mar", 4:"Apr", 5:"May", 6:"Jun",
        7:"Jul", 8:"Aug", 9:"Sep", 10:"Oct", 11:"Nov", 12:"Dec"
    }
    month_stats["name"] = month_stats["month"].map(month_name)

    print("Monthly median of daily VTEC maxima:")
    print(month_stats.to_string(index=False, float_format=lambda x: f"{x:6.2f}"))
    print("")

    # ----- 3) Mois / saisons de plus grands et plus faibles maxima -----

    # Mois de plus grands maxima (sur toute l'année)
    idx_max = month_median.idxmax()
    vtec_equinox_summer = month_median.max()
    print(f"Largest monthly median peak VTEC:")
    print(f"  month = {idx_max} ({month_name[idx_max]}), median = {vtec_equinox_summer:.2f} TECU")
    print("")

    # Hiver : DJF (12,1,2) -> min de la médiane parmi ces mois
    winter_months = [12, 1, 2]
    winter_medians = month_median.loc[winter_months].dropna()
    if not winter_medians.empty:
        w_idx_min = winter_medians.idxmin()
        vtec_winter = winter_medians.min()
        print("Winter (DJF) monthly median peaks:")
        for m in winter_months:
            if not np.isnan(month_median.loc[m]):
                print(f"  {m:2d} ({month_name[m]}): median = {month_median.loc[m]:.2f} TECU")
        print(f"Lowest winter median peak VTEC:")
        print(f"  month = {w_idx_min} ({month_name[w_idx_min]}), median = {vtec_winter:.2f} TECU")
    else:
        print("No data for winter months (DJF) in monthly medians.")

    # ----- 4) Résumé prêt pour le paragraphe -----
    print("\n--- Summary for LaTeX paragraph ---")
    print(f"Use VTEC_min ≈ {vtec_p05:.1f} TECU and VTEC_max ≈ {vtec_p95:.1f} TECU as a robust daily peak range.")
    print(f"Use VTEC_equinox/summer ≈ {vtec_equinox_summer:.1f} TECU in {month_name[idx_max]} (month of largest median peak).")
    if not winter_medians.empty:
        print(f"Use VTEC_winter ≈ {vtec_winter:.1f} TECU in {month_name[w_idx_min]} for the lowest winter median.")


In [None]:
# ==========================================
# CELL 6b — BOXplots par heure locale (bins de 2 h) — jours quiet QSL–GIM20
# Style harmonisé avec les boxplots mensuels/saisonniers
# ==========================================
import numpy as np
import matplotlib.pyplot as plt

# On part de S (30 min, déjà filtré QSL–GIM20) et de VTEC_COL
S_hour = S.copy()

# Bin 2 h basé sur les slots 30 min (0..47) → 0,2,4,...,22
S_hour["hour2"] = (S_hour["slot"] // 4) * 2

hours2 = np.arange(0, 24, 2)  # 0,2,...,22

# Données pour chaque bin 2 h
data_h = [S_hour.loc[S_hour["hour2"] == h, VTEC_COL].dropna().to_numpy()
          for h in hours2]
has_data = [len(v) > 0 for v in data_h]

# Moyenne par bin 2 h (pour la courbe noire)
mh = (S_hour.groupby("hour2", as_index=False)[VTEC_COL]
          .mean()
          .set_index("hour2")
          .reindex(hours2))

fig, ax = plt.subplots(1, 1, figsize=(10, 4))

# Positions des boîtes : 1..N
pos  = [i + 1 for i, ok in enumerate(has_data) if ok]
dat  = [v for v, ok in zip(data_h, has_data) if ok]
labs = [h for h, ok in zip(hours2, has_data) if ok]

if dat:
    ax.boxplot(
        dat,
        positions=pos,
        patch_artist=True,
        showmeans=True,
        whis=(5, 95),
        meanprops=dict(marker='D', markersize=4,
                       markerfacecolor='black', markeredgecolor='white'),
        boxprops=dict(facecolor='#c7e9ff', edgecolor='#1f4d7a', linewidth=1.2),
        medianprops=dict(color='crimson', linewidth=1.8),
        whiskerprops=dict(color='#1f4d7a', linewidth=1.0, linestyle='--'),
        capprops=dict(color='#1f4d7a', linewidth=1.0),
        flierprops=dict(marker='x', markersize=3,
                        markeredgecolor='gray', markerfacecolor='none', alpha=0.5),
    )

# Courbe des moyennes (alignée sur les centres des boîtes)
ax.plot(
    np.arange(1, len(hours2) + 1),
    mh[VTEC_COL].to_numpy(),
    color="black", marker="o", linewidth=2, markersize=5,
    label="Mean"
)

# Ticks et style
ax.set_xlim(0.5, len(hours2) + 0.5)
ax.set_xticks(np.arange(1, len(hours2) + 1))
ax.set_xticklabels([f"{h:02d}" for h in hours2])

for tick in ax.yaxis.get_ticklabels():
    tick.set_fontsize(12); tick.set_color('black'); tick.set_weight('bold')
for tick in ax.xaxis.get_ticklabels():
    tick.set_fontsize(12); tick.set_color('black'); tick.set_weight('bold')

ax.grid(True, alpha=0.3)
ax.legend(frameon=False, loc="upper left")

# Titres et labels comme pour les figures mensuelles/saisonnières
fig.text(0.5, 0.92,
         "Quiet days VTEC distribution by 2 h local-time bins",
         ha="center", fontsize=12, fontweight="bold")
fig.text(0.5, 0.01,
         "Local time LT", ha="center",
         fontsize=12, fontweight="bold")
fig.text(0.04, 0.5,
         "TEC [TECU]", va="center", rotation="vertical",
         fontsize=12, fontweight="bold")

out = OUTDIR_FIG / "TEC08_Low_solar_activity_QUIET_boxplot_by_2h_MS.png"
fig.savefig(out, dpi=300)
plt.close(fig)
print("Saved:", out)


In [None]:
import numpy as np
import pandas as pd
import pytz

# -------- CONFIG --------
TIME_COL = "time"           # 30-min UTC timestamp column
VTEC_COL = "VTEC_median"    # 30-min VTEC
QUIET_FLAG = False # optional
LOCAL_TZ = "Africa/Casablanca"
# ------------------------

# Start from your 30-min climatology dataframe
df = S.copy()   # <-- replace S_30 by your actual df name (e.g., S or df_kept_30min)

# Keep only QSL--GIM20 days if the flag exists
if QUIET_FLAG in df.columns:
    df = df[df[QUIET_FLAG] == True].copy()

# Parse time and convert to local time
df[TIME_COL] = pd.to_datetime(df[TIME_COL], utc=True, errors="coerce")
tz = pytz.timezone(LOCAL_TZ)
df["ts_lt"] = df[TIME_COL].dt.tz_convert(tz)

# Local hour (float)
df["lt_hour"] = df["ts_lt"].dt.hour + df["ts_lt"].dt.minute / 60.0

# Define 2h-bin start: 0,2,4,...,22
df["lt_bin_start"] = (2 * np.floor(df["lt_hour"] / 2)).astype(int)
df.loc[df["lt_bin_start"] == 24, "lt_bin_start"] = 22  # safety, should not happen

# Drop NaNs
df = df[np.isfinite(df[VTEC_COL]) & np.isfinite(df["lt_bin_start"])].copy()

# Aggregate by 2h bin
def iqr(x):
    x = np.asarray(x, float)
    return np.nanpercentile(x, 75) - np.nanpercentile(x, 25)

stats = (df
         .groupby("lt_bin_start")[VTEC_COL]
         .agg(count="count",
              median="median",
              iqr=iqr)
         .reset_index()
         .sort_values("lt_bin_start"))

# Add string labels like "00-02", "02-04", ...
def bin_label(h):
    return f"{h:02d}–{(h+2)%24:02d}"

stats["LT_bin"] = stats["lt_bin_start"].apply(bin_label)

print("VTEC distribution by 2-hour local-time bins (QSL--GIM20):")
print(stats[["LT_bin","count","median","iqr"]]
      .to_string(index=False, float_format=lambda x: f"{x:6.2f}"))

# Extract some key bins for the text:
def get_bin_value(h_start):
    row = stats.loc[stats["lt_bin_start"] == h_start]
    if row.empty:
        return np.nan, np.nan
    return float(row["median"].iloc[0]), float(row["iqr"].iloc[0])

# Example bins:
med_06_08, iqr_06_08 = get_bin_value(6)   # 06–08 LT
med_14_16, iqr_14_16 = get_bin_value(14)  # 14–16 LT
med_00_02, iqr_00_02 = get_bin_value(0)   # 00–02 LT (night)
med_02_04, iqr_02_04 = get_bin_value(2)   # 02–04 LT (night)

print("\n--- Key values for text ---")
print(f"Median VTEC 06–08 LT  ≈ {med_06_08:.2f} TECU (IQR ≈ {iqr_06_08:.2f} TECU)")
print(f"Median VTEC 14–16 LT  ≈ {med_14_16:.2f} TECU (IQR ≈ {iqr_14_16:.2f} TECU)")
print(f"Median VTEC 00–02 LT  ≈ {med_00_02:.2f} TECU (IQR ≈ {iqr_00_02:.2f} TECU)")
print(f"Median VTEC 02–04 LT  ≈ {med_02_04:.2f} TECU (IQR ≈ {iqr_02_04:.2f} TECU)")


In [None]:
# ==========================================
# FIGURE COMBINÉE — ECDF (3 heures) + EXCEEDANCE (20/30/40 TECU)
# ==========================================
import numpy as np
import matplotlib.pyplot as plt

def ecdf(x):
    x = np.sort(x[~np.isnan(x)])
    if x.size == 0:
        return np.array([]), np.array([])
    y = np.arange(1, x.size+1)/x.size
    return x, y

# ---------- CONFIG ----------
labels_ecdf = {"09:00":18, "12:00":24, "15:00":30}  # slots pour ECDF
thresholds = [20, 30, 40]                           # seuils exceedance
# -----------------------------

fig, (ax1, ax2) = plt.subplots(
    2, 1, figsize=(8, 8),
    gridspec_kw={"height_ratios": [1.1, 1.0]}
)

# ===================== PANNEL HAUT: ECDF ======================
quantiles = {}

for lab, slot in labels_ecdf.items():
    vals = S.loc[S["slot"] == slot, VTEC_COL].to_numpy()
    x, y = ecdf(vals)
    if x.size:
        ax1.plot(x, y, lw=3, label=lab)
        # quantiles (optionnel, juste au cas où tu veux les relire)
        p50 = np.nanpercentile(vals, 50)
        p75 = np.nanpercentile(vals, 75)
        p90 = np.nanpercentile(vals, 90)
        quantiles[lab] = (p50, p75, p90)

# style axe ECDF
ax1.set_xlim(0, max(50, np.nanpercentile(S[VTEC_COL], 99)))
ax1.set_ylim(0, 1)
ax1.grid(alpha=0.3)
ax1.legend(frameon=True, title="Local time")

for t in ax1.yaxis.get_ticklabels():
    t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
for t in ax1.xaxis.get_ticklabels():
    t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')

ax1.set_ylabel("CDF VTEC", weight='bold')
ax1.set_title("(a) Empirical CDF of VTEC at 09:00, 12:00, 15:00 LT",
              loc="left", weight='bold')

# ================= PANNEL BAS: EXCEEDANCE =====================
g = S.groupby("slot")[VTEC_COL]
den = g.count().rename("N")

def slot_ticks_2h_local(ax):
    """
    Place des ticks toutes 2h en LT sur l'axe des x, en supposant:
      slot = 0..47, 1 slot = 30 min, LT = slot * 0.5 h
    """
    slots_2h = np.arange(0, 48, 4)  # 4 slots = 2 h
    labels = [f"{int(0.5*s):02d}:00" for s in slots_2h]
    ax.set_xlim(0, 47)
    ax.set_xticks(slots_2h)
    ax.set_xticklabels(labels)

for thr in thresholds:
    num = S.assign(hit=S[VTEC_COL] >= thr).groupby("slot")["hit"].sum()
    p = (num / den).reindex(np.arange(48)).values
    ax2.plot(np.arange(48), p, '-o', ms=3, lw=2, label=f"≥ {thr} TECU")

slot_ticks_2h_local(ax2)
ax2.set_ylim(0, 0.4)
ax2.grid(alpha=0.3)

for t in ax2.yaxis.get_ticklabels():
    t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
for t in ax2.xaxis.get_ticklabels():
    t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')

ax2.set_xlabel("Local Time (LT)", weight='bold')
ax2.set_ylabel("Probability", weight='bold')
ax2.legend(frameon=True)
ax2.set_title("(b) Probability of VTEC exceeding fixed thresholds",
              loc="left", weight='bold')

fig.tight_layout(rect=[0.03, 0.03, 0.97, 0.95])
fig.suptitle("VTEC distributions and exceedance probabilities at Oukaimeden (QSL–GIM20)",
             y=0.98, fontsize=12, weight='bold')

out = OUTDIR_FIG / "Low_solar_activity_QUIET_ECDF_exceedance_MS.png"
fig.savefig(out, dpi=300)
plt.close(fig)
print("Saved combined figure:", out)


In [None]:
# ==========================================
# CELL 9 — ANOMALIES PAR ANNÉE (médiane diurne – climatologie quiet)
# ==========================================
CLIM = agg_diurnal(S)[["slot","median"]].rename(columns={"median":"clim_median"})
fig, axes = plt.subplots(6,2, figsize=(12,8), squeeze=False)
for i,y in enumerate(sorted({pd.Timestamp(d).year for d in df_kept['date_utc']})):
    ax = axes[i//2, i%2]
    Sy = S[pd.to_datetime(S["date_utc"]).dt.year == y]
    Dy = agg_diurnal(Sy)[["slot","median"]].merge(CLIM, on="slot", how="left")
    Dy["anom"] = Dy["median"] - Dy["clim_median"]
    ax.axhline(0, color='k', lw=3, ls='--')
    ax.plot(Dy["slot"], Dy["anom"], '-', lw=2)
    
    # style ticks
    for t in ax.yaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    for t in ax.xaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
        
    slot_ticks_2h(ax); ax.grid(alpha=0.3)
    
    ax.set_title(str(y), fontweight='bold', y=0.75)
    ax.set_ylim(-5,50); ax.set_yticks(np.arange(0,49,15))

# Supprimer la case en bas-droite
fig.delaxes(axes[-1, 1])

# Afficher les heures (ticks x) en bas-gauche ET aussi à droite
rows, cols = axes.shape

# 1) Tout cacher par défaut
for r in range(rows):
    for c in range(cols):
        axes[r, c].tick_params(axis='x', labelbottom=False)

# 2) Bas-gauche visible
axes[rows-1, 0].tick_params(axis='x', labelbottom=True)

# 3) Trouver le panneau le plus bas qui existe encore dans la colonne de droite
right_row = None
for r in range(rows-1, -1, -1):
    if axes[r, 1] in fig.axes:   # la case existe (pas supprimée)
        right_row = r
        break

# 4) Activer les heures sur ce panneau de droite
if right_row is not None:
    axes[right_row, 1].tick_params(axis='x', labelbottom=True)

# Espacement vertical nul et textes
#fig.subplots_adjust(hspace=0)
#fig.text(0.5, 0.05, 'Local Time (LT)', ha='center', fontsize=12, weight='bold')


plt.setp([a.get_xticklabels() for a in axes[0, :]], visible=False)
fig.subplots_adjust(hspace=0)
fig.text(0.5, 0.9, f'VTEC Anomaly (Diurnal Median) vs Quiet-Time Climatology', ha='center', fontsize=12, weight='bold')
fig.text(0.5, 0.05, 'Local Time (LT)', ha='center', fontsize=12, weight='bold')
fig.text(0.07, 0.5, 'VTEC Anomaly', va='center', rotation='vertical', fontsize=12, weight='bold')
#fig.tight_layout(rect=[0.03,0.03,0.98,0.96])
out = OUTDIR_FIG/"Low_solar_activity_QUIET_yearly_anomaly_vs_climatology_MS.png"
#plt.show()
fig.savefig(out, dpi=300) 
plt.close(fig)
print("Saved:", out)


In [None]:
import numpy as np
import pandas as pd

# --- Climatologie diurne multi-annuelle (déjà utilisée pour la figure) ---
CLIM = agg_diurnal(S)[["slot", "median"]].rename(columns={"median": "clim_median"})

# --- Liste des années présentes dans df_kept ---
years = sorted({pd.Timestamp(d).year for d in df_kept["date_utc"]})

def slot_to_lt(slot):
    """Convertit un slot (0..47) en heure locale (HH:MM) en supposant 30 min par slot."""
    h_float = 0.5 * slot
    h = int(h_float)
    m = int(round((h_float - h) * 60))
    return f"{h:02d}:{m:02d}"

rows = []

for y in years:
    # Sous-ensemble de S pour l'année y
    Sy = S[pd.to_datetime(S["date_utc"]).dt.year == y].copy()
    if Sy.empty:
        continue

    # Diurne médian pour l'année y
    Dy = agg_diurnal(Sy)[["slot", "median"]].merge(CLIM, on="slot", how="left")
    Dy["anom"] = Dy["median"] - Dy["clim_median"]

    # 1) Anomalie max et heure correspondante
    max_anom = float(Dy["anom"].max())
    idx_max = Dy["anom"].idxmax()
    slot_max = int(Dy.loc[idx_max, "slot"])
    lt_max = slot_to_lt(slot_max)

    # 2) Anomalie min et heure correspondante
    min_anom = float(Dy["anom"].min())
    idx_min = Dy["anom"].idxmin()
    slot_min = int(Dy.loc[idx_min, "slot"])
    lt_min = slot_to_lt(slot_min)

    # 3) Anomalie moyenne sur toute la journée (48 slots)
    mean_anom_all = float(Dy["anom"].mean())

    # 4) Anomalie moyenne sur l'après-midi (par ex. 12–18 LT → slots 24..36 inclus)
    aft = Dy[(Dy["slot"] >= 24) & (Dy["slot"] <= 36)]
    mean_anom_aft = float(aft["anom"].mean()) if not aft.empty else np.nan

    rows.append({
        "year": y,
        "max_anom_TECU": max_anom,
        "lt_of_max": lt_max,
        "min_anom_TECU": min_anom,
        "lt_of_min": lt_min,
        "mean_anom_all_TECU": mean_anom_all,
        "mean_anom_12_18LT_TECU": mean_anom_aft
    })

anom_stats = pd.DataFrame(rows).sort_values("year")

# Impression propre pour le papier
print("Yearly diurnal VTEC anomalies relative to the quiet-time climatology:")
print(anom_stats.to_string(index=False,
                           float_format=lambda x: f"{x:6.2f}"))

# Quelques résumés utiles pour le texte:
print("\n--- Summary for LaTeX paragraph ---")
y_max_global = anom_stats.loc[anom_stats["max_anom_TECU"].idxmax(), "year"]
max_global = anom_stats["max_anom_TECU"].max()
print(f"Largest positive anomaly: {max_global:.2f} TECU in {y_max_global} "
      f"(at local time {anom_stats.loc[anom_stats['max_anom_TECU'].idxmax(), 'lt_of_max']}).")

y_min_global = anom_stats.loc[anom_stats["min_anom_TECU"].idxmin(), "year"]
min_global = anom_stats["min_anom_TECU"].min()
print(f"Largest negative anomaly: {min_global:.2f} TECU in {y_min_global} "
      f"(at local time {anom_stats.loc[anom_stats['min_anom_TECU'].idxmin(), 'lt_of_min']}).")

print("\nAfternoon (12–18 LT) mean anomalies by year:")
for _, r in anom_stats.iterrows():
    print(f"  {int(r['year'])}: mean anomaly 12–18 LT ≈ {r['mean_anom_12_18LT_TECU']:.2f} TECU")


In [None]:
# ==========================================
# CELL 10 — Monthly & seasonal box + mean
# ==========================================
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sns, calendar
from pathlib import Path
sns.set_theme(style="whitegrid", context="talk")

# --------- PARAMS ---------
TIME_COL  = "date_utc"
VTEC_COL  = "VTEC_median"
LABEL_COL = "geomag_label_gfz_QDNQ"   # 'Q', 'D', 'NQ'
START     = pd.Timestamp("2015-10-01", tz="UTC")
END       = pd.Timestamp("2025-09-26", tz="UTC")
# --------------------------

# --- s'assurer que la colonne temps est bien en datetime ---
df = df.copy()
df[TIME_COL] = pd.to_datetime(df[TIME_COL], utc=True, errors="coerce")
df = df[(df[TIME_COL] >= START) & (df[TIME_COL] <= END)].copy()

# si tu veux être sûr d’être bien sur QSL–GIM20, tu peux remettre ces filtres:
# df = df[df[LABEL_COL] == "Q"].copy()
# df = df[pd.to_numeric(df["gim_offset_tecu"], errors="coerce").abs() <= 20].copy()

# maintenant .dt fonctionne
df["month"] = df[TIME_COL].dt.month

# -------- MENSUEL (toutes années confondues) --------
mlabels = [calendar.month_abbr[m] for m in range(1,13)]
all_months = np.arange(1,13)

data_bp = [df.loc[df["month"]==m, VTEC_COL].dropna().to_numpy() for m in all_months]
has_data = [len(v)>0 for v in data_bp]

mm = (df.groupby("month", as_index=False)[VTEC_COL]
        .mean()
        .set_index("month")
        .reindex(all_months))

fig, ax = plt.subplots(1,1, figsize=(10,4))
pos = [m for m,ok in zip(all_months,has_data) if ok]
dat = [v for v,ok in zip(data_bp,has_data) if ok]

if dat:
    ax.boxplot(
        dat, positions=pos, patch_artist=True, showmeans=True, whis=(5,95),
        meanprops=dict(marker='D', markersize=4, markerfacecolor='black', markeredgecolor='white'),
        boxprops=dict(facecolor='#c7e9ff', edgecolor='#1f4d7a', linewidth=1.2),
        medianprops=dict(color='crimson', linewidth=1.8),
        whiskerprops=dict(color='#1f4d7a', linewidth=1.0, linestyle='--'),
        capprops=dict(color='#1f4d7a', linewidth=1.0),
        flierprops=dict(marker='x', markersize=3, markeredgecolor='gray', markerfacecolor='none', alpha=0.5),
    )

ax.plot(all_months, mm[VTEC_COL].to_numpy(), color="black", marker="o", linewidth=2, markersize=5, label="Mean")
for tick in ax.yaxis.get_ticklabels():
    tick.set_fontsize(12); tick.set_color('black'); tick.set_weight('bold')
for tick in ax.xaxis.get_ticklabels():
    tick.set_fontsize(12); tick.set_color('black'); tick.set_weight('bold')

ax.set_xlim(0.5, 12.5)
ax.set_xticks(all_months)
ax.set_xticklabels(mlabels)
ax.grid(True, alpha=0.3)
ax.legend(frameon=False, loc="upper left")

fig.text(0.5, 0.92, "Quiet days monthly median and mean from Oct 2015 to Sep 2025",
         ha="center", fontsize=12, fontweight="bold")
fig.text(0.5, 0.00, "Month", ha="center", fontsize=14, fontweight="bold")
fig.text(0.04, 0.5, "TEC [TECU]", va="center", rotation="vertical", fontsize=14, fontweight="bold")

out = OUTDIR_FIG/"Low_solar_activity_QUIET_monthly_median_boxplot_means_curve_by_LT_MS.png"
fig.savefig(out, dpi=300)
plt.close(fig)
print("Saved:", out)

# -------- SAISONNIER (toutes années confondues) --------
def to_season(m):
    if m in (12,1,2):  return "DJF"
    if m in (3,4,5):   return "MAM"
    if m in (6,7,8):   return "JJA"
    return "SON"

df["season"] = df["month"].map(to_season)

order = ["DJF","MAM","JJA","SON"]
data_s = [df.loc[df["season"]==s, VTEC_COL].dropna().to_numpy() for s in order]
has_s  = [len(v)>0 for v in data_s]

ms = (df.groupby("season", as_index=False)[VTEC_COL]
        .mean()
        .set_index("season")
        .reindex(order))

fig, ax = plt.subplots(1,1, figsize=(8,4))
pos = [i+1 for i,ok in enumerate(has_s) if ok]
dat = [v for v,ok in zip(data_s,has_s) if ok]
labs = [s for s,ok in zip(order,has_s) if ok]

if dat:
    ax.boxplot(
        dat, positions=pos, patch_artist=True, showmeans=True, whis=(5,95),
        meanprops=dict(marker='D', markersize=4, markerfacecolor='black', markeredgecolor='white'),
        boxprops=dict(facecolor='#c7e9ff', edgecolor='#1f4d7a', linewidth=1.2),
        medianprops=dict(color='crimson', linewidth=1.8),
        whiskerprops=dict(color='#1f4d7a', linewidth=1.0, linestyle='--'),
        capprops=dict(color='#1f4d7a', linewidth=1.0),
        flierprops=dict(marker='x', markersize=3, markeredgecolor='gray', markerfacecolor='none', alpha=0.5),
    )

ax.plot(np.arange(1,5), ms[VTEC_COL].to_numpy(), color="black", marker="o", linewidth=3, markersize=5, label="Mean")

for tick in ax.yaxis.get_ticklabels():
    tick.set_fontsize(12); tick.set_color('black'); tick.set_weight('bold')
for tick in ax.xaxis.get_ticklabels():
    tick.set_fontsize(12); tick.set_color('black'); tick.set_weight('bold')

ax.set_xlim(0.5, 4.5)
ax.set_xticks(np.arange(1,5))
ax.set_xticklabels(order)
ax.legend(frameon=False, loc="upper left")
ax.grid(True, alpha=0.3)

fig.text(0.5, 0.92, "Quiet days seasonal median and mean from Oct 2015 to Sep 2025",
         ha="center", fontsize=12, fontweight="bold")
fig.text(0.5, 0.001, "Season", ha="center", fontsize=14, fontweight="bold")
fig.text(0.04, 0.5, "TEC [TECU]", va="center", rotation="vertical", fontsize=15, fontweight="bold")

out = OUTDIR_FIG/"Low_solar_activity_QUIET_seasonal_median_boxplot_means_curve_by_LT_MS.png"
fig.savefig(out, dpi=300)
plt.close(fig)
print("Saved:", out)


In [None]:
# ==========================================
# CELL 10b — Stats numériques pour les boxplots mensuels et saisonniers
# ==========================================
import numpy as np
import pandas as pd
import calendar

VTEC_COL = "VTEC_median"  # cohérent avec ta cellule précédente

# S'assurer que month et season existent (au cas où)
if "month" not in df.columns:
    df["month"] = df["date_utc"].dt.month

def to_season(m):
    if m in (12,1,2):  return "DJF"
    if m in (3,4,5):   return "MAM"
    if m in (6,7,8):   return "JJA"
    return "SON"

if "season" not in df.columns:
    df["season"] = df["month"].map(to_season)

# ---------- 1) STATS MENSUELLES ----------
def iqr(x):
    x = np.asarray(x, float)
    return np.nanpercentile(x, 75) - np.nanpercentile(x, 25)

monthly_stats = (
    df.groupby("month")[VTEC_COL]
      .agg(
          n_days   = "count",
          median   = "median",
          mean     = "mean",
          iqr      = iqr,
          p5       = lambda x: np.nanpercentile(x, 5),
          p95      = lambda x: np.nanpercentile(x, 95),
      )
      .reset_index()
)

monthly_stats["month_name"] = monthly_stats["month"].apply(lambda m: calendar.month_abbr[m])

print("\n=== Monthly stats of daily median VTEC (quiet QSL–GIM20) ===")
print(monthly_stats.to_string(index=False, float_format=lambda v: f"{v:5.2f}"))

# Mois de médiane max / min
idx_max_m = monthly_stats["median"].idxmax()
idx_min_m = monthly_stats["median"].idxmin()

m_max_row = monthly_stats.loc[idx_max_m]
m_min_row = monthly_stats.loc[idx_min_m]

print("\n--- Key monthly values for LaTeX paragraph ---")
print(f"Highest monthly median VTEC: {m_max_row['median']:.2f} TECU in {m_max_row['month_name']} "
      f"(mean ≈ {m_max_row['mean']:.2f} TECU, IQR ≈ {m_max_row['iqr']:.2f} TECU).")
print(f"Lowest monthly median VTEC:  {m_min_row['median']:.2f} TECU in {m_min_row['month_name']} "
      f"(mean ≈ {m_min_row['mean']:.2f} TECU, IQR ≈ {m_min_row['iqr']:.2f} TECU).")

# Mois où la dispersion (IQR) est max / min
idx_max_iqr_m = monthly_stats["iqr"].idxmax()
idx_min_iqr_m = monthly_stats["iqr"].idxmin()
iqr_max_row   = monthly_stats.loc[idx_max_iqr_m]
iqr_min_row   = monthly_stats.loc[idx_min_iqr_m]

print(f"Largest monthly spread (IQR): {iqr_max_row['iqr']:.2f} TECU in {iqr_max_row['month_name']}.")
print(f"Smallest monthly spread (IQR): {iqr_min_row['iqr']:.2f} TECU in {iqr_min_row['month_name']}.")

# Mesure simple de la dissymétrie mean–median (pour repérer les queues)
monthly_stats["skew_proxy"] = monthly_stats["mean"] - monthly_stats["median"]
idx_max_skew = monthly_stats["skew_proxy"].idxmax()
idx_min_skew = monthly_stats["skew_proxy"].idxmin()

skew_max_row = monthly_stats.loc[idx_max_skew]
skew_min_row = monthly_stats.loc[idx_min_skew]

print(f"Largest positive mean–median difference (right tail): "
      f"{skew_max_row['skew_proxy']:.2f} TECU in {skew_max_row['month_name']}.")
print(f"Largest negative mean–median difference (left tail): "
      f"{skew_min_row['skew_proxy']:.2f} TECU in {skew_min_row['month_name']}.")


# ---------- 2) STATS SAISONNIÈRES ----------
seasonal_stats = (
    df.groupby("season")[VTEC_COL]
      .agg(
          n_days   = "count",
          median   = "median",
          mean     = "mean",
          iqr      = iqr,
          p5       = lambda x: np.nanpercentile(x, 5),
          p95      = lambda x: np.nanpercentile(x, 95),
      )
      .reindex(["DJF","MAM","JJA","SON"])   # ordre canonique
      .reset_index()
)

print("\n=== Seasonal stats of daily median VTEC (quiet QSL–GIM20) ===")
print(seasonal_stats.to_string(index=False, float_format=lambda v: f"{v:5.2f}"))

idx_max_s = seasonal_stats["median"].idxmax()
idx_min_s = seasonal_stats["median"].idxmin()
s_max_row = seasonal_stats.loc[idx_max_s]
s_min_row = seasonal_stats.loc[idx_min_s]

print("\n--- Key seasonal values for LaTeX paragraph ---")
print(f"Highest seasonal median VTEC: {s_max_row['median']:.2f} TECU in {s_max_row['season']} "
      f"(mean ≈ {s_max_row['mean']:.2f} TECU, IQR ≈ {s_max_row['iqr']:.2f} TECU).")
print(f"Lowest seasonal median VTEC:  {s_min_row['median']:.2f} TECU in {s_min_row['season']} "
      f"(mean ≈ {s_min_row['mean']:.2f} TECU, IQR ≈ {s_min_row['iqr']:.2f} TECU).")

# Skew proxy par saison
seasonal_stats["skew_proxy"] = seasonal_stats["mean"] - seasonal_stats["median"]
idx_max_skew_s = seasonal_stats["skew_proxy"].idxmax()
idx_min_skew_s = seasonal_stats["skew_proxy"].idxmin()

sskew_max_row = seasonal_stats.loc[idx_max_skew_s]
sskew_min_row = seasonal_stats.loc[idx_min_skew_s]

print(f"Largest seasonal mean–median difference: {sskew_max_row['skew_proxy']:.2f} TECU in {sskew_max_row['season']}.")
print(f"Smallest seasonal mean–median difference: {sskew_min_row['skew_proxy']:.2f} TECU in {sskew_min_row['season']}.")


In [None]:
# eia_crest_from_ionex_using_my_reader.py
# Extraction robuste de la latitude de la crête EIA autour d’OUCA en s’appuyant sur lq fonction read_ionex().

import numpy as np
import pandas as pd
from pathlib import Path
import io, subprocess
from datetime import date, datetime, timedelta, timezone
import matplotlib.pyplot as plt
import pytz

# ================== CONFIG ==================
DEC_DIR   = Path(r"G:\My Drive\14-DATA\TEC_DATA\Raw_Data\IONEX\IONEX_decompressed")  # répertoire unique des IONEX décompressés
OUT_DAILY = Path(r"G:\My Drive\14-DATA\TEC_DATA\daily_eia_crest_latitude.csv")
OUT_FIG   = Path(r"G:\My Drive\14-DATA\TEC_DATA\daily_eia_crest_latitude.png")

OUCA_LAT, OUCA_LON = 31.206, -7.866
LON_BAND_DEG = 5.0             # moyenne VTEC dans [lon-5, lon+5]
SEARCH_LAT_MIN, SEARCH_LAT_MAX = 0.0, 50.0  # fenêtre de recherche de la crête (Nord)
ONLY_AFTERNOON_LT = False       # True pour ne garder que [LT_HOURS_WINDOW]
LT_TZ = pytz.timezone("Africa/Casablanca")
LT_HOURS_WINDOW = (4, 20)

START     = date(2015, 10, 1)
END       = date(2025, 9, 26)
# ============================================

# ====== TES FONCTIONS (inchangées) ======
def _open_text(p: Path) -> io.StringIO:
    p = Path(p)
    if not p.exists(): raise FileNotFoundError(p)
    ext = p.suffix.lower()
    if ext == ".gz":
        out = subprocess.run(["gzip", "-dc", str(p)], capture_output=True)
        if out.returncode != 0: raise RuntimeError("gzip -dc failed")
        return io.StringIO(out.stdout.decode("ascii","ignore"))
    if ext == ".z":
        gz = subprocess.run(["gzip", "-dc", str(p)], capture_output=True)
        if gz.returncode == 0 and gz.stdout:
            return io.StringIO(gz.stdout.decode("ascii","ignore"))
        sz = subprocess.run(["7z", "e", "-so", str(p)], capture_output=True)
        if sz.returncode != 0: raise RuntimeError("7z -so failed")
        return io.StringIO(sz.stdout.decode("ascii","ignore"))
    return io.StringIO(p.read_text(encoding="ascii", errors="ignore"))

def ionex_first_epoch_date(path: Path):
    try:
        f = _open_text(path)
    except Exception:
        return None
    for _ in range(400):
        line = f.readline()
        if not line: break
        if "EPOCH OF FIRST MAP" in line:
            yr, mo, dy, hh, mm, ss = map(int, line[:60].split()[:6])
            return datetime(yr, mo, dy, hh, mm, ss, tzinfo=timezone.utc).date()
        if "END OF HEADER" in line:
            break
    return None

def read_ionex(path: Path):
    f = _open_text(path)
    exp = -1
    lat1=lat2=dlat=lon1=lon2=dlon=None
    while True:
        line = f.readline()
        if not line: raise ValueError("Header IONEX incomplet")
        if "EXPONENT" in line:
            s = line[:8].strip()
            exp = int(s) if s else -1
        if "LAT1 / LAT2 / DLAT" in line:
            lat1,lat2,dlat = map(float, line[:60].split()[:3])
        if "LON1 / LON2 / DLON" in line:
            lon1,lon2,dlon = map(float, line[:60].split()[:3])
        if "END OF HEADER" in line:
            break
    if None in (lat1,lat2,dlat,lon1,lon2,dlon): raise ValueError("Grille absente")
    nlat = int(round((lat2-lat1)/dlat))+1
    nlon = int(round((lon2-lon1)/dlon))+1
    lats = np.linspace(lat1, lat2, nlat)
    lons = np.linspace(lon1, lon2, nlon)

    times, maps = [], []
    while True:
        line = f.readline()
        if not line: break
        if "START OF TEC MAP" in line:
            # epoch
            line = f.readline()
            while line and "EPOCH OF CURRENT MAP" not in line:
                line = f.readline()
            if not line: break
            yr,mo,dy,hh,mm,ss = map(int, line[:60].split()[:6])
            t = pd.Timestamp(datetime(yr,mo,dy,hh,mm,ss, tzinfo=timezone.utc))
            tec = np.full((nlat,nlon), np.nan)
            bad = False
            for ilat in range(nlat):
                hdr = f.readline()
                if not hdr or "LAT/LON1/LON2/DLON/H" not in hdr:
                    bad=True; break
                vals=[]
                while len(vals) < nlon:
                    data = f.readline()
                    if not data or ("START OF" in data) or ("END OF" in data) or ("LAT/LON1" in data):
                        bad=True; break
                    chunks = [data[i:i+5] for i in range(0, len(data.rstrip()), 5)]
                    for c in chunks:
                        c = c.strip().upper()
                        if c=="" or c=="9999": vals.append(np.nan)
                        else:
                            try: vals.append(float(c)*(10.0**exp))
                            except: vals.append(np.nan)
                        if len(vals)==nlon: break
                if bad: break
                if len(vals)<nlon: vals += [np.nan]*(nlon-len(vals))
                tec[ilat, :] = vals
            if bad:
                # purge jusqu'à END OF TEC MAP
                x = hdr
                while x:
                    if "END OF TEC MAP" in x: break
                    x = f.readline()
                continue
            times.append(t)
            maps.append(tec)
    if not maps: raise ValueError("Aucune carte TEC")
    TEC = np.stack(maps, axis=0)
    # normalisation lon/lat
    if lons.min()>=0 and lons.max()>180:
        order = np.argsort(((lons+180)%360)-180)
        lons = (((lons+180)%360)-180)[order]
        TEC  = TEC[:,:,order]
    if lats[0]>lats[-1]:
        lats = lats[::-1]; TEC = TEC[:, ::-1, :]
    times = pd.to_datetime(times, utc=True)
    return times, lats, lons, TEC

def product_window(day: date) -> str:
    if day <= date(2022, 11, 27):  # DOY<=330
        return "OLD"
    if day <= date(2025, 9, 20):   # fenêtre OPSFIN
        return "OPSFIN"
    return "OPSRAP"

def pick_ionex_for_day(day: date) -> Path | None:
    yy = f"{day.year%100:02d}"; doy = f"{int(pd.Timestamp(day).strftime('%j')):03d}"
    mode = product_window(day)
    if mode == "OLD":
        cand = [f"codg{doy}0.{yy}i", f"CODG{doy}0.{yy}I"]
        for name in cand:
            p = DEC_DIR / name
            if p.exists() and ionex_first_epoch_date(p) == day:
                return p
        for q in DEC_DIR.glob(f"codg*{yy}i"):
            if ionex_first_epoch_date(q) == day: return q
        for q in DEC_DIR.glob(f"CODG*{yy}I"):
            if ionex_first_epoch_date(q) == day: return q
        return None
    patterns = []
    if mode == "OPSFIN":
        patterns = [f"COD0OPSFIN_*{day.year}{doy}*_GIM.INX",
                    f"COD0OPSRAP_*{day.year}{doy}*_GIM.INX"]
    else:
        patterns = [f"COD0OPSRAP_*{day.year}{doy}*_GIM.INX",
                    f"COD0OPSFIN_*{day.year}{doy}*_GIM.INX"]
    for pat in patterns:
        for q in DEC_DIR.glob(pat):
            if ionex_first_epoch_date(q) == day:
                return q
    return None
# =========================================

# ====== OUTILS CRÊTE EIA ======
def lon_wrap(x): return (x + 180.0) % 360.0 - 180.0

def lon_band_indices(lons: np.ndarray, lon0: float, band_deg: float) -> np.ndarray:
    l = lon_wrap(lons.copy()); lon0 = lon_wrap(lon0)
    return np.where(np.abs(l - lon0) <= band_deg)[0]

def crest_lat_from_slice(lat: np.ndarray, vtec_lat: np.ndarray,
                         lat_min: float, lat_max: float) -> float:
    m = (lat >= lat_min) & (lat <= lat_max)
    if not np.any(m): return np.nan
    with np.errstate(invalid="ignore"):
        k = np.nanargmax(vtec_lat[m])
    return float(lat[m][k])

# ====== BOUCLE PRINCIPALE ======
rows = []
d = START
n_days = 0
while d <= END:
    p = pick_ionex_for_day(d)
    if p is None:
        d += timedelta(days=1); continue
    try:
        times, lats, lons, TEC = read_ionex(p)
    except Exception as e:
        print(f"[WARN] {p.name}: lecture impossible -> {e}")
        d += timedelta(days=1); continue

    # indices longitude bande ±LON_BAND_DEG
    j_idx = lon_band_indices(lons, OUCA_LON, LON_BAND_DEG)
    if j_idx.size == 0:
        d += timedelta(days=1); continue

    for k, t in enumerate(times):
        t_utc = t.to_pydatetime()
        if not (datetime.combine(d, datetime.min.time(), tzinfo=timezone.utc)
                <= t_utc
                <  datetime.combine(d + timedelta(days=1), datetime.min.time(), tzinfo=timezone.utc)):
            # sécurité si le fichier couvre 3 jours
            pass  # on ne filtre pas — on laissera l’agrégateur regrouper par date LT
        lt = t_utc.astimezone(LT_TZ)
        if ONLY_AFTERNOON_LT and not (LT_HOURS_WINDOW[0] <= lt.hour < LT_HOURS_WINDOW[1]):
            continue

        # moyenne VTEC sur la bande longitudinale
        vlat = np.nanmean(TEC[k][:, j_idx], axis=1)
        crest_lat = crest_lat_from_slice(lats, vlat, SEARCH_LAT_MIN, SEARCH_LAT_MAX)
        rows.append({
            "epoch_utc": t_utc,
            "epoch_lt": lt,
            "date_lt": lt.date(),
            "crest_lat": crest_lat,
            "ouca_lat": OUCA_LAT,
            "crest_minus_ouca_deg": crest_lat - OUCA_LAT,
            "src_file": p.name
        })
    n_days += 1
    d += timedelta(days=1)

if not rows:
    print("[INFO] Aucun epoch éligible (fenêtre date/LT).")
else:
    df = pd.DataFrame(rows)
    daily = (df.groupby("date_lt", as_index=False)
               .agg(crest_lat_med=("crest_lat", "median"),
                    crest_lat_iqr=("crest_lat", lambda x: np.nanpercentile(x, 75)-np.nanpercentile(x, 25)),
                    crest_minus_ouca_med=("crest_minus_ouca_deg", "median"),
                    n_epochs=("crest_lat", "count")))
    daily.to_csv(OUT_DAILY, index=False)
    print(f"[OK] CSV écrit -> {OUT_DAILY}  | jours = {len(daily)}  | fichiers lus = {n_days}")

    # figure rapide
    fig, ax = plt.subplots(figsize=(10,4))
    ax.plot(pd.to_datetime(daily["date_lt"]), daily["crest_lat_med"], lw=1.2)
    ax.axhline(OUCA_LAT, ls="--", color="k", lw=1.0, label="OUCA latitude")
    ax.set_ylabel("Daily median crest latitude [deg]")
    ax.set_title("Northern EIA crest latitude near OUCA longitude")
    ax.grid(True, alpha=0.3)
    ax.legend()
    fig.tight_layout()
    fig.savefig(OUT_FIG, dpi=200)
    print(f"[OK] Figure écrite -> {OUT_FIG}")


In [None]:
# diurnal_slopes_from_S30.py
import numpy as np
import pandas as pd
from pathlib import Path
import pytz
import matplotlib.pyplot as plt

# --------------- CONFIG ----------------
#CSV_30MIN = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/2015_2025_MS_VTEC_30min_stats.csv")
TIME_COL = "time"             # timestamp column (UTC)
VTEC_COL = "VTEC_median"      # your 30-min VTEC column
LOCAL_TZ = "Africa/Casablanca"

# fenêtres par défaut (plus utilisées ci-dessous, mais on les laisse si besoin)
MORNING_LT = (6, 12)          # inclusive of start, exclusive of end
EVENING_LT = (17, 22)

KEEP_ONLY_QUIET = False
QUIET_FLAG_COL = "is_QSL_GIM20"  # optional boolean; set KEEP_ONLY_QUIET=False if absent

OUT_CSV = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/monthly_diurnal_slopes_seasonal_windows.csv")
OUT_PNG = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/monthly_diurnal_slopes_seasonal_windows.png")
# ---------------------------------------

def theil_sen(x, y):
    x = np.asarray(x, float); y = np.asarray(y, float)
    m = np.isfinite(x) & np.isfinite(y)
    x = x[m]; y = y[m]
    if x.size < 3:
        return np.nan
    slopes = []
    for i in range(x.size-1):
        dx = x[i+1:] - x[i]
        dy = y[i+1:] - y[i]
        v = dy[np.abs(dx) > 1e-12] / dx[np.abs(dx) > 1e-12]
        if v.size:
            slopes.append(v)
    if not slopes:
        return np.nan
    slopes = np.concatenate(slopes)
    return np.nanmedian(slopes)

def window_slope(df_lt, start_hour, end_hour):
    """df_lt: local-time dataframe with columns ['lt_hour_float', VTEC_COL]"""
    m = (df_lt["lt_hour_float"] >= start_hour) & (df_lt["lt_hour_float"] < end_hour)
    sub = df_lt.loc[m, ["lt_hour_float", VTEC_COL]].dropna()
    if len(sub) < 3:
        return np.nan
    return theil_sen(sub["lt_hour_float"].values, sub[VTEC_COL].values)

def seasonal_windows_for_month(month: int):
    """
    Définit les fenêtres LT en fonction du mois.
    - Mois 'hiver étendu' : octobre (10) -> février (2) inclus
      matin : 07–12 LT, soir : 16–20 LT
    - Autres mois (mars -> septembre)
      matin : 06–16 LT, soir : 18–22 LT
    """
    if month in (11, 12, 1):
        # Nov–Jan
        morning = (7.0, 12.0)
        evening = (16.0, 20.0)
    elif month == 2:
        # Feb
        morning = (6.0, 12.0)
        evening = (18.0, 21.0)
    elif month == 3:
        # Mar
        morning = (6.0, 12.0)
        evening = (18.0, 21.0)
    elif month == 4:
        # Apr
        morning = (6.0, 14.0)
        evening = (19, 22.0)
    elif month == 5:
        # May
        morning = (6.0, 15.0)
        evening = (19, 22.0)
    elif month == 6:
        # Jun
        morning = (6.0, 15.0)
        evening = (18.5, 23)
    elif month in (7,8):
        # Jul & Aug
        morning = (6.0, 16.0)
        evening = (19.0, 23.0)
    elif month == 9:
        # Sep
        morning = (6.0, 14.0)
        evening = (18.5, 22.0)
    else:
        # Oct
        morning = (7.0, 14.0)
        evening = (16.0, 21.0)
    return morning, evening

def main():
    # On suppose que tu as déjà un DataFrame S (ex: S = pd.read_csv(...))
    df = S.copy()

    # UTC parse
    df[TIME_COL] = pd.to_datetime(df[TIME_COL], utc=True, errors="coerce")

    # Optional quiet filter
    if KEEP_ONLY_QUIET and QUIET_FLAG_COL in df.columns:
        df = df[df[QUIET_FLAG_COL] == True].copy()

    # Add local time
    tz = pytz.timezone(LOCAL_TZ)
    df["ts_lt"] = df[TIME_COL].dt.tz_convert(tz)
    df["date_lt"] = df["ts_lt"].dt.date
    df["lt_hour_float"] = df["ts_lt"].dt.hour + df["ts_lt"].dt.minute/60.0
    df["month"] = df["ts_lt"].dt.month
    df["year"] = df["ts_lt"].dt.year

    # Daily slopes avec fenêtres dépendant du mois
    daily = []
    for d, g in df.groupby("date_lt"):
        month = int(g["month"].iloc[0])
        (m_start, m_end), (e_start, e_end) = seasonal_windows_for_month(month)

        s_m = window_slope(g, m_start, m_end)
        s_e = window_slope(g, e_start, e_end)

        if not np.isfinite(s_m) and not np.isfinite(s_e):
            continue

        daily.append({
            "date_lt": d,
            "month": month,
            "year": g["year"].iloc[0],
            "morning_start_lt": m_start,
            "morning_end_lt": m_end,
            "evening_start_lt": e_start,
            "evening_end_lt": e_end,
            "slope_morning_tec_per_hour": s_m,
            "slope_evening_tec_per_hour": s_e
        })

    daily = pd.DataFrame(daily)
    if daily.empty:
        print("No daily slopes computed.")
        return

    # Monthly aggregates
    def iqr(x):
        x = np.asarray(x, float)
        return np.nanpercentile(x, 75) - np.nanpercentile(x, 25)

    monthly = (daily
               .groupby("month", as_index=False)
               .agg(n_days=("date_lt", "count"),
                    m_med=("slope_morning_tec_per_hour","median"),
                    m_iqr=("slope_morning_tec_per_hour", iqr),
                    e_med=("slope_evening_tec_per_hour","median"),
                    e_iqr=("slope_evening_tec_per_hour", iqr))
               .sort_values("month"))

    monthly.to_csv(OUT_CSV, index=False)
    print(f"Saved {OUT_CSV}")

    # Quick plot
    x = monthly["month"].values
    fig, ax = plt.subplots(figsize=(9,4))
    ax.plot(x, monthly["m_med"], "-o", label="Morning slope (median)")
    ax.plot(x, monthly["e_med"], "-o", label="Evening slope (median)")
    ax.axhline(0, color="k", lw=1.2, ls="--")
    ax.set_yticks([-3,-2,-1, 0, 1, 2, 3])

    # style ticks
    for t in ax.yaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    for t in ax.xaxis.get_ticklabels():
        t.set_fontsize(10); t.set_color('black'); t.set_weight('bold')
    ax.set_xticks(np.arange(1,13))
    ax.set_xlim(0.5, 12.5)

    ax.grid(True, alpha=0.3)
    ax.legend()

    fig.text(0.5, 0.89, "Quiet monthly diurnal slopes at OUCA (seasonal LT windows)",
             ha='center', fontsize=12, weight='bold')
    fig.text(0.5, 0.03, 'Month of year', ha='center', fontsize=12, weight='bold')
    fig.text(0.06, 0.5, 'dVTEC/dt [TECU/hour]', va='center', rotation='vertical',
             fontsize=12, weight='bold')

    fig.savefig(OUT_PNG, dpi=300)
    print(f"Saved {OUT_PNG}")

if __name__ == "__main__":
    main()


In [None]:
# ==========================================
# CELL — Figure 2 lignes :
#   (1) Daily median EIA crest latitude near OUCA longitude
#   (2) Monthly diurnal slopes (morning / evening)
# ==========================================
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import calendar

# --------- CONFIG: chemins à adapter si besoin ---------
CSV_EIA   = Path(r"G:\My Drive\14-DATA\TEC_DATA\daily_eia_crest_latitude.csv")
CSV_SLOPE = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/monthly_diurnal_slopes_seasonal_windows.csv")

OUT_PNG_COMBO = Path(r"C:/Users/mkmoh/Dropbox/1-DATA/TEC_DATA/New_Data/EIAcrest_vs_monthly_slopes_MS.png")

OUCA_LAT = 31.206

# --------- LECTURE DES DONNÉES ---------
# 1) Crête EIA (daily)
crest = pd.read_csv(CSV_EIA)

if "date_lt" not in crest.columns:
    raise ValueError("CSV_EIA doit contenir une colonne 'date_lt'.")

crest["date_lt"] = pd.to_datetime(crest["date_lt"], errors="coerce")
crest = crest.dropna(subset=["date_lt", "crest_lat_med"])

# 2) Pentes mensuelles
monthly = pd.read_csv(CSV_SLOPE)
for col in ["month", "m_med", "e_med"]:
    if col not in monthly.columns:
        raise ValueError(f"CSV_SLOPE doit contenir la colonne '{col}'.")

monthly = monthly.sort_values("month")
x = monthly["month"].values
month_labels = [calendar.month_abbr[m] for m in x]  # Jan, Feb, ...

# --------- FIGURE À 2 PANNEAUX ---------
fig, (ax_top, ax_bot) = plt.subplots(2, 1, figsize=(12, 10), sharex=False)

# =======================
# (1) PANEL DU HAUT : CRÊTE EIA
# =======================
ax_top.plot(crest["date_lt"], crest["crest_lat_med"], lw=1.2,
            label="Daily median crest latitude")
ax_top.axhline(OUCA_LAT, ls="--", color="k", lw=1.0, label="OUCA latitude")

ax_top.set_ylabel("Latitude [deg]", fontsize=12, fontweight="bold")
ax_top.grid(True, alpha=0.3)

# Légende en bas à droite pour limiter le chevauchement
ax_top.legend(loc="best", frameon=True)

# style ticks
for t in ax_top.yaxis.get_ticklabels():
    t.set_fontsize(12); t.set_color('black'); t.set_weight('bold')
for t in ax_top.xaxis.get_ticklabels():
    t.set_fontsize(12); t.set_color('black'); t.set_weight('bold')

ax_top.set_title("Northern EIA crest latitude near OUCA longitude",
                 fontsize=14, fontweight="bold", loc="center")

# =======================
# (2) PANEL DU BAS : PENTES MENSUELLES
# =======================
ax_bot.plot(x, monthly["m_med"], "-o", label="Morning slope (median)")
ax_bot.plot(x, monthly["e_med"], "-o", label="Evening slope (median)")
ax_bot.axhline(0, color="k", lw=1.2, ls="--")

ax_bot.set_yticks([-3, -2, -1, 0, 1, 2, 3])
ax_bot.set_xticks(x)
ax_bot.set_xticklabels(month_labels)

ax_bot.set_xlim(0.5, 12.5)

# style ticks
for t in ax_bot.yaxis.get_ticklabels():
    t.set_fontsize(12); t.set_color('black'); t.set_weight('bold')
for t in ax_bot.xaxis.get_ticklabels():
    t.set_fontsize(12); t.set_color('black'); t.set_weight('bold')

ax_bot.grid(True, alpha=0.3)

# Légende en haut à droite pour rester hors des branches négatives
ax_bot.legend(loc="best", frameon=True)

# Labels propres au panneau du bas
ax_bot.set_ylabel('dVTEC/dt [TECU/hour]', fontsize=12, fontweight='bold')

ax_bot.set_title("Quiet monthly diurnal slopes at OUCA (seasonal LT windows)",
                 fontsize=14, fontweight="bold", loc="center")

# Label global de l’axe x
fig.text(0.5, 0.03, 'Month of year', ha='center', fontsize=12, weight='bold')

#fig.tight_layout(rect=[0.06, 0.06, 0.98, 0.93])

fig.savefig(OUT_PNG_COMBO, dpi=300)
plt.close(fig)
print(f"Saved combined figure -> {OUT_PNG_COMBO}")
