# GBT-PWV01c : Explore Features for PWV quality seletion with HistGradientBoosting
- apply to all targets at the same time
- first use if Shap interpretation

- author Sylvie Dagoret-Campagne
- creation date 2026-01-30 : version run2026_v01 use HistGradientBoosting
- last update : 2026-01-30 : must keep early stopping and do correctly the trainning
- affiliation : IJCLab
- Kernel @usdf **w_2026_02*
- Home emac : base (conda)
- laptop : conda_py313

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
# must install the mysitcom package by doing at top level "pip install --user -e . "
from mysitcom.auxtel.pwv import scatter_datetime
from mysitcom.auxtel.pwv import strip_datetime
from mysitcom.auxtel.pwv import bar_counts_by_night
from mysitcom.auxtel.pwv import plot_dccd_chi2_vs_time
from mysitcom.auxtel.pwv import plot_dccd_chi2_vs_time_by_filter
from mysitcom.auxtel.pwv import stripplot_target_vs_time
from mysitcom.auxtel.pwv import plot_dccd_chi2_vs_time_by_target_filter
from mysitcom.auxtel.pwv import plot_dccd_chi2_histo_by_target_filter
from mysitcom.auxtel.pwv import plot_dccd_chi2_vs_time_by_target_filter_colorsedtype
from mysitcom.auxtel.pwv import plot_dccd_chi2_histo_by_target_filter_colorsedtype
from mysitcom.auxtel.pwv import summarize_dccd_chi2
from mysitcom.auxtel.pwv import plot_atmparam_vs_time, plot_atmparam_diff_vs_time
from mysitcom.auxtel.pwv import plot_atmparam_hist_per_filter, plot_atmparam_diff_hist_per_filter

from mysitcom.auxtel.pwv import GetNightMidnightsDict,GetNightBoundariesDict

In [None]:
from mysitcom.auxtel.gradientboosttree import normalize_column_data_bytarget_byfilter

In [None]:
from mysitcom.auxtel.pwv import normalize_column_data_bytarget_byfilter,shiftaverage_column_data_byfilter

In [None]:
import os

In [None]:
# where are stored the figures
pathfigs = "figs_GBT-PWV01c"
prefix = "gbt-pwv01c"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline
import seaborn as sns
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
from pandas.api.types import is_datetime64_any_dtype

import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (16,8)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'
plt.rcParams["legend.fontsize"] = "xx-large"

import scipy
from scipy.optimize import curve_fit,least_squares

from pprint import pprint

# new color correction model
import pickle
from scipy.interpolate import RegularGridInterpolator

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time


In [None]:
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import shap

In [None]:
# Remove to run faster the notebook
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
from GBT_PWV00_parameters import *

In [None]:
DumpConfig()

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
np.__version__

In [None]:
pd.__version__

### Configuration

In [None]:
def convertNumToDatestr(num):
    year = num//10_000
    month= (num-year*10_000)//100
    day = (num-year*10_000-month*100)

    year_str = str(year).zfill(4)
    month_str = str(month).zfill(2)
    day_str = str(day).zfill(2)
    
    datestr = f"{year_str}-{month_str}-{day_str}"
    return pd.to_datetime(datestr)

In [None]:
PWVMIN = 0.
PWVMAX = 20.

In [None]:
FLAG_WITHCOLLIMATOR = False
DATE_WITHCOLLIMATOR = 20230930
datetime_WITHCOLLIMATOR = convertNumToDatestr(DATE_WITHCOLLIMATOR)
datetime_WITHCOLLIMATOR = pd.to_datetime("2023-09-30 00:00:00.0+0000")
datetime_WITHCOLLIMATOR

## Initialisation

### Read the file
- `atmfilename` is defined in `PWV00_parameters.py` 

In [None]:
the_suptitle = butlerusercollectiondict[version_run] 

In [None]:
inputfilename = atmfilename.split("/")[-1]

if "parquet" in inputfilename:
    df_spec = pd.read_parquet(atmfilename)
elif "npy" in inputfilename:
    specdata = np.load(atmfilename,allow_pickle=True)
    df_spec = pd.DataFrame(specdata)
    df_spec["D_CCD [mm]"] = df_spec["D2CCD"]
    df_spec["PWV [mm]"] = df_spec["PWV [mm]_x"] 
    df_spec["PWV [mm]_rum"] = df_spec["PWV [mm]_y"] 
    df_spec["PWV [mm]_err"] = df_spec["PWV [mm]_err_x"] 
    df_spec["PWV [mm]_err_rum"] = df_spec["PWV [mm]_err_y"] 


    cols = [
    "PWV [mm]",
    "PWV [mm]_rum",
    "PWV [mm]_err",
    "PWV [mm]_err_rum",
    ]

    df_spec = df_spec.dropna(subset=cols)
    
else:
    raise "bad path of filename {inputfilename}"
    

In [None]:
print(" | ".join(df_spec.columns)) 

In [None]:
#df_spec.dtypes.to_frame('Type de donnée')

In [None]:
# add time for plotting
#df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"])
df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"],utc=True)

In [None]:
df_spec["nightObs"] = df_spec.apply(lambda x: x['id']//100_000, axis=1)

In [None]:
df_spec["seq_num"]  = df_spec["id"] % 100_000

## Select only empty and OG550 filters

In [None]:
df_spec["FILTER"].unique()

In [None]:
if FLAG_PWVFILTERS: 
    df_spec = df_spec[df_spec["FILTER"].isin(PWV_FILTER_LIST) ]

## Special study on Star Color (Spectral type)
- Load magnitudes from external file
- the magnitudes have been computed in another notebooks in ../2025-10-29-TOOLS



In [None]:
targets_mag_files = "../2025-10-29-TOOLS/data/targets_magnitudes.csv"
df_targets_mag = pd.read_csv(targets_mag_files,index_col=0)      
df_targets_mag = df_targets_mag.sort_values(by="y")

### palette with SED type

In [None]:
df_col = df_targets_mag.copy()
df_col = df_col.sort_values(by="B_V")

SpT = df_col["Sp_T"].values
unique_types = list(dict.fromkeys(SpT))  # garde l'ordre d'apparition
N_types = len(unique_types)


# Associe chaque type spectral à un entier
type_to_idx = {t: i for i, t in enumerate(unique_types)}
idx = np.array([type_to_idx[t] for t in SpT])

# Crée la colormap
cmap = mpl.cm.jet
norm = mpl.colors.Normalize(vmin=-0.5, vmax=N_types - 0.5)

# Colorbar horizontale
fig, ax = plt.subplots(figsize=(14, 0.4), layout="constrained")
cbar = fig.colorbar(
    mpl.cm.ScalarMappable(norm=norm, cmap=cmap),
    cax=ax,
    orientation="horizontal",
    ticks=range(N_types),
)
cbar.ax.set_xticklabels(unique_types, rotation=45, ha="right")
cbar.set_label("Spectral Type", fontsize=12,labelpad=10)

plt.show()

In [None]:
df_targets_mag.head() 

### Target ordered by colors in the input file

In [None]:
sorted_targets =  list(df_targets_mag.index) 
print(sorted_targets)

### Target ordered by colors in our data  file

In [None]:
# order pf selected target by magnitude Y
order_selected_targets = [t for t in sorted_targets if t in df_spec["TARGET"].unique()]
print(order_selected_targets)

### build a palette for colors

In [None]:
# --- Palette personnalisée cohérente avec la colormap jet ---
target_to_color = {}
for target in order_selected_targets:
    sp_type = df_targets_mag.loc[target, "Sp_T"]
    rgba = cmap(norm(type_to_idx[sp_type]))

    # Convert to pure Python floats
    target_to_color[target] = tuple(float(c) for c in rgba)

# Plot PWV vs time before application cuts

### Calculate difference and ratio for some params

In [None]:
denom = np.sqrt(df_spec["PWV [mm]_err"]**2 + df_spec["PWV [mm]_err_rum"]**2)

df_spec["diff_PWV_norm"] = np.where(
    np.isfinite(denom) & (denom > 0),
    (df_spec["PWV [mm]"] - df_spec["PWV [mm]_rum"]) / denom,
    np.nan
)

df_spec["diff_PWV"] =  (df_spec["PWV [mm]"] - df_spec["PWV [mm]_rum"]) 
df_spec["diff_PWV_err"] = np.sqrt( (df_spec["PWV [mm]_err"]**2 - df_spec["PWV [mm]_err_rum"]**2)) 

## Shift wrt average

In [None]:
df_spec = shiftaverage_column_data_byfilter(df_spec,"nightObs","FILTER","PWV [mm]")

In [None]:
df_spec = shiftaverage_column_data_byfilter(df_spec,"nightObs","FILTER","PWV [mm]_rum")

## Calculate midnights and night boundaries

In [None]:
DT = pd.Timedelta(minutes=7*24*60)
TMIN  = df_spec["Time"].min()-DT
TMAX  = df_spec["Time"].max()+DT

In [None]:
# get night boundaries
dn = GetNightBoundariesDict(df_spec)
# get midnights
dnidnights = GetNightMidnightsDict(df_spec)

### Plot PWV SHIFT in spectrogram vs Time before quality cuts

In [None]:
fig,ax = plot_atmparam_vs_time(
    df_spec,
    time_col= "Time",
    filter_col = "FILTER",
    param_col = "PWV [mm]_shift",
    param_err_col = "PWV [mm]_err",
    title_param = "Shift PWV rel average vs time (spectrogram)no cut qual. cut)",
    
    # seuils / bornes
    param_min_fig=-PWVMAX/5.,
    param_max_fig=PWVMAX/5.,
    param_min_cut=None,
    param_max_cut=None,
 
    # titres
    suptitle= the_suptitle,

    # axes externes
    axs=None,
    figsize=(18, 6),
)

if version_run not in ["run_v12"]:
    for key, tt in dn.items():
        ax.axvspan(tt[0],tt[1], color='blue', alpha=0.05,lw=0.5)

for key, midn in dnidnights.items():
    ax.axvline( midn ,color="purple",ls=":",alpha=0.5,lw=0.5)



In [None]:
fig,ax = plot_atmparam_hist_per_filter(
    df_spec,
    filter_col="FILTER",
    param_col = "PWV [mm]_shift",
    param_range = (-PWVMAX/5.,PWVMAX/5.),

    # histogram control
    bins=100,
    density=True,
    hist_alpha=0.4,

    # x-axis limits
    param_min_fig=-PWVMAX/5,
    param_max_fig=PWVMAX/5.,

    title_param="Shift PWV ref average vs time (spectrogram) no cut qual. cut)",
    # titres
    suptitle= the_suptitle
)
plt.show()



In [None]:
fig,ax = plot_atmparam_vs_time(
    df_spec,
    time_col= "Time",
    filter_col = "FILTER",
    param_col = "PWV [mm]_rum_shift",
    param_err_col = "PWV [mm]_err_rum",
    title_param = "Shift PWV rel average vs time (spectrum) no cut qual. cut)",
    
    # seuils / bornes
    param_min_fig=-PWVMAX/5.,
    param_max_fig=PWVMAX/5.,
    param_min_cut=None,
    param_max_cut=None,
 
    # titres
    suptitle= the_suptitle,

    # axes externes
    axs=None,
    figsize=(18, 6),
)

if version_run not in ["run_v12"]:
    for key, tt in dn.items():
        ax.axvspan(tt[0],tt[1], color='blue', alpha=0.05,lw=0.5)

for key, midn in dnidnights.items():
    ax.axvline( midn ,color="purple",ls=":",alpha=0.5,lw=0.5)


In [None]:
fig,ax = plot_atmparam_hist_per_filter(
    df_spec,
    filter_col="FILTER",
    param_col = "PWV [mm]_rum_shift",
    param_range = (-PWVMAX/5.,PWVMAX/5.),

    # histogram control
    bins=100,
    density=True,
    hist_alpha=0.4,

    # x-axis limits
    param_min_fig=-PWVMAX/5,
    param_max_fig=PWVMAX/5.,

    title_param="Shift PWV ref average vs time (spectrum) no cut qual. cut)",
    # titres
    suptitle= the_suptitle
)
plt.show()


# Regression fit 

## 1) Preprocess the data

In [None]:
df_spec, df1 = normalize_column_data_bytarget_byfilter(df_spec,target_col="TARGET",filter_col="FILTER",feature_col= "CHI2_FIT",ext="norm")
df_spec, df2 = normalize_column_data_bytarget_byfilter(df_spec,target_col="TARGET",filter_col="FILTER",feature_col= "chi2",ext="norm")
df_spec, df3 = normalize_column_data_bytarget_byfilter(df_spec,target_col="TARGET",filter_col="FILTER",feature_col= "chi2_rum",ext="norm")

## 3️⃣ Define the quality label (Y)
- You already suggested the right idea.
- Recommended target variable
- Use continuous first:

In [None]:
df_spec["abs_delta_PWV"] = np.abs(df_spec["PWV [mm]"] - df_spec["PWV [mm]_rum"])

In [None]:
#q = df_spec["abs_delta_PWV"].quantile(0.95)   # or 0.95
#df_spec["bad"] = df_spec["abs_delta_PWV"] > q

## 4️⃣ Feature hygiene (critical step)
- ❌ Remove forbidden features
- ❌ Remove identifiers
- ✔️ Keep physically meaningful predictors

- Remove unnormalised chi2
- categoy like taget, filter

In [None]:
columns_keep = ["id","Time","TARGET","ROTANGLE","D2CCD", "DOMEAZ","AZ","EL","WINDSPD", "WINDDIR","PARANGLE","TARGETX","TARGETY","CHI2_FIT_norm","PIXSHIFT","PSF_REG","TRACE_R", 
"A2_FIT", "AM_FIT", "MEANFWHM", "AIRMASS", "OUTTEMP", "OUTPRESS", "OUTHUM","FILTER", "CAM_ROT","chi2_norm","A1", "A2", "A3", "PWV [mm]" ,"PWV [mm]_err","B",
"A_star","D_CCD [mm]","shift_x [pix]","shift_y [pix]", "angle [deg]", "P [hPa]","gamma_0_1", "gamma_1_1","gamma_2_1", "alpha_0_1","alpha_1_1","saturation_0_1",
"gamma_0_2","gamma_1_2","gamma_2_2", "alpha_0_2", "alpha_1_2", "alpha_2_2", "saturation_0_2", "chi2_rum_norm", "A1_rum", "A2_rum",
"PWV [mm]_rum","PWV [mm]_err_rum" ,"reso [nm]", "D_CCD [mm]_rum", "alpha_pix [pix]", "mount_motion_image_degradation_x",
"mount_motion_image_degradation_az_x", "mount_motion_image_degradation_el_x", "mount_jitter_rms_x","mount_jitter_rms_az_x", "mount_jitter_rms_el_x", "mount_jitter_rms_rot_x",
"dimm_seeing_x", "focus_z_x" ,"mount_motion_image_degradation_y", "mount_motion_image_degradation_az_y","diff_PWV","diff_PWV_err","abs_delta_PWV","PWV [mm]_shift","PWV [mm]_rum_shift"]

#### First keep for empty filter

In [None]:
df_spec = df_spec[columns_keep]

In [None]:
filter_sel = "empty"

In [None]:
df_spec_sel = df_spec[df_spec["FILTER"] == filter_sel]
N = len(df_spec_sel)
print(f"N = {N}")

In [None]:
#df_Y = df_spec_sel["abs_delta_PWV"]
#Y = df_spec_sel["abs_delta_PWV"].values

df_Y = df_spec_sel["PWV [mm]_shift"]
Y = df_spec_sel["PWV [mm]_shift"].values

NY = len(Y)
print(f"NY = {NY}")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,4))
ax.hist(Y,bins=50)
ax.set_yscale("log")
ax.set_xlabel("Y")
ax.set_ylabel("N")
plt.show()

In [None]:
columns_todropforX =  ["id","Time","TARGET","FILTER",
                        "PWV [mm]","PWV [mm]_err","PWV [mm]_rum","PWV [mm]_err_rum",
                        "diff_PWV","diff_PWV_err","abs_delta_PWV",
                        "PWV [mm]_shift","PWV [mm]_rum_shift",
                        "OUTHUM",
                        "mount_motion_image_degradation_x",     
                        "mount_motion_image_degradation_az_x",
                        "mount_motion_image_degradation_el_x",    
                        "mount_jitter_rms_x",                     
                        "mount_jitter_rms_az_x",                  
                        "mount_jitter_rms_el_x",                  
                        "mount_jitter_rms_rot_x",                 
                        "dimm_seeing_x",                          
                        "focus_z_x",                                 
                        "mount_motion_image_degradation_y",       
                        "mount_motion_image_degradation_az_y"]    

In [None]:
df_X = df_spec_sel.drop(columns = columns_todropforX) 

In [None]:
# Check the number of nan
nan_per_col = df_X.isna().sum()
print(nan_per_col)

In [None]:
df_X.dtypes.value_counts()

In [None]:
# print object columns
obj_cols = df_X.select_dtypes(include="object").columns
print(obj_cols)

##### lines (samples)

In [None]:
len(df_X)

##### Columns (features)

In [None]:
len(df_X.columns)

# 2) Fit a model HistGradientBoostingRegressor with `early_stopping  = True` --> model1

In [None]:
HGB1_MAX_DEPTH = 20
HGB1_LEARNING_RATE = 0.05
HGB1_MAX_ITER = 300
HGB1_MAX_LEAF_NODES = 127
HGB1_VALIDATION_FRACTION = 0.3
HGB1_NITER_NOCHANGE = 10
HGB1_MIN_SAMPLE_LEAF = 20
HGB1_L2_REGULARISATION = 0.0
suptitle1 = f"HistGradientBoostingRegressor:: max_dep = {HGB1_MAX_DEPTH}, lr={HGB1_LEARNING_RATE},val_frac={HGB1_VALIDATION_FRACTION},early stop"

- Note : learning_rate × max_iter ≈ 5 – 20

In [None]:
HGB1_LEARNING_RATE * HGB1_MAX_ITER 

In [None]:
model1 = HistGradientBoostingRegressor(
    max_depth       = HGB1_MAX_DEPTH,
    max_leaf_nodes  = HGB1_MAX_LEAF_NODES,
    min_samples_leaf = HGB1_MIN_SAMPLE_LEAF,
    learning_rate   = HGB1_LEARNING_RATE,
    max_iter        = HGB1_MAX_ITER,
    early_stopping  = True,
    validation_fraction = HGB1_VALIDATION_FRACTION,
    n_iter_no_change    = HGB1_NITER_NOCHANGE,
    l2_regularization = HGB1_L2_REGULARISATION, 
    random_state=42
)

model1.fit(df_X, df_Y)

In [None]:
# Récupération du nombre d'itérations
print(f"Optimal Nb iterations : {model1.n_iter_}")

In [None]:
print(f"model.train_score_      = {model1.train_score_[-1]}")     # loss sur train
print(f"model.validation_score_ = {model1.validation_score_[-1]}")  # loss sur validation

#### Iteration on `max_depth`

In [None]:
depths_to_test = [5, 10, 20, 50, None] # None = profondeur illimitée
results = []

for depth in depths_to_test:
    model = HistGradientBoostingRegressor(
        max_depth=depth,
        max_leaf_nodes  = HGB1_MAX_LEAF_NODES,
        min_samples_leaf = HGB1_MIN_SAMPLE_LEAF,
        learning_rate=HGB1_LEARNING_RATE,
        max_iter=HGB1_MAX_ITER,
        early_stopping=True,
        validation_fraction=HGB1_VALIDATION_FRACTION,
        n_iter_no_change=HGB1_NITER_NOCHANGE,
        random_state=42
    )
    
    model.fit(df_X, df_Y)
    
    # On récupère le dernier score de validation (le meilleur)
    # Note: validation_score_ contient les scores à chaque itération
    best_val_score = model.validation_score_[-1]
    best_train_score = model.train_score_[-1]
    results.append({
        'max_depth': depth if depth is not None else "Unlimited",
        'best_train_score': best_train_score,
        'best_val_score': best_val_score,
        'iterations_needed': model.n_iter_
    })

# Affichage des résultats
df_results = pd.DataFrame(results)
print(df_results)

#### Iteration on `max_leaf_nodes`

In [None]:
leaf_nodes_to_test = [15, 31, 63, 127]
results = []

for nodes in leaf_nodes_to_test:
    model = HistGradientBoostingRegressor(
        max_depth = HGB1_MAX_DEPTH, # On fixe la profondeur gagnante
        max_leaf_nodes=nodes,
        min_samples_leaf = HGB1_MIN_SAMPLE_LEAF,
        learning_rate = HGB1_LEARNING_RATE,
        max_iter = HGB1_MAX_ITER,
        early_stopping=True,
        validation_fraction = HGB1_VALIDATION_FRACTION,
        n_iter_no_change = HGB1_NITER_NOCHANGE,
        random_state=42
    )
    model.fit(df_X, df_Y)
    results.append({'nodes': nodes, 'score': model.validation_score_[-1]})

In [None]:
print(pd.DataFrame(results))

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'max_depth': [10, 20],
#    'max_leaf_nodes': [15, 31, 63, 127] # Très important à coupler avec max_depth
    'max_leaf_nodes': [15, 31, 63] # Très important à coupler avec max_depth
}

grid = GridSearchCV(model1, param_grid, cv=5,n_jobs=-1)
grid.fit(df_X, df_Y)

In [None]:
print(f"GridSearchCV :: best max_depth : {grid.best_params_['max_depth']}")
print(f"GridSearchCV :: best max_leaf_nodes : {grid.best_params_['max_leaf_nodes']}")

In [None]:
from sklearn.experimental import enable_halving_search_cv  # Obligatoire pour l'instant
from sklearn.model_selection import HalvingGridSearchCV
param_grid = {
    'max_depth': [10, 20],
    'max_leaf_nodes': [15, 31, 63, 127] # Très important à coupler avec max_depth
}
# C'est beaucoup plus rapide pour les gros modèles de boosting
grid = HalvingGridSearchCV(model1, param_grid, cv=5, factor=2, resource='n_samples')
grid.fit(df_X, df_Y)

In [None]:
print(f"HalvingGridSearchCV :: best max_depth : {grid.best_params_['max_depth']}")
print(f"HalvingGridSearchCV :: Meilleur max_leaf_nodes : {grid.best_params_['max_leaf_nodes']}")

#### Final model

In [None]:
model1 = HistGradientBoostingRegressor(
    max_depth       = HGB1_MAX_DEPTH,
    max_leaf_nodes  = HGB1_MAX_LEAF_NODES,
    min_samples_leaf = HGB1_MIN_SAMPLE_LEAF,
    learning_rate   = HGB1_LEARNING_RATE,
    max_iter        = HGB1_MAX_ITER,
    early_stopping  = True,
    validation_fraction = HGB1_VALIDATION_FRACTION,
    n_iter_no_change    = HGB1_NITER_NOCHANGE,
    l2_regularization = HGB1_L2_REGULARISATION, 
    random_state=42
)

model1.fit(df_X, df_Y)

In [None]:
# Récupération du nombre d'itérations
print(f"Optimal Nb iterations   = {model1.n_iter_}")
print(f"model.train_score_      = {model1.train_score_[-1]}")     # loss sur train
print(f"model.validation_score_ = {model1.validation_score_[-1]}")  # loss sur validation

### compute mean squared error after fitting

In [None]:
# compute mean squared error
mse = []
for y_pred in model1.staged_predict(df_X):
    mse.append(mean_squared_error(df_Y, y_pred))

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,5),constrained_layout=True)
ax.scatter(df_Y,y_pred,alpha=0.1)
ax.set_xlabel("$Y_{true}$")
ax.set_ylabel("$Y_{pred}$")
ax.plot([-20,20],[-20,20],lw=1,color="k")
ax.set_aspect("equal")
ax.grid()
ax.set_title(suptitle1)

### Plot fit control

In [None]:
fig,axs = plt.subplots(1,2,figsize=(14,5),constrained_layout=True)
ax1,ax2 = axs
ax1.plot(mse,color='b')
ax1.set_xlabel("Iteration")
ax1.set_ylabel("MSE")
ax1.set_title("MSE")
ax1.grid()

ax2.plot(model1.train_score_,color='b',label="model.train_score_"  )
ax2.plot(model1.validation_score_ ,color="r",label="model.validation_score_ "  )
ax2.set_xlabel("Iteration")
ax2.set_ylabel("score")
ax2.set_title("Score")
ax2.grid()
ax2.legend()
plt.suptitle(suptitle1)
plt.show()


In [None]:
# Récupération du nombre d'itérations
print(f"Optimal Nb iterations   = {model1.n_iter_}")
print(f"model.train_score_      = {model1.train_score_[-1]}")     # loss sur train
print(f"model.validation_score_ = {model1.validation_score_[-1]}")  # loss sur validation

## 3) Shap Interpretation for model 1 : HistGradientBoostingRegressor with early stop
https://shap.readthedocs.io/en/latest/

In [None]:
shap.initjs()

### Shap Special TreeExplainer

In [None]:
pred = model1.predict(df_X)

tree_explainer = shap.TreeExplainer(model1)
tree_explanation = tree_explainer(df_X)

tree_shap_values = tree_explanation.values
# make sure the SHAP values add up to marginal predictions
np.abs(tree_shap_values.sum(axis=1) + tree_explanation.base_values - pred).max()

In [None]:
shap.plots.beeswarm(tree_explanation)

#### Standard Shap Explainer

In [None]:
explainer = shap.Explainer(model1, df_X)
shap_values = explainer(df_X,check_additivity = False)

In [None]:
# visualize the first prediction's explanation
shap.plots.force(shap_values[0, ...])

In [None]:
# visualize the first prediction's explanation
shap.plots.force(shap_values[1, ...])

In [None]:
# visualize the first prediction's explanation
shap.plots.force(shap_values[2, ...])

### shap.plots.force

In [None]:
# visualize the training set predictions
shap.plots.force(shap_values)

## shap.summary_plot

In [None]:
fig,ax = plt.subplots(1,1,figsize=(8,16))
shap.summary_plot(shap_values, df_X, plot_type="bar",max_display=40)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(8,16))
shap.plots.bar(shap_values,max_display=40)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(8,16))
shap.plots.beeswarm(shap_values,max_display=30)
plt.show()

## Study Feature by Feature

In [None]:
feature_name = "alpha_0_2"
feature_name_2 = "alpha_0_1"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "alpha_0_1"
feature_name_2 = "alpha_0_2"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "alpha_pix [pix]"
feature_name_2 = "angle [deg]"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "angle [deg]"
feature_name_2 = "alpha_pix [pix]"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(-1.5,1.5)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
# create a SHAP dependence plot to show the effect of a single feature across the whole dataset
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(-1.5,1.5)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "gamma_2_1"
feature_name_2 = "gamma_1_1"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(-20.,50.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(-20,50.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "gamma_1_1"
feature_name_2 = "gamma_2_1"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(-15.,15.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(-15,15.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "D_CCD [mm]"
feature_name_2 = "D_CCD [mm]_rum"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(185.,190.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(185.,190.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "D_CCD [mm]_rum"
feature_name_2 = "D_CCD [mm]"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(185.,190.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(185.,190.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "shift_y [pix]"
feature_name_2 = "shift_x [pix]"

In [None]:
print(f"========== main FEATURE {feature_name}  ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "reso [nm]"
feature_name_2 = "MEANFWHM"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(0,21.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(0.,21.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "MEANFWHM"
feature_name_2 = "reso [nm]"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(0,25.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(0.,25.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "shift_x [pix]"
feature_name_2 = "shift_y [pix]"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(-3.,3)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(-3.,3.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "OUTPRESS"
feature_name_2 = "OUTTEMP"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "OUTTEMP"
feature_name_2 = "OUTPRESS"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "P [hPa]"
feature_name_2 = "OUTPRESS"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(0.,4000.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(0.,4000.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "chi2_norm"
feature_name_2 = "chi2_rum_norm"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(0.,4.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(0.,4.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "chi2_rum_norm"
feature_name_2 = "chi2_norm"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(0.,4.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(0.,4.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "CHI2_FIT_norm"
feature_name_2 = "chi2_norm"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(0.,4.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
ax.set_xlim(0.,4.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "WINDSPD"
feature_name_2 = "WINDDIR"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "WINDDIR"
feature_name_2 = "WINDSPD"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "CAM_ROT"
feature_name_2 = "AZ"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "AZ"
feature_name_2 = "CAM_ROT"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "DOMEAZ"
feature_name_2 = "AZ"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "saturation_0_1"
feature_name_2 = "saturation_0_2"

In [None]:
print(f"========== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

In [None]:
feature_name = "saturation_0_2"
feature_name_2 = "saturation_0_1"

In [None]:
print(f"=============== main FEATURE {feature_name} (secondary feature {feature_name_2}) ======================")

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.plots.scatter(shap_values[:,feature_name ],ax=ax)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(10,5))
ax.grid()
#ax.set_xlim(185.,190.)
shap.dependence_plot(feature_name , shap_values.values, df_X, interaction_index=feature_name_2,ax=ax)

## Apply cuts

In [None]:
y_pred = model1.predict(df_X)

In [None]:
# Définition des quantiles (ex: 10% et 90%)
Alpha_r = 0.025
q_low = np.quantile(y_pred, Alpha_r)
q_high = np.quantile(y_pred, 1-Alpha_r)

# Sélection des données (Masque booléen)
mask = (y_pred >= q_low) & (y_pred <= q_high)
y_pred_filtered = y_pred[mask]

print(f"Intervalle conservé : [{q_low:.2f}, {q_high:.2f}]")
print(f"Nombre de points exclus : {len(y_pred) - len(y_pred_filtered)}")

In [None]:
keep = mask

In [None]:
n_keep = keep.sum()
n_reject = (~keep).sum()

print(f"n_keep = {n_keep}, n_reject = {n_reject}")

In [None]:
df_spec_sel["keep"] = keep

In [None]:
n_keep = df_spec_sel["keep"].value_counts().get(True, 0)
n_reject = df_spec_sel["keep"].value_counts().get(False, 0)
print(f"n_keep = {n_keep}, n_reject = {n_reject}")

In [None]:
df_X_keep = df_X[keep]
df_Y_keep = df_Y[keep]
print(f"n_keep = {n_keep}, n_reject = {n_reject}")

In [None]:
y_pred_keep = [ yp for yp in model1.predict(df_X_keep)]

In [None]:
len(y_pred_keep)

In [None]:
fig,ax = plt.subplots(1,1,figsize=(6,5),constrained_layout=True)
ax.scatter(df_Y_keep,y_pred_keep,alpha=0.1)
ax.set_xlabel("$Y_{true}$")
ax.set_ylabel("$Y_{pred}$")
ax.plot([-20,20],[-20,20],lw=1,color="k")
ax.set_aspect("equal")
ax.grid()
ax.set_title(suptitle1)

In [None]:
df_spec_keep = df_spec_sel[df_spec_sel["keep"]]

In [None]:
fig,ax = plot_atmparam_vs_time(
    df_spec_keep,
    time_col= "Time",
    filter_col = "FILTER",
    param_col = "PWV [mm]",
    param_err_col = "PWV [mm]_err",
    title_param = "Selected PWV vs time (spectrogram AFTER cut qual. cut)",
    
    # seuils / bornes
    param_min_fig=PWVMIN,
    param_max_fig=PWVMAX,
    param_min_cut=None,
    param_max_cut=None,
 
    # titres
    suptitle= the_suptitle,

    # axes externes
    axs=None,
    figsize=(18, 6),
)

if version_run not in ["run_v12"]:
    for key, tt in dn.items():
        ax.axvspan(tt[0],tt[1], color='blue', alpha=0.05,lw=0.5)

for key, midn in dnidnights.items():
    ax.axvline( midn ,color="purple",ls=":",alpha=0.5,lw=0.5)

In [None]:
fig,ax = plot_atmparam_vs_time(
    df_spec_keep,
    time_col= "Time",
    filter_col = "FILTER",
    param_col = "PWV [mm]_rum",
    param_err_col = "PWV [mm]_err_rum",
    title_param = "Selected PWV vs time (spectrum AFTER cut qual. cut)",
    
    # seuils / bornes
    param_min_fig=PWVMIN,
    param_max_fig=PWVMAX,
    param_min_cut=None,
    param_max_cut=None,
 
    # titres
    suptitle= the_suptitle,

    # axes externes
    axs=None,
    figsize=(18, 6),
)

if version_run not in ["run_v12"]:
    for key, tt in dn.items():
        ax.axvspan(tt[0],tt[1], color='blue', alpha=0.05,lw=0.5)

for key, midn in dnidnights.items():
    ax.axvline( midn ,color="purple",ls=":",alpha=0.5,lw=0.5)

In [None]:
fig,ax = plot_atmparam_vs_time(
    df_spec_keep,
    time_col= "Time",
    filter_col = "FILTER",
    param_col = "diff_PWV",
    param_err_col = "diff_PWV_err",
    title_param = "$\Delta$ PWV vs time (spectrogram - spectrum AFTER cut qual. cut)",
    
    # seuils / bornes
    param_min_fig=-PWVMAX/10.,
    param_max_fig=PWVMAX/10.,
    param_min_cut=None,
    param_max_cut=None,
 
    # titres
    suptitle= the_suptitle,

    # axes externes
    axs=None,
    figsize=(18, 6),
)

if version_run not in ["run_v12"]:
    for key, tt in dn.items():
        ax.axvspan(tt[0],tt[1], color='blue', alpha=0.05,lw=0.5)

for key, midn in dnidnights.items():
    ax.axvline( midn ,color="purple",ls=":",alpha=0.5,lw=0.5)