# QCUT08 : Explore hologram data  : Impact of all external conditions

- author Sylvie Dagoret-Campagne
- creation date 2026-02-02 : version run2026_v01
- last update : 2026-02-02
- Kernel @usdf **w_2026_02*
- Home emac : base (conda)
- laptop : conda_py313

**Goal** : Show Night variations of PWV wrt date and Time. Fit a straight line.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
# must install the mysitcom package by doing at top level "pip install --user -e . "
from mysitcom.auxtel.qualitycuts import scatter_datetime
from mysitcom.auxtel.qualitycuts import strip_datetime
from mysitcom.auxtel.qualitycuts import bar_counts_by_night
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time_by_filter
from mysitcom.auxtel.qualitycuts import stripplot_target_vs_time
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time_by_target_filter
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_histo_by_target_filter
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time_by_target_filter_colorsedtype
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_histo_by_target_filter_colorsedtype
from mysitcom.auxtel.qualitycuts import summarize_dccd_chi2
from mysitcom.auxtel.qualitycuts import plot_param_histogram_grid
from mysitcom.auxtel.qualitycuts import plot_params_and_chi2_vs_time
from mysitcom.auxtel.qualitycuts import plot_param_chi2_correlation_grid
from mysitcom.auxtel.qualitycuts import plot_param2_vs_param1_colored_by_time
from mysitcom.auxtel.qualitycuts import plot_param_difference_vs_time
from mysitcom.auxtel.qualitycuts import plot_param_difference_vs_time_colored_by_chi2
from mysitcom.auxtel.qualitycuts import plot_single_param_vs_time_colored_by_chi2
from mysitcom.auxtel.qualitycuts import plot_single_param_vs_time
from mysitcom.auxtel.qualitycuts import plot_param_scatterandhistogram_grid
from mysitcom.auxtel.qualitycuts import plot_param_scatterandhistogram_pdf


In [None]:
from mysitcom.auxtel.pwv import GetNightMidnightsDict
from mysitcom.auxtel.pwv import GetNightBoundariesDict
from mysitcom.auxtel.pwv import normalize_column_data_bytarget_byfilter
from mysitcom.auxtel.pwv import shiftaverage_column_data_byfilter
from mysitcom.auxtel.pwv import pwv_deviation_from_linear_interp_datetime
from mysitcom.auxtel.pwv import plot_atmparam_hist_per_filter

In [None]:
import os

In [None]:
# where are stored the figures
pathfigs = "figs_QCUT08"
prefix = "qcut08"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline
import seaborn as sns
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
from pandas.api.types import is_datetime64_any_dtype

import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (16,8)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'
plt.rcParams["legend.fontsize"] = "xx-large"

import scipy
from scipy.optimize import curve_fit,least_squares

from pprint import pprint

# new color correction model
import pickle
from scipy.interpolate import RegularGridInterpolator

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time


In [None]:
# Remove to run faster the notebook
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
from QCUT00_parameters import *

In [None]:
DumpConfig()

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
np.__version__

In [None]:
pd.__version__

### Configuration

In [None]:
def convertNumToDatestr(num):
    year = num//10_000
    month= (num-year*10_000)//100
    day = (num-year*10_000-month*100)

    year_str = str(year).zfill(4)
    month_str = str(month).zfill(2)
    day_str = str(day).zfill(2)
    
    datestr = f"{year_str}-{month_str}-{day_str}"
    return pd.to_datetime(datestr)

In [None]:
PWVMIN = 0.
PWVMAX = 20.

In [None]:
FLAG_WITHCOLLIMATOR = False
DATE_WITHCOLLIMATOR = 20230930
datetime_WITHCOLLIMATOR = convertNumToDatestr(DATE_WITHCOLLIMATOR)
datetime_WITHCOLLIMATOR = pd.to_datetime("2023-09-30 00:00:00.0+0000")
datetime_WITHCOLLIMATOR

## Initialisation

### Read the file
- `atmfilename` is defined in `QCUT00_parameters.py` 

In [None]:
the_suptitle = butlerusercollectiondict[version_run] 

In [None]:
inputfilename = atmfilename.split("/")[-1]

if "parquet" in inputfilename:
    df_spec = pd.read_parquet(atmfilename)
elif "npy" in inputfilename:
    specdata = np.load(atmfilename,allow_pickle=True)
    specdata = np.load(atmfilename,allow_pickle=True)
    df_spec = pd.DataFrame(specdata)
    df_spec["D_CCD [mm]"] = df_spec["D2CCD"]
    df_spec["PWV [mm]"] = df_spec["PWV [mm]_x"] 
    df_spec["PWV [mm]_rum"] = df_spec["PWV [mm]_y"] 
    df_spec["PWV [mm]_err"] = df_spec["PWV [mm]_err_x"] 
    df_spec["PWV [mm]_err_rum"] = df_spec["PWV [mm]_err_y"] 


    cols = [
    "PWV [mm]",
    "PWV [mm]_rum",
    "PWV [mm]_err",
    "PWV [mm]_err_rum",
    ]

    df_spec = df_spec.dropna(subset=cols)
else:
    raise "bad path of filename {inputfilename}"
    

In [None]:
the_suptitle = butlerusercollectiondict[version_run] 

In [None]:
FLAG_RENAME_SPECTROGRAM_VARIABLES = True

if FLAG_RENAME_SPECTROGRAM_VARIABLES:
    df_spec.rename(
    {
    "chi2":"chi2_ram",
    "A1":"A1_ram",
    "A1_err": "A1_err_ram",
    "A2": "A2_ram",
    "A2_err": "A2_err_ram",
    "A3": "A3_ram",
    "A3_err": "A3_err_ram", 
    "VAOD": "VAOD_ram", 
    "VAOD_err": "VAOD_err_ram", 
    "angstrom_exp" : "angstrom_exp_ram", 
    "angstrom_exp_err" : "angstrom_exp_err_ram" , 
    "ozone [db]" :"ozone [db]_ram", 
    "ozone [db]_err": "ozone [db]_err_ram", 
    "PWV [mm]":  "PWV [mm]_ram",
    "PWV [mm]_err":"PWV [mm]_err_ram" , 
    "B": "B_ram" , 
    "B_err" : "B_err_ram", 
    "A_star": "A_star_ram" , 
    "A_star_err": "A_star_err_ram" , 
    "D_CCD [mm]" : "D_CCD [mm]_ram", 
    "D_CCD [mm]_err": "D_CCD [mm]_err_ram" 
    }
    ,axis=1,inplace = True)

In [None]:
df_spec

In [None]:
print(" , ".join(df_spec.columns)) 

In [None]:
#df_spec.dtypes.to_frame('Type de donnÃ©e')

In [None]:
# add time for plotting
#df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"])
df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"],utc=True)

In [None]:
df_spec["nightObs"] = df_spec.apply(lambda x: x['id']//100_000, axis=1)

In [None]:
df_spec["seq_num"]  = df_spec["id"] % 100_000

In [None]:
df_spec[["id","FILTER"]]

### Convert DATE-OBS to pd_to_datetime

In [None]:
df_spec["DATE-OBS"] = pd.to_datetime(
    df_spec["DATE-OBS"],
    utc=True,
    errors="coerce").dt.tz_convert(None)

In [None]:
df_spec["FILTER"].unique()

In [None]:
print(list(df_spec.columns))

## Select only empty and OG550 filters

In [None]:
#df_spec["FILTER"].unique()

In [None]:
#if FLAG_PWVFILTERS: 
#    df_spec = df_spec[df_spec["FILTER"].isin(PWV_FILTER_LIST) ]

### Correct the time units

## Processing before cut studies

### PWV difference and PWV relative ratio

In [None]:
denom = np.sqrt(df_spec["PWV [mm]_err_ram"]**2 + df_spec["PWV [mm]_err_rum"]**2)

df_spec["diff_PWV_norm"] = np.where(
    np.isfinite(denom) & (denom > 0),
    (df_spec["PWV [mm]_ram"] - df_spec["PWV [mm]_rum"]) / denom,
    np.nan
)

df_spec["diff_PWV"] =  (df_spec["PWV [mm]_ram"] - df_spec["PWV [mm]_rum"]) 
df_spec["diff_PWV_err"] = np.sqrt( (df_spec["PWV [mm]_err_ram"]**2 - df_spec["PWV [mm]_err_rum"]**2)) 

### Normalised chi2

In [None]:
df_spec, df1 = normalize_column_data_bytarget_byfilter(df_spec,target_col="TARGET",filter_col="FILTER",feature_col= "CHI2_FIT",ext="norm")
df_spec, df2 = normalize_column_data_bytarget_byfilter(df_spec,target_col="TARGET",filter_col="FILTER",feature_col= "chi2_ram",ext="norm")
df_spec, df3 = normalize_column_data_bytarget_byfilter(df_spec,target_col="TARGET",filter_col="FILTER",feature_col= "chi2_rum",ext="norm")

### Angle uniformization

In [None]:
#df["angle_180"] = ((df["angle_360"] + 180) % 360) - 180

In [None]:
flag_angles_m180_p180 = True

In [None]:
if flag_angles_m180_p180:
    df_spec["DOMEAZ"] = ((df_spec["DOMEAZ"] + 180) % 360) - 180
    df_spec["RA"] = ((df_spec["RA"] + 180) % 360) - 180
    df_spec["WINDDIR"] = ((df_spec["WINDDIR"] + 180) % 360) - 180
    flag_angles_m180_p180 = True

In [None]:
df_spec["WINDSPDPARR"] =  df_spec["WINDSPD"]*np.cos(df_spec["AZ"]-df_spec["WINDDIR"])
df_spec["WINDSPDPERP"] =  df_spec["WINDSPD"]*np.sin(df_spec["AZ"]-df_spec["WINDDIR"])

## What to keep

In [None]:
columns_keep = ["id","Time","TARGET","ROTANGLE","D2CCD", "DOMEAZ","AZ","EL","WINDSPD", "WINDDIR","PARANGLE","TARGETX","TARGETY","CHI2_FIT_norm","PIXSHIFT","PSF_REG","TRACE_R", 
"A2_FIT", "AM_FIT", "MEANFWHM", "AIRMASS", "OUTTEMP", "OUTPRESS", "OUTHUM","FILTER", "CAM_ROT","chi2_ram_norm","A1_ram", "A2_ram", "A3_ram", "PWV [mm]_ram" ,"PWV [mm]_err_ram","B_ram",
"A_star_ram","D_CCD [mm]_ram","shift_x [pix]","shift_y [pix]", "angle [deg]", "P [hPa]","gamma_0_1", "gamma_1_1","gamma_2_1", "alpha_0_1","alpha_1_1","saturation_0_1",
"gamma_0_2","gamma_1_2","gamma_2_2", "alpha_0_2", "alpha_1_2", "alpha_2_2", "saturation_0_2", "chi2_rum_norm", "A1_rum", "A2_rum",
"PWV [mm]_rum","PWV [mm]_err_rum" ,"reso [nm]", "D_CCD [mm]_rum", "alpha_pix [pix]", "mount_motion_image_degradation_x",
"mount_motion_image_degradation_az_x", "mount_motion_image_degradation_el_x", "mount_jitter_rms_x","mount_jitter_rms_az_x", "mount_jitter_rms_el_x", "mount_jitter_rms_rot_x",
"dimm_seeing_x", "focus_z_x" ,"mount_motion_image_degradation_y", "mount_motion_image_degradation_az_y","diff_PWV","diff_PWV_err","abs_delta_PWV","PWV [mm]_shift","PWV [mm]_rum_shift"]

## Histograms of parameters

In [None]:
params = [ 
    "alpha_0_1", 
    "alpha_1_1", 
    "alpha_0_2", 
#    "alpha_1_2", 
#    "alpha_2_2",
    "gamma_0_1",
    "gamma_1_1",
    "gamma_2_1",
    "angle [deg]", 
    "alpha_pix [pix]",
    "reso [nm]",
    #"shift_x[pix]",
    #"shift_y[pix]",
    'MEANFWHM',
    'PIXSHIFT',
    'PSF_REG',
    'TRACE_R',
    'CHI2_FIT_norm', 
    'chi2_ram_norm',
    'chi2_rum_norm',
    'D2CCD',
    'D_CCD [mm]_ram',
    'D_CCD [mm]_rum',
    'alpha_pix [pix]',
    "WINDSPD",
    "WINDDIR",
    "WINDSPDPARR",
    "WINDSPDPERP",
    "CAM_ROT",
    "ROTANGLE",
    "PARANGLE",
    "DOMEAZ",
    "AZ",
    "EL",
    "PARANGLE",
    "AIRMASS", 
    "OUTTEMP", 
    "OUTPRESS",
    "P [hPa]"
]

In [None]:
param_ranges = {
                "alpha_0_1" : (0,10),
                "alpha_1_1": (-1,1), 
                "alpha_0_2" : (0,10),
                "gamma_0_1": (-2,10),
                "gamma_1_1": (-5,5),
                "gamma_2_1": (-2,5),
                "angle [deg]":(0,0.5), 
                "reso [nm]":(0,5),
                'MEANFWHM': (0,30),
                'PIXSHIFT': (-1,1),
                'PSF_REG' : (0,10),
                'TRACE_R':(0,80),
                'CHI2_FIT_norm':(0,5), 
                'chi2_ram_norm':(0,5),
                'chi2_rum_norm':(0,5),
                'D2CCD':(185,190.),
                'D_CCD [mm]_ram':(185,190.),
                'D_CCD [mm]_rum':(185,190.),
                "ROTANGLE":(0,0.5),
                "P [hPa]":(0,5000),
               
               }


filter_order = ["empty", "BG40_65mm_1", "OG550_65mm_1"]

In [None]:
fig, axs = plot_param_histogram_grid(
    df=df_spec,
    params=params,
    filter_col="FILTER",
    filter_order=filter_order,
    param_ranges=param_ranges,
    bins=40,
    stacked=True,
    logy=False,
)


In [None]:
param_ranges_log = {
                "alpha_0_1" : (0,10),
                "alpha_1_1": (-2,2), 
                "alpha_0_2" : (0,10),
                "gamma_0_1": (-2,50),
                "gamma_1_1": (-10,10),
                "gamma_2_1": (-5,10),
                "angle [deg]":(0,0.5), 
                "reso [nm]":(0,5),
                'MEANFWHM': (0,30),
                'PIXSHIFT': (-1,1),
                'PSF_REG' : (0,10),
                'TRACE_R':(0,80),
                'CHI2_FIT_norm':(0,5), 
                'chi2_ram_norm':(0,5),
                'chi2_rum_norm':(0,5),
                'D2CCD':(186,189.),
                'D_CCD [mm]_ram':(186,189.),
                'D_CCD [mm]_rum':(186,189.),
                "ROTANGLE": (0,0.5),
                "P [hPa]":(0,5000),
               
               }


In [None]:
fig, axs = plot_param_histogram_grid(
    df=df_spec,
    params=params,
    filter_col="FILTER",
    filter_order=filter_order,
    param_ranges=param_ranges_log,
    bins=40,
    stacked=True,
    logy=True,
)


In [None]:
if FLAG_PWVFILTERS: 
    df_spec = df_spec[df_spec["FILTER"].isin(PWV_FILTER_LIST) ]

In [None]:
filter_order = ["empty", "OG550_65mm_1"]

In [None]:
plot_param_scatterandhistogram_grid(
    df=df_spec,
    params=params,
    y_col = "diff_PWV",
    filter_col="FILTER",
    filter_order=filter_order,
    params_ranges=param_ranges_log,
    yminmax=(-5,5),
    bins=40,
    stacked=True,
    logy=False,
    figsize=(10,4)
)
plt.show()

In [None]:
plot_param_scatterandhistogram_pdf(
    pdf_filename="diff_PWV-vs-params_diagnostics.pdf",
    df=df_spec,
    params=params,               # liste complÃ¨te
    params_ranges=param_ranges_log,
    params_per_page=5,            # ðŸ‘ˆ 5 par page
    y_col= "diff_PWV",
    filter_col="FILTER",
    filter_order = filter_order,
    yminmax=(-5, 5),
    bins=50,
    figsize=(10,4),
)

## With PWV repeatability

### Calculate Repeatability for Spectrogram

In [None]:
df_spec = pwv_deviation_from_linear_interp_datetime(
    df_spec,
    night_col="nightObs",
    filter_col="FILTER",
    target_col="TARGET",
    time_col="Time",
    pwv_col="PWV [mm]_ram",
    suffix="repeat_ram",
    time_unit="min",
)

### Calculate Repeatability for Spectrum

In [None]:
df_spec = pwv_deviation_from_linear_interp_datetime(
    df_spec,
    night_col="nightObs",
    filter_col="FILTER",
    target_col="TARGET",
    time_col="Time",
    pwv_col="PWV [mm]_rum",
    suffix="repeat_rum",
    time_unit="min",
)

#### Process some columns format

In [None]:
df_spec["dt_repeat_ram"] = pd.to_timedelta(df_spec["dt_repeat_ram"])
df_spec["dt_repeat_rum"] = pd.to_timedelta(df_spec["dt_repeat_rum"])

In [None]:
df_spec["dt_repeat_ram_min"] = df_spec["dt_repeat_ram"].dt.total_seconds() / 60
df_spec["dt_repeat_rum_min"] = df_spec["dt_repeat_rum"].dt.total_seconds() / 60

In [None]:
df_spec.columns

In [None]:
fig,axs = plt.subplots(1,2,figsize=(10,3),layout="constrained")
ax1,ax2 = axs
df_spec[ 'PWV [mm]_ram_repeat_ram'].hist(bins=50,range=(-1,1),histtype="step",color="r",label="ram",ax=ax1)
df_spec[ 'PWV [mm]_rum_repeat_rum'].hist(bins=50,range=(-1,1),histtype="step",color="b",label="rum",ax=ax1)
df_spec["dt_repeat_ram_min"].hist(bins=100,range=(0,3),histtype="step",color="r",label="ram",ax=ax2)
df_spec["dt_repeat_rum_min"].hist(bins=100,range=(0,3),histtype="step",color="b",label="rum",ax=ax2)
ax1.legend()
ax2.legend()
plt.show()

In [None]:
fig,ax = plot_atmparam_hist_per_filter(
    df_spec,
    filter_col="FILTER",
    param_col = "PWV [mm]_ram_repeat_ram",
    param_range = (-1.,1.),

    # histogram control
    bins=100,
    density=True,
    hist_alpha=0.4,

    # x-axis limits
    param_min_fig=-1.,
    param_max_fig=1.,

    title_param="$\Delta$ PWV (repeatability uin spectrogram)",
    # titres
    suptitle= the_suptitle
)
plt.show()


In [None]:
fig,ax = plot_atmparam_hist_per_filter(
    df_spec,
    filter_col="FILTER",
    param_col = "PWV [mm]_rum_repeat_rum",
    param_range = (-1.,1.),

    # histogram control
    bins=100,
    density=True,
    hist_alpha=0.4,

    # x-axis limits
    param_min_fig=-1.,
    param_max_fig=1.,

    title_param="$\Delta$ PWV (repeatability in spectrum)",
    # titres
    suptitle= the_suptitle
)
plt.show()

In [None]:
df_spec = df_spec.dropna(subset=["PWV [mm]_ram_repeat_ram"])
df_spec = df_spec.dropna(subset=["PWV [mm]_rum_repeat_rum"])

In [None]:
plot_param_scatterandhistogram_grid(
    df=df_spec,
    params=params,
    y_col = "PWV [mm]_ram_repeat_ram",
    filter_col="FILTER",
    filter_order=filter_order,
    params_ranges=param_ranges_log,
    yminmax=(-2,2),
    bins=40,
    stacked=True,
    logy=False,
    figsize=(10,4)
)
plt.show()

In [None]:
plot_param_scatterandhistogram_pdf(
    pdf_filename="DPWV_ram_repeat-vs-params_diagnostics.pdf",
    df=df_spec,
    params=params,               # liste complÃ¨te
    params_ranges=param_ranges_log,
    params_per_page=5,            # ðŸ‘ˆ 5 par page
    y_col= "PWV [mm]_ram_repeat_ram",
    filter_col="FILTER",
    filter_order = filter_order,
    yminmax=(-5, 5),
    bins=50,
    figsize=(10,4),
)

In [None]:
plot_param_scatterandhistogram_pdf(
    pdf_filename="DPWV_rum_repeat-vs-params_diagnostics.pdf",
    df=df_spec,
    params=params,               # liste complÃ¨te
    params_ranges=param_ranges_log,
    params_per_page=5,            # ðŸ‘ˆ 5 par page
    y_col= "PWV [mm]_rum_repeat_rum",
    filter_col="FILTER",
    filter_order = filter_order,
    yminmax=(-5, 5),
    bins=50,
    figsize=(10,4),
)