# QCUT01 : Explore hologram data  quality

- author Sylvie Dagoret-Campagne
- creation date 2026-01-14 : version vrun2026_v01
- last update 2026-01-15 : really start to impelment functions in src/mysitcom/auxtel/qualitycuts
- affiliation : IJCLab
- Kernel @usdf **w_2026_02*
- Home emac : base (conda)
- laptop : conda_py313

**Goal** : Show Night variations of PWV wrt date and Time. Fit a straight line.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
from platform import python_version
print(python_version())

In [None]:
# must install the mysitcom package by doing at top level "pip install --user -e . "
from mysitcom.auxtel.qualitycuts import scatter_datetime
from mysitcom.auxtel.qualitycuts import strip_datetime
from mysitcom.auxtel.qualitycuts import bar_counts_by_night
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time_by_filter
from mysitcom.auxtel.qualitycuts import stripplot_target_vs_time
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time_by_target_filter
from mysitcom.auxtel.qualitycuts import summarize_dccd_chi2

In [None]:
import os

In [None]:
# where are stored the figures
pathfigs = "figs_QCUT01"
prefix = "qcut01"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline
import seaborn as sns
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
from pandas.api.types import is_datetime64_any_dtype

import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (16,8)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'
plt.rcParams["legend.fontsize"] = "xx-large"

import scipy
from scipy.optimize import curve_fit,least_squares

from pprint import pprint

# new color correction model
import pickle
from scipy.interpolate import RegularGridInterpolator

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time


In [None]:
# Remove to run faster the notebook
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
from QCUT00_parameters import *

In [None]:
DumpConfig()

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
np.__version__

In [None]:
pd.__version__

### Configuration

In [None]:
def convertNumToDatestr(num):
    year = num//10_000
    month= (num-year*10_000)//100
    day = (num-year*10_000-month*100)

    year_str = str(year).zfill(4)
    month_str = str(month).zfill(2)
    day_str = str(day).zfill(2)
    
    datestr = f"{year_str}-{month_str}-{day_str}"
    return pd.to_datetime(datestr)

In [None]:
PWVMIN = 0.
PWVMAX = 20.

In [None]:
FLAG_WITHCOLLIMATOR = False
DATE_WITHCOLLIMATOR = 20230930
datetime_WITHCOLLIMATOR = convertNumToDatestr(DATE_WITHCOLLIMATOR)
datetime_WITHCOLLIMATOR = pd.to_datetime("2023-09-30 00:00:00.0+0000")
datetime_WITHCOLLIMATOR

## Initialisation

### Read the file
- `atmfilename` is defined in `QCUT00_parameters.py` 

In [None]:
the_suptitle = butlerusercollectiondict[version_run] 

In [None]:
inputfilename = atmfilename.split("/")[-1]

if "parquet" in inputfilename:
    df_spec = pd.read_parquet(atmfilename)
elif "npy" in inputfilename:
    specdata = np.load(atmfilename,allow_pickle=True)
    df_spec = pd.DataFrame(specdata)
else:
    raise "bad path of filename {inputfilename}"
    

In [None]:
print(" | ".join(df_spec.columns)) 

In [None]:
#df_spec.dtypes.to_frame('Type de donnée')

In [None]:
# add time for plotting
#df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"])
df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"],utc=True)

In [None]:
df_spec["nightObs"] = df_spec.apply(lambda x: x['id']//100_000, axis=1)

In [None]:
df_spec["seq_num"]  = df_spec["id"] % 100_000

In [None]:
df_spec[["id","FILTER"]]

In [None]:
df_spec["FILTER"].unique()

### Check Filters

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 8))

scatter_datetime(
    df=df_spec,
    x="Time",
    y="seq_num",
    hue="FILTER",
    ax=ax,
    title="Filter Time sequence",
)

plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(20, 8))

strip_datetime(
    df=df_spec,
    x="Time",
    y="FILTER",
    hue="FILTER",
    ax=ax,
    size=9,
)

plt.show()


In [None]:
bar_counts_by_night(
    df=df_spec,
    night_col="nightObs",
    filter_col="FILTER",
    stacked=True,
    title="Observations per night (stacked)",
)

### Visualize Selection cuts

In [None]:
fig,axs = plt.subplots(1,2,figsize=(12,3))
ax1,ax2  = axs.flatten()
df_spec.hist("D_CCD [mm]",ax=ax1,bins=50,range=(DCCDMINFIG,DCCDMAXFIG),facecolor="b")
ax1.axvline(DCCDMINCUT,ls="-.",c="k")
ax1.axvline(DCCDMAXCUT,ls="-.",c="k")

df_spec.hist("CHI2_FIT",ax=ax2,bins=50,range=(0,500),facecolor="b")
#ax2.set_yscale("log")
ax2.axvline(CHI2CUT,ls="-.",c="k")

plt.suptitle(tag)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.pyplot as plt

filters = df_spec["FILTER"].unique()

fig, axs = plt.subplots(len(filters), 2, figsize=(14, 3*len(filters)))

for i, f in enumerate(filters):
    subdf = df_spec[df_spec["FILTER"] == f]  # sélectionne uniquement ce filtre
    
    ax1, ax2 = axs[i] if len(filters) > 1 else axs  # gestion si 1 seul filtre
    
    subdf.hist("D_CCD [mm]", ax=ax1, bins=50,
               range=(DCCDMINFIG, DCCDMAXFIG), facecolor="b")
    ax1.axvline(DCCDMINCUT, ls="-.", c="k")
    ax1.axvline(DCCDMAXCUT, ls="-.", c="k")
    ax1.set_title(f"{f} – D_CCD [mm]_x")

    subdf.hist("CHI2_FIT", ax=ax2, bins=50,range=(0,300) ,facecolor="b")
    #ax2.set_yscale("log")
    ax2.axvline(CHI2CUT, ls="-.", c="k")
    ax2.set_title(f"{f} – CHI2_FIT")
   

  

plt.suptitle(f" Quality selection cut , {tag}")
plt.tight_layout()
plt.show()


In [None]:
fig, axs = plot_dccd_chi2_vs_time(
    df=df_spec,
    time_col="Time",
    filter_col="FILTER",
    dccd_col="D_CCD [mm]",
    chi2_col="CHI2_FIT",
    dccd_min_fig=DCCDMINFIG,
    dccd_max_fig=DCCDMAXFIG,
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_cut=CHI2CUT,
    suptitle=tag,
)

plt.tight_layout()
figname =f"{pathfigs}/{prefix}_plot_dccd_chi2_vs_time"+figtype
plt.savefig(figname)
plt.show()


In [None]:
fig, axs = plot_dccd_chi2_vs_time_by_filter(
    df=df_spec,
    time_col="Time",
    filter_col="FILTER",
    dccd_col="D_CCD [mm]",
    chi2_col="CHI2_FIT",
    dccd_min_fig=DCCDMINFIG,
    dccd_max_fig=DCCDMAXFIG,
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_cut=CHI2CUT,
    suptitle=tag,
)

plt.tight_layout()
figname =f"{pathfigs}/{prefix}_plot_dccd_chi2_vs_time_by_filter"+figtype
plt.savefig(figname)
plt.show()


### Impact of target on Quality cut

In [None]:
List_Of_Targets = df_spec["TARGET"].unique()
NTARGETS = len(List_Of_Targets)
print(NTARGETS,List_Of_Targets)

In [None]:
fig, ax = stripplot_target_vs_time(
    df=df_spec,
    tag=tag
)

plt.tight_layout()
figname =f"{pathfigs}/{prefix}_plot_stripplot_target_vs_time"+figtype
plt.savefig(figname)
plt.show()


### Impact of quality for all targets for empty filter

In [None]:
fig, axs = plot_dccd_chi2_vs_time_by_target_filter(
    df=df_spec,
    filter_col="FILTER",
    filter_select="empty",
    per_target=False,
    dccd_min_fig=185,   # <- limite inférieure de l'axe
    dccd_max_fig=190,   # <- limite supérieure de l'axe
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_min_fig=1.,   
    chi2_max_fig=6000.,   
    chi2_cut=CHI2CUT,
    suptitle="empty – all targets",
    tag = tag
)

figname =f"{pathfigs}/{prefix}_plot_dccd_chi2_vs_time_all-targets_filter-empty"+figtype
plt.savefig(figname)
plt.show()

### Impact of quality for all targets for OG550 filter

In [None]:
fig, axs = plot_dccd_chi2_vs_time_by_target_filter(
    df=df_spec,
    filter_col="FILTER",
    filter_select="OG550_65mm_1",
    per_target=False,
    dccd_min_fig=185,   # <- limite inférieure de l'axe
    dccd_max_fig=190,   # <- limite supérieure de l'axe
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_min_fig=1.,   
    chi2_max_fig=6000.,   
    chi2_cut=CHI2CUT,
    suptitle="OG550 – all targets",
    tag = tag
)

plt.tight_layout()
figname =f"{pathfigs}/{prefix}_plot_dccd_chi2_vs_time_all-targets_filter-og550"+figtype
plt.savefig(figname)
plt.show()


### Impact of quality for all targets for BG40 filter

In [None]:
fig, axs = plot_dccd_chi2_vs_time_by_target_filter(
    df=df_spec,
    filter_col="FILTER",
    filter_select="BG40_65mm_1",
    per_target=False,
    dccd_min_fig=185,   # <- limite inférieure de l'axe
    dccd_max_fig=190,   # <- limite supérieure de l'axe
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_min_fig=1.,   
    chi2_max_fig=6000.,   
    chi2_cut=CHI2CUT,
    suptitle="BG40 – all targets",
    tag = tag
)

plt.tight_layout()
figname =f"{pathfigs}/{prefix}_plot_dccd_chi2_vs_time_all-targets_filter-bg40"+figtype
plt.savefig(figname)
plt.show()


### Impact of quality for PER targets for empty filter

In [None]:
fig, axs = plot_dccd_chi2_vs_time_by_target_filter(
    df=df_spec,
    filter_col="FILTER",
    filter_select="empty",
    per_target=True,
    dccd_min_fig=185,   # <- limite inférieure de l'axe
    dccd_max_fig=190,   # <- limite supérieure de l'axe
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_min_fig=1.,   
    chi2_max_fig=6000.,   
    chi2_cut=CHI2CUT,
    suptitle="empty – per targets",
    tag = tag
)
plt.show()

### Impact of quality for PER targets for OG550 filter

In [None]:
fig, axs = plot_dccd_chi2_vs_time_by_target_filter(
    df=df_spec,
    filter_col="FILTER",
    filter_select="OG550_65mm_1",
    per_target=True,
    dccd_min_fig=185,   # <- limite inférieure de l'axe
    dccd_max_fig=190,   # <- limite supérieure de l'axe
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_min_fig=1.,   
    chi2_max_fig=6000.,   
    chi2_cut=CHI2CUT,
    suptitle="OG550 – per target",
    tag = tag
)
plt.show()

### Impact of quality for PER targets for BG40 filter

In [None]:
fig, axs = plot_dccd_chi2_vs_time_by_target_filter(
    df=df_spec,
    filter_col="FILTER",
    filter_select="BG40_65mm_1",
    per_target=True,
    dccd_min_fig=185,   # <- limite inférieure de l'axe
    dccd_max_fig=190,   # <- limite supérieure de l'axe
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_min_fig=1.,   
    chi2_max_fig=6000.,   
    chi2_cut=CHI2CUT,
    suptitle="BG40 – per target",
    tag = tag
)
plt.show()

## Summary

In [None]:
df_summary = summarize_dccd_chi2(df_spec)
df_formatted = df_summary.copy()
df_formatted["mean_DCCD"]   = df_formatted["mean_DCCD"].map("{:.1f}".format)
df_formatted["sigma_DCCD"]  = df_formatted["sigma_DCCD"].map("{:.1f}".format)
df_formatted["mean_CHI2"]   = df_formatted["mean_CHI2"].map("{:.0f}".format)
df_formatted["sigma_CHI2"]  = df_formatted["sigma_CHI2"].map("{:.0f}".format)

### Summary using groupby
(no .reset_index)

In [None]:
summary = (
    df_spec
    .groupby(["TARGET", "FILTER"])
    .agg(
        mean_DCCD=("D_CCD [mm]", "mean"),
        sigma_DCCD=("D_CCD [mm]", "std"),
        mean_CHI2=("CHI2_FIT", "mean"),
        sigma_CHI2=("CHI2_FIT", "std"),
    )
)

In [None]:
summary["mean_DCCD"]   = summary["mean_DCCD"].map("{:.1f}".format)
summary["sigma_DCCD"]  = summary["sigma_DCCD"].map("{:.1f}".format)
summary["mean_CHI2"]   = summary["mean_CHI2"].map("{:.0f}".format)
summary["sigma_CHI2"]  = summary["sigma_CHI2"].map("{:.0f}".format)

In [None]:
summary