# QCUT02 : Check quality results in Spectractor
- REQUIRES ACCESS TO BUTLER

- author Sylvie Dagoret-Campagne
- creation date 2026-01-16 : version vrun2026_v01
- last update 2026-01-17 : loop on several samples
- last update : 2026-01-19 : Better defintions for cuts in a dictionnary
- Kernel @usdf **w_2026_02*
- Home emac : base (conda)
- laptop : conda_py313

**Goal** : Show Night variations of PWV wrt date and Time. Fit a straight line.

In [None]:
#%load_ext autoreload
#%autoreload 2

In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
# must install the mysitcom package by doing at top level "pip install --user -e . "
from mysitcom.auxtel.qualitycuts import scatter_datetime
from mysitcom.auxtel.qualitycuts import strip_datetime
from mysitcom.auxtel.qualitycuts import bar_counts_by_night
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time_by_filter
from mysitcom.auxtel.qualitycuts import stripplot_target_vs_time
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time_by_target_filter
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_histo_by_target_filter
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time_by_target_filter_colorsedtype
from mysitcom.auxtel.qualitycuts import  plot_dccd_chi2_histo_by_target_filter_colorsedtype
from mysitcom.auxtel.qualitycuts import summarize_dccd_chi2

In [None]:
import os

In [None]:
# where are stored the figures
pathfigs = "figs_QCUT02"
prefix = "qcut02"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline
import seaborn as sns
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
from pandas.api.types import is_datetime64_any_dtype

import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (16,8)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'
plt.rcParams["legend.fontsize"] = "xx-large"

import scipy
from scipy.optimize import curve_fit,least_squares

from pprint import pprint

# new color correction model
import pickle
from scipy.interpolate import RegularGridInterpolator

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time


In [None]:
# Remove to run faster the notebook
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
from QCUT00_parameters import *

In [None]:
DumpConfig()

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
np.__version__

In [None]:
pd.__version__

### Configuration

In [None]:
def convertNumToDatestr(num):
    year = num//10_000
    month= (num-year*10_000)//100
    day = (num-year*10_000-month*100)

    year_str = str(year).zfill(4)
    month_str = str(month).zfill(2)
    day_str = str(day).zfill(2)
    
    datestr = f"{year_str}-{month_str}-{day_str}"
    return pd.to_datetime(datestr)

In [None]:
PWVMIN = 0.
PWVMAX = 20.

In [None]:
FLAG_WITHCOLLIMATOR = False
DATE_WITHCOLLIMATOR = 20230930
datetime_WITHCOLLIMATOR = convertNumToDatestr(DATE_WITHCOLLIMATOR)
datetime_WITHCOLLIMATOR = pd.to_datetime("2023-09-30 00:00:00.0+0000")
datetime_WITHCOLLIMATOR

## Initialisation

### Read the file
- `atmfilename` is defined in `QCUT00_parameters.py` 

In [None]:
the_suptitle = butlerusercollectiondict[version_run] 

In [None]:
inputfilename = atmfilename.split("/")[-1]

if "parquet" in inputfilename:
    df_spec = pd.read_parquet(atmfilename)
elif "npy" in inputfilename:
    specdata = np.load(atmfilename,allow_pickle=True)
    df_spec = pd.DataFrame(specdata)
else:
    raise "bad path of filename {inputfilename}"
    

In [None]:
print(" | ".join(df_spec.columns)) 

In [None]:
#df_spec.dtypes.to_frame('Type de donnée')

In [None]:
# add time for plotting
#df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"])
df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"],utc=True)

In [None]:
df_spec["nightObs"] = df_spec.apply(lambda x: x['id']//100_000, axis=1)

In [None]:
df_spec["seq_num"]  = df_spec["id"] % 100_000

In [None]:
df_spec[["id","FILTER"]]

## Targets in our data

In [None]:
List_Of_Targets = df_spec["TARGET"].unique()
NTARGETS = len(List_Of_Targets)
print(NTARGETS,List_Of_Targets)

## Special study on Star Color (Spectral type)
- Load magnitudes from external file
- the magnitudes have been computed in another notebooks in ../2025-10-29-TOOLS



In [None]:
targets_mag_files = "../2025-10-29-TOOLS/data/targets_magnitudes.csv"
df_targets_mag = pd.read_csv(targets_mag_files,index_col=0)      
df_targets_mag = df_targets_mag.sort_values(by="y")

### palette with SED type

In [None]:
df_col = df_targets_mag.copy()
df_col = df_col.sort_values(by="B_V")

SpT = df_col["Sp_T"].values
unique_types = list(dict.fromkeys(SpT))  # garde l'ordre d'apparition
N_types = len(unique_types)


# Associe chaque type spectral à un entier
type_to_idx = {t: i for i, t in enumerate(unique_types)}
idx = np.array([type_to_idx[t] for t in SpT])

# Crée la colormap
cmap = mpl.cm.jet
norm = mpl.colors.Normalize(vmin=-0.5, vmax=N_types - 0.5)

# Colorbar horizontale
fig, ax = plt.subplots(figsize=(14, 0.4), layout="constrained")
cbar = fig.colorbar(
    mpl.cm.ScalarMappable(norm=norm, cmap=cmap),
    cax=ax,
    orientation="horizontal",
    ticks=range(N_types),
)
cbar.ax.set_xticklabels(unique_types, rotation=45, ha="right")
cbar.set_label("Spectral Type", fontsize=12,labelpad=10)

plt.show()

In [None]:
df_targets_mag.head() 

### Target ordered by colors in the input file

In [None]:
sorted_targets =  list(df_targets_mag.index) 
print(sorted_targets)

### Target ordered by colors in our data  file

In [None]:
# order pf selected target by magnitude Y
order_selected_targets = [t for t in sorted_targets if t in df_spec["TARGET"].unique()]
print(order_selected_targets)

### build a palette for colors

In [None]:
# --- Palette personnalisée cohérente avec la colormap jet ---
target_to_color = {}
for target in order_selected_targets:
    sp_type = df_targets_mag.loc[target, "Sp_T"]
    rgba = cmap(norm(type_to_idx[sp_type]))

    # Convert to pure Python floats
    target_to_color[target] = tuple(float(c) for c in rgba)

In [None]:
fig, axs = plot_dccd_chi2_vs_time_by_target_filter_colorsedtype(
    df=df_spec,
    filter_col="FILTER",
    filter_select="empty",
    per_target=False,
    dccd_min_fig=185,   # <- limite inférieure de l'axe
    dccd_max_fig=190,   # <- limite supérieure de l'axe
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_min_fig=1.,   
    chi2_max_fig=6000.,   
    chi2_cut=CHI2CUT,
    suptitle="empty – all targets (color SED-type)",
    tag = tag,
    target_palette=target_to_color
)

figname =f"{pathfigs}/{prefix}_plot_dccd_chi2_vs_time_all-targets_filter-empty_colorsedtype"+figtype
plt.savefig(figname)
plt.show()

In [None]:
fig, axs = plot_dccd_chi2_histo_by_target_filter_colorsedtype(
    df=df_spec,
    filter_select="empty",
    per_target=False,
    dccd_min_fig=186,
    dccd_max_fig=189,
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_min_fig=1,
    chi2_max_fig=6000,
    chi2_cut=CHI2CUT,
    target_palette=target_to_color,
    suptitle="empty – histograms",
    tag = tag,
)
figname =f"{pathfigs}/{prefix}_plot_dccd_chi2_hosto_all-targets_filter-empty_colorsedtype"+figtype
plt.savefig(figname)
plt.show()

In [None]:
fig, axs = plot_dccd_chi2_vs_time_by_target_filter_colorsedtype(
    df=df_spec,
    filter_col="FILTER",
    filter_select="OG550_65mm_1",
    per_target=False,
    dccd_min_fig=185,   # <- limite inférieure de l'axe
    dccd_max_fig=190,   # <- limite supérieure de l'axe
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_min_fig=1.,   
    chi2_max_fig=6000.,   
    chi2_cut=CHI2CUT,
    suptitle="OG550 – all targets (color SED-type)",
    tag = tag,
    target_palette=target_to_color
)

figname =f"{pathfigs}/{prefix}_plot_dccd_chi2_vs_time_all-targets_filter-og550_colorsedtype"+figtype
plt.savefig(figname)
plt.show()

In [None]:
fig, axs = plot_dccd_chi2_histo_by_target_filter_colorsedtype(
    df=df_spec,
    filter_select="OG550_65mm_1",
    per_target=False,

    dccd_min_fig=186,
    dccd_max_fig=189,
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_min_fig=1,
    chi2_max_fig=6000,
    chi2_cut=CHI2CUT,
    target_palette=target_to_color,
    suptitle="OG550 – histograms",
    tag = tag,
)
figname =f"{pathfigs}/{prefix}_plot_dccd_chi2_hosto_all-targets_filter-og550_colorsedtype"+figtype
plt.savefig(figname)
plt.show()

In [None]:
fig, axs = plot_dccd_chi2_vs_time_by_target_filter_colorsedtype(
    df=df_spec,
    filter_col="FILTER",
    filter_select="BG40_65mm_1",
    per_target=False,
    dccd_min_fig=185,   # <- limite inférieure de l'axe
    dccd_max_fig=190,   # <- limite supérieure de l'axe
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_min_fig=1.,   
    chi2_max_fig=6000.,   
    chi2_cut=CHI2CUT,
    suptitle="BG40 – all targets (color SED-type)",
    tag = tag,
    target_palette=target_to_color
)

figname =f"{pathfigs}/{prefix}_plot_dccd_chi2_vs_time_all-targets_filter-bg40_colorsedtype"+figtype
plt.savefig(figname)
plt.show()

In [None]:
fig, axs = plot_dccd_chi2_histo_by_target_filter_colorsedtype(
    df=df_spec,
    filter_select="BG40_65mm_1",
    per_target=False,

    dccd_min_fig=186,
    dccd_max_fig=189,
    dccd_min_cut=DCCDMINCUT,
    dccd_max_cut=DCCDMAXCUT,
    chi2_min_fig=1,
    chi2_max_fig=6000,
    chi2_cut=CHI2CUT,
    target_palette=target_to_color,
    suptitle="BG40 – histograms",
    tag = tag,
)
figname =f"{pathfigs}/{prefix}_plot_dccd_chi2_hosto_all-targets_filter-bg40_colorsedtype"+figtype
plt.savefig(figname)
plt.show()

## LOOP over each target : D2CD and CHI2_FIT vs time

In [None]:
if 0:
    fig, axs = plot_dccd_chi2_vs_time_by_target_filter_colorsedtype(
        df=df_spec,
        filter_col="FILTER",
        filter_select="empty",
        dccd_min_fig=185,   # <- limite inférieure de l'axe
        dccd_max_fig=190,   # <- limite supérieure de l'axe
        dccd_min_cut=DCCDMINCUT,
        dccd_max_cut=DCCDMAXCUT,
        chi2_min_fig=1.,   
        chi2_max_fig=6000.,   
        chi2_cut=CHI2CUT,
        suptitle="empty – all targets (color SED-type)",
        per_target=True,
        tag = tag,
        target_palette=target_to_color
    )

    plt.show()

### Understand missing targets

Missing trget that are not in file `targets_mag_files = "../2025-10-29-TOOLS/data/targets_magnitudes.csv"`

In [None]:
ListOfMissingTargets = set(List_Of_Targets) - set(sorted_targets)

In [None]:
from getCalspec import getCalspec
from getCalspec.getCalspec import getCalspecDataFrame

In [None]:
for missing_target in ListOfMissingTargets:
    result = getCalspec.is_calspec(missing_target)
    print(f"missing target {missing_target} ==> is_Calspec = {result}")

## Summary

In [None]:
df_summary = summarize_dccd_chi2(df_spec)
df_formatted = df_summary.copy()
df_formatted["mean_DCCD"]   = df_formatted["mean_DCCD"].map("{:.1f}".format)
df_formatted["sigma_DCCD"]  = df_formatted["sigma_DCCD"].map("{:.1f}".format)
df_formatted["mean_CHI2"]   = df_formatted["mean_CHI2"].map("{:.0f}".format)
df_formatted["sigma_CHI2"]  = df_formatted["sigma_CHI2"].map("{:.0f}".format)

In [None]:
df_summary["is_calspec"] = df_summary["TARGET"].apply(getCalspec.is_calspec)

In [None]:
df_summary.head()

### Summary using groupby
(no .reset_index)

In [None]:
summary = (
    df_spec
    .groupby(["TARGET", "FILTER"])
    .agg(
        count=("CHI2_FIT", "size"),
        mean_DCCD=("D_CCD [mm]", "mean"),
        sigma_DCCD=("D_CCD [mm]", "std"),
        mean_CHI2=("CHI2_FIT", "mean"),
        sigma_CHI2=("CHI2_FIT", "std"),
    )
)

In [None]:
summary["mean_DCCD"]   = summary["mean_DCCD"].map("{:.1f}".format)
summary["sigma_DCCD"]  = summary["sigma_DCCD"].map("{:.1f}".format)
summary["mean_CHI2"]   = summary["mean_CHI2"].map("{:.0f}".format)
summary["sigma_CHI2"]  = summary["sigma_CHI2"].map("{:.0f}".format)

In [None]:
summary["is_calspec"] = summary.index.get_level_values("TARGET").map(getCalspec.is_calspec)

In [None]:
summary

## View Spectractor results in Butler

### Mist of some targets

In [None]:
listOfGaia = ['HD73495', 'HD104304', 'HD89736', 'HD57167','HD202025', 'HD160760', 'HD36780', 'HD74180', 'HD77020', 'HD111235']

In [None]:
target_sel ="HD36780"   # GAIA :: very bad Gaia
#target_sel = "HD73495"  # GAIA ::  oscillating Gaia
#target_sel = 'HD104304' # GAIA :: oscillating Gaia
#target_sel = 'HD89736', ### GAIA ::  Missing ????? alors que cout indique 100
#target_sel = 'HD111235' # GAIA ::  VERY BEAUTIFUL GAIA (753)
#target_sel =  'HD57167' # GAIA ::  VERY NICE Gaia
#target_sel = "HD202025" # GAIA ::  VERY BEAUTIFUL GAIA with low chi2
#target_sel = "HD160760" # GAIA ::  VERY LOW CHI2 Gaia 
#target_sel = 'HD74180'  # GAIA :: VERY HIGH chi2 HORRIBLE GAIA
#target_sel ='HD77020'   # GAIA :: Nice Gaia, low ch2
#target_sel ="HD38666"   # CALSPEC :: mu. Col Blue bright star
#target_sel ="HD185975"  # CALSPEC :: Polar star  	G3V

### Cuts definition

In [None]:
cuts = {}
cuts["HD36780"] = df_spec.TARGET == target_sel
cuts["HD36780 AND HIGH CHI2"] = (df_spec.TARGET == target_sel) & (df_spec.CHI2_FIT> 1000)
cuts["HD36780 AND LOW CHI2"] = (df_spec.TARGET == target_sel) & (df_spec.CHI2_FIT< 500)

### Cut selection

In [None]:
#cut = cuts["HD36780 AND HIGH CHI2"]
cut = cuts["HD36780 AND LOW CHI2"]

In [None]:
df_sel = df_spec[cut].reset_index() 

In [None]:
len(df_sel)

In [None]:
df_sel[["id","FILTER","D2CCD","CHI2_FIT"]]

In [None]:
index_sel = 0
filter_sel = "empty"

In [None]:
df_visit_ids = df_sel[df_sel.FILTER == filter_sel]["id"]

In [None]:
df_visit_ids

In [None]:
visit_id = df_visit_ids.iloc[index_sel]

In [None]:
day_obs = visit_id//100_000 
seq_num = visit_id - day_obs*100_000 

In [None]:
dataId = {"day_obs": int(day_obs), "seq_num": int(seq_num) , 'instrument':'LATISS',"detector": 0}
print(dataId)

### Butler

>  butler query-data-ids /repo/main exposure,detector --collections LATISS/* --datasets 'raw' --where "instrument='LATISS' AND exposure.day_obs>=20250101 AND (physical_filter.name='empty~holo4_003' OR physical_filter.name='SDSSr_65mm~holo4_003' OR physical_filter.name='SDSSr_65mm~holo4_003' OR physical_filter.name='OG550_65mm_1~holo4_003' OR physical_filter.name='BG40_65mm_1~holo4_003') AND exposure.observation_type='science'"


>  butler query-data-ids /repo/embargo exposure,detector --collections LATISS/* --datasets 'raw' --where "instrument='LATISS' AND exposure.day_obs>=20250101 AND (physical_filter.name='empty~holo4_003' OR physical_filter.name='SDSSr_65mm~holo4_003' OR physical_filter.name='SDSSr_65mm~holo4_003' OR physical_filter.name='OG550_65mm_1~holo4_003' OR physical_filter.name='BG40_65mm_1~holo4_003') AND exposure.observation_type='science'"



In [None]:
my_collection = butlerusercollectiondict[version_run]
print(my_collection)

In [None]:
from lsst.summit.utils.utils import checkStackSetup
checkStackSetup()

In [None]:
import lsst.daf.butler as dafButler

#repo = "/repo/main"
repo = "/sdf/group/rubin/repo/main"
butler = dafButler.Butler(repo)
registry = butler.registry

#for c in sorted(registry.queryCollections()):
#    if "u/jneveu/auxtel_atmosphere_202311_v3.2.1_fixA2fixA1_RobustFit_newThroughputs/" in c:
#        print(f"Found the requested collection {c} in butler {repo}")
#    else:
#        print(f"Requested collection {my_col} NOT FOUND in butler {repo}")

In [None]:
print(butler.registry.getDatasetType('spectrumLibradtranFitParameters'))

In [None]:
datasetRefs = registry.queryDatasets(datasetType='spectractorSpectrum', collections=my_collection, where= "instrument='LATISS'")
where = "instrument='LATISS'" 
records = list(butler.registry.queryDimensionRecords('visit', datasets='spectractorSpectrum', where=where,  collections=my_collection))
refs = list(set(butler.registry.queryDatasets('spectractorSpectrum',  where=where,  collections=my_collection)))
len(records)

In [None]:
spec= butler.get('spectractorSpectrum',dataId,collections=my_collection)

In [None]:
#dir(spec)

In [None]:
dict_params = {
    "targetname": spec.header["TARGET"],
    "airmass": spec.airmass,
    "chi2_fit": spec.header["CHI2_FIT"],
    "exptime": spec.header["EXPTIME"],
    "d2ccd":spec.header["D2CCD"]}
ser = pd.Series(data= dict_params, index=['targetname', 'airmass', 'chi2_fit','d2ccd',"exptime"])
print(ser)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
spec.plot_spectrum_summary()
plt.show()

In [None]:
#spec.plot_spectrum()
#spec.plot_spectrogram()

In [None]:
plt.figure(figsize=(18, 4))  # largeur, hauteur en pouces
plt.imshow(spec.spectrogram_data, origin="lower",aspect="auto")
plt.colorbar()
plt.show()

In [None]:
dataId = {"day_obs": int(day_obs), "seq_num": int(seq_num) , 'instrument':'LATISS',"detector": 0}

## Loop on few samples

In [None]:
N = 10

In [None]:
for visit_num in range(0,N):
    visit_id = df_visit_ids.iloc[visit_num]
    day_obs = visit_id//100_000 
    seq_num = visit_id - day_obs*100_000 
    dataId = {"day_obs": int(day_obs), "seq_num": int(seq_num) , 'instrument':'LATISS',"detector": 0}
    print(f"====================================================={visit_id} =============================================================")
    spec= butler.get('spectractorSpectrum',dataId,collections=my_collection)


    dict_params = {
    "targetname": spec.header["TARGET"],
    "airmass": spec.airmass,
    "chi2_fit": spec.header["CHI2_FIT"],
    "exptime": spec.header["EXPTIME"],
    "d2ccd":spec.header["D2CCD"]}
    ser = pd.Series(data= dict_params, index=['targetname', 'airmass', 'chi2_fit','d2ccd',"exptime"])
    print(ser)
    
    spec.plot_spectrum_summary()
    plt.show()
    plt.figure(figsize=(18, 4))  # largeur, hauteur en pouces
    plt.imshow(spec.spectrogram_data, origin="lower",aspect="auto")
    plt.colorbar()
    plt.show()
    