# QCUT03 : Check quality per target, especially on Chi2

- author Sylvie Dagoret-Campagne
- creation date 2026-01-27 : version vrun2026_v01
- last update 2026-01-27 : 
- last update : 2026-01-27 : 
- Home emac : base (conda)
- laptop : conda_py313

**Goal** : Show Night variations of PWV wrt date and Time. Fit a straight line.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
# must install the mysitcom package by doing at top level "pip install --user -e . "
from mysitcom.auxtel.qualitycuts import scatter_datetime
from mysitcom.auxtel.qualitycuts import strip_datetime
from mysitcom.auxtel.qualitycuts import bar_counts_by_night
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time_by_filter
from mysitcom.auxtel.qualitycuts import stripplot_target_vs_time
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time_by_target_filter
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_histo_by_target_filter
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_vs_time_by_target_filter_colorsedtype
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_histo_by_target_filter_colorsedtype
from mysitcom.auxtel.qualitycuts import summarize_dccd_chi2
from mysitcom.auxtel.qualitycuts import plot_dccd_chi2_histo_by_target_filter_colorsedtype
from mysitcom.auxtel.qualitycuts import plot_chi2_norm_histo_by_target
from mysitcom.auxtel.qualitycuts import plot_chi2_norm_histo_onetarget
from mysitcom.auxtel.qualitycuts import normalize_column_data_bytarget_byfilter

In [None]:
from mysitcom.auxtel.qualitycuts import generate_chi2_samples
from mysitcom.auxtel.qualitycuts import generate_lognormal_samples
from mysitcom.auxtel.qualitycuts import ks_test_chi2_vs_lognormal
from mysitcom.auxtel.qualitycuts import plot_normalized_histogram
from mysitcom.auxtel.qualitycuts import qq_plot_chi2_vs_lognormal

In [None]:
import os

In [None]:
# where are stored the figures
pathfigs = "figs_QCUT03"
prefix = "qcut03"
if not os.path.exists(pathfigs):
    os.makedirs(pathfigs) 
figtype = ".png"

In [None]:
import numpy as np
from numpy.linalg import inv
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline
import seaborn as sns
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm,SymLogNorm
from matplotlib.patches import Circle,Annulus
from astropy.visualization import ZScaleInterval
props = dict(boxstyle='round', facecolor="white", alpha=0.1)
#props = dict(boxstyle='round')

import matplotlib.colors as colors
import matplotlib.cm as cmx

import matplotlib.ticker                         # here's where the formatter is
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from matplotlib.gridspec import GridSpec

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.io import fits
from astropy.wcs import WCS
from astropy import units as u
from astropy import constants as c

from scipy import interpolate
from sklearn.neighbors import NearestNeighbors
from sklearn.neighbors import KDTree, BallTree

import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

import matplotlib.ticker                         # here's where the formatter is
import os
import re
import pandas as pd
from pandas.api.types import is_datetime64_any_dtype

import pickle
from collections import OrderedDict

plt.rcParams["figure.figsize"] = (16,8)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'
plt.rcParams["legend.fontsize"] = "xx-large"

import scipy
from scipy.optimize import curve_fit,least_squares
from scipy import stats
from pprint import pprint

# new color correction model
import pickle
from scipy.interpolate import RegularGridInterpolator

In [None]:
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from astropy.visualization import (MinMaxInterval, SqrtStretch,ZScaleInterval,PercentileInterval,
                                   ImageNormalize,imshow_norm)
from astropy.visualization.stretch import SinhStretch, LinearStretch,AsinhStretch,LogStretch

from astropy.time import Time


In [None]:
# Remove to run faster the notebook
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
from QCUT00_parameters import *

In [None]:
DumpConfig()

In [None]:
from importlib.metadata import version

In [None]:
# wavelength bin colors
#jet = plt.get_cmap('jet')
#cNorm = mpl.colors.Normalize(vmin=0, vmax=NSED)
#scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
#all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
np.__version__

In [None]:
pd.__version__

### Configuration

In [None]:
def convertNumToDatestr(num):
    year = num//10_000
    month= (num-year*10_000)//100
    day = (num-year*10_000-month*100)

    year_str = str(year).zfill(4)
    month_str = str(month).zfill(2)
    day_str = str(day).zfill(2)
    
    datestr = f"{year_str}-{month_str}-{day_str}"
    return pd.to_datetime(datestr)

In [None]:
PWVMIN = 0.
PWVMAX = 20.

In [None]:
FLAG_WITHCOLLIMATOR = False
DATE_WITHCOLLIMATOR = 20230930
datetime_WITHCOLLIMATOR = convertNumToDatestr(DATE_WITHCOLLIMATOR)
datetime_WITHCOLLIMATOR = pd.to_datetime("2023-09-30 00:00:00.0+0000")
datetime_WITHCOLLIMATOR

## Initialisation

### Read the file
- `atmfilename` is defined in `QCUT00_parameters.py` 

In [None]:
the_suptitle = butlerusercollectiondict[version_run] 

In [None]:
inputfilename = atmfilename.split("/")[-1]

if "parquet" in inputfilename:
    df_spec = pd.read_parquet(atmfilename)
elif "npy" in inputfilename:
    specdata = np.load(atmfilename,allow_pickle=True)
    df_spec = pd.DataFrame(specdata)
else:
    raise "bad path of filename {inputfilename}"
    

In [None]:
print(" | ".join(df_spec.columns)) 

In [None]:
#df_spec.dtypes.to_frame('Type de donnée')

In [None]:
# add time for plotting
#df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"])
df_spec["Time"] = pd.to_datetime(df_spec["DATE-OBS"],utc=True)

In [None]:
df_spec["nightObs"] = df_spec.apply(lambda x: x['id']//100_000, axis=1)

In [None]:
df_spec["seq_num"]  = df_spec["id"] % 100_000

In [None]:
df_spec[["id","FILTER"]]

## Targets in our data

In [None]:
List_Of_Targets = df_spec["TARGET"].unique()
NTARGETS = len(List_Of_Targets)
print(NTARGETS,List_Of_Targets)

## Special study on Star Color (Spectral type)
- Load magnitudes from external file
- the magnitudes have been computed in another notebooks in ../2025-10-29-TOOLS



In [None]:
targets_mag_files = "../2025-10-29-TOOLS/data/targets_magnitudes.csv"
df_targets_mag = pd.read_csv(targets_mag_files,index_col=0)      
df_targets_mag = df_targets_mag.sort_values(by="y")

### palette with SED type

In [None]:
df_col = df_targets_mag.copy()
df_col = df_col.sort_values(by="B_V")

SpT = df_col["Sp_T"].values
unique_types = list(dict.fromkeys(SpT))  # garde l'ordre d'apparition
N_types = len(unique_types)


# Associe chaque type spectral à un entier
type_to_idx = {t: i for i, t in enumerate(unique_types)}
idx = np.array([type_to_idx[t] for t in SpT])

# Crée la colormap
cmap = mpl.cm.jet
norm = mpl.colors.Normalize(vmin=-0.5, vmax=N_types - 0.5)

# Colorbar horizontale
fig, ax = plt.subplots(figsize=(14, 0.4), layout="constrained")
cbar = fig.colorbar(
    mpl.cm.ScalarMappable(norm=norm, cmap=cmap),
    cax=ax,
    orientation="horizontal",
    ticks=range(N_types),
)
cbar.ax.set_xticklabels(unique_types, rotation=45, ha="right")
cbar.set_label("Spectral Type", fontsize=12,labelpad=10)

plt.show()

In [None]:
df_targets_mag.head() 

### Target ordered by colors in the input file

In [None]:
sorted_targets =  list(df_targets_mag.index) 
print(sorted_targets)

### Target ordered by colors in our data  file

In [None]:
# order pf selected target by magnitude Y
order_selected_targets = [t for t in sorted_targets if t in df_spec["TARGET"].unique()]
print(order_selected_targets)

### build a palette for colors

In [None]:
# --- Palette personnalisée cohérente avec la colormap jet ---
target_to_color = {}
for target in order_selected_targets:
    sp_type = df_targets_mag.loc[target, "Sp_T"]
    rgba = cmap(norm(type_to_idx[sp_type]))

    # Convert to pure Python floats
    target_to_color[target] = tuple(float(c) for c in rgba)

In [None]:
fig,axs = plot_chi2_norm_histo_by_target(
    df_spec,
    filter_col="FILTER",
    filter_select="empty",
    target_col="TARGET",
    chi2_col="CHI2_FIT",

    # bornes / seuils (sur chi2 normalisé)
    chi2_min_fig=1e-2,
    chi2_max_fig=1e2,
    chi2_cut=1,

    # histogramme
    bins_chi2=50,
    density=False,

    # style
    lw=3,

    suptitle=None,

    # affichage
    per_target=False,
    axs=None,
    figsize=(12, 10),
    tag=None,

    # colors
    target_palette=target_to_color,   # dict: TARGET -> color
)

In [None]:
fig,axs = plot_chi2_norm_histo_by_target(
    df_spec,
    filter_col="FILTER",
    filter_select="OG550_65mm_1",
    target_col="TARGET",
    chi2_col="CHI2_FIT",

    # bornes / seuils (sur chi2 normalisé)
    chi2_min_fig=1e-2,
    chi2_max_fig=2e2,
    chi2_cut=1.,

    # histogramme
    bins_chi2=50,
    density=False,

    # style
    lw=3,

    suptitle=None,

    # affichage
    per_target=False,
    axs=None,
    figsize=(12, 10),
    tag=None,

    # colors
    target_palette=target_to_color,   # dict: TARGET -> color
)

In [None]:
fig,axs = plot_chi2_norm_histo_by_target(
    df_spec,
    filter_col="FILTER",
    filter_select="BG40_65mm_1",
    target_col="TARGET",
    chi2_col="CHI2_FIT",

    # bornes / seuils (sur chi2 normalisé)
    chi2_min_fig=1e-2,
    chi2_max_fig=1e2,
    chi2_cut=1,

    # histogramme
    bins_chi2=50,
    density=False,

    # style
    lw=3,

    suptitle=None,

    # affichage
    per_target=False,
    axs=None,
    figsize=(12, 10),
    tag=None,

    # colors
    target_palette=target_to_color,   # dict: TARGET -> color
)

In [None]:
fig,axs = plot_chi2_norm_histo_by_target(
    df_spec,
    filter_col="FILTER",
    filter_select="OG550_65mm_1",
    target_col="TARGET",
    chi2_col="CHI2_FIT",

    # bornes / seuils (sur chi2 normalisé)
    chi2_min_fig=1e-2,
    chi2_max_fig=1e2,
    chi2_cut=1.,

    # histogramme
    bins_chi2=50,
    density=False,

    # style
    lw=3,

    suptitle=None,

    # affichage
    per_target=True,
    axs=None,
    figsize=(5,4),
    tag=None,

    # colors
    target_palette=target_to_color,   # dict: TARGET -> color
)

## Simulation

In [None]:
n_samples = 10000
degrees_of_freedom = 10
n_bins = 50
min_bound = 0.01
max_bound = 5.0
    
# Génération des échantillons chi2
samples, mean_value = generate_chi2_samples(n_samples, degrees_of_freedom)
    
print(f"Degrés de liberté: {degrees_of_freedom}")
print(f"Nombre d'échantillons: {n_samples}")
print(f"Moyenne théorique: {mean_value}")
print(f"Moyenne empirique: {np.mean(samples):.3f}")
print(f"Nombre de bins: {n_bins}")
    
# Visualisation
fig, ax = plot_normalized_histogram(samples, mean_value, n_bins, min_bound, max_bound,
                              title=f"Distribution Chi2 normalisée (df={degrees_of_freedom})",figsize=(6,5))
ax.axvline(1)
plt.show()

In [None]:
# ========================================================================
# Q-Q Plots
# ========================================================================
print("\n\n*** Q-Q PLOTS POUR DONNÉES CHI2 ***")
fig1, params1 = qq_plot_chi2_vs_lognormal(samples,log_scale=True,figsize=(10,5))

In [None]:
samples_ln, mean_ln = generate_lognormal_samples(n_samples, mu=0, sigma=0.5)
fig,ax = plot_normalized_histogram(samples_ln, mean_ln, n_bins, min_bound, max_bound,
                               title="Distribution Log-Normale normalisée",figsize=(6,5))
ax.axvline(1)
plt.show()

In [None]:
# ========================================================================
# Q-Q Plots
# ========================================================================
print("\n\n*** Q-Q PLOTS POUR DONNÉES LOG-NORMALES ***")
fig2, params2 = qq_plot_chi2_vs_lognormal(samples_ln,log_scale=True,figsize=(10,5))

## Test de Kolmogorov-Smirnov : loi  

## Interprétation :

p-value > 0.05 : les données sont cohérentes avec la distribution
p-value élevée : meilleur ajustement
La fonction te dit directement quelle distribution correspond le mieux !

Le code inclut aussi des exemples de test sur des données chi2 et log-normales pour que tu voies comment ça marche. Besoin d'aide pour l'interpréter 

In [None]:
# ========================================================================
# Test de Kolmogorov-Smirnov
# ========================================================================
print("\n\n*** TEST SUR DES DONNÉES CHI2 ***")
results = ks_test_chi2_vs_lognormal(samples, verbose=True)
    
# Exemple avec des données log-normales
print("\n\n*** TEST SUR DES DONNÉES LOG-NORMALES ***")
samples_ln, mean_ln = generate_lognormal_samples(n_samples, mu=0, sigma=0.5)
results_ln = ks_test_chi2_vs_lognormal(samples_ln, verbose=True)

In [None]:
List_Of_Targets

In [None]:
missing_targets = set(List_Of_Targets) - set(target_to_color.keys())

In [None]:
missing_targets

In [None]:
def ExtractDataFromDataFrame(df,target_name,filter_name,target_col= "TARGET",filter_col="FILTER"):
    """
    """
    df_cut = (df[target_col] ==  target_name) & (df[filter_col] ==  filter_name)
    return df[df_cut]
    

In [None]:
filter_sel ="empty"
chi2_col = "CHI2_FIT"

In [None]:
for target_name in List_Of_Targets:

    print(f"******************************** BEGIN {target_name} ************************************")
   
    the_color = target_to_color.get(target_name,"grey")

    fig,ax = plot_chi2_norm_histo_onetarget(df_spec,target_name,filter_col="FILTER",filter_select=filter_sel,target_col="TARGET",chi2_col="CHI2_FIT",
        # bornes / seuils (sur chi2 normalisé)
        chi2_min_fig=1e-2,chi2_max_fig=1e2,chi2_cut=1.0,
        # histogramme
        bins_chi2=50,density=False,
        # style
        lw=4,
        suptitle=target_name,
        # affichage
        axs=None,
        figsize=(6, 4),
        # colors
        target_palette=target_to_color)
    plt.show()

    # ========================================================================
    # Test de Kolmogorov-Smirnov
    # ========================================================================
    df_data = ExtractDataFromDataFrame(df_spec,target_name,filter_name=filter_sel,target_col= "TARGET",filter_col="FILTER")
    df_chi2_data = df_data[chi2_col].dropna()
    the_chi2_data = df_chi2_data.values
    the_chi2_data_mean = np.mean(the_chi2_data)

    print(f"\n\n*** TEST SUR DES DONNÉES CHI2 pour {target_name} ***")
    results = ks_test_chi2_vs_lognormal(the_chi2_data , verbose=True)
    
    # Exemple avec des données log-normales
    print(f"\n\n*** TEST SUR DES DONNÉES LOG-NORMALES pour {target_name} ***")
    results_ln = ks_test_chi2_vs_lognormal(the_chi2_data, verbose=True)

    # ========================================================================
    # Q-Q plots
    # ========================================================================
    fig, params = qq_plot_chi2_vs_lognormal(the_chi2_data,figsize=(10, 4),log_scale=True)
    plt.show()
    print(f"******************************** END {target_name} *************************************")