# Demo to use quality cuts

- author Sylvie Dagoret-Campagne
- creation date 2026-02-12 : 
- last update : 2026-02-12

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from platform import python_version
print(python_version())

In [None]:
import warnings
warnings.resetwarnings()
warnings.simplefilter('ignore')

In [None]:
from libSelectionQCUT import ParameterCutSelection,ParameterCutTools

In [None]:
import os

In [None]:
import numpy as np
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option('display.max_rows', 100)

In [None]:
from IPython.display import display, JSON
import json

In [None]:
def normalize_column_data_bytarget_byfilter(df,target_col,filter_col,feature_col,ext="norm"):
    """
    Docstring pour normalize_data
    :param df: Pandas dataframe
    :param target_col: name of columns target in dataframe
    :param filter_col: name of columns filter in dataframe
    :param feature_col: name of columns feature in dataframe
    :param ext: extension to add to new column name
    :return: dataframe with new normalized feature column
    """

    the_filters = df[filter_col].unique()
    the_targets = df[target_col].unique()
    feature_col_out =f"{feature_col}_{ext}"
    feature_col_mean =f"{feature_col}_mean"

    all_df = []
    df_out =  pd.DataFrame(columns=[target_col ,filter_col,feature_col_mean])

    for f in the_filters:
        for t in the_targets:
            mask = (df[filter_col] == f) & (df[target_col] == t)
            df_data = df[mask]
            mean_data = df_data[feature_col].mean()
            df_data[feature_col_out] = df_data[feature_col]/mean_data
            all_df.append(df_data)
            df_out.loc[len(df_out)] = {target_col: t, filter_col: f, feature_col_mean: mean_data}

    df_merge = pd.concat(all_df)
    df_merge = df_merge.sort_values(by="id", ascending=True)

    return df_merge,df_out

## Input dataframe

In [None]:
input_filename = "../../2025-06-26-SpectractorExtraction-FromButler/data/spectro_merged/auxtel_atmosphere_202311_v3.2.1_fixA2fixA1_RobustFit_newThroughputs_merged.parquet.gz"

In [None]:
df_spec = pd.read_parquet(input_filename)

In [None]:
df_spec.rename(
    {
    "chi2":"chi2_ram",
    "A1":"A1_ram",
    "A1_err": "A1_err_ram",
    "A2": "A2_ram",
    "A2_err": "A2_err_ram",
    "A3": "A3_ram",
    "A3_err": "A3_err_ram", 
    "VAOD": "VAOD_ram", 
    "VAOD_err": "VAOD_err_ram", 
    "angstrom_exp" : "angstrom_exp_ram", 
    "angstrom_exp_err" : "angstrom_exp_err_ram" , 
    "ozone [db]" :"ozone [db]_ram", 
    "ozone [db]_err": "ozone [db]_err_ram", 
    "PWV [mm]":  "PWV [mm]_ram",
    "PWV [mm]_err":"PWV [mm]_err_ram" , 
    "B": "B_ram" , 
    "B_err" : "B_err_ram", 
    "A_star": "A_star_ram" , 
    "A_star_err": "A_star_err_ram" , 
    "D_CCD [mm]" : "D_CCD [mm]_ram", 
    "D_CCD [mm]_err": "D_CCD [mm]_err_ram" 
    }
    ,axis=1,inplace = True)

### Normalised chi2

Compute the chi2 renormalised for each target.
This quantity is used for the cuts instead of the original one that depend a lot on the target

In [None]:
df_spec, df1 = normalize_column_data_bytarget_byfilter(df_spec,target_col="TARGET",filter_col="FILTER",feature_col= "CHI2_FIT",ext="norm")
df_spec, df2 = normalize_column_data_bytarget_byfilter(df_spec,target_col="TARGET",filter_col="FILTER",feature_col= "chi2_ram",ext="norm")
df_spec, df3 = normalize_column_data_bytarget_byfilter(df_spec,target_col="TARGET",filter_col="FILTER",feature_col= "chi2_rum",ext="norm")

In [None]:
len(df_spec)

## Load the Parameter class config for all users

### Use the class ParameterCutTools to generate a dictionnary of cut ranges

In [None]:
filename_cuts = "cuts_finaldecision.json" 

In [None]:
cuts = ParameterCutTools.load_cuts_json(filename_cuts)

### Dump cuts

In [None]:
#print(json.dumps(cuts, indent=4, sort_keys=True))
#display(JSON(cuts))
ParameterCutTools.cuts_to_dataframe(cuts)

In [None]:
list_of_params = list(cuts.keys())

In [None]:
list_of_params

## Apply the selections to the data

In [None]:
selector = ParameterCutSelection(
    df_spec,
    params = list_of_params,
    id_col="id"
)

flags = selector.apply_cuts(cuts)
df_stats = selector.selection_statistics(cuts)
df_stats_v2 = selector.selection_statistics_inoutliers_by_param(cuts)

df_selected = df_spec.merge(flags, on="id")
df_keep = df_selected[df_selected["pass_all_cuts"]]

### output dataframe

In [None]:
len(df_keep)

In [None]:
df_keep.groupby(by=["TARGET","FILTER"]).size()

### Per target overall statistics

In [None]:
df_stats

### Per target per filter per param statistics

In [None]:
df_stats_v2 