## CHECK Spectraction Efficiency ==> Find Missing and Reconstructed

- author : Sylvie Dagoret-Campagne
- creation date : 2025-09-23
- last update : 2025-09-24

- The goal is to find the efficiency of Spectrator by comparing for each night the number of reconstructed spectra to the total number of exposure that exist in the butler.
- It is possible to compare the performance depending on the type of filter. 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

plt.rcParams["figure.figsize"] = (16,8)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'
plt.rcParams["legend.fontsize"] = "small"
pd.set_option('display.max_rows', 100)

In [None]:
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
# CHECK THE CONFIG HERE !!!!!!
from BUTLER00_parameters import *

In [None]:
DumpConfig()

## Configuration

In [None]:
selected_run = "run_v6"

In [None]:
inputfullfilename = mergedtofindmissings[selected_run]

In [None]:
the_collection = butlerusercollectiondict[selected_run] 

In [None]:
the_tag = legendtag[selected_run] 

In [None]:
the_suptitle = the_tag + f" ({the_collection})"

In [None]:
the_suptitle

In [None]:
the_collection_name = the_collection.replace("/","_")

In [None]:
fileout_expo_okormiss  = f"checkout_ExposuresOKorMissing_{the_collection_name}.csv"
fileout_pernightcountokormiss  = f"checkout_PerNightFilterCountOKorMissing_{the_collection_name}.csv"

## Read file

In [None]:
data = np.load(inputfullfilename,allow_pickle=True)

In [None]:
df = pd.DataFrame(data)

## SELECT FILTER

In [None]:
df.ex_filter.unique()

In [None]:
List_Of_Selected_Filter =['empty~holo4_003','OG550_65mm_1~holo4_003'] 
# Force band order

In [None]:
df = df[df["ex_filter"].isin(List_Of_Selected_Filter)]

In [None]:
df

In [None]:
print(list(df.columns))

In [None]:
df = df[["id",'ex_day_obs', 'ex_seq_num','ex_filter','FILTER','D2CCD']]

In [None]:
df

In [None]:
df = df.copy()
df["filter"] = df["ex_filter"].apply( lambda x : x.split("~")[0])

In [None]:
df.drop(["ex_filter","FILTER"],axis=1,inplace=True)

In [None]:
df

### Determine if the Spectractor data are missing

- If D2CCD is nan, it means no Spectractor data were found

In [None]:
df["missing"] = df["D2CCD"].isna()

In [None]:
df

In [None]:
df_out = df.drop(["D2CCD"],axis=1)
df_out.to_csv(fileout_expo_okormiss)

## Increase the number of filter by
    - filtername_miss : if the exposure has note been reconstructed by Spectractor
    - filtername_ok : if the exposure been reconstructed by Spectractor

In [None]:
df["filter_status"] = df.apply(
    lambda row: row["filter"] + "_miss" if row["missing"] else row["filter"]  + "_ok",
    axis=1
)

In [None]:
df.drop(["D2CCD"],axis=1, inplace = True)

In [None]:
df

## Grouping for the summary and plotting

- group by filter_status

In [None]:
band_order = sorted(df.filter_status.unique())

In [None]:
band_order= ['empty_ok','empty_miss','OG550_65mm_1_ok','OG550_65mm_1_miss']

In [None]:
color_map = {
    'OG550_65mm_1_ok': "red", 
    'OG550_65mm_1_miss':"orange", 
    'empty_ok':"blue", 
    'empty_miss':"cyan",
}

In [None]:
grouped  = df.groupby(["ex_day_obs","filter_status"]).size().reset_index(name='count')

In [None]:
grouped 

In [None]:
plt.figure(figsize=(20, 6))
sns.barplot(
    data=grouped,
    x='ex_day_obs',
    y='count',
    hue='filter_status',
    hue_order=band_order,
    palette=color_map
)
plt.xlabel("night obs")
plt.ylabel("Number of observation per night")
plt.title("Number of Obs per night found or missing",fontsize=20,fontweight="bold")
plt.xticks(rotation=90, ha='right')
plt.legend(title="Filter",loc="upper left")
plt.suptitle(the_suptitle)
plt.tight_layout()
plt.grid()
plt.yscale("log")
plt.show()

In [None]:
df2 = grouped.copy()
df2

In [None]:
# Séparer filtre et statut
df2["filter"] = df2["filter_status"].str.replace(r"_(ok|miss)$", "", regex=True)
df2["status"] = df2["filter_status"].str.extract(r"(ok|miss)$")

In [None]:
df2

In [None]:

# Pivot : index=date, colonnes=(filter, status)
pivot = df2.pivot_table(
    index="ex_day_obs", 
    columns=["filter", "status"], 
    values="count", 
    aggfunc="sum",
    fill_value=0
)

# Plot : une barre par filtre, stackée ok+miss
fig, ax = plt.subplots(figsize=(14,6))

filters = pivot.columns.get_level_values(0).unique()

width = 0.8 / len(filters)  # largeur ajustée en fonction du nombre de filtres
x = range(len(pivot))

for i, f in enumerate(filters):
    ok = pivot[(f, "ok")]
    miss = pivot[(f, "miss")]
    # position décalée pour chaque filtre
    xpos = [xi + i*width for xi in x]
    ax.bar(xpos, ok, width, label=f"{f} ok")
    ax.bar(xpos, miss, width, bottom=ok, label=f"{f} miss", alpha=0.6)

ax.set_xticks([xi + (len(filters)-1)*width/2 for xi in x])
ax.set_xticklabels(pivot.index, rotation=90)


ax.set_ylabel("Number of exposures")
ax.set_xlabel("Date (day_obs)")
ax.set_title("Ok/miss per filter and per night")

ax.legend()
plt.suptitle(the_suptitle)
plt.tight_layout()
plt.show()


In [None]:
df2 = grouped.copy()
df2

In [None]:
# Exemple dictionnaire couleurs
colors = {
    "OG550_65mm_1": "tab:red",
    "empty": "tab:blue",
    # ajoute d'autres filtres ici
}

# Séparer filtre et statut
df2 = grouped.copy()
df2["filter"] = df2["filter_status"].str.replace(r"_(ok|miss)$", "", regex=True)
df2["status"] = df2["filter_status"].str.extract(r"(ok|miss)$")

# Pivot : index=date, colonnes=(filter, status)
pivot = df2.pivot_table(
    index="ex_day_obs", 
    columns=["filter", "status"], 
    values="count", 
    aggfunc="sum",
    fill_value=0
)

fig, ax = plt.subplots(figsize=(14,6))

filters = pivot.columns.get_level_values(0).unique()
width = 0.8 / len(filters)
x = range(len(pivot))

for i, f in enumerate(filters):
    ok = pivot[(f, "ok")]
    miss = pivot[(f, "miss")]
    xpos = [xi + i*width for xi in x]
    
    base_color = colors.get(f, "gray")  # couleur par défaut = gris
    
    ax.bar(xpos, ok, width, label=f"{f} ok", color=base_color)
    ax.bar(xpos, miss, width, bottom=ok, 
           label=f"{f} miss", color=base_color, alpha=0.4)

ax.set_xticks([xi + (len(filters)-1)*width/2 for xi in x])
ax.set_xticklabels(pivot.index, rotation=90)

ax.set_ylabel("Number of exposures")
ax.set_xlabel("Date (day_obs)")
ax.set_title("Ok/miss per filter and per night")
ax.legend(ncol=2)  # légende plus compacte

plt.suptitle(the_suptitle)
plt.tight_layout()
plt.show()


In [None]:
df2

In [None]:
df2.to_csv(fileout_pernightcountokormiss)