# CHECK Merged Spectra Statistics

- author : Sylvie Dagoret-Campagne
- creation date : 2025-10-21
- last update : 2025-10-21
- last update : 2025-10-23 : apply this calculation to run_v10

- The goal is to find the efficiency of Spectrator by comparing for each night the number of reconstructed spectra to the total number of exposure that exist in the butler.
- It is possible to compare the performance depending on the type of filter.
- Similar to **CHECK_SpectractionStatistics.ipynb** notebook but requires 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

plt.rcParams["figure.figsize"] = (16,8)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'
plt.rcParams["legend.fontsize"] = "small"
pd.set_option('display.max_rows', 100)

In [None]:
#import ipywidgets as widgets
#%matplotlib widget

In [None]:
# CHECK THE CONFIG HERE !!!!!!
from BUTLER00_parameters import *

In [None]:
DumpConfig()

## Configuration

In [None]:
selected_run = version_run

In [None]:
inputfullfilename = mergedextractedfilesdict[selected_run]
print(inputfullfilename )

In [None]:
the_collection = butlerusercollectiondict[selected_run] 

In [None]:
the_tag = legendtag[selected_run] 

In [None]:
the_suptitle = the_tag + f" ({the_collection})"

In [None]:
the_suptitle

In [None]:
the_collection_name = the_collection.replace("/","_")
print(the_collection_name)

## Read file

In [None]:
!ls ../2025-06-26-SpectractorExtraction-FromButler/data/spectro_merged/

In [None]:
data = np.load(inputfullfilename,allow_pickle=True)

In [None]:
df = pd.DataFrame(data)

## Computations

In [None]:
df["nightObs"] = df.apply(lambda x: x['id']//100_000, axis=1)

In [None]:
df["date"] = pd.to_datetime(df["DATE-OBS"])

In [None]:
#list(df.columns)

## Plot selected filters vs date

In [None]:
df.ex_filter.unique()

### Plot sequence number vs date for different filters

In [None]:
plt.figure(figsize=(20,8))

# conversion en datetime
#df["date"] = pd.to_datetime(df["DATE-OBS"].astype(str), format="%Y%m%d")


sns.scatterplot(
    data=df, 
    x="date",       # abscisse en datetime
    y="ex_seq_num",    # ou ra, dec, etc.
    hue="ex_filter", 
    palette="tab10"
)

plt.title(f"Auxtel Holo observations wrt date and filter type")
plt.xlabel("Date of observation")
plt.ylabel("Seq Num")
plt.xticks(rotation=45)  # lisibilité des dates
#plt.legend(loc="upper left",ncol=8)
plt.legend(bbox_to_anchor=(1.01, 1.05),ncols=1)
plt.grid()
plt.suptitle(the_suptitle)
plt.tight_layout()
plt.show()

### Plot filter type vs date for different filters

In [None]:
import numpy as np

# on décale légèrement la position en y en fonction de seq_num
df["ex_filter_seq"] = df["ex_filter"].astype(str) + "_" + df["ex_seq_num"].astype(str)

plt.figure(figsize=(20,8))
sns.stripplot(
    data=df,
    x="date",
    y="ex_filter",
    hue="ex_filter",
    palette="Set1",
    size=4,         # taille des points
    jitter=True,    # évite que les points se chevauchent
    alpha=1.0
)

plt.title(f"Auxtel Holo observations wrt date and filter type",fontsize=15)
plt.xlabel("Time")
plt.ylabel("Filter")
plt.xticks(rotation=45)
#plt.legend(loc="upper left", ncol=6)
plt.grid()
plt.suptitle(the_suptitle)
plt.tight_layout()
plt.show()


## Plot Target vs date

In [None]:
plt.figure(figsize=(20,16))
df["ex_target_seq"] = df["ex_target"].astype(str) + "_" + df["ex_seq_num"].astype(str)
sns.stripplot(
    data=df,
    x="date",
    y="ex_target",
    hue="ex_target",
    palette="Set2",
    size=10,         # taille des points
    jitter=True,    # évite que les points se chevauchent
    alpha=1.0,
    edgecolor="black",
    linewidth=0.1
)
plt.title(f"Auxtel Holo observations wrt date and target",fontsize=15)
plt.xlabel("Time")
plt.ylabel("Target")
plt.xticks(rotation=45)
plt.tight_layout()
plt.suptitle(the_suptitle)
plt.grid()
plt.show()

## Plot cumulative sums over time 

### Cumulative sum for all filters individually

In [None]:
nobs = len(df)
nobs

In [None]:
# Compter 1 par exposition
df['n_exposures'] = 1

# Calculer la somme cumulative globale
df_sorted = df.sort_values('date')
df_sorted['total_cumsum'] = df_sorted['n_exposures'].cumsum()

# Calculer la somme cumulative par filtre
cumsum_by_filter = (
    df_sorted
    .groupby(['ex_filter', 'date'])['n_exposures']
    .sum()
    .groupby(level=0)
    .cumsum()
    .reset_index()
)

# --- Plot ---

plt.figure(figsize=(10, 6))

# Total cumulative exposures
plt.plot(df_sorted['date'], df_sorted['total_cumsum'],
         label='Total exposures', color='black', linewidth=2)

# Cumulative exposures per filter
for f in cumsum_by_filter['ex_filter'].unique():
    data = cumsum_by_filter[cumsum_by_filter['ex_filter'] == f]
    plt.plot(data['date'], data['n_exposures'],
             label=f'Filter: {f}', linewidth=1.8)

plt.xlabel('Observation date', fontsize=12)
plt.ylabel('Cumulative number of exposures', fontsize=12)
plt.title(f"Cumulative Exposures Over Time by Filter (tot = {nobs})", fontsize=14)
plt.legend(title='Legend', fontsize=10)
plt.grid(True, linestyle='--', alpha=0.5)
plt.xticks(rotation=45, ha='right')
plt.suptitle(the_suptitle)
plt.tight_layout()
plt.show()


### Cumulative sum for all filters regroupped by their colors

#### Define filters to remove

In [None]:
# Remove unwanted filters
filters_to_remove = ['cyl_lens~holo4_001', 
                     'empty~holo4_001', 
                     'cyl_lens~holo4_004', 
                     'empty~holo4_001', 
                     'collimator~holo4_003', 
                     'SDSSr~holo4_003', 
                     'unknown~holo4_003',
                     'FELH0600~holo4_003'
                    ]
df = df[~df['ex_filter'].isin(filters_to_remove)]

In [None]:
# Simplify filter categories
def simplify_filter(f):
    if f.startswith('OG550'):
        return 'OG550'
    elif f.startswith('BG40'):
        return 'BG40'
    elif f.startswith('empty'):
        return 'empty'
    else:
        return 'other'  # In case something unexpected remains

#### Regroup filters according their color

In [None]:
df['filter_group'] = df['ex_filter'].apply(simplify_filter)

In [None]:
# Keep only the three desired categories
df = df[df['filter_group'].isin(['empty', 'OG550', 'BG40'])]

# Add one exposure per row
df['n_exposures'] = 1

# Sort by time
df = df.sort_values('date')

In [None]:
nobs = len(df)
nobs

#### Plot cumulative sum using color families

In [None]:
#Global cumulative sum
df['total_cumsum'] = df['n_exposures'].cumsum()

# Cumulative sum by filter group
cumsum_by_filter = (
    df.groupby(['filter_group', 'date'])['n_exposures']
    .sum()
    .groupby(level=0)
    .cumsum()
    .reset_index()
)

In [None]:
# Choose a colormap and sample 4 colors evenly spaced
colors = plt.cm.Dark2.colors[:4]  # or use 'Set2', 'Dark2', 'tab20', etc.
palette = plt.cm.Dark2.colors[:4]  # Soft natural colors

color_map = {
    'empty': palette[0],
    'OG550': palette[1],
    'BG40': palette[2],
    'Total exposures': palette[3],
}

color_map = {
    'empty': "green",
    'OG550': "red",
    'BG40': "blue",
    'Total exposures': "k",
}

In [None]:
# --- Plot ---
plt.figure(figsize=(10, 6))

# Total cumulative exposures (black)
#plt.plot(df['time_start'], df['total_cumsum'],
#         label='Total exposures', color='black', linewidth=2)

# Example use:
plt.plot(df['date'], df['total_cumsum'],
         label='Total exposures', color=color_map['Total exposures'], linewidth=2)


# Per filter group
#for f in cumsum_by_filter['filter_group'].unique():
#    data = cumsum_by_filter[cumsum_by_filter['filter_group'] == f]
#    plt.plot(data['time_start'], data['n_exposures'],
#             label=f'Filter: {f}', linewidth=2)

for f in cumsum_by_filter['filter_group'].unique():
    data = cumsum_by_filter[cumsum_by_filter['filter_group'] == f]
    plt.plot(data['date'], data['n_exposures'],
             label=f'Filter: {f}', color=color_map[f], linewidth=2)



plt.xlabel('Observation date', fontsize=12)
plt.ylabel('Cumulative number of exposures', fontsize=12)
plt.title(f"Cumulative Exposures Over Time by Filter Group (tot = {nobs})", fontsize=14)
plt.legend(title='Filter group', fontsize=10)
plt.grid(True, linestyle='--', alpha=0.5)
# Rotate x-axis labels
plt.xticks(rotation=45, ha='right')
plt.suptitle(the_suptitle)
plt.tight_layout()
plt.show()