# Merge Spectractor Results with Exposures list from Butler (saved in csv file)

- author : Sylvie Dagoret-Campagne
- creation date : 2025-09-20
- last update : 2025-09-21  add  imports from BUTLER00_parameters
- last update : 2025-09-22  :  Merge "run_v6":"u/dagoret/auxtel_run_20250921_w_2025_38_spectractorv32_main_gains_holoallfilt_a"
- last update : 2025-10-21 : Associate run_v9 : u/dagoret/auxtel_run_20251018_w_2025_42_spectractorv32_all_main_data_gains_holoallfilt_b with data/butlerregustry/2025-10-17_holosummary_all_filters_repo_main.csv
- last update : 2025-10-23 : run_v10
- last update : 2025-10-25 : run_v11
- last update : 2025-12-12 : run_v12
- last update : 2026-01-14 : run2026_v01  

- 1) read the list of exposures generated by `TOOL_ListofExposures.ipynb` notebook
- 2) read the Spectractor Results produced by `EXTR_viewSpectractorResults.ipynb` notebook
- 3) merge both by exposure id and save the merge in a npy file
- 4) all path are given in `BUTLER00_parameters.py`

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

plt.rcParams["figure.figsize"] = (16,8)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'
plt.rcParams["legend.fontsize"] = "xx-large"


In [None]:
# CHECK THE CONFIG HERE !!!!!!
from BUTLER00_parameters import *

In [None]:
DumpConfig()

## Configuration

In [None]:
#FLAG_REPO_EMBARGO = True

In [None]:
#selected_run = "run_v12"
selected_run = "run2026_v01"

In [None]:
atmfilename = extractedfilesdict[selected_run]
tag = legendtag[selected_run] 

In [None]:
print(f"Spectractor Extracted atmospheric parameters file : {atmfilename}  for config {tag}")

In [None]:
#!ls ../2025-06-26-SpectractorExtraction-FromButler/data/spectro/

In [None]:
input_path = os.path.dirname(atmfilename)
inputfilename = os.path.basename(atmfilename)

if "2026" not in selected_run: 
    inputfilename_root = inputfilename.split(".")[0] 
    inputfilename_ext = inputfilename.split(".")[-1] 
    output_path = input_path + '_merged'
    outputfilename = inputfilename_root + '_merged' + '.' + inputfilename_ext
else:
    outputfullfilename = mergedextractedfilesdict[selected_run]
    output_path = os.path.dirname(outputfullfilename)
    outputfilename = os.path.basename(outputfullfilename)
#outputfilename = inputfilename_root + '_merged' + '.npz'

In [None]:
print(output_path , outputfilename)

In [None]:
if "2026" not in selected_run: 
    specdata = np.load(atmfilename,allow_pickle=True)
    df_spec = pd.DataFrame(specdata)
else:
     df_spec = pd.read_parquet(atmfilename)

In [None]:
df_spec.tail()

## List of Exposures

In [None]:
if FLAG_REPO_EMBARGO:
    repo="/repo/embargo"
else:
    repo="/repo/main"
reponame = repo.replace("/","_")

path_exposureslist = "data/butlerregistry"
#DATE = "2025-10-17"
#DATEPROD = "2025-10-25"

#DATEPROD = "2025-12-12"
#DATEMIN = 20250107
#DATEMAX = 20251211

DATEPROD = "2026-01-14"
DATEMIN = 20220215
DATEMAX = 20260113

listexposures_file = os.path.join(path_exposureslist,f"{DATEPROD}_"+"holosummary_all_filters" + reponame + f"_{DATEMIN}_{DATEMAX}.csv")

In [None]:
print(listexposures_file) 

In [None]:
df_exp = pd.read_csv(listexposures_file,index_col=0)
df_exp = df_exp.reset_index(drop=True)

In [None]:
df_exp

In [None]:
df_exp_prefixed = df_exp.rename(
    columns={col: f"ex_{col}" for col in df_exp.columns if col != "id"}
)

In [None]:
df_exp_prefixed 

## Merge

In [None]:
# Merge sur la colonne 'id'
df_spec_merged = pd.merge(df_spec, df_exp_prefixed, on="id", how="inner")

In [None]:
df_spec_merged

In [None]:
print(" | ".join(df_spec_merged.columns)) 

In [None]:
df_spec_merged["DATE-OBS"] = df_spec_merged["ex_time_start"]

## Save output

In [None]:
os.makedirs(output_path, exist_ok=True)

In [None]:
print(outputfilename) 

In [None]:
outputfullfilename = os.path.join(output_path,outputfilename)

In [None]:
if "2026" not in selected_run: 
    rec_array = df_spec_merged.to_records(index=False)
    np.save(outputfullfilename , rec_array)
else:
    df_spec_merged.to_parquet(outputfullfilename, compression='gzip')

## Check

In [None]:
if "2026" not in selected_run: 
    newspecdata = np.load(outputfullfilename,allow_pickle=True)
    df_newspec = pd.DataFrame(newspecdata)
else:
    df_newspec = pd.read_parquet(outputfullfilename)

In [None]:
#newspecdata

In [None]:
df_newspec.head()

In [None]:
df_newspec.tail()