# Extract Spectral data from Butler

- from Tuto of Corentin R on Spectractor, May 28th 2025
- After adapting path of Butler in atmospec/spectraction.py
- adaptation : Sylvie Dagoret-Campagne
- date : 2025-09-15
- last update : 2025-09-15 : redo the run : collection : 'u/dagoret/auxtel_run_20250912a'
- last update : 2025-09-17 : redo the run : collection : 'u/dagoret/auxtel_run_20250917_w_2025_25_spectractorv31_holoallfilt_a'

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from astropy.io import fits
import getCalspec

%matplotlib widget 

In [None]:
from lsst.summit.utils.utils import checkStackSetup
checkStackSetup()

In [None]:
import lsst.daf.butler as dafButler

repo = "/repo/main"
butler = dafButler.Butler(repo)
registry = butler.registry

In [None]:
for c in sorted(registry.queryCollections()):
    if "dagoret" in c and "2025" in c:
        print(c)

In [None]:
# for dt in sorted(butler.registry.queryDatasetTypes()):
#      print(dt)

In [None]:
#!cat ~/rubin-user/holo_atmo_2025-09-12/bps_auxtel_atmosphere.yaml

In [None]:
# Find collection in Butler /repo/embargo
#my_collection = ['u/dagoret/auxtel_run_20250625a']
#my_collection = ['u/dagoret/auxtel_run_20250702b']
#my_collection = ['u/dagoret/auxtel_run_20250912a']
my_collection = ['u/dagoret/auxtel_run_20250917_w_2025_25_spectractorv31_holoallfilt_a']

# save extraction
#file_save = "auxtel_atmosphere_20250625a_v1.npy"
#file_save = "auxtel_atmosphere_20250702b_repomain_v1.npy"
#file_save = "auxtel_atmosphere_20250912a_repomain_v1.npy"
file_save = "auxtel_run_20250917_w_2025_25_spectractorv31_holoallfilt_a_repomain_v1.npy"



## Check the presence of the spectra

In [None]:
datasetRefs = registry.queryDatasets(datasetType='spectractorSpectrum', collections=my_collection, where= "instrument='LATISS'")
where = "instrument='LATISS'" 
records = list(butler.registry.queryDimensionRecords('visit', datasets='spectractorSpectrum', where=where,  collections=my_collection))
refs = list(set(butler.registry.queryDatasets('spectractorSpectrum',  where=where,  collections=my_collection)))
len(records)

In [None]:
for i, r in enumerate(records):

    print(f"============= ({i}) ============datasetType = spectraction ============================================")
    print("fullId..................:",r.id)
    print("seq_num..................:",r.seq_num)
    print("day_obs..................:",r.day_obs)
    print("target..................:",r.target_name)
    print("filt+disp..................:",r.physical_filter)

    # spec = butler.get('spectractorSpectrum', visit=r.id, detector=0, collections=my_collection, instrument='LATISS')
    
    if i>5:
        break

In [None]:
# delete a collection
# butler.pruneDatasets(datasetRefs, disassociate=True, unstore=True, purge=True)

## Load one spectrum

In [None]:
print(butler.registry.getDatasetType('spectrumLibradtranFitParameters'))

In [None]:
# for i in range(len(refs_noerrorsed)):
for i in range(20):
    try:        
        p = butler.get('spectrumLibradtranFitParameters', visit=refs_noerrorsed[i].dataId["visit"], collections=my_collection, detector=0, instrument='LATISS')
        err = p["ozone [db]"]
    except:
        pass

In [None]:
# for i in range(len(refs_noerrorsed)):
for i in range(20):
    try:        
        p = butler.get('spectrogramLibradtranFitParameters', visit=refs_noerrorsed[i].dataId["visit"], collections=my_collection, detector=0, instrument='LATISS')
        err = p["ozone [db]"]
    except:
        pass
    

In [None]:
dataId = {"day_obs": 20220316, "seq_num": 330, 'instrument':'LATISS',"detector": 0}

20220316

spec= butler.get('spectractorSpectrum',dataId,collections=my_collection)
p = butler.get('spectrumLibradtranFitParameters',dataId,collections=my_collection)
p

In [None]:
%matplotlib widget 
_ = spec.plot_spectrum()

## Load all Libradtran parameters

In [None]:
refs[0].dataId["visit"]

In [None]:
if not(os.path.isfile(file_save)):
    # see here an efficient way to access FITS headers: https://lsstc.slack.com/archives/CBV7K0DK6/p1700250222827499
    params_spectrum = []
    params_spectrogram = []
    headers = []
    
    def from_ref_to_dataId(ref):
        dataId = {'day_obs': ref.dataId["day_obs"], 'seq_num': int(str(ref.dataId["visit"])[8:]), 'instrument': 'LATISS', 'detector': 0}
        return dataId
    
    for ref in tqdm(sorted(refs, key=lambda x: x.dataId["visit"])[::]):
        try:
            spec = butler.get('spectractorSpectrum', visit=ref.dataId["visit"], collections=my_collection, detector=0, instrument='LATISS')
            headers.append(spec.header)
            p = butler.get('spectrumLibradtranFitParameters', visit=ref.dataId["visit"], collections=my_collection, detector=0, instrument='LATISS')
            params_spectrum.append(p)
            p = butler.get('spectrogramLibradtranFitParameters', visit=ref.dataId["visit"], collections=my_collection, detector=0, instrument='LATISS')
            params_spectrogram.append(p)
        except (AttributeError,ValueError,LookupError):
            print("Skip", ref.dataId["visit"])
            continue


In [None]:
if not(os.path.isfile(file_save)):
    columns_spectrum = ["id"]
    
    for h in headers[0]:
        if "COMMENT" in h or "EXTNAME" in h: continue
        if "LBDAS_T" in h or "PSF_P_T" in h or "AMPLIS_T" in h: continue
        if "UNIT" in h: continue
        if "SIMPLE" in h: continue
        columns_spectrum.append(h)
     
    columns_spectrogram_bestfit = []
    for key in params_spectrogram[0].labels:
        columns_spectrogram_bestfit.append(key)
        columns_spectrogram_bestfit.append(key+"_err")
    
    columns_spectrum_bestfit = []
    for key in params_spectrum[0].labels:
        columns_spectrum_bestfit.append(key)
        columns_spectrum_bestfit.append(key+"_err")
    
    min_index = 0
    max_index = np.inf

    #df1 is header info
    df1 = pd.DataFrame(columns=columns_spectrum)
    
    for k, header in enumerate(headers):
        # if k > 40: break
        n = records[k].id
        if n < min_index or n > max_index: continue
        row = {"id": n}
        for h in header:
            if h in columns_spectrum:
                row[h] = header[h]
        df1.loc[len(df1)] = row

    #df2 is spectrogram     spectrogram best fit
    df2 = pd.DataFrame(columns=columns_spectrogram_bestfit)
    
    for k, p in enumerate(params_spectrogram):
        n = records[k].id
        if n < min_index or n > max_index: continue
        row = {"id": n}
        for i, key in enumerate(p.labels):
            row[key] = p.values[i]
            row[key+"_err"] = p.err[i]
        df2.loc[len(df2)] = row

    # df3 is spectrum best fit    
    df3 = pd.DataFrame(columns=columns_spectrum_bestfit)

    
    for k, p in enumerate(params_spectrum):
        n = records[k].id
        if n < min_index or n > max_index: continue
        row = {"id": n}
        for i, key in enumerate(p.labels):
            row[key] = p.values[i]
            row[key+"_err"] = p.err[i]
        df3.loc[len(df3)] = row

    # merge header with spectrogram
    df = pd.merge(df1, df2, left_index=True, right_index=True)
    # merge (header-spectrogram with spectrum)
    df = pd.merge(df, df3, left_index=True, right_index=True)
    df.set_index('DATE-OBS', inplace=True)
    df.index = pd.to_datetime(df.index, format="ISO8601") #['DATE-OBS'])
    df.sort_index(inplace=True)
    
    rec = df.to_records()
    np.save(file_save, rec)


## Plots

In [None]:
rec = np.load(file_save, allow_pickle=True)
df = pd.DataFrame(rec)
pd.set_option('display.max_columns', None)
print(rec.shape)

In [None]:
for col in ["D2CCD", "PIXSHIFT", "PSF_REG", "CHI2_FIT", "OUTPRESS", "OUTTEMP", "alpha_0_2", "TARGETX", "TARGETY"]:
    if col not in df.columns: continue
    if len(col.split('_')) > 1:
        col_err = '_'.join(col.split('_')[:-1])+"_err_"+col.split('_')[-1]
    else:
        col_err = col+"_err"
    fig = plt.figure()
    if col_err in df.columns:
        plt.errorbar(rec["DATE-OBS"], rec[col], yerr=rec[col_err], linestyle="none", marker="+")
    else:
        plt.plot(rec["DATE-OBS"], rec[col], linestyle="none", marker="+")
    plt.ylim((0.9*np.min(rec[col]), 1.1*np.max(rec[col])))
    if "PSF_REG" in col:
        plt.yscale("log")
    plt.grid()
    plt.title(col)
    plt.legend()
    plt.gcf().autofmt_xdate()
    plt.show()

In [None]:
filtered = (rec["CHI2_FIT"] < 30) & (rec["PSF_REG"] > 1e-2) & (rec["D2CCD"] > 186.7)  & (rec["D2CCD"] < 187.4)  & (rec["PIXSHIFT"] > 0.5)  & (rec["PIXSHIFT"] < 1.5) 
print(len(filtered[filtered]))
filtered = filtered & (rec["PWV [mm]_err_x"] > 0) & (rec["PWV [mm]_err_x"] < 5) & (rec["PWV [mm]_err_y"] > 0) & (rec["PWV [mm]_err_y"] < 5)

In [None]:
len(filtered[filtered])

In [None]:
filtered = np.full(rec["CHI2_FIT"].shape,True)

In [None]:
np.sum(filtered)

In [None]:
for col in ["D2CCD", "PIXSHIFT", "PSF_REG", "CHI2_FIT", "OUTPRESS", "OUTTEMP", "OUTHUM", "alpha_0_2", "TARGETX", "TARGETY"]:
    if col not in df.columns: continue
    if len(col.split('_')) > 1:
        col_err = '_'.join(col.split('_')[:-1])+"_err_"+col.split('_')[-1]
    else:
        col_err = col+"_err"
    fig = plt.figure()
    if col_err in df.columns:
        plt.errorbar(rec["DATE-OBS"][filtered], rec[col][filtered], yerr=rec[col_err][filtered], linestyle="none", marker="+")
    else:
        plt.plot(rec["DATE-OBS"][filtered], rec[col][filtered], linestyle="none", marker="+")
    plt.ylim((0.9*np.nanmin(rec[col][filtered]), 1.1*np.nanmax(rec[col][filtered])))
    if "PSF_REG" in col:
        plt.yscale("log")
    plt.grid()
    plt.title(col)
    plt.legend()
    plt.gcf().autofmt_xdate()
    plt.show()

### Spectrum fits

In [None]:
for col in ["A1_y", "chi2_y", "ozone [db]_y", "PWV [mm]_y", "VAOD_y", "A2_y", "D_CCD [mm]_y", "alpha_pix [pix]", "reso [nm]", "B_y"]:
    if col not in df.columns: 
        continue
    if len(col.split('_')) > 1:
        col_err = '_'.join(col.split('_')[:-1])+"_err_"+col.split('_')[-1]
    else:
        col_err = col+"_err"
    fig = plt.figure(figsize=(8,5))
    if col_err in df.columns:
        plt.errorbar(rec["DATE-OBS"][filtered], rec[col][filtered], yerr=rec[col_err][filtered], linestyle="none", marker="+")
    else:
        plt.plot(rec["DATE-OBS"][filtered], rec[col][filtered], linestyle="none", marker="+")
    #plt.ylim((0.9*np.min(rec[col][filtered]), 1.1*np.max(rec[col][filtered])))
    plt.grid()
    plt.title(col)
    plt.legend()
    plt.gcf().autofmt_xdate()
    plt.show()

In [None]:
filters = np.unique(rec["FILTER"])
filters

In [None]:
for col in ["A1_y", "chi2_y", "ozone [db]_y", "PWV [mm]_y", "VAOD_y", "A1_y", "A2_y", "D_CCD [mm]_y", "alpha_pix [pix]", "reso [nm]", "B_y"]:
    if col not in df.columns: 
        continue
    if len(col.split('_')) > 1:
        col_err = '_'.join(col.split('_')[:-1])+"_err_"+col.split('_')[-1]
    else:
        col_err = col+"_err"
    fig = plt.figure(figsize=(8,5))
    for filt in filters:
        if filt in ["HD60753", "HD37962"]:
            continue
        index = filtered & (rec["FILTER"] == filt)
        if col_err in df.columns:
            plt.errorbar(rec["DATE-OBS"][index], rec[col][index], yerr=rec[col_err][index], linestyle="none", marker="+", label=filt)
        else:
            plt.plot(rec["DATE-OBS"][index], rec[col][index], linestyle="none", marker="+")
    plt.ylim((0.9*np.min(rec[col][filtered]), 1.1*np.max(rec[col][filtered])))
    plt.grid()
    plt.title(col)
    plt.legend()
    plt.gcf().autofmt_xdate()
    plt.show()

In [None]:
stars = np.unique(rec["TARGET"])
stars

In [None]:
for col in ["A1_y", "chi2_y", "ozone [db]_y", "PWV [mm]_y", "VAOD_y", "angstrom_exp_y", "A2_y", "D_CCD [mm]_y", "alpha_pix [pix]", "reso [nm]", "B_y", "alpha_0_2", "alpha_0_1", "gamma_0_2", "gamma_0_1", "y_c_0_2", "y_c_0_1"]:
    if col not in df.columns: 
        continue
    if len(col.split('_')) > 1:
        col_err = '_'.join(col.split('_')[:-1])+"_err_"+col.split('_')[-1]
    else:
        col_err = col+"_err"
    fig = plt.figure(figsize=(10,6))
    for star in stars:
        #if star not in ["HD185975"]:
        #    continue
        index = filtered & (rec["TARGET"] == star)
        if col_err in df.columns and False:
            plt.errorbar(rec["DATE-OBS"][index], rec[col][index], yerr=rec[col_err][index], linestyle="none", marker="+", label=star)
        else:
            plt.plot(rec["DATE-OBS"][index], rec[col][index], linestyle="none", marker="+")
    plt.ylim((0.9*np.nanmin(rec[col][filtered]), 1.1*np.nanmax(rec[col][filtered])))
    plt.grid()
    plt.title(col)
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    fig.autofmt_xdate()
    fig.tight_layout()
    plt.show()

### Spectrogram forward model fits

In [None]:
filters = np.unique(rec["FILTER"])
filters

In [None]:
for col in ["A1_x", "ozone [db]_x", "PWV [mm]_x", "VAOD_x", "D_CCD [mm]_x"]: #, "gamma_0", "alpha_0"]:
    if len(col.split('_')) > 1:
        col_err = '_'.join(col.split('_')[:-1])+"_err_"+col.split('_')[-1]
    else:
        col_err = col+"_err"
    fig = plt.figure(figsize=(8,5))
    for filt in filters:
        index = filtered & (rec["FILTER"] == filt)
        if col_err in df.columns:
            plt.errorbar(rec["DATE-OBS"][index], rec[col][index], yerr=rec[col_err][index], linestyle="none", marker="+")
        else:
            plt.plot(rec["DATE-OBS"][index], rec[col][index], linestyle="none", marker="+")
    plt.ylim((0.9*np.min(rec[col][filtered]), 1.1*np.max(rec[col][filtered])))
    plt.grid()
    plt.title(col)
    plt.legend()
    plt.gcf().autofmt_xdate()
    plt.show()

In [None]:
stars = np.unique(rec["TARGET"])
#stars = ['HD2811', 'HD38666']  # , 'HD185975'
stars

In [None]:
for col in ["A1_x", "ozone [db]_x", "PWV [mm]_x", "VAOD_x", "angstrom_exp_x", "D_CCD [mm]_x", "gamma_0_2", "alpha_0_2"]:
    if len(col.split('_')) > 1:
        col_err = '_'.join(col.split('_')[:-1])+"_err_"+col.split('_')[-1]
    else:
        col_err = col+"_err"
    fig = plt.figure(figsize=(10,5))
    for star in stars : #['HD185975']: #stars:
        index = filtered & (rec["TARGET"] == star)
        if not getCalspec.is_calspec(star):
            marker = "o"
        else:
            marker = "+"
        if col_err in df.columns:
            plt.errorbar(rec["DATE-OBS"][index], rec[col][index], yerr=rec[col_err][index], linestyle="none", marker=marker, label=star)
        else:
            plt.plot(rec["DATE-OBS"][index], rec[col][index], linestyle="none", marker=marker)
    plt.ylim((0.9*np.min(rec[col][filtered]), 1.1*np.max(rec[col][filtered])))
    plt.grid()
    plt.title(col)
    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    fig.autofmt_xdate()
    fig.tight_layout()
    plt.show()