# Compare magnitudes  Gaia and Calspec spectra from hdf5 file

- author Sylvie Dagoret-Campagne
- affiliation IJCLab
- creation date : 2024/10/06
- update : 2024/10/07 : 

Goal : Learn  how to compute magnitudes in LSST bands from Spectra:
Show the histogram of magnitudes of Gaia spectra in the LSST bands. And it computes the magnitude difference between Gaia and Calspec Spectra.
Note it is needed to extend the Gaia spectra spectrum at its borders inside the whole wavelength LSST band range definition. 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib as mpl
import matplotlib.colors as colors
import matplotlib.cm as cmx
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm
from matplotlib.gridspec import GridSpec
import pandas as pd

import matplotlib.ticker                         # here's where the formatter is
import os,sys
import re
import pandas as pd

from astropy.io import fits
from astropy import units as u
from astropy import constants as c

plt.rcParams["figure.figsize"] = (8,6)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

#import pickle
#from scipy.interpolate import RegularGridInterpolator
from scipy.interpolate import interp1d
import h5py

In [None]:
pd.set_option('display.max_columns', 500)

In [None]:
machine_name = os.uname().nodename
path_rubinsimphot = "repos/repos_w_2024_38/rubinsimphot/src"
if 'sdf' in machine_name:
    #machine_name_usdf = 'sdfrome001'
    print("Set environment for USDF")
    newpythonpath = os.path.join(os.getenv("HOME"),path_rubinsimphot)
    sys.path.append(newpythonpath)
elif 'dagoret-nb' in machine_name:
    print("Set environment for USDF Rubin Science Platform")
    newpythonpath = os.path.join(os.getenv("HOME"),path_rubinsimphot)
    sys.path.append(newpythonpath)    
elif 'mac' in machine_name:
    print("Be sure to run this notebook in conda environment named conda_py310")
else:
    print(f"Your current machine name is {machine_name}. Check your python environment")

## Config 

In [None]:
FLAG_PLOTSPECTRA = False
FLAG_PLOTATMOSPHERETRANSM = False
FLAG_PLOTDETECTORTRANSM = False
FLAG_PLOTTOTALTRANSM = False

In [None]:
input_path = "data_gaiacalspecspectra"
input_file_h5  = 'GAIACALSPECspectra.hdf5'
input_fullfile_h5 = os.path.join(input_path,input_file_h5)

## Read file spectra

In [None]:
hf =  h5py.File(input_fullfile_h5, 'r') 
list_of_keys = list(hf.keys())

In [None]:
list_of_keys

In [None]:
# pick one key    
key_sel =  list_of_keys[0]
# pick one group
group = hf.get(key_sel)  

In [None]:
#pickup all attribute names
all_subgroup_keys = []
for k in group.attrs.keys():
    all_subgroup_keys.append(k)

In [None]:
all_subgroup_keys

In [None]:
def GetColumnHfData(hff,list_of_keys,nameval):
    """
    Extract hff atttribute 
    
    parameters
      hff           : descriptor of h5 file
      list_of_keys : list of exposures
      nameval      : name of the attribute
      
    return
           the array of values in the order of 
    """
    

    all_data = []
    for key in list_of_keys:
        group=hff.get(key)
        val=group.attrs[nameval]
        all_data.append(val)
    return all_data

In [None]:
# create info
df_info = pd.DataFrame()
for key in all_subgroup_keys:
    arr=GetColumnHfData(hf, list_of_keys ,key)
    df_info[key] = arr

In [None]:
df_info

In [None]:
NSPEC = len(df_info)

## Extract the spectra

In [None]:
all_dfg = [] 
all_dfc = [] 
    
idx=0
for key in list_of_keys :
        
    group = hf.get(key)
    dfg = pd.DataFrame()
    dfc = pd.DataFrame()

    dfg["WAVELENGTH"] = np.array(group.get("gWAVELENGTH"))
    dfg["FLUX"] = np.array(group.get("gFLUX")) 
    dfg["STATERROR"] = np.array(group.get("gSTATERROR")) 
    dfg["SYSERROR"] = np.array(group.get("gSYSERROR")) 
    dfc["WAVELENGTH"] = np.array(group.get("cWAVELENGTH"))
    dfc["FLUX"] = np.array(group.get("cFLUX")) 
    dfc["STATERROR"] = np.array(group.get("cSTATERROR")) 
    dfc["SYSERROR"] = np.array(group.get("cSYSERROR")) 
 
 
    all_dfg.append(dfg)
    all_dfc.append(dfc)
        
    idx+=1

## Check

In [None]:
index = 0

In [None]:
row = df_info.iloc[index]

In [None]:
row

In [None]:
hdname = row["HD_name"]
gaianame = row["GAIA_ED3_Name"]
tag = f"{hdname}_{gaianame}"

In [None]:
hdname

In [None]:
dfg = all_dfg[index]
dfc = all_dfc[index]

In [None]:
def plotspec(tag,dfc,dfg):
    """
    tag : name of the star to appear un title
    dfc : dataframe for calspec
    dfg : dataframe for gaia
    """
    
    fig, ax = plt.subplots(1,1,figsize=(8,5))
    leg = ax.get_legend()
    title = "calspec-gaia : " + tag
    dfc.plot(x="WAVELENGTH",y="FLUX",ax=ax,marker='.',color='b',legend=leg,label="calspec")
    dfg.plot(x="WAVELENGTH",y="FLUX",ax=ax,marker='.',color='r',legend=leg,label="gaia")
    ax.set_xlim(300.,1100.)
    wl = dfc.WAVELENGTH
    index_sel = np.where(np.logical_and(wl>300.,wl<1100.))[0]
    fl = dfc.FLUX[index_sel]
    flmax = np.max(fl)*1.2
    ax.set_ylim(0.,flmax)    

    ax.legend()
    ax.set_xlabel("$\\lambda$ (nm)")
    ax.set_ylabel("Flux erg/cm$^2$/s/nm ")
    ax.set_title(title)
    plt.show()

In [None]:
if FLAG_PLOTSPECTRA: 
    plotspec(tag,dfc,dfg)

## Plot all spectra

In [None]:
if FLAG_PLOTSPECTRA: 
    for index in range(NSPEC):
        # info
        row = df_info.iloc[index]

        # the title
        hdname = row["HD_name"]
        gaianame = row["GAIA_ED3_Name"]
        tag = f"{hdname}_{gaianame}"

        # the spectra
        dfg = all_dfg[index]
        dfc = all_dfc[index]

        plotspec(tag,dfc,dfg)
    

## Atmospheric emulator

In [None]:
from importlib.metadata import version
the_ver = version('getObsAtmo')
print(f"Version of getObsAtmo : {the_ver}")

In [None]:
from getObsAtmo import ObsAtmo
emul = ObsAtmo("AUXTEL")

## Process transmission

In [None]:
import sys
sys.path.append('../lib')
#import libAtmosphericFit

In [None]:
# This package encapsulate the calculation on calibration used in this nb
from libPhotometricCorrections import *

In [None]:
def set_photometric_parameters(exptime, nexp, readnoise=None):
    # readnoise = None will use the default (8.8 e/pixel). Readnoise should be in electrons/pixel.
    photParams = PhotometricParameters(exptime=exptime, nexp=nexp, readnoise=readnoise)
    return photParams

In [None]:
def scale_sed(ref_mag, ref_filter, sed):
    fluxNorm = sed.calc_flux_norm(ref_mag, lsst_std[ref_filter])
    sed.multiply_flux_norm(fluxNorm)
    return sed

## library rubin_sim defining LSST parameters, namely for photometric calculations

In [None]:
from rubinsimphot.phot_utils import Bandpass, Sed
from rubinsimphot.data import get_data_dir

### Config of atmosphere

In [None]:
am0 =1.20    # airmass
pwv0 = 3.0  # Precipitable water vapor vertical column depth in mm
oz0 = 300.  # Ozone vertical column depth in Dobson Unit (DU)
ncomp=1     # Number of aerosol components
tau0= 0.0 # Vertical Aerosol depth (VAOD) 
beta0 = 1.2 # Aerosol Angstrom exponent
pc = PhotometricCorrections(am0,pwv0,oz0,tau0,beta0)

In [None]:
if FLAG_PLOTATMOSPHERETRANSM:
    fig, axs = plt.subplots(1,1,figsize=(6,4))
    axs.plot(pc.WL,pc.atm_std,'k-')
    axs.set_xlabel("$\\lambda$ (nm)")
    axs.set_title("Standard atmosphere transmission")
    plt.show()

In [None]:
if FLAG_PLOTDETECTORTRANSM:
    fig, axs = plt.subplots(1,1,figsize=(6,4))
    # loop on filter
    for index,f in enumerate(filter_tagnames):
        axs.plot(pc.bandpass_inst[f].wavelen,pc.bandpass_inst[f].sb,color=filter_color[index]) 
        axs.fill_between(pc.bandpass_inst[f].wavelen,pc.bandpass_inst[f].sb,color=filter_color[index],alpha=0.2) 
        axs.axvline(FILTERWL[index,2],color=filter_color[index],linestyle="-.") 
    axs.set_xlabel("$\\lambda$ (nm)")
    axs.set_title("Instrument throughput (rubin-obs)")
    plt.show()

In [None]:
if FLAG_PLOTTOTALTRANSM:
    fig, axs = plt.subplots(1,1,figsize=(6,4))
    # loop on filter
    for index,f in enumerate(filter_tagnames):
        axs.plot(pc.bandpass_total_std[f].wavelen,pc.bandpass_total_std[f].sb,color=filter_color[index]) 
        axs.fill_between(pc.bandpass_total_std[f].wavelen,pc.bandpass_total_std[f].sb,color=filter_color[index],alpha=0.2) 
        axs.axvline(FILTERWL[index,2],color=filter_color[index],linestyle="-.")    
    axs.set_xlabel("$\\lambda$ (nm)")
    axs.set_title("Total filter throughput (rubin-obs)")
    plt.show()


## Convert Gaia-Calspec sed into rubin-sim SED

In [None]:
all_sed_gaia = []
all_sed_calspec = []
all_sed_names = []
#zmag = 20.0
for index in np.arange(NSPEC):

    row = df_info.iloc[index]

    # the title
    hdname = row["HD_name"]
    gaianame = row["GAIA_ED3_Name"]
    tag = f"{hdname}_{gaianame}"

    # the spectra
    dfg = all_dfg[index]
    dfc = all_dfc[index]
    spectype = tag
    
    #wavelen (nm)
    #flambda (ergs/cm^2/s/nm)
    the_sed_c = Sed(wavelen=dfc.WAVELENGTH.values, flambda=dfc.FLUX.values, name="calspec_" + spectype)
    the_sed_g = Sed(wavelen=dfg.WAVELENGTH.values, flambda=dfg.FLUX.values, name="gaiaspec_" + spectype)
    #flux_norm = the_sed.calc_flux_norm(zmag, pc.bandpass_total_std['r'])
    #the_sed.multiply_flux_norm(flux_norm)
    all_sed_gaia.append(the_sed_g) 
    all_sed_calspec.append(the_sed_c) 
    all_sed_names.append(tag)

## Compute magnitudes and magnitudes differences

In [None]:
# container for all magnitudes and magnitudes differences
all_mags_std_gaiacalspec = []
all_magsdiff_std_g_c = []
all_maggaia = []

# loop on spectra
for index in np.arange(NSPEC):
    mags_std_gaia = {}
    mags_std_calspec = {}
    the_sed_c = all_sed_calspec[index] 
    the_sed_g = all_sed_gaia[index] 

    # loop on filters
    for index2,f in enumerate(filter_tagnames) :

        # extrapolate the gaia sed in filter range
        WLMIN_f = pc.bandpass_total_std[f].wavelen.min()
        WLMAX_f = pc.bandpass_total_std[f].wavelen.max()
        WL = np.arange(WLMIN_f-1.,WLMAX_f+1.,1.)

        # extrapolate the gaia
        finterp = interp1d(the_sed_g.wavelen, the_sed_g.flambda, kind = 'nearest',fill_value="extrapolate")
        the_sed_g_extrapolated = Sed(wavelen=WL, flambda= finterp(WL), name=the_sed_g.name)
        # compute magnitude
        mags_std_gaia[f] = the_sed_g_extrapolated .calc_mag(pc.bandpass_total_std[f])

        # extrapolate the calspec
        finterp = interp1d(the_sed_c.wavelen, the_sed_c.flambda, kind = 'nearest',fill_value="extrapolate")
        the_sed_c_extrapolated = Sed(wavelen=WL, flambda= finterp(WL), name=the_sed_c.name)

        mags_std_calspec[f] = the_sed_c_extrapolated.calc_mag(pc.bandpass_total_std[f])

    
    dfmag_c = pd.DataFrame(mags_std_calspec, index=[the_sed_c.name]).T
    dfmag_g = pd.DataFrame(mags_std_gaia, index=[the_sed_g.name]).T
    dfmag_cg  =pd.concat([dfmag_c,dfmag_g],axis=1)
    col1 = dfmag_cg.columns[0] # calspec
    col2 = dfmag_cg.columns[1] # gaia
    col3 = all_sed_names[index] 
    dfmag_cg[col3] = (dfmag_cg[col2] - dfmag_cg[col1])*1000.0 # in mmag 
    all_mags_std_gaiacalspec.append(dfmag_cg)
    all_magsdiff_std_g_c.append(dfmag_cg.iloc[:,2].to_frame().T) # convert into a dataframe with col u,g,r,i,z,y
    all_maggaia.append(dfmag_cg.iloc[:,1].to_frame().T)

### concatenate results for all

#### concatenate gaia magnitudes

In [None]:
df_maggaia = pd.concat(all_maggaia)
df_maggaia

In [None]:
df_maggaia.describe()

In [None]:
fig,axs = plt.subplots(1,6,figsize=(20,4),sharey=True)
ax1,ax2,ax3,ax4,ax5,ax6 = axs
df_maggaia["u"].plot.hist(bins=20,ax=ax1,color="b",xlabel="$m_u$ (mag)",title="magnitudes in u")
df_maggaia["g"].plot.hist(bins=20,ax=ax2,color="g",xlabel="$m_g$ (mag)",title="magnitudes in g")
df_maggaia["r"].plot.hist(bins=20,ax=ax3,color="r",xlabel="$m_r$ (mag)",title="magnitudes in r")
df_maggaia["i"].plot.hist(bins=20,ax=ax4,color="orange",xlabel="$m_i$ (mag)",title="magnitudes in i")
df_maggaia["z"].plot.hist(bins=20,ax=ax5,color="grey",xlabel="$m_z$ (mag)",title="magnitudes in z")
df_maggaia["y"].plot.hist(bins=20,ax=ax6,color="k",xlabel="$m_y$ (mag)",title="magnitudes in y")
plt.tight_layout()
plt.tight_layout()
plt.show()

#### concatenate magnitudes difference : gaia-calspec

In [None]:
df_magdiff = pd.concat(all_magsdiff_std_g_c)
df_magdiff

In [None]:
df_stat = df_magdiff.describe()
df_stat

In [None]:
#df_magdiff.plot.hist(bins=50,color=["b","g","r","orange","grey","k"],histtype='step',lw=3)
fig,ax = plt.subplots(1,1,figsize=(8,4))
df_magdiff.plot.hist(bins=50,ax=ax,color=["b","g","r","orange","grey","k"],alpha=0.5,xlim=[-80.,80.],xlabel="$\\Delta m$ (mmag)",title="difference in magnitudes (gaia-calspec)")