# Read Gaia and Calspec spectra from hdf5 file

- author Sylvie Dagoret-Campagne
- affiliation IJCLab
- creation date : 2024/10/05
- update : 2024/10/05 : 

- inspired from : https://github.com/JospehCeh/PhotoZ_PhD/blob/u/dagoret/StudyFors2SED/ReadFors2spectrafromh5.ipynb

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib as mpl
import matplotlib.colors as colors
import matplotlib.cm as cmx
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm
from matplotlib.gridspec import GridSpec
import pandas as pd

import matplotlib.ticker                         # here's where the formatter is
import os,sys
import re
import pandas as pd

from astropy.io import fits
from astropy import units as u
from astropy import constants as c

plt.rcParams["figure.figsize"] = (8,6)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

import pickle
from scipy.interpolate import RegularGridInterpolator
import h5py

In [None]:
pd.set_option('display.max_columns', 500)

In [None]:
machine_name = os.uname().nodename
path_rubinsimphot = "repos/repos_w_2024_38/rubinsimphot/src"
if 'sdf' in machine_name:
    #machine_name_usdf = 'sdfrome001'
    print("Set environment for USDF")
    newpythonpath = os.path.join(os.getenv("HOME"),path_rubinsimphot)
    sys.path.append(newpythonpath)
elif 'dagoret-nb' in machine_name:
    print("Set environment for USDF Rubin Science Platform")
    newpythonpath = os.path.join(os.getenv("HOME"),path_rubinsimphot)
    sys.path.append(newpythonpath)    
elif 'mac' in machine_name:
    print("Be sure to run this notebook in conda environment named conda_py310")
else:
    print(f"Your current machine name is {machine_name}. Check your python environment")

## Config 

In [None]:
input_path = "data_gaiacalspecspectra"
input_file_h5  = 'GAIACALSPECspectra.hdf5'
input_fullfile_h5 = os.path.join(input_path,input_file_h5)

## Read file

In [None]:
hf =  h5py.File(input_fullfile_h5, 'r') 
list_of_keys = list(hf.keys())

In [None]:
list_of_keys

In [None]:
# pick one key    
key_sel =  list_of_keys[0]
# pick one group
group = hf.get(key_sel)  

In [None]:
#pickup all attribute names
all_subgroup_keys = []
for k in group.attrs.keys():
    all_subgroup_keys.append(k)

In [None]:
all_subgroup_keys

In [None]:
def GetColumnHfData(hff,list_of_keys,nameval):
    """
    Extract hff atttribute 
    
    parameters
      hff           : descriptor of h5 file
      list_of_keys : list of exposures
      nameval      : name of the attribute
      
    return
           the array of values in the order of 
    """
    

    all_data = []
    for key in list_of_keys:
        group=hff.get(key)
        val=group.attrs[nameval]
        all_data.append(val)
    return all_data

In [None]:
# create info
df_info = pd.DataFrame()
for key in all_subgroup_keys:
    arr=GetColumnHfData(hf, list_of_keys ,key)
    df_info[key] = arr

In [None]:
df_info

In [None]:
N = len(df_info)

## Extract the spectra

In [None]:
all_dfg = [] 
all_dfc = [] 
    
idx=0
for key in list_of_keys :
        
    group = hf.get(key)
    dfg = pd.DataFrame()
    dfc = pd.DataFrame()

    dfg["WAVELENGTH"] = np.array(group.get("gWAVELENGTH"))
    dfg["FLUX"] = np.array(group.get("gFLUX")) 
    dfg["STATERROR"] = np.array(group.get("gSTATERROR")) 
    dfg["SYSERROR"] = np.array(group.get("gSYSERROR")) 
    dfc["WAVELENGTH"] = np.array(group.get("cWAVELENGTH"))
    dfc["FLUX"] = np.array(group.get("cFLUX")) 
    dfc["STATERROR"] = np.array(group.get("cSTATERROR")) 
    dfc["SYSERROR"] = np.array(group.get("cSYSERROR")) 
 
 
    all_dfg.append(dfg)
    all_dfc.append(dfc)
        
    idx+=1

## Check

In [None]:
index = 0

In [None]:
row = df_info.iloc[index]

In [None]:
row

In [None]:
hdname = row["HD_name"]
gaianame = row["GAIA_ED3_Name"]
tag = f"{hdname}_{gaianame}"

In [None]:
hdname

In [None]:
dfg = all_dfg[index]
dfc = all_dfc[index]

In [None]:
def plotspec(tag,dfc,dfg):
    """
    tag : name of the star to appear un title
    dfc : dataframe for calspec
    dfg : dataframe for gaia
    """
    
    fig, ax = plt.subplots(1,1,figsize=(8,5))
    leg = ax.get_legend()
    title = "calspec-gaia : " + tag
    dfc.plot(x="WAVELENGTH",y="FLUX",ax=ax,marker='.',color='b',legend=leg,label="calspec")
    dfg.plot(x="WAVELENGTH",y="FLUX",ax=ax,marker='.',color='r',legend=leg,label="gaia")
    ax.set_xlim(300.,1100.)
    wl = dfc.WAVELENGTH
    index_sel = np.where(np.logical_and(wl>300.,wl<1100.))[0]
    fl = dfc.FLUX[index_sel]
    flmax = np.max(fl)*1.2
    ax.set_ylim(0.,flmax)    

    ax.legend()
    ax.set_xlabel("$\\lambda$ (nm)")
    ax.set_ylabel("Flux erg/cm$^2$/s/nm ")
    ax.set_title(title)
    plt.show()

In [None]:
plotspec(tag,dfc,dfg)

## Plot all spectra

In [None]:
for index in range(N):
    # info
    row = df_info.iloc[index]

    # the title
    hdname = row["HD_name"]
    gaianame = row["GAIA_ED3_Name"]
    tag = f"{hdname}_{gaianame}"

    # the spectra
    dfg = all_dfg[index]
    dfc = all_dfc[index]

    plotspec(tag,dfc,dfg)
    