# Save Gaia spectra in hdf5 file

- author Sylvie Dagoret-Campagne
- affiliation IJCLab
- creation date : 2024/10/04
- update : 2024/10/04 : write spectra

- Inspried from : https://github.com/JospehCeh/PhotoZ_PhD/blob/u/dagoret/StudyFors2SED/convertFors2spectratohdf5.ipynb

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib as mpl
import matplotlib.colors as colors
import matplotlib.cm as cmx
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm
from matplotlib.gridspec import GridSpec
import pandas as pd

import matplotlib.ticker                         # here's where the formatter is
import os,sys
import re
import pandas as pd

from astropy.io import fits
from astropy import units as u
from astropy import constants as c

plt.rcParams["figure.figsize"] = (8,6)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

import pickle
from scipy.interpolate import RegularGridInterpolator
import h5py

In [None]:
pd.set_option('display.max_columns', 500)

In [None]:
machine_name = os.uname().nodename
path_rubinsimphot = "repos/repos_w_2024_38/rubinsimphot/src"
if 'sdf' in machine_name:
    #machine_name_usdf = 'sdfrome001'
    print("Set environment for USDF")
    newpythonpath = os.path.join(os.getenv("HOME"),path_rubinsimphot)
    sys.path.append(newpythonpath)
elif 'dagoret-nb' in machine_name:
    print("Set environment for USDF Rubin Science Platform")
    newpythonpath = os.path.join(os.getenv("HOME"),path_rubinsimphot)
    sys.path.append(newpythonpath)    
elif 'mac' in machine_name:
    print("Be sure to run this notebook in conda environment named conda_py310")
else:
    print(f"Your current machine name is {machine_name}. Check your python environment")

## Config 

In [None]:
cat_fn = "calspec_v2.csv"

In [None]:
list_of_targets = ["HD111235","HD144334","HD73495","HD165763"] 

In [None]:
FLAG_PLOT = False
FLAG_SAVEHDF5 = True

In [None]:
output_path = "data_gaiaspectra"
output_file_h5  = 'GAIAspectra.hdf5'
output_fullfile_h5 = os.path.join(output_path,output_file_h5)

In [None]:
if not os.path.exists(output_path):
    os.mkdir(output_path)

## Read catalog

In [None]:
df = pd.read_csv(cat_fn,index_col=0)
df["GAIA_ED3_Name"] = df["GAIA_ED3_Name"].astype('Int64')
df

In [None]:
for target in list_of_targets:
    row = df[df.HD_name == target]
    print(row)

In [None]:
df_s = df[["HD_name","GAIA_ED3_Name"]]

In [None]:
df_s = df_s.dropna(axis=0)

In [None]:
list_of_gaia_ids = df_s.GAIA_ED3_Name.unique()

In [None]:
list_of_gaia_ids = list_of_gaia_ids[~np.isnan(list_of_gaia_ids)]

In [None]:
list_of_gaia_ids = list_of_gaia_ids.astype(int)

In [None]:
list_of_gaia_ids

In [None]:
try:
    from gaiaspec import getGaia
except ModuleNotFoundError:
    getGaia = None

## Check the correspondence of names

In [None]:
df_s.reset_index(drop=True, inplace=True)
df_s['GAIA_ED3_Name']= df_s['GAIA_ED3_Name'].astype('Int64')
df_s

## Extract all the spectra

In [None]:
all_gaia = [] # container for gaia object
all_spec = [] # container for gaia spectra spectra
all_row = [] # container for info on 
for idx,hd_name in enumerate(df_s.HD_name.values):
    row = df[df["HD_name"] == hd_name]
    gaianum = getGaia.get_gaia_name_from_star_name(hd_name)
    print(hd_name, gaianum)
    g = getGaia.Gaia(hd_name)
    try:
        #spec_dict = getGaia.get_gaia_from_query_id(gaianum)
        spec_dict = g.get_spectrum_numpy(hd_name)
        all_spec.append(spec_dict)
        all_gaia.append(g)
        all_row.append(row)
        #pass
    except Exception as inst:
        print(type(inst))    # the exception type
        print(inst.args)     # arguments stored in .args
        print(inst)          # __str__ allows args to be printed directly,
                         # but may be overridden in exception subclasses
        #x, y = inst.args     # unpack args
        #print('x =', x)
        #print('y =', y)
    
N = len(all_gaia)

In [None]:
if FLAG_PLOT:
    for idx,star_gaia in enumerate(all_gaia):  
        star_gaia.plot_spectrum(xscale='linear', yscale='linear') 

## Save spectra in files
- example TO WRITE : https://github.com/JospehCeh/PhotoZ_PhD/blob/u/dagoret/StudyFors2SED/convertFors2spectratohdf5.ipynb
- example TO READ : https://github.com/JospehCeh/PhotoZ_PhD/blob/u/dagoret/StudyFors2SED/ReadFors2spectrafromh5.ipynb

In [None]:
if FLAG_SAVEHDF5:

    hf_outfile =  h5py.File(output_fullfile_h5, 'w') 
    for idx,star_gaia in enumerate(all_gaia):  
        the_row = all_row[idx]
        the_spec = all_spec[idx]
        hdname = the_row["HD_name"].values[0]
        gaianame = the_row["GAIA_ED3_Name"].values[0]
        tag = f"{hdname}_{gaianame}"
        print(tag)
        h5group = hf_outfile.create_group(tag)
        # save attributes
        for colname in list(the_row.columns):
            #print(colname)
            h5group.attrs[colname] = the_row[colname].values[0]
        #print(the_spec.keys())
        #print(the_spec['WAVELENGTH'])
        #print(the_spec['FLUX'])
        #print(the_spec['STATERROR'])
        #print(the_spec['SYSERROR'])
        # save the arrays
        d = h5group.create_dataset("WAVELENGTH",data=the_spec['WAVELENGTH'],compression="gzip", compression_opts=9)
        d = h5group.create_dataset("FLUX",data=the_spec['FLUX'],compression="gzip", compression_opts=9)
        d = h5group.create_dataset("STATERROR",data=the_spec['STATERROR'],compression="gzip", compression_opts=9)
        d = h5group.create_dataset("SYSERROR",data=the_spec['SYSERROR'],compression="gzip", compression_opts=9)

        

In [None]:
if FLAG_SAVEHDF5:
    hf_outfile.keys()

In [None]:
if FLAG_SAVEHDF5:
    hf_outfile.close() 