# Access to Fors2 Spectra and calculate background of baseline and generate a fits file for GELATO

Spectra in observation frame

- author : Sylvie Dagoret-Campagne
- affiliation : IJCLab/IN2P3/CNRS
- creation date : 2024/02/11
- update : 2024/02/14


Want to generate fits file from the spectrum such emission lines are found by GELATO


In [None]:
import h5py
import pandas as pd
import numpy as np
import os
import re
from astropy.io import fits
from astropy.table import Table
import matplotlib as mpl
import matplotlib.pyplot as plt
#%matplotlib inline
%matplotlib ipympl
import matplotlib.colors as colors
import matplotlib.cm as cmx
import collections
from collections import OrderedDict
import re
import matplotlib.gridspec as gridspec
from sklearn.gaussian_process import GaussianProcessRegressor, kernels
import matplotlib.gridspec as gridspec
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

from astropy.table import Table

In [None]:
plt.rcParams["figure.figsize"] = (12,6)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

In [None]:
#from sedpy import observate
# get magnitude from a spectrum:


In [None]:
from sedpy import observate
from fors2pcigale.filters import FilterInfo

In [None]:
from fors2pcigale.fors2starlightio import Fors2DataAcess
from fors2pcigale.utils.utils_stat import weighted_mean, weighted_variance

In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor, kernels

In [None]:
Lyman_lines = [1220., 1030. ,973.,950., 938., 930.]
Balmer_lines = [6562.791,4861.351,4340.4721,4101.740,3970.072,3889.0641,3835.3971]
Paschen_lines = [8750., 12820., 10938.0,10050., 9546.2, 9229.7,9015.3, 8862.89,8750.46,8665.02]
Brackett_lines = [40522.79, 26258.71, 21661.178, 19440., 18179.21]
Pfund_lines = [ 74599.0, 46537.8, 37405.76 , 32969.8, 30400.]
all_Hydrogen_lines = [ Lyman_lines, Balmer_lines, Paschen_lines, Brackett_lines, Pfund_lines]
Color_lines = ["purple", "blue", "green", "red","grey"]
Balmer_thres = 3645.6
Lyman_thres = 911.267
Paschen_thres = 8200.
Brackett_thres = 14580.
Pfund_lines = 22800.
all_Hydrogen_thres = [Lyman_thres , Balmer_thres, Paschen_thres, Brackett_thres, Pfund_lines]


In [None]:
D4000_red = [4050.,4250] 
D4000_blue = [3750.,3950.]
W_BALMER = [Balmer_thres, Balmer_lines[0]]
W_LYMAN = [Lyman_thres, Lyman_lines[0]]

In [None]:
def plot_hydrogen_lines(ax):
    nth = len(all_Hydrogen_thres)
    for idx,group_lines in enumerate(all_Hydrogen_lines):
        # select only Lyman and Balmer
        if idx<2:
            color = Color_lines[idx]
            for wl_line in group_lines:
                ax.axvline(wl_line,color=color,lw=0.5)
            if idx< nth:
                ax.axvline(all_Hydrogen_thres[idx],color=color,linestyle=":")
    ax.axvspan(W_LYMAN[0],W_LYMAN[1],facecolor='purple', alpha=0.5)
    ax.axvspan(W_BALMER[0],W_BALMER[1],facecolor='green', alpha=0.2)

In [None]:
def plot_hydrogen_lines_redshift(ax,z):
    nth = len(all_Hydrogen_thres)
    for idx,group_lines in enumerate(all_Hydrogen_lines):
        # select only Lyman and Balmer
        if idx<2:
            color = Color_lines[idx]
            for wl_line in group_lines:
                ax.axvline(wl_line*(1+z),color=color,lw=2)
            if idx< nth:
                ax.axvline(all_Hydrogen_thres[idx]*(1+z),color=color,linestyle="-",lw=2)
    ax.axvspan(W_LYMAN[0]*(1+z),W_LYMAN[1]*(1+z),facecolor='purple', alpha=0.5)
    ax.axvspan(W_BALMER[0]*(1+z),W_BALMER[1]*(1+z),facecolor='green', alpha=0.2)

In [None]:
def generate_spectrum_table(wl, flx, errors, version_table ="v0"):
    """
    Parameters
      wl : wavelength in angstrom
      flx : Flux in FLAM
      errors : statistical errors on spectra
      version : version of the table format

    Return:
       astropy table for GETATO having columns
       with the following columns and column names:

       - The log10 of the wavelengths in Angstroms, column name: "loglam"
       - The spectral flux density in flam units, column name: "flux"
       - The inverse variances of the data points, column name: "ivar"

    """


    loglam = np.log10(wl)
    flux = flx
    ivar = 1./errors**2
    sig_flux = errors
    ratio_nphe = (flux/sig_flux)**2  # should be related to the number of photoelectrons
    

    if version_table == "v0":
        t = Table([loglam, flux, ivar], names=('loglam', 'flux', 'ivar'))
    else:
        t = Table([loglam, flux, ivar, sig_flux, ratio_nphe], names=('loglam', 'flux', 'ivar','sig_flux','ratio_nphe'))

    return t

## Configuration

- The spectrum SPEC183 is not valid (see png file)

In [None]:
bad_spectra_list = ["SPEC183"]

### Version of the Table format

In [None]:
version_table = "v1"

### Path to write Spectra

- version v0 : simple background with Table columns names=('loglam', 'flux', 'ivar')

In [None]:
# object filename
obj_filename_csv =  f"object_filelist_{version_table}.csv" 
obj_filename_fits =  f"object_filelist_{version_table}.fits" 
# set the path of spectra
path_gelato_spectra_fits = f"./spec_forgelato/{version_table}"

In [None]:
# create the pasth if it does not exists
if not os.path.exists(path_gelato_spectra_fits):
    os.makedirs(path_gelato_spectra_fits)

## Init

### Gaussian process

Configurate the Gaussian kernel such the filtering constant is not less than 5 AA.

In [None]:
kernel = kernels.RBF(0.5, (5, 10000.0))
gpr = GaussianProcessRegressor(kernel=kernel ,random_state=0)

### Filters

In [None]:
ps = FilterInfo()
ps.dump()

### Init data

In [None]:
fors2 = Fors2DataAcess()

In [None]:
#fors2.plot_allspectra()

In [None]:
fors2_tags = fors2.get_list_of_groupkeys()
len(fors2_tags)

In [None]:
list_of_fors2_attributes = fors2.get_list_subgroup_keys()
print(list_of_fors2_attributes)

## Loop to plot spectra in observation frame

Plot of $F_\lambda(\lambda)$ in FLAM (erg/cm2/s/AA) units

In [None]:
bwr_map = plt.get_cmap('bwr')
reversed_map = bwr_map.reversed() 
cNorm = colors.Normalize(0., vmax=1.)
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=bwr_map)

In [None]:

all_paths = []
all_redshifts = []

for specname in fors2_tags:   

    if specname in bad_spectra_list:
        print(f">>>> SKIP spectrum {specname} which is in list of bad spectra")
        continue

    spectrum_fits_table_out = os.path.join(path_gelato_spectra_fits,f"specgelato_{specname}.fits")

    
    
    # retrieve generic info on the spectrum
    attr = fors2.getattribdata_fromgroup(specname)
    #print(attr)
    redshift = attr['redshift']

    # save for object list
    all_paths.append(spectrum_fits_table_out)
    all_redshifts.append(redshift)
    
    lines = attr['lines']
    the_color = scalarMap.to_rgba(redshift, alpha=1)
    the_dict_sed =  fors2.getspectrum_fromgroup(specname)
    wl = the_dict_sed["wl"] 
    fl = the_dict_sed["fl"] 

    #########################################
    # calibrate flux in FLAM (erg/s/cm2/AA)
    ###########################################

    FLAG_EXCEPTION = False
    try:
        factor_mean,factor_err  = fors2.get_calibrationfactor(specname)
    except Exception as error:
        print(f">>>>>>>>>>>>>> EXCEPTION {error} for spectrum {specname}")
        FLAG_EXCEPTION = True
        factor_mean,factor_err = 1.,0.
        
    
    
    factor_relerr = factor_err/factor_mean

    if not FLAG_EXCEPTION:
        print(specname,f"Fors2IO : CALIBRATION FACTOR = {factor_mean:.4g} +/- {factor_err:.4g} ({factor_relerr:.4f})") 
    
    #########################################
    # multiply flux by the calibration factor
    #########################################
    fl*= factor_mean
    ymax = fl.max()
    print(f"ymax = {ymax:.4g} erg/cm2/s/AA")


    ##########################################################
    # Estimate some kind of background with gaussian process
    ##########################################################

    X = wl
    Y = fl
    gpr.fit(X[:, None], Y)
    Yfit = gpr.predict(X[:, None], return_std=False)
    Z = Y - Yfit
    # but Yerr is wrong
    #Yerr = np.abs(Yerr)

    gpr.fit(X[:, None],np.abs(Z))
    Yfit2 = gpr.predict(X[:, None], return_std=False)
            
    ################################
    # Plot the figures, fig1,fi2,fig3
    #################################
    fig = plt.figure(constrained_layout=True,figsize=(12,6))
    #fig = plt.figure(figsize=(12,6))

    gs = gridspec.GridSpec(nrows=3, ncols=1, height_ratios=[3,3,1],hspace=0)

    ax1 = fig.add_subplot(gs[0])

    ax1.plot(wl,fl,'-',color=the_color)
    ax1.plot(wl,Yfit,'-',color="k",lw=3)
    ax1.fill_between(wl, Yfit-Yfit2, Yfit+Yfit2,facecolor="orange",edgecolor="k",alpha=0.8)
    title = f"{specname}, z={redshift:.2f}, lines = {lines}"
    ax1.set_title(title)
    ax1.axvline(wl.min(),color="k")
    ax1.axvline(wl.max(),color="k")
    ax1.set_xlim(4000.,10000.)
    ax1.set_ylim(0.,ymax)
    ax1.set_xlabel("$\lambda (\\AA)$")
    ax1.set_ylabel("$F_\lambda(\lambda) (erg/cm^2/s/\\AA)$")
    ax1.grid()
    plot_hydrogen_lines_redshift(ax1,redshift)




    ax3 = fig.add_subplot(gs[1], sharex=ax1)  
    ax3.plot(wl,Z,'b-',lw=0.5)
    ax3.axvline(wl.min(),color="k")
    ax3.axvline(wl.max(),color="k")
    ax3.fill_between(wl, -Yfit2, +Yfit2,facecolor="grey",edgecolor="k",alpha=0.5)
    ax3.grid()


    
    ax2 = fig.add_subplot(gs[2], sharex=ax1)  
    plt.setp(ax2.get_yticklabels(), visible=False)
    ##################
    # loop on filters
    ####################
    for index in ps.filters_indexlist:
        the_name = ps.filters_namelist[index]
        the_filt = ps.filters_transmissionlist[index]
        the_norm = ps.filters_transmissionnormlist[index]
        the_wlmean = the_filt.wave_mean
        the_color = ps.filters_colorlist[index]
        the_transmission =the_filt.transmission/the_norm
        ax2.plot(the_filt.wavelength,the_transmission,color=the_color)

        if index%2 ==0:
            ax2.text(the_wlmean, 0.7, the_name,horizontalalignment='center',verticalalignment='center',color=the_color,fontweight="bold")
        else:
            ax2.text(the_wlmean, 0.85, the_name,horizontalalignment='center',verticalalignment='center',color=the_color,fontweight="bold")
    ax2.axvline(wl.min(),color="k")
    ax2.axvline(wl.max(),color="k")
    ax2.grid()
    __=ax2.set_ylim(0.,1.)
  
    axins1 = inset_axes(ax1,
                    width="50%",  # width = 50% of parent_bbox width
                    height="5%",  # height : 5%
                    loc='upper right')
    
    cbar=fig.colorbar(scalarMap , cax=axins1,orientation='horizontal')
    cbar.ax.set_xlabel('redshift')
    #fig.colorbar(im1, cax=axins1, orientation="horizontal", ticks=[1, 2, 3])
    #axins1.xaxis.set_ticks_position("bottom")

    plt.show()

    ########################
    # generate the table
    ##########################
    t = generate_spectrum_table(wl,fl, Yfit2, version_table = version_table)
    t.write(spectrum_fits_table_out, format="fits",overwrite=True)

    #########################################################
    # calculate  noise in histograms
    # Will calculate statistical background by another mean later
    #######################################################
    #wavelengthrange_sdss  = [ filt.wavelength[np.where(filt.transmission>0.02)[0]] for filt in ps.all_filt_sdss]
    #wavelengthminmax_sdss = [ (wl.min(),wl.max()) for wl in wavelengthrange_sdss ]
    # select g(1),r(2),i(3)
    #wavelengthminmax_sdss = wavelengthminmax_sdss[1:4]
    #print( wavelengthminmax_sdss)

    #all_data = []
    #for ifilt in range(3):      
    #    wlmin = wavelengthminmax_sdss[ifilt][0]
    #    wlmax = wavelengthminmax_sdss[ifilt][1]
    #    indexes_selected = np.where(np.logical_and(wl>wlmin,wl<wlmax))[0]
    #    data = Z[indexes_selected]
    #    all_data.append(data)

    #d[specname] = all_data 
    

## Save object list

In [None]:
df = pd.DataFrame()
df["Path"] = all_paths
df["z"] = all_redshifts

In [None]:
df.to_csv(obj_filename_csv)

In [None]:
t = Table.from_pandas(df)

In [None]:
t.write(obj_filename_fits,format="fits",overwrite=True)

In [None]:
print(t)