Fit a series of spectra
----------------------------------

First let's import the packages we will need.


Analyse outputs from GELATO fit results on a signe spectrum

- author : Sylvie Dagoret-Campagne
- creation date : 2024-03-25
- update : 2024-05-24


- Kernel at CCIN2P3 : ``conda_desc_py310_pcigale``
- Kernel on my laptop : ``pcigale``

# Create dir
  ``ResultsFitInNb/``

In [None]:
# Import packages
import gelato
import numpy as np
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['font.size'] = 25
from matplotlib import pyplot # For plotting
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
# For loading in data
from astropy.io import fits
from astropy.table import Table 
import os,re
import pandas as pd

In [None]:
from astropy.modeling import models, fitting
from astropy import modeling
# define a model for a line
g_init = models.Gaussian1D(amplitude=1, mean=0, stddev=1)
# initialize a linear fitter
fit_g = fitting.LevMarLSQFitter()

In [None]:
from fors2pcigale.fors2starlightio import Fors2DataAcess

In [None]:
#from gelato.Plotting import  Plot, PlotFig,subplotplot
#from gelato.Plotting import subplotplot
from gelato.Plotting import logbarrier
from scipy.optimize import minimize

#import gelato.ConstructParams as CP

import gelato.Utility as U
import gelato.Plotting as P
import gelato.ConstructParams as CP

# GELATO
import gelato.Utility as U
import gelato.CustomModels as CM
import gelato.SpectrumClass as SC

from gelato.Constants import C

In [None]:
from libExampleFitInNb import *

## Fors2 Interface

In [None]:
fors2 = Fors2DataAcess()

## Config

In [None]:
version = "v2"

## Gelato Parameters

In [None]:
# Path to the parameters file
#path_params = './ExampleParametersFitInNb.json'
path_params = f"./ExampleParametersFitInNb_{version}.json"

# Create Parameters dictionary
params_gel = gelato.ConstructParams.construct(path_params)

# Set to not multiprocessing
params_gel['NProcess'] = 1
params_gel['Verbose'] = False

In [None]:
params_gel['EmissionGroups']

In [None]:
for group in params_gel['EmissionGroups']:
    info_group = "Group : "+ group["Name"]
    print(info_group)
    all_species = group['Species']
    for the_species in all_species:
        #info_species = "\t  Species : " + the_species['Name'] + " FlagGroup :: " +  the_species['FlagGroups'][0] + "Nlines = " + str(len( the_species['Lines'] )) 
        info_species = "\t  Species : " + the_species['Name']  + " , Nlines = " + str(len( the_species['Lines'] )) 
        print(info_species)

## Table with Spectra name and Redshifts

In [None]:
#df_objlist = pd.read_csv("object_filelist_v0.csv",index_col=0)
filename_object_file_list = f"object_filelist_{version}.csv"
df_objlist = pd.read_csv(filename_object_file_list,index_col=0)

## Input files before the fit, sorting and index

In [None]:
#path = "./spec_forgelato/v0"
path = f"./spec_forgelato/{version}"

In [None]:
list_all_files = os.listdir(path)

In [None]:
idx_selected_files = []
list_selected_files = []
for file in list_all_files:
    res = re.findall("^specgelato_SPEC.*[.]fits$",file)
    if len(res):
        list_selected_files.append(file)
        num = int(re.findall("specgelato_SPEC(.*)[.]fits$",file)[0])   
        idx_selected_files.append(num)

In [None]:
idx_selected_files = np.array(idx_selected_files)
list_selected_files = np.array(list_selected_files)
idx_sorted_files = np.argsort(idx_selected_files)
list_sorted_files = list_selected_files[idx_sorted_files]

In [None]:
NSPEC = len(list_sorted_files)

In [None]:
print(f"Number of Spectra : {NSPEC}")

### Output path

In [None]:
#Output path 
output_path = params_gel['OutFolder']
print(f"output_path defined in json file : {output_path}")

In [None]:
if not os.path.isdir(output_path):
    os.mkdir(output_path)

## Loop on files

In [None]:
NStart = 190
NStop = 550

outputpathname  = os.path.basename(params_gel['OutFolder'])
filename_processing_final = f"runFinalStatus_fitgenato_{NStart}-{NStop}_{outputpathname}.csv"
filename_processing_previous = f"runCurrentStatus_fitgenato_{NStart}-{NStart}_{outputpathname}.csv"
df_processing = pd.DataFrame(columns=['index','tag','redshift','fitstatus','rchi2_m','r_chi2_std','redshift_m','redshift_std','exception'])

print(filename_processing_final)
print(filename_processing_previous)


for index in range(NStart,NStop):
    
    filename_processing_current = f"runCurrentStatus_fitgenato_{NStart}-{index}_{outputpathname}.csv"
    print(filename_processing_current)
    
    shortfilename = list_sorted_files[index]
    fullfilename = os.path.join(path,shortfilename) 
    path_spec = fullfilename
    tag_spec = re.findall(".*_(SPEC.*).fits$", shortfilename)

    #define outputfilenames
    output_filename = shortfilename.split('.')[0] + "-results.fits"
    output_filename_pulls = shortfilename.split('.')[0] + "-pulls-results.csv"
    output_filename_emissionlines = shortfilename.split('.')[0] + "-emissionlines-results.csv"

    #find the redshift
    if len(tag_spec)>0:
        tag_spec = tag_spec[0]
        all_inputspecfilenames = df_objlist.Path.values
        for idx_tag,filen in enumerate(all_inputspecfilenames):
            if tag_spec in filen:
                break

    df_row = df_objlist.iloc[idx_tag]
    redshift = df_row["z"]

   

    spec_name_sel = tag_spec
    title = f"{index}) {output_filename}, z={redshift:.3f}" 
    
    print(index,spec_name_sel,"redshift",df_row,redshift)
    print(title)

    # acess to the image array
    img = fors2.get_specimg(spec_name_sel)

    # get the image filename and path
    spec_sec_fileimg = fors2.get_specimgfile(spec_name_sel)

    # Show original image
    if 1:
        fig = plt.figure(constrained_layout=True,figsize=(12,6))
        plt.imshow(img)
        ax = plt.gca()
        # Hide X and Y axes label marks
        ax.xaxis.set_tick_params(labelbottom=False)
        ax.yaxis.set_tick_params(labelleft=False)
        # Hide X and Y axes tick marks
        ax.set_xticks([])
        ax.set_yticks([])
        plt.show()

    
    # case when the input file does not exist
    if not os.path.exists(path_spec):
        msg = f"Input spectrum file {path_spec} does not exists"
        df_processing.loc[index] = [index, tag_spec, redshift,False,-1, -1,-1, -1,msg]
        # handle temporary file
        df_processing.to_csv(filename_processing_current)
        if index !=NStart and os.path.exists(filename_processing_previous):
            os.remove(filename_processing_previous)
        filename_processing_previous = filename_processing_current
        continue
        
    #Get the spectrum
    spectrum = Table.read(path_spec)
    # Start with inverse variance
    ivar = spectrum['ivar']
    good = ivar > 0 # GELATO only looks at points with nonzero weights
    # Finally, let's load in the data
    wavl = 10**spectrum['loglam'][good]
    flux = spectrum['flux'][good]
    ivar = ivar[good]
    args = (wavl,flux,ivar) # These will be useful later

    # show the spectrum and 1-sigma boundary
    if 0:
        # Create figure
        fig, ax = pyplot.subplots(figsize=(15,7))

        # Plot Spectrum
        sig = 1/np.sqrt(ivar) # 1 Sigma boundary
        ax.fill_between(wavl,flux-sig,flux+sig,color='gray')
        ax.step(wavl,flux,where='mid',c='k',lw=0.5)

        # Axis limits
        ax.set(xlim=[wavl.min(),wavl.max()],ylim=[0,flux.max()])

        # Axis labels
        ax.set(xlabel=r'Obs. Wavelength [\AA]',ylabel=r'$F_\lambda$')
        ax.set_title(title)
        # Show figure
        pyplot.show()

  
    output_path_fullfilename = os.path.join(output_path,output_filename)
    output_path_fullfilename_pulls = os.path.join(output_path,output_filename_pulls)
    output_path_fullfilename_emissionlines = os.path.join(output_path,output_filename_emissionlines)
    
    # Run gelato fit
    model = gelato.gelato(params_gel,path_spec,redshift)

    #result
    # Load in results
    results = fits.open(output_path_fullfilename)

    # Print FITS extensions
    #print(results.info())

    #get summary
    summary = Table(results['SUMMARY'].data)
    df = summary.to_pandas()
    #print(df)

    #emissionline plot
    if 0:
        # Create figure
        fig, ax = pyplot.subplots(figsize=(15,3))

        # Plot Spectrum
        ax.step(10**summary['loglam'],summary['LINE'],where='mid',c='y',label='Emission Lines')
        ax.legend()

        # Axis limits
        ax.set(xlim=[wavl.min(),wavl.max()],ylim=[0,flux.max()])
        # Axis labels
        ax.set(xlabel=r'Obs. Wavelength [\AA]',ylabel=r'$F_\lambda$')
        ax.set_title(title)
        ax.grid()
        # Show figure
        pyplot.show()

    if 0:
        # Create figure
        fig, ax = pyplot.subplots(figsize=(15,7))

        # Plot Spectrum
        ax.fill_between(wavl,flux-sig,flux+sig,color='gray')
        ax.step(wavl,flux,where='mid',c='k',lw=0.5,label='Data')
        ax.step(10**summary['loglam'],summary['MODEL'],where='mid',c='r',label='Total Model')
        ax.step(10**summary['loglam'],summary['SSP'],where='mid',c='g',label='SSP Cont.')
        #ax.step(10**summary['loglam'],summary['PL'],where='mid',c='b',label='Power-Law Cont.')
        ax.step(10**summary['loglam'],summary['LINE'],where='mid',c='y',label='Emission Lines')
        ax.legend()

        # Axis limits
        ax.set(xlim=[wavl.min(),wavl.max()],ylim=[0,flux.max()])

        # Axis labels
        ax.set(xlabel=r'Obs. Wavelength [\AA]',ylabel=r'$F_\lambda$')
        ax.set_title(title)
        # Show figure
        pyplot.show()

    # I do my plot results
    ### A very simple view of fit results 
    if 0:
        MySimplePlotSpectrumWithFittedModel(output_path_fullfilename,redshift,title)

   
    try:
        # access to spectrum directly
        spectrum = SC.Spectrum(output_path_fullfilename,redshift,params_gel)

        # extract the pulls
        the_pulls = myplotfromresults(params_gel,output_path_fullfilename, redshift)
        # build the pulls table
        pulls_table = build_pulls_table(spectrum,the_pulls)
        #save the pulls
        pulls_table.to_csv(output_path_fullfilename_pulls) 
        print(pulls_table)

        #go to the fit results
        # Open Parameters extension
        params_fit = Table(results['PARAMS'].data)
        #print(params_fit)

        fitted_redshift_mean = params_fit["SSP_Redshift"].mean()/C
        fitted_redshift_std = params_fit["SSP_Redshift"].std()/C
        fitted_rchi2_mean = params_fit["rChi2"].mean()
        fitted_rchi2_std = params_fit["rChi2"].mean()

        #get the emission-lines results
        emissionlines_table = DecodeParamsFitEmissionLines(params_fit)
        #save emission-lines
        emissionlines_table.to_csv(output_path_fullfilename_emissionlines)
        print(emissionlines_table)
   
        #df_processing = pd.DataFrame(columns=['index','tag','redshift','fitstatus','rchi2_m','r_chi2_std','redshift_m','redshift_std'])
        df_processing.loc[index] = [index, tag_spec, redshift,True,fitted_rchi2_mean, fitted_rchi2_std,fitted_redshift_mean, fitted_redshift_std,"No"]

    except Exception as inst:
        print(f">>>>>>>>>> Exception cautch for ==={index} )========{tag_spec}==== ")
        print(type(inst))    # the exception type
        print(inst.args)     # arguments stored in .args
        print(inst)          # __str__ allows args to be printed directly,
        df_processing.loc[index] = [index, tag_spec, redshift,False,-1, -1,-1, -1,inst]
        
    
    # handle temprary file
    df_processing.to_csv(filename_processing_current)
    if index !=NStart and os.path.exists(filename_processing_previous):
        os.remove(filename_processing_previous)
    filename_processing_previous = filename_processing_current
        
    
    

# Save file status

In [None]:
df_processing

In [None]:
df_processing.to_csv(filename_processing_final) 