Fit One Spectrum and try to recover failure
----------------------------------

First let's import the packages we will need.


Analyse outputs from GELATO fit results on a signe spectrum

- author : Sylvie Dagoret-Campagne
- creation date : 2024-03-26
- update : 2024-05-26
- update : 2024-05-24 : version v2


- Kernel at CCIN2P3 : ``conda_desc_py310_pcigale``
- Kernel on my laptop : ``pcigale``

# Create dir
  ``ResultsFitInNb/``

In [None]:
# Import packages
import gelato
import numpy as np
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['font.size'] = 25
from matplotlib import pyplot # For plotting
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
# For loading in data
from astropy.io import fits
from astropy.table import Table 
import os,re
import pandas as pd

In [None]:
from astropy.modeling import models, fitting
from astropy import modeling
# define a model for a line
g_init = models.Gaussian1D(amplitude=1, mean=0, stddev=1)
# initialize a linear fitter
fit_g = fitting.LevMarLSQFitter()

In [None]:
from fors2pcigale.fors2starlightio import Fors2DataAcess

In [None]:
#from gelato.Plotting import  Plot, PlotFig,subplotplot
#from gelato.Plotting import subplotplot
from gelato.Plotting import logbarrier
from scipy.optimize import minimize

#import gelato.ConstructParams as CP

import gelato.Utility as U
import gelato.Plotting as P
import gelato.ConstructParams as CP

# GELATO
import gelato.Utility as U
import gelato.CustomModels as CM
import gelato.SpectrumClass as SC

from gelato.Constants import C

In [None]:
from libExampleFitInNb import *

In [None]:
import shutil
import json

## Fors2 Interface

In [None]:
fors2 = Fors2DataAcess()

## Gelato Parameters

In [None]:
version = "v3"

In [None]:
# Path to the parameters file
#path_params = './ExampleParametersFitInNb.json'
path_params = f"./ExampleParametersFitInNb_{version}.json"

# Create Parameters dictionary
params_gel = gelato.ConstructParams.construct(path_params)

# Set to not multiprocessing
params_gel['NProcess'] = 1

In [None]:
params_gel['EmissionGroups']

In [None]:
for group in params_gel['EmissionGroups']:
    info_group = "Group : "+ group["Name"]
    print(info_group)
    all_species = group['Species']
    for the_species in all_species:
        #info_species = "\t  Species : " + the_species['Name'] + " FlagGroup :: " +  the_species['FlagGroups'][0] + "Nlines = " + str(len( the_species['Lines'] )) 
        info_species = "\t  Species : " + the_species['Name']  + " , Nlines = " + str(len( the_species['Lines'] )) 
        print(info_species)

## Table with Spectra name and Redshifts

In [None]:
#df = pd.read_csv("object_filelist_v0.csv",index_col=0)
filename_object_file_list = f"object_filelist_{version}.csv"
df_objectslist = pd.read_csv(filename_object_file_list,index_col=0)

## Input files before the fit, sorting and index

In [None]:
#path = "./spec_forgelato/v0"
path = f"./spec_forgelato/{version}"

In [None]:
list_all_files = os.listdir(path)

In [None]:
idx_selected_files = []
list_selected_files = []
for file in list_all_files:
    res = re.findall("^specgelato_SPEC.*[.]fits$",file)
    if len(res):
        list_selected_files.append(file)
        num = int(re.findall("specgelato_SPEC(.*)[.]fits$",file)[0])   
        idx_selected_files.append(num)

In [None]:
idx_selected_files = np.array(idx_selected_files)
list_selected_files = np.array(list_selected_files)
idx_sorted_files = np.argsort(idx_selected_files)
list_sorted_files = list_selected_files[idx_sorted_files]

In [None]:
NSPEC = len(list_sorted_files)

## Choose One file

In [None]:
index = 8
shortfilename = list_sorted_files[index]
fullfilename = os.path.join(path,shortfilename) 
path_spec = fullfilename
tag_spec = re.findall(".*_(SPEC.*).fits$", shortfilename)

In [None]:
shortfilename.split('.')[0]

In [None]:
tag_spec

### Define output filenames

#### Fit results

In [None]:
output_filename = shortfilename.split('.')[0] + "-results.fits"

In [None]:
output_filename

#### pulls and emission-line results

In [None]:
output_filename_pulls = shortfilename.split('.')[0] + "-pulls-results.csv"
output_filename_emissionlines = shortfilename.split('.')[0] + "-emissionlines-results.csv"

#### Find the redshift

In [None]:
if len(tag_spec)>0:
    tag_spec = tag_spec[0]
    all_inputspecfilenames = df_objectslist.Path.values
    for idx_tag,filen in enumerate(all_inputspecfilenames):
        if tag_spec in filen:
                break

    df_row = df_objectslist.iloc[idx_tag]
    redshift = df_row["z"]

In [None]:
spec_name_sel = tag_spec

In [None]:
spec_name_sel

In [None]:
# acess to the image array
img = fors2.get_specimg(spec_name_sel)

# get the image filename and path
spec_sec_fileimg = fors2.get_specimgfile(spec_name_sel)

In [None]:
fig = plt.figure(constrained_layout=True,figsize=(12,6))
plt.imshow(img)
ax = plt.gca()
# Hide X and Y axes label marks
ax.xaxis.set_tick_params(labelbottom=False)
ax.yaxis.set_tick_params(labelleft=False)
# Hide X and Y axes tick marks
ax.set_xticks([])
ax.set_yticks([])
plt.show()

In [None]:
print(df_row,redshift)

In [None]:
title = f"{index}) {output_filename}, z={redshift:.3f}" 

#### Get the spectrum

In [None]:
spectrum = Table.read(path_spec)

# Start with inverse variance
ivar = spectrum['ivar']
good = ivar > 0 # GELATO only looks at points with nonzero weights

# Finally, let's load in the data
wavl = 10**spectrum['loglam'][good]
flux = spectrum['flux'][good]
ivar = ivar[good]
args = (wavl,flux,ivar) # These will be useful later

In [None]:
spectrum[:5]

Let's go ahead and plot our spectrum to get an idea of what we're dealing with.

In [None]:
# Create figure
fig, ax = pyplot.subplots(figsize=(15,7))

# Plot Spectrum
sig = 3/np.sqrt(ivar) # 3 Sigma boundary
ax.fill_between(wavl,flux-sig,flux+sig,color='gray')
ax.step(wavl,flux,where='mid',c='k',lw=0.5)

# Axis limits
ax.set(xlim=[wavl.min(),wavl.max()],ylim=[0,flux.max()])

# Axis labels
ax.set(xlabel=r'Obs. Wavelength [\AA]',ylabel=r'$F_\lambda$')
ax.set_title(title)
# Show figure
pyplot.show()

The main gelato function takes three inputs.
* The path to the parameters file or the parameters dictionary.
* The path to the spectrum.
* The redshift of the spectrum.

We already have the last two, and we need to take a little precaution with the first.
The main gelato function will only return the final model if the code is being run without multiprocessing (as the return statement can break Python multiprocessing). So we can either change the Parameters JSON file, or edit the parameters dictionary. 

## Output for results

In [None]:
output_path = params_gel['OutFolder']

In [None]:
if not os.path.isdir(output_path):
        os.mkdir(output_path)

In [None]:
print(f"output_path defined in json file : {output_path}")

In [None]:
output_path_fullfilename = os.path.join(output_path,output_filename)
output_path_fullfilename_pulls = os.path.join(output_path,output_filename_pulls)
output_path_fullfilename_emissionlines = os.path.join(output_path,output_filename_emissionlines)

## Run Gelato Fit

We are now ready to run GELATO. Note, before you do this, ensure the results directory exists, either by running the Example from the README file or creating it. It will return the final callable model, however it won't be used in this notebook. 

In [None]:
model = gelato.gelato(params_gel,path_spec,redshift)

## Results of Gelato Fit

The results have been saved to the "Results/" Directory. Let's go ahead and load them in. We will print all extensions on the folder.

### 1) results

In [None]:
# Load in results
results = fits.open(output_path_fullfilename)

# Print FITS extensions
results.info()

We have two FITS extensions, SUMMARY and PARAMS. They are described in more detail in the README File but let's play around with them directly. Let's go ahead and take a look inside the SUMMARY extension. As we can see, it is a binary FITS Table.

### Summary of fitted model

In [None]:
summary = Table(results['SUMMARY'].data)
summary

In this table, we have the original spectrum along with the various model components, we can go ahead and plot them.

In [None]:
df = summary.to_pandas()

In [None]:
# Create figure
fig, ax = pyplot.subplots(figsize=(15,3))

# Plot Spectrum
ax.step(10**summary['loglam'],summary['LINE'],where='mid',c='y',label='Emission Lines')
ax.legend()

# Axis limits
ax.set(xlim=[wavl.min(),wavl.max()],ylim=[0,flux.max()])
# Axis labels
ax.set(xlabel=r'Obs. Wavelength [\AA]',ylabel=r'$F_\lambda$')
ax.set_title(title)
ax.grid()
# Show figure
pyplot.show()

In [None]:
# Create figure
fig, ax = pyplot.subplots(figsize=(15,7))

# Plot Spectrum
ax.fill_between(wavl,flux-sig,flux+sig,color='gray')
ax.step(wavl,flux,where='mid',c='k',lw=0.5,label='Data')
ax.step(10**summary['loglam'],summary['MODEL'],where='mid',c='r',label='Total Model')
ax.step(10**summary['loglam'],summary['SSP'],where='mid',c='g',label='SSP Cont.')
#ax.step(10**summary['loglam'],summary['PL'],where='mid',c='b',label='Power-Law Cont.')
ax.step(10**summary['loglam'],summary['LINE'],where='mid',c='y',label='Emission Lines')
ax.legend()

# Axis limits
ax.set(xlim=[wavl.min(),wavl.max()],ylim=[0,flux.max()])

# Axis labels
ax.set(xlabel=r'Obs. Wavelength [\AA]',ylabel=r'$F_\lambda$')
ax.set_title(title)
# Show figure
pyplot.show()

Looks great! You can see an example of the GELATO generated plots in the results folder, but this will let you incorporate GELATO fits easily into your own work. Let's go ahead and take a look at the PARAMS extension. This is a much larger table! It's made up of the parameters from each bootstrap iteration. 

### A very simple view of fit results 

In [None]:
MySimplePlotSpectrumWithFittedModel(output_path_fullfilename,redshift,title)

### 2) Access to the fitted spectrum object directly

In [None]:
 spectrum = SC.Spectrum(output_path_fullfilename,redshift,params_gel)

In [None]:
spectrum.p

In [None]:
spectrum.p["EmissionGroups"]

In [None]:
len(spectrum.p["EmissionGroups"])

### Complete plot result

In [None]:
FLAG_FAILURE = False
try:
    # access to spectrum directly
    spectrum = SC.Spectrum(output_path_fullfilename,redshift,params_gel)

    # extract the pulls
    the_pulls = myplotfromresults(params_gel,output_path_fullfilename, redshift)
    # build the pulls table
    pulls_table = build_pulls_table(spectrum,the_pulls)
    #save the pulls
    pulls_table.to_csv(output_path_fullfilename_pulls) 
    print(pulls_table)

    #go to the fit results
    # Open Parameters extension
    params_fit = Table(results['PARAMS'].data)
    #print(params_fit)

    fitted_redshift_mean = params_fit["SSP_Redshift"].mean()/C
    fitted_redshift_std = params_fit["SSP_Redshift"].std()/C
    fitted_rchi2_mean = params_fit["rChi2"].mean()
    fitted_rchi2_std = params_fit["rChi2"].std()

    dz = redshift_mean_fitted - redshift
    print(f"z1 = {redshift:.5f} , z2 ={redshift_mean_fitted:.5f}, dz = {dz:.5f}")

    reduced_chi2 = fitted_rchi2_mean
    print(f"Reduced Chi2 = {reduced_chi2:.2f}")
    
    #get the emission-lines results
    emissionlines_table = DecodeParamsFitEmissionLines(params_fit)
    #save emission-lines
    emissionlines_table.to_csv(output_path_fullfilename_emissionlines)
    print(emissionlines_table)
   
   
except Exception as inst:
    print(f">>>>>>>>>> Exception cautch for ==={index} )========{tag_spec}==== ")
    print(type(inst))    # the exception type
    print(inst.args)     # arguments stored in .args
    print(inst)          # __str__ allows args to be printed directly,
 
    
    FLAG_FAILURE = True

In [None]:
# stop if no failure
if not FLAG_FAILURE:
    assert False

In [None]:
print(">>>>>>>>>>>>>>>>>>>> GELATO FIT FAILURE <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")

In [None]:
def updateparameterfile(oldver,newver,oldpath):
    """
    """
    newpath=oldpath.replace(oldver,newver) 
    # create the new parameter file
    shutil.copyfile(oldpath,newpath)

    params_gel = gelato.ConstructParams.construct(newpath)
    params_gel['OutFolder'] = params_gel['OutFolder'].replace(oldver,newver)
    with open(newpath, 'w') as f:
        json.dump(params_gel, f)
    return newpath

In [None]:
recoverversion = f"recovery{version}"

In [None]:
path_params_recover= updateparameterfile(version,recoverversion,path_params)

In [None]:
params_gel_recover = gelato.ConstructParams.construct(path_params_recover)

In [None]:
output_path = params_gel_recover['OutFolder']
if not os.path.exists(output_path):
    os.mkdir(output_path)
topdirout = params_gel_recover['OutFolder']
if not os.path.exists(topdirout):
    os.mkdir(topdirout)
dirout = os.path.join(topdirout,spec_name_sel)
if not os.path.exists(dirout):
    os.mkdir(dirout)

#reindex the output under spec_name_sel folder
params_gel_recover['OutFolder'] = dirout

In [None]:
input_path_fullfilename = output_path_fullfilename 
output_path = params_gel_recover['OutFolder']
output_path_fullfilename = os.path.join(output_path,output_filename)
output_path_fullfilename_pulls = os.path.join(output_path,output_filename_pulls)
output_path_fullfilename_emissionlines = os.path.join(output_path,output_filename_emissionlines)

In [None]:
def splitspectrum(spec_name_sel,input_path_fullfilename,redshift,params_gel,splitfract=0.5):
    """
    """
    print("splitspectrum::",spec_name_sel,input_path_fullfilename,redshift)
    spectrum = Table.read(input_path_fullfilename)

    # Start with inverse variance
    ivar = spectrum['ivar']
    good = ivar > 0 # GELATO only looks at points with nonzero weights

    # Finally, let's load in the data
    wavl = 10**spectrum['loglam'][good]
    flux = spectrum['flux'][good]
    ivar = ivar[good]
    args = (wavl,flux,ivar) # These will be useful later

    wlrange = wavl.max()-wavl.min()
    wlsplit = wavl.min()+splitfract*wlrange

    indexes_left = np.where(wavl<=wlsplit)[0]
    indexes_right = np.where(wavl>wlsplit)[0]
    
    wavl_left = wavl[indexes_left]
    wavl_right = wavl[indexes_right]
    flux_left = flux[indexes_left]
    flux_right = flux[indexes_right]
    ivar_left = ivar[indexes_left]
    ivar_right = ivar[indexes_right]
    loglam_left = np.log10( wavl_left)
    loglam_right = np.log10( wavl_right)

    t_left = Table([loglam_left, flux_left, ivar_left], names=('loglam', 'flux', 'ivar'))
    t_right = Table([loglam_right, flux_right, ivar_right], names=('loglam', 'flux', 'ivar'))

    dirout = params_gel['OutFolder']


    fracnum = splitfract*100
    frac_str = f"_{fracnum:.0f}"
    spectrum_filename = os.path.basename(input_path_fullfilename)
    spectrum_rootfilename = spectrum_filename.split(".")[0]
    spectrum_filename_left = spectrum_rootfilename + frac_str + "_left.fits"
    spectrum_filename_right = spectrum_rootfilename + frac_str + "_right.fits"
    spectrum_fullfilename_left = os.path.join(dirout,spectrum_filename_left)
    spectrum_fullfilename_right = os.path.join(dirout,spectrum_filename_right)
    t_left.write(spectrum_fullfilename_left, format="fits",overwrite=True)
    t_right.write(spectrum_fullfilename_right, format="fits",overwrite=True)
    return spectrum_fullfilename_left,spectrum_fullfilename_right

In [None]:
path_spec

In [None]:
path_spec_left,path_spec_right = splitspectrum(spec_name_sel,path_spec,redshift,params_gel_recover,splitfract=0.7)

In [None]:
model_left = gelato.gelato(params_gel_recover,path_spec_left,redshift)

In [None]:
model_right = gelato.gelato(params_gel_recover,path_spec_right,redshift)