# Smooth Gaia spectra 

- author Sylvie Dagoret-Campagne
- affiliation IJCLab
- creation date : 2024/10/04
- update : 2024/10/05 : 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib as mpl
import matplotlib.colors as colors
import matplotlib.cm as cmx
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LogNorm
from matplotlib.gridspec import GridSpec
import pandas as pd

import matplotlib.ticker                         # here's where the formatter is
import os,sys
import re
import pandas as pd

from astropy.io import fits
from astropy import units as u
from astropy import constants as c

plt.rcParams["figure.figsize"] = (8,6)
plt.rcParams["axes.labelsize"] = 'xx-large'
plt.rcParams['axes.titlesize'] = 'xx-large'
plt.rcParams['xtick.labelsize']= 'xx-large'
plt.rcParams['ytick.labelsize']= 'xx-large'

import pickle
#from scipy.interpolate import RegularGridInterpolator
import h5py

In [None]:
from scipy.interpolate import interp1d,CubicSpline

In [None]:
pd.set_option('display.max_columns', 500)

In [None]:
machine_name = os.uname().nodename
path_rubinsimphot = "repos/repos_w_2024_38/rubinsimphot/src"
if 'sdf' in machine_name:
    #machine_name_usdf = 'sdfrome001'
    print("Set environment for USDF")
    newpythonpath = os.path.join(os.getenv("HOME"),path_rubinsimphot)
    sys.path.append(newpythonpath)
elif 'dagoret-nb' in machine_name:
    print("Set environment for USDF Rubin Science Platform")
    newpythonpath = os.path.join(os.getenv("HOME"),path_rubinsimphot)
    sys.path.append(newpythonpath)    
elif 'mac' in machine_name:
    print("Be sure to run this notebook in conda environment named conda_py310")
else:
    print(f"Your current machine name is {machine_name}. Check your python environment")

## Smoothing
https://scipy-cookbook.readthedocs.io/items/SignalSmooth.html

In [None]:
def smooth(x,window_len=11,window='hanning'):
    """smooth the data using a window with requested size.
    
    This method is based on the convolution of a scaled window with the signal.
    The signal is prepared by introducing reflected copies of the signal 
    (with the window size) in both ends so that transient parts are minimized
    in the begining and end part of the output signal.
    
    input:
        x: the input signal 
        window_len: the dimension of the smoothing window; should be an odd integer
        window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
            flat window will produce a moving average smoothing.

    output:
        the smoothed signal
        
    example:

    t=linspace(-2,2,0.1)
    x=sin(t)+randn(len(t))*0.1
    y=smooth(x)
    
    see also: 
    
    numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
    scipy.signal.lfilter
 
    TODO: the window parameter could be the window itself if an array instead of a string
    NOTE: length(output) != length(input), to correct this: return y[(window_len/2-1):-(window_len/2)] instead of just y.
    """

    if x.ndim != 1:
        raise(ValueError, "smooth only accepts 1 dimension arrays.")

    if x.size < window_len:
        raise(ValueError, "Input vector needs to be bigger than window size.")


    if window_len<3:
        return x


    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise(ValueError, "Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
    s=np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
 
    if window == 'flat': #moving average
        w=np.ones(window_len,'d')
    else:
        w=eval('np.'+window+'(window_len)')

    y=np.convolve(w/w.sum(),s,mode='valid')
   
    
    return y


In [None]:
np.convolve?

In [None]:
def continuum(x,y,N):
    contin = np.convolve(y, np.ones((N,))/N, mode='valid')
    x_new = np.linspace(x[0],x[-1],len(contin))        
    return x_new,contin

## Config 

In [None]:
input_path = "data_gaiaspectra"
input_file_h5  = 'GAIAspectra.hdf5'
input_fullfile_h5 = os.path.join(input_path,input_file_h5)

## Read file

In [None]:
hf =  h5py.File(input_fullfile_h5, 'r') 
list_of_keys = list(hf.keys())

In [None]:
# pick one key    
key_sel =  list_of_keys[0]
# pick one group
group = hf.get(key_sel)  

In [None]:
#pickup all attribute names
all_subgroup_keys = []
for k in group.attrs.keys():
    all_subgroup_keys.append(k)

In [None]:
def GetColumnHfData(hff,list_of_keys,nameval):
    """
    Extract hff atttribute 
    
    parameters
      hff           : descriptor of h5 file
      list_of_keys : list of exposures
      nameval      : name of the attribute
      
    return
           the array of values in the order of 
    """
    

    all_data = []
    for key in list_of_keys:
        group=hff.get(key)
        val=group.attrs[nameval]
        all_data.append(val)
    return all_data

In [None]:
# create info
df_info = pd.DataFrame()
for key in all_subgroup_keys:
    arr=GetColumnHfData(hf, list_of_keys ,key)
    df_info[key] = arr

In [None]:
all_df = []  
    
idx=0
for key in list_of_keys :
        
    group = hf.get(key)
    df = pd.DataFrame()

    df["WAVELENGTH"] = np.array(group.get("WAVELENGTH"))
    df["FLUX"] = np.array(group.get("FLUX")) 
    df["STATERROR"] = np.array(group.get("STATERROR")) 
    df["SYSERROR"] = np.array(group.get("SYSERROR")) 
 
    all_df.append(df)
        
    idx+=1

## Plot

In [None]:
def plotspec(tag,df):
    """
    tag : name of the star to appear un title
    df : dataframe for gaia
    """
    
    fig, ax = plt.subplots(1,1,figsize=(8,5))
    leg = ax.get_legend()
    title = "gaia : " + tag
    df.plot(x="WAVELENGTH",y="FLUX",ax=ax,marker='.',color='r',legend=leg,label="gaia")
    ax.set_xlim(300.,1100.)
    wl = df.WAVELENGTH
    index_sel = np.where(np.logical_and(wl>300.,wl<1100.))[0]
    fl = df.FLUX[index_sel]
    flmax = np.max(fl)*1.2
    ax.set_ylim(0.,flmax)    

    ax.legend()
    ax.set_xlabel("$\\lambda$ (nm)")
    ax.set_ylabel("Flux erg/cm$^2$/s/nm ")
    ax.set_title(title)
    plt.show()

In [None]:
index = 0

In [None]:
row = df_info.iloc[index]

In [None]:
hdname = row["HD_name"]
gaianame = row["GAIA_ED3_Name"]
tag = f"{hdname}_{gaianame}"

In [None]:
df = all_df[index]

In [None]:
plotspec(tag,df)

## Interpolate and smooth

In [None]:
def plotspecinterpsmooth(tag,df,wli,fli,wls=[0],fls=[0],tau=0):
    """
    tag : name of the star to appear un title
    df : dataframe for gaia
    """
    
    fig, ax = plt.subplots(1,1,figsize=(8,5))
    leg = ax.get_legend()
    title = "interpolation-gaia: " + tag
    df.plot(x="WAVELENGTH",y="FLUX",ax=ax,marker='.',color='b',legend=leg,label="gaia")
    ax.plot(wli,fli,"g-.",label="interpolation",lw=2)
    if len(wls) > 1:
        ax.plot(wls,fls,"r-",label=f"smoothed ({tau})")
    ax.set_xlim(300.,1100.)
    wl = df.WAVELENGTH
    index_sel = np.where(np.logical_and(wl>300.,wl<1100.))[0]
    fl = df.FLUX[index_sel]
    flmax = np.max(fl)*1.2
    ax.set_ylim(0.,flmax)    

    ax.legend()
    ax.set_xlabel("$\\lambda$ (nm)")
    ax.set_ylabel("Flux erg/cm$^2$/s/nm ")
    ax.set_title(title)
    plt.show()

In [None]:
WL = np.arange(300.,1050.,1.)

In [None]:
finterp = interp1d(df.WAVELENGTH, df.FLUX, kind = 'nearest',fill_value="extrapolate")
#finterp = CubicSpline(df.WAVELENGTH, df.FLUX, axis=0, bc_type='natural',extrapolate=True)

In [None]:
plotspecinterpsmooth(tag,df,WL,finterp(WL))

### Smooth

In [None]:
sm_const = 35
#WL_sm,fl_sm = continuum(WL,finterp(WL),sm_const)

fl_sm = smooth(finterp(WL),window_len=sm_const,window="hanning")
WL_sm = smooth(WL,window_len=sm_const,window="hanning")

In [None]:
plotspecinterpsmooth(tag,df,WL,finterp(WL),WL_sm,fl_sm,tau=sm_const) 