# Use selected training set to create input for fitting code

In [1]:
# Compatibility with Python 3
from __future__ import (absolute_import, division, print_function)

try:
    %matplotlib inline
    %config InlineBackend.figure_format='retina'
except:
    pass

# Basic Tools
import numpy as np
import copy
import pickle
from astropy.table import Table
from astropy.io import fits
import corner
import matplotlib.pyplot as plt
from scipy.io import readsav
from scipy.ndimage.filters import convolve
import time

# The Cannon
# import thecannon as tc

In [2]:
galah_elements = [
        'Li','C','O',
        'Na','Mg','Al','Si',
        'K','Ca','Sc','Ti','V','Cr','Mn','Co','Ni','Cu','Zn',
        'Rb','Sr','Y','Zr','Mo','Ru',
        'Ba','La','Ce','Nd','Sm','Eu'
    ]

In [3]:
# apogee = Table.read('/Users/svenbuder/Surveys/APOGEE_DR16.fits')

In [4]:
synthesis_files = '/Users/svenbuder/GALAH_DR4/spectrum_grids/marcs2014/specout/'
wavelength_file = 'training_sets/solar_twin_5steps_training_marcs2014_wavelength.pickle'

training_set = Table.read('../../spectrum_grids/marcs2014/marcs2014_scaledsolar_210720.fits')
training_set['INDEX'][np.where((training_set['TEFF'] == 5250) & (training_set['FEH'] == -3))[0]]
# #2159,2216,3967

example_0p5 = readsav(synthesis_files+'marcs2014_scaledsolar_210720_2159_ccd1_smod_sint.sav').results[0]
example_2p5 = readsav(synthesis_files+'marcs2014_scaledsolar_210720_2216_ccd1_smod_sint.sav').results[0]
example_4p5 = readsav(synthesis_files+'marcs2014_scaledsolar_210720_3967_ccd1_smod_sint.sav').results[0]



In [5]:
def gaussbroad(w, s, hwhm):
    """
    Smooths a spectrum by convolution with a gaussian of specified hwhm.
    Parameters
    -------
    w : array[n]
        wavelength scale of spectrum to be smoothed
    s : array[n]
        spectrum to be smoothed
    hwhm : float
        half width at half maximum of smoothing gaussian.
    Returns
    -------
    sout: array[n]
        the gaussian-smoothed spectrum.
    """
    """
    History
    --------
        Dec-90 GB,GM
            Rewrote with fourier convolution algorithm.
        Jul-91 AL
            Translated from ANA to IDL.
        22-Sep-91 JAV
            Relaxed constant dispersion check# vectorized, 50% faster.
        05-Jul-92 JAV
            Converted to function, handle nonpositive hwhm.
        Oct-18 AW
            Python version
    """

    # Warn user if hwhm is negative.
    if hwhm < 0:
        logger.warning("Forcing negative smoothing width to zero.")

    # Return input argument if half-width is nonpositive.
    if hwhm <= 0:
        return s  # true: no broadening

    # Calculate (uniform) dispersion.
    nw = len(w)  ## points in spectrum
    wrange = w[-1] - w[0]
    dw = wrange / (nw - 1)  # wavelength change per pixel

    # Make smoothing gaussian# extend to 4 sigma.
    # 4.0 / sqrt(2.0*alog(2.0)) = 3.3972872 and sqrt(alog(2.0))=0.83255461
    # sqrt(alog(2.0)/pi)=0.46971864 (*1.0000632 to correct for >4 sigma wings)
    if hwhm >= 5 * wrange:
        return np.full(nw, np.sum(s) / nw)
    nhalf = int(3.3972872 * hwhm / dw)  ## points in half gaussian
    ng = 2 * nhalf + 1  ## points in gaussian (odd!)
    wg = dw * (
        np.arange(ng, dtype=float) - (ng - 1) / 2
    )  # wavelength scale of gaussian
    xg = (0.83255461 / hwhm) * wg  # convenient absisca
    gpro = (0.46974832 * dw / hwhm) * np.exp(-xg * xg)  # unit area gaussian w/ FWHM
    gpro = gpro / np.sum(gpro)

    # Pad spectrum ends to minimize impact of Fourier ringing.
    sout = convolve(s, gpro, mode="nearest")

    return sout

In [6]:
def apply_gauss_broad(wave, smod, ipres=30000, debug=True):
    ccd_time = time.perf_counter()
    # Apply Gaussian Instrument Broadening
    if ipres == 0.0:
        hwhm = 0
    else:
        hwhm = 0.5 * wave[0] / ipres
    if hwhm > 0: smod = gaussbroad(wave, smod, hwhm)

    if debug:
        ccd_time = time.perf_counter() - ccd_time
        print('Gaussbroad time: ',ccd_time)

    return(smod)

In [7]:
example_ccd1 = readsav(synthesis_files+'marcs2014_scaledsolar_210720_0_ccd1_smod_sint.sav').results[0]
example_ccd2 = readsav(synthesis_files+'marcs2014_scaledsolar_210720_0_ccd2_smod_sint.sav').results[0]
example_ccd3 = readsav(synthesis_files+'marcs2014_scaledsolar_210720_0_ccd3_smod_sint.sav').results[0]
example_ccd4 = readsav(synthesis_files+'marcs2014_scaledsolar_210720_0_ccd4_smod_sint.sav').results[0]

In [8]:
# Define a common wavelength grid that we will use for all spectra
# This one was used for GALAH DR2
wavelengths_for_each_ccd = dict()
wavelengths_for_each_ccd['CCD1'] = example_ccd1.wave[1:-1]
wavelengths_for_each_ccd['CCD2'] = example_ccd2.wave[1:-1]
wavelengths_for_each_ccd['CCD3'] = example_ccd3.wave[1:-1]
wavelengths_for_each_ccd['CCD4'] = example_ccd4.wave[1:-1]

wavelength_array = np.concatenate(([wavelengths_for_each_ccd['CCD'+ccd] for ccd in ['1','2','3','4']]))
#wavelength_array = np.concatenate(([wavelengths_for_each_ccd['CCD'+ccd] for ccd in ['3','4']]))

wavelength_file_opener = open(wavelength_file,'wb')
pickle.dump((wavelength_array),wavelength_file_opener)
wavelength_file_opener.close()

In [9]:
# Let's create a matrix that we will later fill with the normalised flux values
normalized_flux = np.ones((np.shape(training_set)[0],np.shape(wavelength_array)[0]))
normalized_ivar = np.ones((np.shape(training_set)[0],np.shape(wavelength_array)[0]))

In [10]:
def load_normalised_spectra(index, wavelengths_for_each_ccd, spectrum_path = '/Users/svenbuder/galah_solar_twins/dr3_spectra/hermes'):
    
    # For each stars, there are 4 spectra for the 4 different CCDs.
    # We will interpolate the fluxes and uncertainties/inverse variances onto a common grid
    normalised_flux_for_index = []
    normalised_ivar_for_index = []
    # For that we first interpolate over the individual CCDs
    #for ccd in ['3','4']:
    for ccd in ['1','2','3','4']:
    #for ccd in ['2','3']:
        
        #synthetic_spectrum = readsav(synthesis_files+'/solar_twin_grid_210831_'+str(index)+'_ccd'+ccd+'_smod_sint.sav').results[0]
        synthetic_spectrum = readsav(synthesis_files+'/marcs2014_scaledsolar_210720_'+str(index)+'_ccd'+ccd+'_smod_sint.sav').results[0]
        
        #broadened_smod = apply_gauss_broad(synthetic_spectrum.wave,synthetic_spectrum.smod,ipres=25000,debug=False)
        broadened_smod = synthetic_spectrum.smod
        
        interpolated_broadened_smod = np.interp(wavelengths_for_each_ccd['CCD'+ccd],synthetic_spectrum.wave,broadened_smod)
        
        normalised_flux_for_index.append(interpolated_broadened_smod)
        # We use synthetic spectra, so SNR == infinity
        # Let's assume SNR = 1000, so std == 0.001, so 1/var = 1/std**2 = 1,000,000.
        normalised_ivar_for_index.append(1000000.*np.ones_like(interpolated_broadened_smod))
        
    normalised_flux_for_index = np.concatenate((normalised_flux_for_index))
    normalised_ivar_for_index = np.concatenate((normalised_ivar_for_index))
    
    return(normalised_flux_for_index,normalised_ivar_for_index)

In [11]:
def populate_normalised_flux_and_ivar_matrix(training_set, matrix_index, wavelengths_for_each_ccd):
    index = training_set['INDEX'][matrix_index]
    #try:
    normalised_flux_for_index, normalised_ivar_for_index = load_normalised_spectra(index,wavelengths_for_each_ccd=wavelengths_for_each_ccd)
    #except:
    #    print('Failed to load spectrum for index '+str(matrix_index)+', that is, index '+str(index))
    normalized_flux[matrix_index] = normalised_flux_for_index
    normalized_ivar[matrix_index] = normalised_ivar_for_index

In [15]:
training_set_all = Table.read('../../spectrum_grids/marcs2014/marcs2014_scaledsolar_210720.fits')

# 2160 models with
# teff 2500..(100/250)..8000 K including 5 at a time
# logg = -0.5..(0.5)..5.0/5.5 dex
# feh = -3.0..(0.5/0.25)..1.0 dex

teff_points = np.unique(training_set_all['TEFF'])
logg_points = np.unique(training_set_all['LOGG'])
fe_h_points = np.unique(training_set_all['FEH'])

cannon_index = []
cannon_teff = []
cannon_logg = []
cannon_feh = []

i = 0
for teff in teff_points[2:-3]:
    for logg in logg_points[1:-2]:
        for fe_h in fe_h_points[4:-3]:
            teff_i = np.where(teff==teff_points)[0][0]
            logg_i = np.where(logg==logg_points)[0][0]
            fe_h_i = np.where(fe_h==fe_h_points)[0][0]
            
            grid = (
                    (training_set_all['TEFF'] >= teff_points[teff_i-2]) &
                    (training_set_all['TEFF'] <= teff_points[teff_i+2]) &
                    (training_set_all['LOGG'] >= logg_points[logg_i-1]) &
                    (training_set_all['LOGG'] <= logg_points[logg_i+1]) &
                    (training_set_all['FEH'] >= fe_h_points[fe_h_i-2]) &
                    (training_set_all['FEH'] <= fe_h_points[fe_h_i+2]) &
                    ((training_set_all['INDEX'] < 107) | (training_set_all['INDEX'] > 109)) &
                    ((training_set_all['INDEX'] < 119) | (training_set_all['INDEX'] > 122)) &
                    ((training_set_all['INDEX'] < 131) | (training_set_all['INDEX'] > 134)) &
                    ((training_set_all['INDEX'] < 143) | (training_set_all['INDEX'] > 146)) &
                    ((training_set_all['INDEX'] < 203) | (training_set_all['INDEX'] > 205)) &
                    ((training_set_all['INDEX'] < 212) | (training_set_all['INDEX'] > 213)) &
                    ((training_set_all['INDEX'] < 3980) | (training_set_all['INDEX'] > 3980)) 
                
                )
            
            grid_size = len(training_set_all[grid])
            
            if grid_size > 10:

                cannon_index.append(i)
                cannon_teff.append(teff)
                cannon_logg.append(logg)
                cannon_feh.append(fe_h)

                #if (teff == 5500) & (logg == 4.5) & (fe_h == 0.00):
                #    print(i)
                
                if i in [1813]:#550,1000,1206,1656,1660,1664,1668,1669,1670,1671,1672,1673,1744,2000,2050]:
                    
                    
                    
                    #if (grid_size > 0) & (grid_size < 50):
                    print(teff,logg,fe_h,grid_size)

                    training_set = copy.deepcopy(training_set_all[grid])
                    training_set.write('training_sets/subgrid_'+str(i)+'_training_set_marcs2014.fits',overwrite=True)

                    flux_ivar_file = 'training_sets/subgrid_'+str(i)+'_training_marcs2014_flux_ivar.pickle'

                    # Let's create a matrix that we will later fill with the normalised flux values
                    normalized_flux = np.ones((np.shape(training_set)[0],np.shape(wavelength_array)[0]))
                    normalized_ivar = np.ones((np.shape(training_set)[0],np.shape(wavelength_array)[0]))

                    [populate_normalised_flux_and_ivar_matrix(training_set, matrix_index=index, wavelengths_for_each_ccd=wavelengths_for_each_ccd) for index in range(np.shape(training_set)[0])];

                    flux_ivar_file_opener = open(flux_ivar_file,'wb')
                    pickle.dump((normalized_flux,normalized_ivar),flux_ivar_file_opener)
                    flux_ivar_file_opener.close()

                i+= 1

cannon_grid = Table()
cannon_grid['index'] = np.array(cannon_index)
cannon_grid['teff'] = np.array(cannon_teff)
cannon_grid['logg'] = np.array(cannon_logg)
cannon_grid['fe_h'] = np.array(cannon_feh)
cannon_grid.write('Cannon_subgrid.fits',overwrite=True)

6250.0 4.5 0.0 75
