# galah_dr4_grid_interpolation_trainingset_creation

In [1]:
try:
    %matplotlib inline
    %config InlineBackend.figure_format='retina'
except:
    pass

import numpy as np
import os
import pickle
from astropy.table import Table
from scipy.io import readsav
from scipy.ndimage.filters import convolve
import matplotlib.pyplot as plt
from pathlib import Path
import sys
import time

In [2]:
# Read in all available grids
grids = Table.read('../spectrum_grids/galah_dr4_model_trainingset_gridpoints.fits')

In [3]:
# choose one grid_index
try:
    grid_index = int(sys.argv[1])
    print('Using Grid index ',grid_index)
except:
    grid_index = 1259 # 1931 for sun
    print('Using default grid index ',grid_index)

try:
    teff_logg_feh_name = str(int(grids['teff_subgrid'][grid_index]))+'_'+"{:.2f}".format(grids['logg_subgrid'][grid_index])+'_'+"{:.2f}".format(grids['fe_h_subgrid'][grid_index])
    training_set_vsini0 = Table.read('../spectrum_grids/3d_bin_subgrids/'+teff_logg_feh_name+'/galah_dr4_trainingset_'+teff_logg_feh_name+'.fits')
    synthesis_files = '../spectrum_grids/3d_bin_subgrids/'+teff_logg_feh_name
    print('Grid index '+str(grid_index)+' corresponds to '+teff_logg_feh_name)
except:
    raise ValueError('There are only '+str(len(grids))+' entries within the grid')

Using default grid index  1259
Grid index 1259 corresponds to 4250_1.50_-0.50


### Below we define how to broaden a spectrum with a certain vsini value

In [4]:
def integrate_flux(mu, inten, deltav, vsini, vrt, osamp=1):
    """
    Produces a flux profile by integrating intensity profiles (sampled
    at various mu angles) over the visible stellar surface.
    Intensity profiles are weighted by the fraction of the projected
    stellar surface they represent, apportioning the area between
    adjacent MU points equally. Additional weights (such as those
    used in a Gauss-Legendre quadrature) can not meaningfully be
    used in this scheme.  About twice as many points are required
    with this scheme to achieve the precision of Gauss-Legendre
    quadrature.
    DELTAV, VSINI, and VRT must all be in the same units (e.g. km/s).
    If specified, OSAMP should be a positive integer.
    Parameters
    ----------
    mu : array(float) of size (nmu,)
        cosine of the angle between the outward normal and
        the line of sight for each intensity spectrum in INTEN.
    inten : array(float) of size(nmu, npts)
        intensity spectra at specified values of MU.
    deltav : float
        velocity spacing between adjacent spectrum points
        in INTEN (same units as VSINI and VRT).
    vsini : float
        maximum radial velocity, due to solid-body rotation.
    vrt : float
        radial-tangential macroturbulence parameter, i.e.
        np.sqrt(2) times the standard deviation of a Gaussian distribution
        of turbulent velocities. The same distribution function describes
        the radial motions of one component and the tangential motions of
        a second component. Each component covers half the stellar surface.
        See 'The Observation and Analysis of Stellar Photospheres', Gray.
    osamp : int, optional
        internal oversampling factor for convolutions.
        By default convolutions are done using the input points (OSAMP=1),
        but when OSAMP is set to higher integer values, the input spectra
        are first oversampled by cubic spline interpolation.
    Returns
    -------
    value : array(float) of size (npts,)
        Disk integrated flux profile.
    Note
    ------------
        If you use this algorithm in work that you publish, please cite
        Valenti & Anderson 1996, PASP, currently in preparation.
    """
    """
    History
    -----------
    Feb-88  GM
        Created ANA version.
    13-Oct-92 JAV
        Adapted from G. Marcy's ANA routi!= of the same name.
    03-Nov-93 JAV
        Switched to annular convolution technique.
    12-Nov-93 JAV
        Fixed bug. Intensity compo!=nts not added when vsini=0.
    14-Jun-94 JAV
        Reformatted for "public" release. Heavily commented.
        Pass deltav instead of 2.998d5/deltav. Added osamp
        keyword. Added rebinning logic at end of routine.
        Changed default osamp from 3 to 1.
    20-Feb-95 JAV
        Added mu as an argument to handle arbitrary mu sampling
        and remove ambiguity in intensity profile ordering.
        Interpret VTURB as np.sqrt(2)*sigma instead of just sigma.
        Replaced call_external with call to spl_{init|interp}.
    03-Apr-95 JAV
        Multiply flux by pi to give observed flux.
    24-Oct-95 JAV
        Force "nmk" padding to be at least 3 pixels.
    18-Dec-95 JAV
        Renamed from dskint() to rtint(). No longer make local
        copy of intensities. Use radial-tangential instead
        of isotropic Gaussian macroturbulence.
    26-Jan-99 JAV
        For NMU=1 and VSINI=0, assume resolved solar surface#
        apply R-T macro, but supress vsini broadening.
    01-Apr-99 GMH
        Use annuli weights, rather than assuming ==ual area.
    07-Mar-12 JAV
        Force vsini and vmac to be scalars.
    """

    # Make local copies of various input variables, which will be altered below.
    # Force vsini and especially vmac to be scalars. Otherwise mu dependence fails.

    if np.size(vsini) > 1:
        vsini = vsini[0]
    if np.size(vrt) > 1:
        vrt = vrt[0]

    # Determine oversampling factor.
    os = round(np.clip(osamp, 1, None))  # force integral value > 1

    # Convert input MU to projected radii, R, of annuli for a star of unit radius
    #  (which is just sine, rather than cosine, of the angle between the outward
    #  normal and the line of sight).
    rmu = np.sqrt(1 - mu ** 2)  # use simple trig identity

    # Sort the projected radii and corresponding intensity spectra into ascending
    #  order (i.e. from disk center to the limb), which is equivalent to sorting
    #  MU in descending order.
    isort = np.argsort(rmu)
    rmu = rmu[isort]  # reorder projected radii
    nmu = np.size(mu)  # number of radii
    if nmu == 1:
        if vsini != 0:
            logger.warning(
                "Vsini is non-zero, but only one projected radius (mu value) is set. No rotational broadening will be performed."
            )
            vsini = 0  # ignore vsini if only 1 mu

    # Calculate projected radii for boundaries of disk integration annuli.  The n+1
    # boundaries are selected such that r(i+1) exactly bisects the area between
    # rmu(i) and rmu(i+1). The in!=rmost boundary, r(0) is set to 0 (disk center)
    # and the outermost boundary, r(nmu) is set to 1 (limb).
    if nmu > 1 or vsini != 0:  # really want disk integration
        r = np.sqrt(
            0.5 * (rmu[:-1] ** 2 + rmu[1:] ** 2)
        )  # area midpoints between rmu
        r = np.concatenate(([0], r, [1]))

        # Calculate integration weights for each disk integration annulus.  The weight
        # is just given by the relative area of each annulus, normalized such that
        # the sum of all weights is unity.  Weights for limb darkening are included
        # explicitly in the intensity profiles, so they aren't needed here.
        wt = r[1:] ** 2 - r[:-1] ** 2  # weights = relative areas
    else:
        wt = np.array([1.0])  # single mu value, full weight

    # Generate index vectors for input and oversampled points. Note that the
    # oversampled indicies are carefully chosen such that every "os" finely
    # sampled points fit exactly into one input bin. This makes it simple to
    # "integrate" the finely sampled points at the end of the routine.
    npts = inten.shape[1]  # number of points
    xpix = np.arange(npts, dtype=float)  # point indices
    nfine = os * npts  # number of oversampled points
    xfine = (0.5 / os) * (
        2 * np.arange(nfine, dtype=float) - os + 1
    )  # oversampled points indices

    # Loop through annuli, constructing and convolving with rotation kernels.

    yfine = np.empty(nfine)  # init oversampled intensities
    flux = np.zeros(nfine)  # init flux vector
    for imu in range(nmu):  # loop thru integration annuli

        #  Use external cubic spline routine (adapted from Numerical Recipes) to make
        #  an oversampled version of the intensity profile for the current annulus.
        ypix = inten[isort[imu]]  # extract intensity profile
        if os == 1:
            # just copy (use) original profile
            yfine = ypix
        else:
            # spline onto fine wavelength scale
            yfine = interp1d(xpix, ypix, kind="cubic")(xfine)

        # Construct the convolution kernel which describes the distribution of
        # rotational velocities present in the current annulus. The distribution has
        # been derived analytically for annuli of arbitrary thickness in a rigidly
        # rotating star. The kernel is constructed in two pieces: o!= piece for
        # radial velocities less than the maximum velocity along the inner edge of
        # the annulus, and one piece for velocities greater than this limit.
        if vsini > 0:
            # nontrivial case
            r1 = r[imu]  # inner edge of annulus
            r2 = r[imu + 1]  # outer edge of annulus
            dv = deltav / os  # oversampled velocity spacing
            maxv = vsini * r2  # maximum velocity in annulus
            nrk = 2 * int(maxv / dv) + 3  ## oversampled kernel point
            # velocity scale for kernel
            v = dv * (np.arange(nrk, dtype=float) - ((nrk - 1) / 2))
            rkern = np.zeros(nrk)  # init rotational kernel
            j1 = np.abs(v) < vsini * r1  # low velocity points
            rkern[j1] = np.sqrt((vsini * r2) ** 2 - v[j1] ** 2) - np.sqrt(
                (vsini * r1) ** 2 - v[j1] ** 2
            )  # generate distribution

            j2 = (np.abs(v) >= vsini * r1) & (np.abs(v) <= vsini * r2)
            rkern[j2] = np.sqrt(
                (vsini * r2) ** 2 - v[j2] ** 2
            )  # generate distribution

            rkern = rkern / np.sum(rkern)  # normalize kernel

            # Convolve the intensity profile with the rotational velocity kernel for this
            # annulus. Pad each end of the profile with as many points as are in the
            # convolution kernel. This reduces Fourier ringing. The convolution may also
            # be do!= with a routi!= called "externally" from IDL, which efficiently
            # shifts and adds.
            if nrk > 3:
                yfine = convolve(yfine, rkern, mode="nearest")

        # Calculate projected sigma for radial and tangential velocity distributions.
        muval = mu[isort[imu]]  # current value of mu
        sigma = os * vrt / np.sqrt(2) / deltav  # standard deviation in points
        sigr = sigma * muval  # reduce by current mu value
        sigt = sigma * np.sqrt(1.0 - muval ** 2)  # reduce by np.sqrt(1-mu**2)

        # Figure out how many points to use in macroturbulence kernel.
        nmk = int(10 * sigma)
        nmk = np.clip(nmk, 3, (nfine - 3) // 2)

        # Construct radial macroturbulence kernel with a sigma of mu*VRT/np.sqrt(2).
        if sigr > 0:
            xarg = np.linspace(-nmk, nmk, 2 * nmk + 1) / sigr
            xarg = np.clip(-0.5 * xarg ** 2, -20, None)
            mrkern = np.exp(xarg)  # compute the gaussian
            mrkern = mrkern / np.sum(mrkern)  # normalize the profile
        else:
            mrkern = np.zeros(2 * nmk + 1)  # init with 0d0
            mrkern[nmk] = 1.0  # delta function

        # Construct tangential kernel with a sigma of np.sqrt(1-mu**2)*VRT/np.sqrt(2).
        if sigt > 0:
            xarg = np.linspace(-nmk, nmk, 2 * nmk + 1) / sigt
            xarg = np.clip(-0.5 * xarg ** 2, -20, None)
            mtkern = np.exp(xarg)  # compute the gaussian
            mtkern = mtkern / np.sum(mtkern)  # normalize the profile
        else:
            mtkern = np.zeros(2 * nmk + 1)  # init with 0d0
            mtkern[nmk] = 1.0  # delta function

        # Sum the radial and tangential components, weighted by surface area.
        area_r = 0.5  # assume equal areas
        area_t = 0.5  # ar+at must equal 1
        mkern = area_r * mrkern + area_t * mtkern  # add both components

        # Convolve the total flux profiles, again padding the spectrum on both ends to
        # protect against Fourier ringing.
        yfine = convolve(
            yfine, mkern, mode="nearest"
        )  # add the padding and convolve

        # Add contribution from current annulus to the running total.
        flux = flux + wt[imu] * yfine  # add profile to running total

    flux = np.reshape(flux, (npts, os))  # convert to an array
    flux = np.pi * np.sum(flux, axis=1) / os  # sum, normalize
    return flux

In [5]:
def broaden_spectrum(wint_seg, sint_seg, wave_seg, cmod_seg, vsini=0, vmac=0, debug=False):

    nw = len(wint_seg)
    clight = 299792.5
    mu = (np.sqrt(0.5*(2*np.arange(7)+1)/np.float(7)))[::-1]
    nmu = 7
    wmid = 0.5 * (wint_seg[nw-1] + wint_seg[0])
    wspan = wint_seg[nw-1] - wint_seg[0]
    jmin = np.argmin(wint_seg[1:nw-1] - wint_seg[0:nw-2])
    vstep1 = min(wint_seg[1:nw-1] - wint_seg[0:nw-2])
    vstep2 = 0.1 * wspan / (nw-1) / wmid * clight
    vstep3 = 0.05
    vstep = np.max([vstep1,vstep2,vstep3])

    # Generate model wavelength scale X, with uniform wavelength step.
    nx = int(np.floor(np.log10(wint_seg[nw-1] / wint_seg[0])/ np.log10(1.0+vstep / clight))+1)
    if nx % 2 == 0: nx += 1
    resol_out = 1.0/((wint_seg[nw-1] / wint_seg[0])**(1.0/(nx-1.0))-1.0)
    vstep = clight / resol_out
    x_seg = wint_seg[0] * (1.0 + 1.0 / resol_out)**np.arange(nx)

    # Interpolate intensity spectra onto new model wavelength scale.  
    yi_seg = np.empty((nmu, nx))

    for imu in range(nmu):
        yi_seg[imu] = np.interp(x_seg, wint_seg, sint_seg[imu])

    y_seg = integrate_flux(mu, yi_seg, vstep, np.abs(vsini), np.abs(vmac))

    dispersion = vstep1
    wave_equi = np.arange(x_seg[0],x_seg[-1]+dispersion,dispersion)

    c_seg = np.interp(wave_equi,wave_seg,cmod_seg)
    y_seg = np.interp(wave_equi,x_seg,y_seg)

    if debug:
        print(vstep1,len(wave_equi))
    
    return(wave_equi,y_seg/c_seg)

In [6]:
vsini_values = np.array([1.5, 3.0, 6.0, 9.0, 12.0, 24.0]) # km/s

# Gradient Spectra and Masks

In [7]:
null_spectrum_broad = dict()
for ccd in [1,2,3,4]:
    null_spectrum = readsav(synthesis_files+'/galah_dr4_trainingset_'+teff_logg_feh_name+'_0_'+str(ccd)+'.out').results[0]
    null_spectrum_broad['wave_null_ccd'+str(ccd)],null_spectrum_broad['spectrum_null_ccd'+str(ccd)] = broaden_spectrum(
            null_spectrum.wint,
            null_spectrum.sint,
            null_spectrum.wave,
            null_spectrum.cmod,
            vsini = vsini_values[-1]
        )
print('The synthetic spectra come with keywords ',null_spectrum.dtype.names)

The synthetic spectra come with keywords  ('WAVE', 'SMOD', 'CMOD', 'WINT', 'SINT')


In [8]:
labels = np.array(training_set_vsini0.keys()[2:-1])
labels

array(['teff', 'logg', 'fe_h', 'vmic', 'li_fe', 'c_fe', 'n_fe', 'o_fe',
       'na_fe', 'mg_fe', 'al_fe', 'si_fe', 'k_fe', 'ca_fe', 'sc_fe',
       'ti_fe', 'v_fe', 'cr_fe', 'mn_fe', 'co_fe', 'ni_fe', 'cu_fe',
       'zn_fe', 'rb_fe', 'sr_fe', 'y_fe', 'zr_fe', 'mo_fe', 'ru_fe',
       'ba_fe', 'la_fe', 'ce_fe', 'nd_fe', 'sm_fe', 'eu_fe'], dtype='<U5')

In [9]:
fancy_labels = []
for label in labels:
    if label == 'teff':
        fancy_labels.append(r'$T_\mathrm{eff}~/~\mathrm{K}$')
    elif label == 'logg':
        fancy_labels.append(r'$\log (g~/~\mathrm{cm\,s^{-2}})$')
    elif label == 'fe_h':
        fancy_labels.append(r'$\mathrm{[Fe/H]}$')
    elif label == 'vmic':
        fancy_labels.append(r'$v_\mathrm{mic}~/~\mathrm{km\,s^{-1}}$')
    elif label == 'vsini':
        fancy_labels.append(r'$v \sin i~/~\mathrm{km\,s^{-1}}$')
    elif label[-3:] == '_fe':
        fancy_labels.append('$\mathrm{['+label[0].upper()+label[1:-3]+'/Fe]}$')
    else:
        print('No entry for '+label)
print(fancy_labels)

['$T_\\mathrm{eff}~/~\\mathrm{K}$', '$\\log (g~/~\\mathrm{cm\\,s^{-2}})$', '$\\mathrm{[Fe/H]}$', '$v_\\mathrm{mic}~/~\\mathrm{km\\,s^{-1}}$', '$\\mathrm{[Li/Fe]}$', '$\\mathrm{[C/Fe]}$', '$\\mathrm{[N/Fe]}$', '$\\mathrm{[O/Fe]}$', '$\\mathrm{[Na/Fe]}$', '$\\mathrm{[Mg/Fe]}$', '$\\mathrm{[Al/Fe]}$', '$\\mathrm{[Si/Fe]}$', '$\\mathrm{[K/Fe]}$', '$\\mathrm{[Ca/Fe]}$', '$\\mathrm{[Sc/Fe]}$', '$\\mathrm{[Ti/Fe]}$', '$\\mathrm{[V/Fe]}$', '$\\mathrm{[Cr/Fe]}$', '$\\mathrm{[Mn/Fe]}$', '$\\mathrm{[Co/Fe]}$', '$\\mathrm{[Ni/Fe]}$', '$\\mathrm{[Cu/Fe]}$', '$\\mathrm{[Zn/Fe]}$', '$\\mathrm{[Rb/Fe]}$', '$\\mathrm{[Sr/Fe]}$', '$\\mathrm{[Y/Fe]}$', '$\\mathrm{[Zr/Fe]}$', '$\\mathrm{[Mo/Fe]}$', '$\\mathrm{[Ru/Fe]}$', '$\\mathrm{[Ba/Fe]}$', '$\\mathrm{[La/Fe]}$', '$\\mathrm{[Ce/Fe]}$', '$\\mathrm{[Nd/Fe]}$', '$\\mathrm{[Sm/Fe]}$', '$\\mathrm{[Eu/Fe]}$']


In [10]:
gradient_spectra_up = Table()
gradient_spectra_up['wave'] = np.concatenate(([null_spectrum_broad['wave_null_ccd'+str(ccd)] for ccd in [1,2,3,4]]))
gradient_spectra_up['median'] = np.concatenate(([null_spectrum_broad['spectrum_null_ccd'+str(ccd)] for ccd in [1,2,3,4]]))

gradient_spectra_down = Table()
gradient_spectra_down['wave'] = np.concatenate(([null_spectrum_broad['wave_null_ccd'+str(ccd)] for ccd in [1,2,3,4]]))
gradient_spectra_down['median'] = np.concatenate(([null_spectrum_broad['spectrum_null_ccd'+str(ccd)] for ccd in [1,2,3,4]]))

In [11]:
for label_index, label in enumerate(labels):
    
    gradient_up = []
    gradient_down = []
    
    for ccd in [1,2,3,4]:
        try:
            increased_spectrum = readsav(synthesis_files+'/galah_dr4_trainingset_'+teff_logg_feh_name+'_'+str(2+label_index)+'_'+str(ccd)+'.out').results[0]
            decreased_spectrum = readsav(synthesis_files+'/galah_dr4_trainingset_'+teff_logg_feh_name+'_'+str(37+label_index)+'_'+str(ccd)+'.out').results[0]
        except:
            increased_spectrum = readsav(synthesis_files+'/galah_dr4_cannon_trainingset_'+teff_logg_feh_name+'_'+str(2+label_index)+'_'+str(ccd)+'.out').results[0]
            decreased_spectrum = readsav(synthesis_files+'/galah_dr4_cannon_trainingset_'+teff_logg_feh_name+'_'+str(37+label_index)+'_'+str(ccd)+'.out').results[0]

            
        wave_increase, spectrum_increase = broaden_spectrum(
            increased_spectrum.wint,
            increased_spectrum.sint,
            increased_spectrum.wave,
            increased_spectrum.cmod,
            vsini = vsini_values[-1]
        )
        
        wave_decrease, spectrum_decrease = broaden_spectrum(
            decreased_spectrum.wint,
            decreased_spectrum.sint,
            decreased_spectrum.wave,
            decreased_spectrum.cmod,
            vsini = vsini_values[-1]
        )
        
        gradient_up.append(spectrum_increase - null_spectrum_broad['spectrum_null_ccd'+str(ccd)])
        gradient_down.append(spectrum_decrease - null_spectrum_broad['spectrum_null_ccd'+str(ccd)])

    gradient_spectra_up[label] = np.concatenate((gradient_up))
    gradient_spectra_down[label] = np.concatenate((gradient_down))

In [12]:
grid_masks = Table()

percentage_used = []

Path('gradient_spectra/'+teff_logg_feh_name).mkdir(parents=True, exist_ok=True)

for label_index, label in enumerate(labels):
    print(label, training_set_vsini0[label][2+label_index]-training_set_vsini0[label][0])
    
    threshold1 = 0.0001
    threshold2 = 0.001
    
    total = len(gradient_spectra_up)
    below_threshold1 = len(np.where(np.max([np.abs(gradient_spectra_up[label]),np.abs(gradient_spectra_down[label])],axis=0) >= threshold1)[0])
    below_threshold2 = len(np.where(np.max([np.abs(gradient_spectra_up[label]),np.abs(gradient_spectra_down[label])],axis=0) >= threshold2)[0])
    
    print(str(threshold1)+':   ',"{:.1f}".format(100*below_threshold1/total)+'%',below_threshold1)
    print(str(threshold2)+':   ',"{:.1f}".format(100*below_threshold2/total)+'%',below_threshold2)
    
    percentage_used.append([fancy_labels[label_index], r'$\pm$'+str(training_set_vsini0[label][2+label_index]-training_set_vsini0[label][0]), "{:.1f}".format(100*below_threshold1/total),"{:.1f}".format(100*below_threshold2/total)])
    
    above_threshold1 = (np.max([np.abs(gradient_spectra_up[label]),np.abs(gradient_spectra_down[label])],axis=0) >= threshold1)
    above_threshold2 = (np.max([np.abs(gradient_spectra_up[label]),np.abs(gradient_spectra_down[label])],axis=0) >= threshold2)

    grid_masks[label] = above_threshold2
    
    f, gs = plt.subplots(1,4,figsize=(15,2.5),sharey=True)
    for ccd in [1,2,3,4]:
        plot_label = '_nolegend_'
        if ccd == 2:
            plot_label = r'$\Delta f$ for $\Delta$'+fancy_labels[label_index]+' = '+str(training_set_vsini0[label][2+label_index]-training_set_vsini0[label][0])
        in_ccd = (gradient_spectra_up['wave'] > (3+ccd)*1000) & (gradient_spectra_up['wave'] < (4+ccd)*1000)
        ax=gs[ccd-1]
        ax.plot(
            gradient_spectra_up['wave'][in_ccd],
            gradient_spectra_up[label][in_ccd],
            c='k',lw=0.5,label = plot_label
        )
        plot_label = '_nolegend_'
        if ccd == 3:
            plot_label = r'$-\Delta f$ for $\Delta$'+fancy_labels[label_index]+' = '+str(training_set_vsini0[label][37+label_index]-training_set_vsini0[label][0])
        ax.plot(
            gradient_spectra_down['wave'][in_ccd],
            -gradient_spectra_down[label][in_ccd],
            c='C0',lw=0.5,label = plot_label
        )
        ax.set_xlabel(r'Wavelength [$\AA$]')
        if ccd==1:
            ax.set_ylabel(r'$\Delta f~/~\mathrm{norm.}$')
        plot_label = '_nolegend_'
        if ccd == 4:
            plot_label = r'$\vert\Delta f\vert$ above '+str(0.0001)
        ax.scatter(
            gradient_spectra_up['wave'][(above_threshold1 & in_ccd)],
            np.zeros(len(np.where(above_threshold1 & in_ccd==True)[0])),
            c='red',s=2,label=plot_label
        )
        plot_label = '_nolegend_'
        if ccd == 4:
            plot_label = r'$\vert\Delta f\vert$ above '+str(0.001)
        ax.scatter(
            gradient_spectra_up['wave'][(above_threshold2 & in_ccd)],
            np.zeros(len(np.where(above_threshold2 & in_ccd==True)[0])),
            c='orange',s=2,label=plot_label
        )
        ax.set_ylim(
            np.min([np.min(gradient_spectra_up[label]),-3*threshold1]),
            np.max([np.max(gradient_spectra_up[label]),3*threshold1])
        )
        if ccd in [2,3,4]:
            if label not in ['teff']:
                ax.legend(loc='lower center')
            else:
                ax.legend()
    plt.tight_layout()
    plt.savefig('gradient_spectra/'+teff_logg_feh_name+'/gradient_spectrum_'+teff_logg_feh_name+'_'+label+'.png',dpi=200,bbox_inches='tight')
    if grid_index in [1931]:
        plt.savefig('../../galah_dr4_paper/figures/gradient_spectrum_'+teff_logg_feh_name+'_'+label+'.png',dpi=200,bbox_inches='tight')
        plt.show()
    plt.close()


teff 250.0
0.0001:    99.4% 63840
0.001:    97.6% 62657
logg 0.5
0.0001:    99.3% 63755
0.001:    97.2% 62444
fe_h 0.25
0.0001:    99.4% 63825
0.001:    99.1% 63644
vmic 1.0
0.0001:    92.5% 59394
0.001:    64.8% 41586
li_fe 2.2499998
0.0001:    18.5% 11905
0.001:    1.2% 739
c_fe 1.0
0.0001:    99.7% 64009
0.001:    99.3% 63754
n_fe 1.0
0.0001:    98.9% 63522
0.001:    95.4% 61273
o_fe 2.0
0.0001:    99.4% 63839
0.001:    99.2% 63737
na_fe 1.0
0.0001:    96.4% 61924
0.001:    68.5% 44022
mg_fe 1.0
0.0001:    99.4% 63862
0.001:    97.4% 62551
al_fe 1.0
0.0001:    97.7% 62776
0.001:    78.6% 50458
si_fe 1.0
0.0001:    99.3% 63763
0.001:    96.0% 61637
k_fe 1.0
0.0001:    60.1% 38588
0.001:    2.7% 1703
ca_fe 1.0
0.0001:    97.4% 62573
0.001:    77.0% 49441
sc_fe 1.0
0.0001:    13.3% 8557
0.001:    7.8% 5011
ti_fe 1.0
0.0001:    98.5% 63232
0.001:    90.7% 58222
v_fe 1.0
0.0001:    46.1% 29627
0.001:    31.0% 19934
cr_fe 1.0
0.0001:    91.6% 58804
0.001:    46.8% 30066
mn_fe 1.0
0.0001: 

In [13]:
if grid_index in [1931]:
    table_text = [
    [r'\begin{table}[!ht]'],
    [r'    \centering'],
    [r'    \caption{Example of mask estimation for \textit{The Cannon}/\textit{The Payne} model creation. Listed are percentages of the spectrum that respond to an in-/decrease of each label above 0.001 and 0.0001 of the normalised flux.}'],
    [r'    \label{tab:cannon_mask_percentage}'],
    [r'    \begin{tabular}{cccc}'],
    [r'    \hline \hline'],
    [r'    Label &  Label change & $\vert \Delta f \vert > 0.001~/~\%$ & $\vert \Delta f \vert > 0.0001~/~\%$ \\'],
    [r'    \hline']
    ]
    for each in percentage_used:
        table_text.append([r'    '+each[0]+' & '+each[1]+' & '+each[3]+' & '+each[2]+r' \\'])
    table_text.append([r'    \hline'])
    table_text.append([r'    \end{tabular}'])
    table_text.append([r'\end{table}'])

    np.savetxt('../../galah_dr4_paper/tables/mask_percentage_1931.tex',np.array(table_text),fmt='%s')

In [14]:
Path('training_input/'+teff_logg_feh_name).mkdir(parents=True, exist_ok=True)

gradient_spectra_up.write('gradient_spectra/'+teff_logg_feh_name+'/'+teff_logg_feh_name+'_gradient_spectra_up.fits',overwrite=True)
gradient_spectra_down.write('gradient_spectra/'+teff_logg_feh_name+'/'+teff_logg_feh_name+'_gradient_spectra_down.fits',overwrite=True)
grid_masks.write('training_input/'+teff_logg_feh_name+'/'+teff_logg_feh_name+'_masks.fits',overwrite=True)

# Trainingset flux and ivar at different vsini values

In [15]:
# Prepare the full trainingset (including vsini sampled from vsini_values)

full_trainingset = Table()
for label in training_set_vsini0.keys()[:6]:
    full_trainingset[label] = np.concatenate((np.array([training_set_vsini0[label] for vsini in vsini_values])))
full_trainingset['vsini'] = np.concatenate((np.array([vsini*np.ones(len(training_set_vsini0['spectrum_index'])) for vsini in vsini_values])))
for label in training_set_vsini0.keys()[6:]:
    full_trainingset[label] = np.concatenate((np.array([training_set_vsini0[label] for vsini in vsini_values])))
full_trainingset.write('training_input/'+teff_logg_feh_name+'/galah_dr4_trainingset_'+teff_logg_feh_name+'_incl_vsini.fits',overwrite=True)

In [16]:
# Prepare the wavelength array, if not yet available

wavelength_array = np.concatenate(([null_spectrum_broad['wave_null_ccd'+str(ccd)] for ccd in [1,2,3,4]]))
wavelength_file = 'training_input/galah_dr4_3dbin_wavelength_array.pickle'
if not os.path.isfile(wavelength_file):
    wavelength_file_opener = open(wavelength_file,'wb')
    pickle.dump((wavelength_array),wavelength_file_opener)
    wavelength_file_opener.close()
normalized_flux = np.ones((np.shape(full_trainingset)[0],np.shape(wavelength_array)[0]))

In [17]:
full_trainingset[0]

grid_index,spectrum_index,teff,logg,fe_h,vmic,vsini,li_fe,c_fe,n_fe,o_fe,na_fe,mg_fe,al_fe,si_fe,k_fe,ca_fe,sc_fe,ti_fe,v_fe,cr_fe,mn_fe,co_fe,ni_fe,cu_fe,zn_fe,rb_fe,sr_fe,y_fe,zr_fe,mo_fe,ru_fe,ba_fe,la_fe,ce_fe,nd_fe,sm_fe,eu_fe,sme_abund [99]
int64,int64,float32,float32,float32,float32,float64,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float64
1259,0,4250.0,1.5,-0.5,1.5,1.5,2.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9207554249128719 .. -20.035855713686423


In [20]:
def prepare_normalised_spectra(spectrum_index, vsini):
    
    normalised_flux_for_index = []
    #normalised_ivar_for_index = []
    
    for ccd in [1,2,3,4]:
        
        try:
            synthetic_spectrum = readsav(synthesis_files+'/galah_dr4_trainingset_'+teff_logg_feh_name+'_'+str(spectrum_index)+'_'+str(ccd)+'.out').results[0]
        except:
            synthetic_spectrum = readsav(synthesis_files+'/galah_dr4_cannon_trainingset_'+teff_logg_feh_name+'_'+str(spectrum_index)+'_'+str(ccd)+'.out').results[0]
        
        wave_broadened,flux_broadened = broaden_spectrum(
            synthetic_spectrum.wint,
            synthetic_spectrum.sint,
            synthetic_spectrum.wave,
            synthetic_spectrum.cmod,
            vsini=vsini)
        
        normalised_flux_for_index.append(flux_broadened)
        
    normalised_flux_for_index = np.concatenate((normalised_flux_for_index))

    return(normalised_flux_for_index)

In [21]:
def populate_normalised_flux_and_ivar_matrix(index):
        
    vsini = full_trainingset['vsini'][index]
    spectrum_index = full_trainingset['spectrum_index'][index]
    
    normalised_flux_for_index = prepare_normalised_spectra(spectrum_index,vsini=vsini)
    return(normalised_flux_for_index)
    
start = time.time()
now = time.time()

for index in range(len(full_trainingset)):
    normalised_flux_for_index = populate_normalised_flux_and_ivar_matrix(index)
    normalized_flux[index] = normalised_flux_for_index
    
    print(index,time.time()-now,time.time()-start)
    now = time.time()


0 0.19710183143615723 0.19712185859680176
1 0.21364808082580566 0.4135167598724365
2 0.19889020919799805 0.615501880645752
3 0.21570396423339844 0.8342819213867188
4 0.22067880630493164 1.0579068660736084
5 0.21118688583374023 1.2720608711242676
6 0.20677924156188965 1.4817700386047363
7 0.19291210174560547 1.6774628162384033
8 0.18750905990600586 1.8678369522094727
9 0.19150185585021973 2.0623438358306885
10 0.1882779598236084 2.253406047821045
11 0.20850729942321777 2.464853048324585
12 0.20226621627807617 2.6698739528656006
13 0.22490715980529785 2.897732973098755
14 0.2015221118927002 3.1021459102630615
15 0.1957530975341797 3.300719976425171
16 0.18846487998962402 3.4921228885650635
17 0.19586515426635742 3.6908559799194336
18 0.18933677673339844 3.882957935333252
19 0.18791413307189941 4.073704957962036
20 0.1989297866821289 4.275577783584595
21 0.19915509223937988 4.477558135986328
22 0.19239020347595215 4.672853946685791
23 0.19960713386535645 4.875297784805298
24 0.18732881546

196 0.20071816444396973 41.54567503929138
197 0.2006092071533203 41.74874401092529
198 0.20104002952575684 41.9525420665741
199 0.20864105224609375 42.163809061050415
200 0.20118403434753418 42.3677020072937
201 0.2011258602142334 42.571396827697754
202 0.2179241180419922 42.791893005371094
203 0.20138335227966309 42.99634313583374
204 0.20889592170715332 43.20772409439087
205 0.2052149772644043 43.41550278663635
206 0.20216989517211914 43.6202597618103
207 0.20163297653198242 43.824689865112305
208 0.1984548568725586 44.02575182914734
209 0.21313905715942383 44.24178886413574
210 0.2043750286102295 44.44890809059143
211 0.22202396392822266 44.67373490333557
212 0.1997840404510498 44.87614178657532
213 0.20106005668640137 45.08000588417053
214 0.20300006866455078 45.28571701049805
215 0.200700044631958 45.488872051239014
216 0.2130870819091797 45.70457696914673
217 0.2063741683959961 45.913578033447266
218 0.2112722396850586 46.127403020858765
219 0.19903898239135742 46.32937979698181


393 0.2809898853302002 93.20470786094666
394 0.29246997833251953 93.49963092803955
395 0.2852659225463867 93.78760099411011
396 0.28335118293762207 94.07350397109985
397 0.28194618225097656 94.35805892944336
398 0.2807929515838623 94.64165687561035
399 0.2833871841430664 94.92763590812683
400 0.2932407855987549 95.22364282608032
401 0.29665422439575195 95.52318692207336
402 0.28612709045410156 95.81203889846802
403 0.2842419147491455 96.09888482093811
404 0.29175305366516113 96.39363479614258
405 0.2835209369659424 96.67981791496277
406 0.2857019901275635 96.9683358669281
407 0.28367090225219727 97.25478982925415
408 0.2859618663787842 97.54348587989807
409 0.28672194480895996 97.83299088478088
410 0.28578996658325195 98.1214280128479
411 0.2836000919342041 98.40779089927673
412 0.30544185638427734 98.71598601341248
413 0.2954528331756592 99.0144510269165
414 0.2945559024810791 99.31179285049438
415 0.29128193855285645 99.60590696334839
416 0.28972697257995605 99.89840388298035
417 0.2

588 0.5043091773986816 155.68867588043213
589 0.49094605445861816 156.18235874176025
590 0.4878840446472168 156.67300295829773
591 0.4913620948791504 157.16692900657654
592 0.47958993911743164 157.64931893348694
593 0.4764590263366699 158.1284818649292
594 0.4789299964904785 158.61003184318542
595 0.49144721031188965 159.1041328907013
596 0.4858260154724121 159.5928840637207
597 0.4979419708251953 160.09372282028198
598 0.501507043838501 160.59779381752014
599 0.49776411056518555 161.0984547138214
600 0.5163052082061768 161.6177499294281
601 0.5133728981018066 162.1337549686432
602 0.5168318748474121 162.65332794189453
603 0.5188460350036621 163.17484498023987
604 0.5140469074249268 163.6916160583496
605 0.5099081993103027 164.20414090156555
606 0.5033800601959229 164.7103989124298
607 0.5067479610443115 165.21986889839172
608 0.503216028213501 165.72610092163086
609 0.5045340061187744 166.2334349155426
610 0.5052132606506348 166.74171805381775
611 0.49524784088134766 167.2398447990417

783 0.48508524894714355 256.9225709438324
784 0.48757290840148926 257.41283679008484
785 0.49131107330322266 257.9067437648773
786 0.4886007308959961 258.3980529308319
787 0.49792003631591797 258.89884972572327
788 0.4892117977142334 259.3909628391266
789 0.48934412002563477 259.88295888900757
790 0.4967939853668213 260.3824620246887
791 0.5017032623291016 260.88692474365234
792 0.5028247833251953 261.3927791118622
793 0.5032718181610107 261.89886808395386
794 0.5023999214172363 262.4042649269104
795 0.5162701606750488 262.9233920574188
796 0.49694323539733887 263.4233407974243
797 0.48924803733825684 263.9151780605316
798 0.5056478977203369 264.42373991012573
799 0.493818998336792 264.920334815979
800 0.4759480953216553 265.39901995658875
801 0.4754152297973633 265.87722396850586
802 0.4924323558807373 266.3723530769348
803 0.4776430130004883 266.8526117801666
804 0.4759678840637207 267.33115792274475
805 0.47545862197875977 267.8094549179077
806 0.477031946182251 268.2891118526459
80

980 0.7679920196533203 386.4394419193268
981 0.7645688056945801 387.2068109512329
982 0.7861170768737793 387.99617886543274
983 0.792273998260498 388.7916188240051
984 0.7882328033447266 389.5829830169678
985 0.8520419597625732 390.43811297416687
986 0.8416080474853516 391.2830228805542
987 0.8211169242858887 392.1072189807892
988 0.8077068328857422 392.9181067943573
989 0.828732967376709 393.750040769577
990 0.7865989208221436 394.5398049354553
991 0.79252028465271 395.3352017402649
992 0.8364980220794678 396.17470383644104
993 0.8073070049285889 396.9849417209625
994 0.798062801361084 397.78576374053955
995 0.7883598804473877 398.5770409107208
996 0.7957630157470703 399.3761217594147
997 0.7850837707519531 400.1645369529724
998 0.8161051273345947 400.9836869239807
999 0.8218488693237305 401.8086647987366
1000 0.851848840713501 402.6635410785675
1001 0.8336970806121826 403.5004608631134
1002 0.8147642612457275 404.31814098358154
1003 0.8657820224761963 405.18691992759705
1004 0.801553

1174 0.9711549282073975 545.2323567867279
1175 0.9679241180419922 546.2029457092285
1176 0.9546499252319336 547.1603379249573
1177 0.9321160316467285 548.0951397418976
1178 0.9323329925537109 549.0302588939667
1179 0.9409830570220947 549.9742679595947
1180 0.9526891708374023 550.9298570156097
1181 0.9442670345306396 551.8770530223846
1182 0.9385368824005127 552.8182969093323
1183 0.9368281364440918 553.7580580711365
1184 0.9483001232147217 554.7089359760284
1185 0.9416682720184326 555.653324842453
1186 0.9285104274749756 556.5845458507538
1187 0.9298031330108643 557.5167989730835
1188 0.9435226917266846 558.4631507396698
1189 0.9486989974975586 559.4145679473877
1190 0.9696981906890869 560.3871738910675
1191 0.9663000106811523 561.3563940525055
1192 0.9682750701904297 562.3274068832397
1193 0.9708070755004883 563.3011648654938
1194 0.9427580833435059 564.2466990947723
1195 0.9366710186004639 565.1862289905548
1196 0.9346470832824707 566.1236219406128
1197 0.9154810905456543 567.0417158

1370 0.9151370525360107 730.5152928829193
1371 0.9094889163970947 731.4275379180908
1372 0.9123110771179199 732.3425688743591
1373 0.9212439060211182 733.2664148807526
1374 0.9337007999420166 734.2028207778931
1375 0.9370138645172119 735.1426448822021
1376 0.9312129020690918 736.076581954956
1377 0.9418859481811523 737.021213054657
1378 0.9316549301147461 737.955570936203
1379 0.9426059722900391 738.901111125946
1380 0.9374642372131348 739.841157913208
1381 0.944753885269165 740.7886810302734
1382 0.9341881275177002 741.7254610061646
1383 0.9446547031402588 742.6727960109711
1384 0.9411900043487549 743.616968870163
1385 0.9498879909515381 744.5696659088135
1386 0.9399149417877197 745.5122671127319
1387 0.9658710956573486 746.4809811115265
1388 0.9636378288269043 747.4474499225616
1389 0.9739081859588623 748.4241309165955
1390 0.9738891124725342 749.4008219242096
1391 0.9533109664916992 750.3570067882538
1392 0.9341597557067871 751.2939457893372
1393 0.9424152374267578 752.2392230033875

1566 1.9433751106262207 1066.9227299690247
1567 1.9217488765716553 1068.8473148345947
1568 1.9310328960418701 1070.781366109848
1569 1.9618229866027832 1072.7460010051727
1570 1.9808251857757568 1074.729907989502
1571 1.9641828536987305 1076.696939945221
1572 1.9504938125610352 1078.650455713272
1573 1.934358835220337 1080.5875988006592
1574 1.9241127967834473 1082.5147278308868
1575 1.9254438877105713 1084.4432678222656
1576 1.9266092777252197 1086.372929096222
1577 1.9305880069732666 1088.306501865387
1578 1.9314181804656982 1090.2408728599548
1579 1.8729770183563232 1092.1167917251587
1580 1.86826491355896 1093.9878380298615
1581 1.8693833351135254 1095.8600330352783
1582 1.8777899742126465 1097.7406690120697
1583 1.8793931007385254 1099.622889995575
1584 1.8749890327453613 1101.501019001007
1585 1.8699290752410889 1103.3737847805023
1586 1.8827238082885742 1105.2593307495117
1587 1.8744518756866455 1107.1366698741913
1588 1.857201099395752 1108.9968309402466
1589 1.853727102279663 

In [22]:
flux_ivar_file = 'training_input/'+teff_logg_feh_name+'/galah_dr4_trainingset_'+teff_logg_feh_name+'_incl_vsini_flux_ivar.pickle'

flux_ivar_file_opener = open(flux_ivar_file,'wb')
pickle.dump((normalized_flux),flux_ivar_file_opener)
flux_ivar_file_opener.close()