# galah_dr4_grid_interpolation_trainingset_creation

In [52]:
try:
    %matplotlib inline
    %config InlineBackend.figure_format='retina'
except:
    pass

import numpy as np
import os
import pickle
from astropy.table import Table
from scipy.io import readsav
from scipy.ndimage.filters import convolve
import matplotlib.pyplot as plt
from pathlib import Path
import sys
import time

In [53]:
# Read in all available grids
grids = Table.read('../spectrum_grids/galah_dr4_model_trainingset_gridpoints.fits')

In [54]:
# choose one grid_index
try:
    grid_index = int(sys.argv[1])
    print('Using Grid index ',grid_index)
except:
    #grid_index = 1931 # 5750_4.50_0.00
    #grid_index = 1259 # 4250_1.50_-0.50
    grid_index = 1919 # 5750_4.00_0.00
    
    print('Using default grid index ',grid_index)

try:
    teff_logg_feh_name = str(int(grids['teff_subgrid'][grid_index]))+'_'+"{:.2f}".format(grids['logg_subgrid'][grid_index])+'_'+"{:.2f}".format(grids['fe_h_subgrid'][grid_index])
    training_set_vsini0 = Table.read('../spectrum_grids/3d_bin_subgrids/'+teff_logg_feh_name+'/galah_dr4_trainingset_'+teff_logg_feh_name+'.fits')
    synthesis_files = '../spectrum_grids/3d_bin_subgrids/'+teff_logg_feh_name
    print('Grid index '+str(grid_index)+' corresponds to '+teff_logg_feh_name)
except:
    raise ValueError('There are only '+str(len(grids))+' entries within the grid')

Using default grid index  1919
Grid index 1919 corresponds to 5750_4.00_0.00


### Below we define how to broaden a spectrum with a certain vsini value

In [55]:
def integrate_flux(mu, inten, deltav, vsini, vrt, osamp=1):
    """
    Produces a flux profile by integrating intensity profiles (sampled
    at various mu angles) over the visible stellar surface.
    Intensity profiles are weighted by the fraction of the projected
    stellar surface they represent, apportioning the area between
    adjacent MU points equally. Additional weights (such as those
    used in a Gauss-Legendre quadrature) can not meaningfully be
    used in this scheme.  About twice as many points are required
    with this scheme to achieve the precision of Gauss-Legendre
    quadrature.
    DELTAV, VSINI, and VRT must all be in the same units (e.g. km/s).
    If specified, OSAMP should be a positive integer.
    Parameters
    ----------
    mu : array(float) of size (nmu,)
        cosine of the angle between the outward normal and
        the line of sight for each intensity spectrum in INTEN.
    inten : array(float) of size(nmu, npts)
        intensity spectra at specified values of MU.
    deltav : float
        velocity spacing between adjacent spectrum points
        in INTEN (same units as VSINI and VRT).
    vsini : float
        maximum radial velocity, due to solid-body rotation.
    vrt : float
        radial-tangential macroturbulence parameter, i.e.
        np.sqrt(2) times the standard deviation of a Gaussian distribution
        of turbulent velocities. The same distribution function describes
        the radial motions of one component and the tangential motions of
        a second component. Each component covers half the stellar surface.
        See 'The Observation and Analysis of Stellar Photospheres', Gray.
    osamp : int, optional
        internal oversampling factor for convolutions.
        By default convolutions are done using the input points (OSAMP=1),
        but when OSAMP is set to higher integer values, the input spectra
        are first oversampled by cubic spline interpolation.
    Returns
    -------
    value : array(float) of size (npts,)
        Disk integrated flux profile.
    Note
    ------------
        If you use this algorithm in work that you publish, please cite
        Valenti & Anderson 1996, PASP, currently in preparation.
    """
    """
    History
    -----------
    Feb-88  GM
        Created ANA version.
    13-Oct-92 JAV
        Adapted from G. Marcy's ANA routi!= of the same name.
    03-Nov-93 JAV
        Switched to annular convolution technique.
    12-Nov-93 JAV
        Fixed bug. Intensity compo!=nts not added when vsini=0.
    14-Jun-94 JAV
        Reformatted for "public" release. Heavily commented.
        Pass deltav instead of 2.998d5/deltav. Added osamp
        keyword. Added rebinning logic at end of routine.
        Changed default osamp from 3 to 1.
    20-Feb-95 JAV
        Added mu as an argument to handle arbitrary mu sampling
        and remove ambiguity in intensity profile ordering.
        Interpret VTURB as np.sqrt(2)*sigma instead of just sigma.
        Replaced call_external with call to spl_{init|interp}.
    03-Apr-95 JAV
        Multiply flux by pi to give observed flux.
    24-Oct-95 JAV
        Force "nmk" padding to be at least 3 pixels.
    18-Dec-95 JAV
        Renamed from dskint() to rtint(). No longer make local
        copy of intensities. Use radial-tangential instead
        of isotropic Gaussian macroturbulence.
    26-Jan-99 JAV
        For NMU=1 and VSINI=0, assume resolved solar surface#
        apply R-T macro, but supress vsini broadening.
    01-Apr-99 GMH
        Use annuli weights, rather than assuming ==ual area.
    07-Mar-12 JAV
        Force vsini and vmac to be scalars.
    """

    # Make local copies of various input variables, which will be altered below.
    # Force vsini and especially vmac to be scalars. Otherwise mu dependence fails.

    if np.size(vsini) > 1:
        vsini = vsini[0]
    if np.size(vrt) > 1:
        vrt = vrt[0]

    # Determine oversampling factor.
    os = round(np.clip(osamp, 1, None))  # force integral value > 1

    # Convert input MU to projected radii, R, of annuli for a star of unit radius
    #  (which is just sine, rather than cosine, of the angle between the outward
    #  normal and the line of sight).
    rmu = np.sqrt(1 - mu ** 2)  # use simple trig identity

    # Sort the projected radii and corresponding intensity spectra into ascending
    #  order (i.e. from disk center to the limb), which is equivalent to sorting
    #  MU in descending order.
    isort = np.argsort(rmu)
    rmu = rmu[isort]  # reorder projected radii
    nmu = np.size(mu)  # number of radii
    if nmu == 1:
        if vsini != 0:
            logger.warning(
                "Vsini is non-zero, but only one projected radius (mu value) is set. No rotational broadening will be performed."
            )
            vsini = 0  # ignore vsini if only 1 mu

    # Calculate projected radii for boundaries of disk integration annuli.  The n+1
    # boundaries are selected such that r(i+1) exactly bisects the area between
    # rmu(i) and rmu(i+1). The in!=rmost boundary, r(0) is set to 0 (disk center)
    # and the outermost boundary, r(nmu) is set to 1 (limb).
    if nmu > 1 or vsini != 0:  # really want disk integration
        r = np.sqrt(
            0.5 * (rmu[:-1] ** 2 + rmu[1:] ** 2)
        )  # area midpoints between rmu
        r = np.concatenate(([0], r, [1]))

        # Calculate integration weights for each disk integration annulus.  The weight
        # is just given by the relative area of each annulus, normalized such that
        # the sum of all weights is unity.  Weights for limb darkening are included
        # explicitly in the intensity profiles, so they aren't needed here.
        wt = r[1:] ** 2 - r[:-1] ** 2  # weights = relative areas
    else:
        wt = np.array([1.0])  # single mu value, full weight

    # Generate index vectors for input and oversampled points. Note that the
    # oversampled indicies are carefully chosen such that every "os" finely
    # sampled points fit exactly into one input bin. This makes it simple to
    # "integrate" the finely sampled points at the end of the routine.
    npts = inten.shape[1]  # number of points
    xpix = np.arange(npts, dtype=float)  # point indices
    nfine = os * npts  # number of oversampled points
    xfine = (0.5 / os) * (
        2 * np.arange(nfine, dtype=float) - os + 1
    )  # oversampled points indices

    # Loop through annuli, constructing and convolving with rotation kernels.

    yfine = np.empty(nfine)  # init oversampled intensities
    flux = np.zeros(nfine)  # init flux vector
    for imu in range(nmu):  # loop thru integration annuli

        #  Use external cubic spline routine (adapted from Numerical Recipes) to make
        #  an oversampled version of the intensity profile for the current annulus.
        ypix = inten[isort[imu]]  # extract intensity profile
        if os == 1:
            # just copy (use) original profile
            yfine = ypix
        else:
            # spline onto fine wavelength scale
            yfine = interp1d(xpix, ypix, kind="cubic")(xfine)

        # Construct the convolution kernel which describes the distribution of
        # rotational velocities present in the current annulus. The distribution has
        # been derived analytically for annuli of arbitrary thickness in a rigidly
        # rotating star. The kernel is constructed in two pieces: o!= piece for
        # radial velocities less than the maximum velocity along the inner edge of
        # the annulus, and one piece for velocities greater than this limit.
        if vsini > 0:
            # nontrivial case
            r1 = r[imu]  # inner edge of annulus
            r2 = r[imu + 1]  # outer edge of annulus
            dv = deltav / os  # oversampled velocity spacing
            maxv = vsini * r2  # maximum velocity in annulus
            nrk = 2 * int(maxv / dv) + 3  ## oversampled kernel point
            # velocity scale for kernel
            v = dv * (np.arange(nrk, dtype=float) - ((nrk - 1) / 2))
            rkern = np.zeros(nrk)  # init rotational kernel
            j1 = np.abs(v) < vsini * r1  # low velocity points
            rkern[j1] = np.sqrt((vsini * r2) ** 2 - v[j1] ** 2) - np.sqrt(
                (vsini * r1) ** 2 - v[j1] ** 2
            )  # generate distribution

            j2 = (np.abs(v) >= vsini * r1) & (np.abs(v) <= vsini * r2)
            rkern[j2] = np.sqrt(
                (vsini * r2) ** 2 - v[j2] ** 2
            )  # generate distribution

            rkern = rkern / np.sum(rkern)  # normalize kernel

            # Convolve the intensity profile with the rotational velocity kernel for this
            # annulus. Pad each end of the profile with as many points as are in the
            # convolution kernel. This reduces Fourier ringing. The convolution may also
            # be do!= with a routi!= called "externally" from IDL, which efficiently
            # shifts and adds.
            if nrk > 3:
                yfine = convolve(yfine, rkern, mode="nearest")

        # Calculate projected sigma for radial and tangential velocity distributions.
        muval = mu[isort[imu]]  # current value of mu
        sigma = os * vrt / np.sqrt(2) / deltav  # standard deviation in points
        sigr = sigma * muval  # reduce by current mu value
        sigt = sigma * np.sqrt(1.0 - muval ** 2)  # reduce by np.sqrt(1-mu**2)

        # Figure out how many points to use in macroturbulence kernel.
        nmk = int(10 * sigma)
        nmk = np.clip(nmk, 3, (nfine - 3) // 2)

        # Construct radial macroturbulence kernel with a sigma of mu*VRT/np.sqrt(2).
        if sigr > 0:
            xarg = np.linspace(-nmk, nmk, 2 * nmk + 1) / sigr
            xarg = np.clip(-0.5 * xarg ** 2, -20, None)
            mrkern = np.exp(xarg)  # compute the gaussian
            mrkern = mrkern / np.sum(mrkern)  # normalize the profile
        else:
            mrkern = np.zeros(2 * nmk + 1)  # init with 0d0
            mrkern[nmk] = 1.0  # delta function

        # Construct tangential kernel with a sigma of np.sqrt(1-mu**2)*VRT/np.sqrt(2).
        if sigt > 0:
            xarg = np.linspace(-nmk, nmk, 2 * nmk + 1) / sigt
            xarg = np.clip(-0.5 * xarg ** 2, -20, None)
            mtkern = np.exp(xarg)  # compute the gaussian
            mtkern = mtkern / np.sum(mtkern)  # normalize the profile
        else:
            mtkern = np.zeros(2 * nmk + 1)  # init with 0d0
            mtkern[nmk] = 1.0  # delta function

        # Sum the radial and tangential components, weighted by surface area.
        area_r = 0.5  # assume equal areas
        area_t = 0.5  # ar+at must equal 1
        mkern = area_r * mrkern + area_t * mtkern  # add both components

        # Convolve the total flux profiles, again padding the spectrum on both ends to
        # protect against Fourier ringing.
        yfine = convolve(
            yfine, mkern, mode="nearest"
        )  # add the padding and convolve

        # Add contribution from current annulus to the running total.
        flux = flux + wt[imu] * yfine  # add profile to running total

    flux = np.reshape(flux, (npts, os))  # convert to an array
    flux = np.pi * np.sum(flux, axis=1) / os  # sum, normalize
    return flux

In [56]:
def broaden_spectrum(wint_seg, sint_seg, wave_seg, cmod_seg, vsini=0, vmac=0, debug=False):

    nw = len(wint_seg)
    clight = 299792.5
    mu = (np.sqrt(0.5*(2*np.arange(7)+1)/np.float(7)))[::-1]
    nmu = 7
    wmid = 0.5 * (wint_seg[nw-1] + wint_seg[0])
    wspan = wint_seg[nw-1] - wint_seg[0]
    jmin = np.argmin(wint_seg[1:nw-1] - wint_seg[0:nw-2])
    vstep1 = min(wint_seg[1:nw-1] - wint_seg[0:nw-2])
    vstep2 = 0.1 * wspan / (nw-1) / wmid * clight
    vstep3 = 0.05
    vstep = np.max([vstep1,vstep2,vstep3])

    # Generate model wavelength scale X, with uniform wavelength step.
    nx = int(np.floor(np.log10(wint_seg[nw-1] / wint_seg[0])/ np.log10(1.0+vstep / clight))+1)
    if nx % 2 == 0: nx += 1
    resol_out = 1.0/((wint_seg[nw-1] / wint_seg[0])**(1.0/(nx-1.0))-1.0)
    vstep = clight / resol_out
    x_seg = wint_seg[0] * (1.0 + 1.0 / resol_out)**np.arange(nx)

    # Interpolate intensity spectra onto new model wavelength scale.  
    yi_seg = np.empty((nmu, nx))

    for imu in range(nmu):
        yi_seg[imu] = np.interp(x_seg, wint_seg, sint_seg[imu])

    y_seg = integrate_flux(mu, yi_seg, vstep, np.abs(vsini), np.abs(vmac))

    dispersion = vstep1
    wave_equi = np.arange(x_seg[0],x_seg[-1]+dispersion,dispersion)

    c_seg = np.interp(wave_equi,wave_seg,cmod_seg)
    y_seg = np.interp(wave_equi,x_seg,y_seg)

    if debug:
        print(vstep1,len(wave_equi))
    
    return(wave_equi,y_seg/c_seg)

In [57]:
vsini_values = np.array([1.5, 3.0, 6.0, 9.0, 12.0, 24.0]) # km/s

# Gradient Spectra and Masks

In [58]:
null_spectrum_broad = dict()
for ccd in [1,2,3,4]:
    null_spectrum = readsav(synthesis_files+'/galah_dr4_trainingset_'+teff_logg_feh_name+'_0_'+str(ccd)+'.out').results[0]
    null_spectrum_broad['wave_null_ccd'+str(ccd)],null_spectrum_broad['spectrum_null_ccd'+str(ccd)] = broaden_spectrum(
            null_spectrum.wint,
            null_spectrum.sint,
            null_spectrum.wave,
            null_spectrum.cmod,
            vsini = vsini_values[-1]
        )
print('The synthetic spectra come with keywords ',null_spectrum.dtype.names)

The synthetic spectra come with keywords  ('WAVE', 'SMOD', 'CMOD', 'WINT', 'SINT')


In [59]:
labels = np.array(training_set_vsini0.keys()[2:-1])
labels

array(['teff', 'logg', 'fe_h', 'vmic', 'li_fe', 'c_fe', 'n_fe', 'o_fe',
       'na_fe', 'mg_fe', 'al_fe', 'si_fe', 'k_fe', 'ca_fe', 'sc_fe',
       'ti_fe', 'v_fe', 'cr_fe', 'mn_fe', 'co_fe', 'ni_fe', 'cu_fe',
       'zn_fe', 'rb_fe', 'sr_fe', 'y_fe', 'zr_fe', 'mo_fe', 'ru_fe',
       'ba_fe', 'la_fe', 'ce_fe', 'nd_fe', 'sm_fe', 'eu_fe'], dtype='<U5')

In [60]:
fancy_labels = []
for label in labels:
    if label == 'teff':
        fancy_labels.append(r'$T_\mathrm{eff}~/~\mathrm{K}$')
    elif label == 'logg':
        fancy_labels.append(r'$\log (g~/~\mathrm{cm\,s^{-2}})$')
    elif label == 'fe_h':
        fancy_labels.append(r'$\mathrm{[Fe/H]}$')
    elif label == 'vmic':
        fancy_labels.append(r'$v_\mathrm{mic}~/~\mathrm{km\,s^{-1}}$')
    elif label == 'vsini':
        fancy_labels.append(r'$v \sin i~/~\mathrm{km\,s^{-1}}$')
    elif label[-3:] == '_fe':
        fancy_labels.append('$\mathrm{['+label[0].upper()+label[1:-3]+'/Fe]}$')
    else:
        print('No entry for '+label)
print(fancy_labels)

['$T_\\mathrm{eff}~/~\\mathrm{K}$', '$\\log (g~/~\\mathrm{cm\\,s^{-2}})$', '$\\mathrm{[Fe/H]}$', '$v_\\mathrm{mic}~/~\\mathrm{km\\,s^{-1}}$', '$\\mathrm{[Li/Fe]}$', '$\\mathrm{[C/Fe]}$', '$\\mathrm{[N/Fe]}$', '$\\mathrm{[O/Fe]}$', '$\\mathrm{[Na/Fe]}$', '$\\mathrm{[Mg/Fe]}$', '$\\mathrm{[Al/Fe]}$', '$\\mathrm{[Si/Fe]}$', '$\\mathrm{[K/Fe]}$', '$\\mathrm{[Ca/Fe]}$', '$\\mathrm{[Sc/Fe]}$', '$\\mathrm{[Ti/Fe]}$', '$\\mathrm{[V/Fe]}$', '$\\mathrm{[Cr/Fe]}$', '$\\mathrm{[Mn/Fe]}$', '$\\mathrm{[Co/Fe]}$', '$\\mathrm{[Ni/Fe]}$', '$\\mathrm{[Cu/Fe]}$', '$\\mathrm{[Zn/Fe]}$', '$\\mathrm{[Rb/Fe]}$', '$\\mathrm{[Sr/Fe]}$', '$\\mathrm{[Y/Fe]}$', '$\\mathrm{[Zr/Fe]}$', '$\\mathrm{[Mo/Fe]}$', '$\\mathrm{[Ru/Fe]}$', '$\\mathrm{[Ba/Fe]}$', '$\\mathrm{[La/Fe]}$', '$\\mathrm{[Ce/Fe]}$', '$\\mathrm{[Nd/Fe]}$', '$\\mathrm{[Sm/Fe]}$', '$\\mathrm{[Eu/Fe]}$']


In [61]:
gradient_spectra_up = Table()
gradient_spectra_up['wave'] = np.concatenate(([null_spectrum_broad['wave_null_ccd'+str(ccd)] for ccd in [1,2,3,4]]))
gradient_spectra_up['median'] = np.concatenate(([null_spectrum_broad['spectrum_null_ccd'+str(ccd)] for ccd in [1,2,3,4]]))

gradient_spectra_down = Table()
gradient_spectra_down['wave'] = np.concatenate(([null_spectrum_broad['wave_null_ccd'+str(ccd)] for ccd in [1,2,3,4]]))
gradient_spectra_down['median'] = np.concatenate(([null_spectrum_broad['spectrum_null_ccd'+str(ccd)] for ccd in [1,2,3,4]]))

In [62]:
for label_index, label in enumerate(labels):
    
    gradient_up = []
    gradient_down = []
    
    for ccd in [1,2,3,4]:
        try:
            increased_spectrum = readsav(synthesis_files+'/galah_dr4_trainingset_'+teff_logg_feh_name+'_'+str(2+label_index)+'_'+str(ccd)+'.out').results[0]
            decreased_spectrum = readsav(synthesis_files+'/galah_dr4_trainingset_'+teff_logg_feh_name+'_'+str(37+label_index)+'_'+str(ccd)+'.out').results[0]
        except:
            increased_spectrum = readsav(synthesis_files+'/galah_dr4_cannon_trainingset_'+teff_logg_feh_name+'_'+str(2+label_index)+'_'+str(ccd)+'.out').results[0]
            decreased_spectrum = readsav(synthesis_files+'/galah_dr4_cannon_trainingset_'+teff_logg_feh_name+'_'+str(37+label_index)+'_'+str(ccd)+'.out').results[0]

            
        wave_increase, spectrum_increase = broaden_spectrum(
            increased_spectrum.wint,
            increased_spectrum.sint,
            increased_spectrum.wave,
            increased_spectrum.cmod,
            vsini = vsini_values[-1]
        )
        
        wave_decrease, spectrum_decrease = broaden_spectrum(
            decreased_spectrum.wint,
            decreased_spectrum.sint,
            decreased_spectrum.wave,
            decreased_spectrum.cmod,
            vsini = vsini_values[-1]
        )
        
        gradient_up.append(spectrum_increase - null_spectrum_broad['spectrum_null_ccd'+str(ccd)])
        gradient_down.append(spectrum_decrease - null_spectrum_broad['spectrum_null_ccd'+str(ccd)])

    gradient_spectra_up[label] = np.concatenate((gradient_up))
    gradient_spectra_down[label] = np.concatenate((gradient_down))

In [63]:
grid_masks = Table()

percentage_used = []

Path('gradient_spectra/'+teff_logg_feh_name).mkdir(parents=True, exist_ok=True)

for label_index, label in enumerate(labels):
    print(label, training_set_vsini0[label][2+label_index]-training_set_vsini0[label][0])
    
    threshold1 = 0.0001
    threshold2 = 0.001
    
    total = len(gradient_spectra_up)
    below_threshold1 = len(np.where(np.max([np.abs(gradient_spectra_up[label]),np.abs(gradient_spectra_down[label])],axis=0) >= threshold1)[0])
    below_threshold2 = len(np.where(np.max([np.abs(gradient_spectra_up[label]),np.abs(gradient_spectra_down[label])],axis=0) >= threshold2)[0])
    
    print(str(threshold1)+':   ',"{:.1f}".format(100*below_threshold1/total)+'%',below_threshold1)
    print(str(threshold2)+':   ',"{:.1f}".format(100*below_threshold2/total)+'%',below_threshold2)
    
    percentage_used.append([fancy_labels[label_index], r'$\pm$'+str(training_set_vsini0[label][2+label_index]-training_set_vsini0[label][0]), "{:.1f}".format(100*below_threshold1/total),"{:.1f}".format(100*below_threshold2/total)])
    
    above_threshold1 = (np.max([np.abs(gradient_spectra_up[label]),np.abs(gradient_spectra_down[label])],axis=0) >= threshold1)
    above_threshold2 = (np.max([np.abs(gradient_spectra_up[label]),np.abs(gradient_spectra_down[label])],axis=0) >= threshold2)

    grid_masks[label] = above_threshold2
    
    f, gs = plt.subplots(1,4,figsize=(15,2.5),sharey=True)
    for ccd in [1,2,3,4]:
        plot_label = '_nolegend_'
        if ccd == 2:
            plot_label = r'$\Delta f$ for $\Delta$'+fancy_labels[label_index]+' = '+str(training_set_vsini0[label][2+label_index]-training_set_vsini0[label][0])
        in_ccd = (gradient_spectra_up['wave'] > (3+ccd)*1000) & (gradient_spectra_up['wave'] < (4+ccd)*1000)
        ax=gs[ccd-1]
        ax.plot(
            gradient_spectra_up['wave'][in_ccd],
            gradient_spectra_up[label][in_ccd],
            c='k',lw=0.5,label = plot_label
        )
        plot_label = '_nolegend_'
        if ccd == 3:
            plot_label = r'$-\Delta f$ for $\Delta$'+fancy_labels[label_index]+' = '+str(training_set_vsini0[label][37+label_index]-training_set_vsini0[label][0])
        ax.plot(
            gradient_spectra_down['wave'][in_ccd],
            -gradient_spectra_down[label][in_ccd],
            c='C0',lw=0.5,label = plot_label
        )
        ax.set_xlabel(r'Wavelength [$\AA$]')
        if ccd==1:
            ax.set_ylabel(r'$\Delta f~/~\mathrm{norm.}$')
        plot_label = '_nolegend_'
        if ccd == 4:
            plot_label = r'$\vert\Delta f\vert$ above '+str(0.0001)
        ax.scatter(
            gradient_spectra_up['wave'][(above_threshold1 & in_ccd)],
            np.zeros(len(np.where(above_threshold1 & in_ccd==True)[0])),
            c='red',s=2,label=plot_label
        )
        plot_label = '_nolegend_'
        if ccd == 4:
            plot_label = r'$\vert\Delta f\vert$ above '+str(0.001)
        ax.scatter(
            gradient_spectra_up['wave'][(above_threshold2 & in_ccd)],
            np.zeros(len(np.where(above_threshold2 & in_ccd==True)[0])),
            c='orange',s=2,label=plot_label
        )
        ax.set_ylim(
            np.min([np.min(gradient_spectra_up[label]),-3*threshold1]),
            np.max([np.max(gradient_spectra_up[label]),3*threshold1])
        )
        if ccd in [2,3,4]:
            if label not in ['teff']:
                ax.legend(loc='lower center')
            else:
                ax.legend()
    plt.tight_layout()
    plt.savefig('gradient_spectra/'+teff_logg_feh_name+'/gradient_spectrum_'+teff_logg_feh_name+'_'+label+'.png',dpi=200,bbox_inches='tight')
    if grid_index in [1931]:
        plt.savefig('../galah_dr4_paper/figures/gradient_spectrum_'+teff_logg_feh_name+'_'+label+'.png',dpi=200,bbox_inches='tight')
        plt.show()
    plt.close()


teff 250.0
0.0001:    99.1% 63674
0.001:    87.4% 56133
logg 0.5
0.0001:    92.4% 59339
0.001:    47.9% 30784
fe_h 0.25
0.0001:    99.4% 63859
0.001:    95.7% 61476
vmic 1.0
0.0001:    63.8% 40944
0.001:    41.9% 26925
li_fe 2.25
0.0001:    1.4% 891
0.001:    0.4% 278
c_fe 1.0
0.0001:    99.0% 63608
0.001:    95.2% 61164
n_fe 1.0
0.0001:    97.7% 62738
0.001:    83.9% 53912
o_fe 2.0
0.0001:    97.7% 62721
0.001:    75.5% 48499
na_fe 1.0
0.0001:    71.5% 45928
0.001:    28.0% 17989
mg_fe 1.0
0.0001:    98.6% 63323
0.001:    79.8% 51265
al_fe 1.0
0.0001:    75.9% 48757
0.001:    33.6% 21592
si_fe 1.0
0.0001:    97.9% 62892
0.001:    72.9% 46796
k_fe 1.0
0.0001:    26.6% 17099
0.001:    1.8% 1165
ca_fe 1.0
0.0001:    80.8% 51894
0.001:    41.7% 26794
sc_fe 1.0
0.0001:    10.4% 6703
0.001:    4.9% 3136
ti_fe 1.0
0.0001:    62.1% 39852
0.001:    32.7% 20989
v_fe 1.0
0.0001:    35.0% 22476
0.001:    19.0% 12204
cr_fe 1.0
0.0001:    71.5% 45950
0.001:    37.3% 23941
mn_fe 1.0
0.0001:    47.4%

In [64]:
if grid_index in [1931]:
    table_text = [
    [r'\begin{table}[!ht]'],
    [r'    \centering'],
    [r'    \caption{Example of mask estimation for \textit{The Cannon}/\textit{The Payne} model creation. Listed are percentages of the spectrum that respond to an in-/decrease of each label above 0.001 and 0.0001 of the normalised flux.}'],
    [r'    \label{tab:cannon_mask_percentage}'],
    [r'    \begin{tabular}{cccc}'],
    [r'    \hline \hline'],
    [r'    Label &  Label change & $\vert \Delta f \vert > 0.001~/~\%$ & $\vert \Delta f \vert > 0.0001~/~\%$ \\'],
    [r'    \hline']
    ]
    for each in percentage_used:
        table_text.append([r'    '+each[0]+' & '+each[1]+' & '+each[3]+' & '+each[2]+r' \\'])
    table_text.append([r'    \hline'])
    table_text.append([r'    \end{tabular}'])
    table_text.append([r'\end{table}'])

    np.savetxt('../galah_dr4_paper/tables/mask_percentage_1931.tex',np.array(table_text),fmt='%s')

In [65]:
Path('training_input/'+teff_logg_feh_name).mkdir(parents=True, exist_ok=True)

gradient_spectra_up.write('gradient_spectra/'+teff_logg_feh_name+'/'+teff_logg_feh_name+'_gradient_spectra_up.fits',overwrite=True)
gradient_spectra_down.write('gradient_spectra/'+teff_logg_feh_name+'/'+teff_logg_feh_name+'_gradient_spectra_down.fits',overwrite=True)
grid_masks.write('training_input/'+teff_logg_feh_name+'/'+teff_logg_feh_name+'_masks.fits',overwrite=True)

# Trainingset flux and ivar at different vsini values

In [66]:
# Prepare the full trainingset (including vsini sampled from vsini_values)

full_trainingset = Table()
for label in training_set_vsini0.keys()[:6]:
    full_trainingset[label] = np.concatenate((np.array([training_set_vsini0[label] for vsini in vsini_values])))
full_trainingset['vsini'] = np.concatenate((np.array([vsini*np.ones(len(training_set_vsini0['spectrum_index'])) for vsini in vsini_values])))
for label in training_set_vsini0.keys()[6:]:
    full_trainingset[label] = np.concatenate((np.array([training_set_vsini0[label] for vsini in vsini_values])))
full_trainingset.write('training_input/'+teff_logg_feh_name+'/galah_dr4_trainingset_'+teff_logg_feh_name+'_incl_vsini.fits',overwrite=True)

In [67]:
# Prepare the wavelength array, if not yet available

wavelength_array = np.concatenate(([null_spectrum_broad['wave_null_ccd'+str(ccd)] for ccd in [1,2,3,4]]))
wavelength_file = 'training_input/galah_dr4_3dbin_wavelength_array.pickle'
if not os.path.isfile(wavelength_file):
    wavelength_file_opener = open(wavelength_file,'wb')
    pickle.dump((wavelength_array),wavelength_file_opener)
    wavelength_file_opener.close()

In [68]:
full_trainingset[0]

grid_index,spectrum_index,teff,logg,fe_h,vmic,vsini,li_fe,c_fe,n_fe,o_fe,na_fe,mg_fe,al_fe,si_fe,k_fe,ca_fe,sc_fe,ti_fe,v_fe,cr_fe,mn_fe,co_fe,ni_fe,cu_fe,zn_fe,rb_fe,sr_fe,y_fe,zr_fe,mo_fe,ru_fe,ba_fe,la_fe,ce_fe,nd_fe,sm_fe,eu_fe,sme_abund [99]
int64,int64,float32,float32,float32,float32,float64,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float32,float64
1919,0,5750.0,4.0,0.0,1.5,1.5,1.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9207554259437328 .. -20.035855713200196


In [69]:
def prepare_normalised_spectra(spectrum_index, vsini):
    
    normalised_flux_for_index = []
    #normalised_ivar_for_index = []
    
    for ccd in [1,2,3,4]:
        
        try:
            synthetic_spectrum = readsav(synthesis_files+'/galah_dr4_trainingset_'+teff_logg_feh_name+'_'+str(spectrum_index)+'_'+str(ccd)+'.out').results[0]
        except:
            synthetic_spectrum = readsav(synthesis_files+'/galah_dr4_cannon_trainingset_'+teff_logg_feh_name+'_'+str(spectrum_index)+'_'+str(ccd)+'.out').results[0]
        
        wave_broadened,flux_broadened = broaden_spectrum(
            synthetic_spectrum.wint,
            synthetic_spectrum.sint,
            synthetic_spectrum.wave,
            synthetic_spectrum.cmod,
            vsini=vsini)
        
        normalised_flux_for_index.append(flux_broadened)
        
    normalised_flux_for_index = np.concatenate((normalised_flux_for_index))

    return(normalised_flux_for_index)

In [70]:
def populate_normalised_flux_and_ivar_matrix(index):
        
    vsini = full_trainingset['vsini'][index]
    spectrum_index = full_trainingset['spectrum_index'][index]
    
    normalised_flux_for_index = prepare_normalised_spectra(spectrum_index,vsini=vsini)
    return(normalised_flux_for_index)
    
normalized_flux = np.ones((np.shape(full_trainingset)[0],np.shape(wavelength_array)[0]))

start = time.time()
now = time.time()

for index in range(len(full_trainingset)):
    normalised_flux_for_index = populate_normalised_flux_and_ivar_matrix(index)
    normalized_flux[index] = normalised_flux_for_index
    
    print(index,time.time()-now,time.time()-start)
    now = time.time()
    
now = time.time()
print(index,time.time()-now,time.time()-start)


0 0.1932840347290039 0.19332480430603027
1 0.21757817268371582 0.41370701789855957
2 0.19890379905700684 0.6153848171234131
3 0.2031240463256836 0.8213489055633545
4 0.19527292251586914 1.0192468166351318
5 0.1860790252685547 1.2080409526824951
6 0.2050790786743164 1.4157698154449463
7 0.19893097877502441 1.6174778938293457
8 0.20038294792175293 1.8209149837493896
9 0.2006392478942871 2.0242578983306885
10 0.20417094230651855 2.231315851211548
11 0.20042896270751953 2.4344897270202637
12 0.19376683235168457 2.631115674972534
13 0.2042839527130127 2.8382327556610107
14 0.19235587120056152 3.033474922180176
15 0.18738698959350586 3.223620891571045
16 0.1888418197631836 3.4151737689971924
17 0.18753623962402344 3.6054999828338623
18 0.18517804145812988 3.7936818599700928
19 0.1850452423095703 3.9814939498901367
20 0.1972489356994629 4.181514024734497
21 0.19214105606079102 4.3764119148254395
22 0.19302701950073242 4.571900844573975
23 0.19244909286499023 4.767026901245117
24 0.19178318977

392 0.283919095993042 92.60213303565979
393 0.2873659133911133 92.89226078987122
394 0.2972989082336426 93.19313287734985
395 0.2840898036956787 93.48002481460571
396 0.2924177646636963 93.77530884742737
397 0.29021215438842773 94.06835198402405
398 0.2895548343658447 94.36081886291504
399 0.28429198265075684 94.64790081977844
400 0.3025059700012207 94.95343589782715
401 0.29244208335876465 95.24879002571106
402 0.3031620979309082 95.55493474006653
403 0.29084014892578125 95.84843492507935
404 0.29221296310424805 96.14350175857544
405 0.29317402839660645 96.43948793411255
406 0.28987884521484375 96.73224782943726
407 0.2921409606933594 97.02721095085144
408 0.29086995124816895 97.3209319114685
409 0.2940328121185303 97.61776280403137
410 0.2913529872894287 97.91198992729187
411 0.2915060520172119 98.20637083053589
412 0.2929809093475342 98.50225281715393
413 0.29433298110961914 98.79944181442261
414 0.29148125648498535 99.0937979221344
415 0.2898590564727783 99.38648295402527
416 0.289

782 0.5058212280273438 254.7340168952942
783 0.49590587615966797 255.23275709152222
784 0.49091601371765137 255.7264609336853
785 0.48959803581237793 256.2187478542328
786 0.4911799430847168 256.7126908302307
787 0.5002532005310059 257.21572375297546
788 0.49472594261169434 257.7134208679199
789 0.5000820159912109 258.21628284454346
790 0.5043749809265137 258.7233669757843
791 0.5082120895385742 259.2345218658447
792 0.5217239856719971 259.7592577934265
793 0.5314590930938721 260.2940218448639
794 0.5233631134033203 260.8207440376282
795 0.547072172164917 261.37100982666016
796 0.539168119430542 261.9132659435272
797 0.5391061305999756 262.45558881759644
798 0.5315320491790771 262.99008989334106
799 0.5401580333709717 263.53334188461304
800 0.5438086986541748 264.08008790016174
801 0.5384397506713867 264.62148690223694
802 0.6849050521850586 265.30945897102356
803 0.5465219020843506 265.8589549064636
804 0.5497901439666748 266.4117908477783
805 0.5452978610992432 266.96006774902344
806

1176 1.0749871730804443 562.4910748004913
1177 1.0722863674163818 563.5664219856262
1178 1.0792829990386963 564.6494107246399
1179 1.0943398475646973 565.7471837997437
1180 1.0923073291778564 566.842689037323
1181 1.0933711528778076 567.9393608570099
1182 1.0961647033691406 569.0387599468231
1183 1.082953929901123 570.1253678798676
1184 1.0748980045318604 571.2035129070282
1185 1.0717711448669434 572.2784609794617
1186 1.0704619884490967 573.3521029949188
1187 1.0746009349822998 574.4296729564667
1188 1.0952861309051514 575.5280327796936
1189 1.1260490417480469 576.6574399471283
1190 1.1244359016418457 577.7852108478546
1191 1.1087148189544678 578.8973169326782
1192 1.1079859733581543 580.0087208747864
1193 1.1061460971832275 581.1186287403107
1194 1.1024599075317383 582.2243337631226
1195 1.1092610359191895 583.3367249965668
1196 1.1003730297088623 584.4403488636017
1197 1.1100280284881592 585.5540738105774
1198 1.105381727218628 586.6627359390259
1199 1.1026461124420166 587.768464803

1567 2.0566020011901855 1130.863383769989
1568 2.0540528297424316 1132.9206249713898
1569 2.0429301261901855 1134.9668288230896
1570 2.0200090408325195 1136.9899578094482
1571 1.9971709251403809 1138.9901850223541
1572 1.995697021484375 1140.988963842392
1573 1.9917519092559814 1142.983652830124
1574 1.9947319030761719 1144.98131108284
1575 1.9907000064849854 1146.9749970436096
1576 1.9895200729370117 1148.9674348831177
1577 1.981806993484497 1150.9521160125732
1578 1.9923441410064697 1152.947450876236
1579 1.9841787815093994 1154.934730052948
1580 1.994555950164795 1156.9322707653046
1581 1.9858691692352295 1158.921264886856
1582 2.0022239685058594 1160.926481962204
1583 2.0023038387298584 1162.9319140911102
1584 2.0289909839630127 1164.9639058113098
1585 2.0613338947296143 1167.028310060501
1586 2.048262357711792 1169.0797667503357
1587 2.0467288494110107 1171.1294848918915
1588 2.037519931793213 1173.1701440811157
1589 2.0503780841827393 1175.2238237857819
1590 2.052841901779175 117

In [71]:
flux_ivar_file = 'training_input/'+teff_logg_feh_name+'/galah_dr4_trainingset_'+teff_logg_feh_name+'_incl_vsini_flux_ivar.pickle'

flux_ivar_file_opener = open(flux_ivar_file,'wb')
pickle.dump((normalized_flux),flux_ivar_file_opener)
flux_ivar_file_opener.close()