# Creating FITS output for any given SME output

## Author(s): Sven Buder (SB, WG4)

### History:
181011 SB Created

In [1]:
# Preamble for notebook 

# Compatibility with Python 3
from __future__ import (absolute_import, division, print_function)

try:
    %matplotlib inline
    %config InlineBackend.figure_format='retina'
except:
    pass

# Basic packages
import numpy as np
np.seterr(divide='ignore', invalid='ignore')
import os
import sys
import collections
import glob
import pickle
import pandas

# Packages to work with FITS and (IDL) SME.out files
import astropy.io.fits as pyfits
import astropy.table as table
from astropy.table import Table, hstack, vstack
from scipy.io.idl import readsav

# Matplotlib and associated packages for plotting
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from matplotlib.transforms import Bbox,TransformedBbox
from matplotlib.image import BboxImage
from matplotlib.legend_handler import HandlerBase
from matplotlib._png import read_png
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib.colors import ListedColormap
import matplotlib.colors as colors

params = {
    'font.family'        : 'sans',
    'font.size'          : 17,
    'axes.labelsize'     : 20,
    'ytick.labelsize'    : 16,
    'xtick.labelsize'    : 16,
    'legend.fontsize'    : 20,
    'text.usetex'        : True, 
    'text.latex.preamble': [r'\usepackage{upgreek}', r'\usepackage{amsmath}'],
    }   
plt.rcParams.update(params)

_parula_data = [[0.2081, 0.1663, 0.5292], 
                [0.2116238095, 0.1897809524, 0.5776761905], 
                [0.212252381, 0.2137714286, 0.6269714286], 
                [0.2081, 0.2386, 0.6770857143], 
                [0.1959047619, 0.2644571429, 0.7279], 
                [0.1707285714, 0.2919380952, 0.779247619], 
                [0.1252714286, 0.3242428571, 0.8302714286], 
                [0.0591333333, 0.3598333333, 0.8683333333], 
                [0.0116952381, 0.3875095238, 0.8819571429], 
                [0.0059571429, 0.4086142857, 0.8828428571], 
                [0.0165142857, 0.4266, 0.8786333333], 
                [0.032852381, 0.4430428571, 0.8719571429], 
                [0.0498142857, 0.4585714286, 0.8640571429], 
                [0.0629333333, 0.4736904762, 0.8554380952], 
                [0.0722666667, 0.4886666667, 0.8467], 
                [0.0779428571, 0.5039857143, 0.8383714286], 
                [0.079347619, 0.5200238095, 0.8311809524], 
                [0.0749428571, 0.5375428571, 0.8262714286], 
                [0.0640571429, 0.5569857143, 0.8239571429], 
                [0.0487714286, 0.5772238095, 0.8228285714], 
                [0.0343428571, 0.5965809524, 0.819852381], 
                [0.0265, 0.6137, 0.8135], 
                [0.0238904762, 0.6286619048, 0.8037619048], 
                [0.0230904762, 0.6417857143, 0.7912666667], 
                [0.0227714286, 0.6534857143, 0.7767571429], 
                [0.0266619048, 0.6641952381, 0.7607190476], 
                [0.0383714286, 0.6742714286, 0.743552381], 
                [0.0589714286, 0.6837571429, 0.7253857143], 
                [0.0843, 0.6928333333, 0.7061666667], 
                [0.1132952381, 0.7015, 0.6858571429], 
                [0.1452714286, 0.7097571429, 0.6646285714], 
                [0.1801333333, 0.7176571429, 0.6424333333], 
                [0.2178285714, 0.7250428571, 0.6192619048], 
                [0.2586428571, 0.7317142857, 0.5954285714], 
                [0.3021714286, 0.7376047619, 0.5711857143], 
                [0.3481666667, 0.7424333333, 0.5472666667], 
                [0.3952571429, 0.7459, 0.5244428571], 
                [0.4420095238, 0.7480809524, 0.5033142857], 
                [0.4871238095, 0.7490619048, 0.4839761905], 
                [0.5300285714, 0.7491142857, 0.4661142857], 
                [0.5708571429, 0.7485190476, 0.4493904762],
                [0.609852381, 0.7473142857, 0.4336857143], 
                [0.6473, 0.7456, 0.4188], 
                [0.6834190476, 0.7434761905, 0.4044333333], 
                [0.7184095238, 0.7411333333, 0.3904761905], 
                [0.7524857143, 0.7384, 0.3768142857], 
                [0.7858428571, 0.7355666667, 0.3632714286], 
                [0.8185047619, 0.7327333333, 0.3497904762], 
                [0.8506571429, 0.7299, 0.3360285714], 
                [0.8824333333, 0.7274333333, 0.3217], 
                [0.9139333333, 0.7257857143, 0.3062761905], 
                [0.9449571429, 0.7261142857, 0.2886428571], 
                [0.9738952381, 0.7313952381, 0.266647619], 
                [0.9937714286, 0.7454571429, 0.240347619], 
                [0.9990428571, 0.7653142857, 0.2164142857], 
                [0.9955333333, 0.7860571429, 0.196652381], 
                [0.988, 0.8066, 0.1793666667], 
                [0.9788571429, 0.8271428571, 0.1633142857], 
                [0.9697, 0.8481380952, 0.147452381], 
                [0.9625857143, 0.8705142857, 0.1309], 
                [0.9588714286, 0.8949, 0.1132428571], 
                [0.9598238095, 0.9218333333, 0.0948380952], 
                [0.9661, 0.9514428571, 0.0755333333], 
                [0.9763, 0.9831, 0.0538]]

parula = ListedColormap(_parula_data, name='parula')
parula_zero = _parula_data[0]
parula_0 = ListedColormap(_parula_data, name='parula_0')
parula_0.set_bad((1,1,1))
parula_r = ListedColormap(_parula_data[::-1], name='parula_r')

willi_blau = [0.0722666667, 0.4886666667, 0.8467]

## Function to gather and combine input data from SME, IRAF, Output structure & A(X)_sun

In [2]:
def get_product_information(produc_name): 

    if product_name == 'GBS':
        product_subsets = 'gbs'
        product_input_data_path = '../validation/gbs/data/'
        product_pipeline = 'lbol'

    if product_name == 'seis':
        product_subsets = 'seis'
        product_input_data_path = '../validation/seis/data/'
        product_pipeline = 'lbol'

    if product_name == 'OpenClusters':
        product_subsets = ['Blanco 1','Pleiades','Hyades','NGC 1817','NGC 1901','ASCC 16','ASCC 20','ASCC 21','NGC 2112','NGC 2204','Berkeley 73','NGC 2232','NGC 2243','Berkeley 33','Berkeley 32','NGC 2516','NGC 2548','NGC 2632','M 67','IC 2602','Melotte 101','Trumpler 20','NGC 5460','NGC 6253','ASCC 89','IC 4665','NGC 6469','NGC 6568','NGC 6583','Ruprecht 145','Ruprecht 147']
        product_input_data_path = '../validation/clusters/data/'
        product_pipeline = 'lbol'

    if product_name == 'GlobularClusters':
        product_subsets = ['47 Tuc','NGC 288','NGC 362','NGC 1851','Omega Cen','NGC 6362','NGC 6397','NGC 7099']
        product_input_data_path = '../validation/clusters/data/'
        product_pipeline = 'lbol'

    if product_name == 'random10000':
        product_subsets = '10000'
        product_input_data_path = '../validation/random10000/data/'
        product_pipeline = 'lbol'

    if product_name == 'ts_DR2':
        product_subsets = 'ts_DR2'
        product_input_data_path = '../validation/DR2_rerun/data/'
        product_pipeline = 'lbol'

    if product_name == 'high_vtot':
        product_subsets = 'high_vtot'
        product_input_data_path = '../science/high_vtot/data/'
        product_pipeline = 'lbol'

    if product_name == 'Li_rich_giants':
        product_subsets = 'Li_rich'
        product_input_data_path = '../science/Li_rich_giants/data/'
        product_pipeline = 'lbol'

    if product_name == 'Keller':
        product_subsets = 'Keller'
        product_input_data_path = '../science/Keller/'
        product_pipeline = 'lbol'


    if product_name == 'wide_binaries':
        product_subsets = 'wide_binaries'
        product_input_data_path = '../science/wide_binaries/data/'
        product_pipeline = 'lbol'

    if product_name[:4] == '10k_':
        product_subsets = product_name
        product_input_data_path = '../data_products/10k_subsets/'
        product_pipeline = 'lbol'

    return (product_subsets, product_input_data_path, product_pipeline)

In [3]:
def get_input_data(product_subsets, product_input_data_path, product_pipeline): 
    
    dr3_output_structure = Table.read('../output_structure/galah_dr3_output_structure.fits',1)

    if np.shape(product_subsets)!=():
        joined_sme_data = Table.read(product_input_data_path+'/GALAH_'+product_subsets[0].replace(" ", "")+'_'+product_pipeline+'.fits', format='fits')
        for each in range(1,len(product_subsets)):
            joined_sme_data = vstack([joined_sme_data, Table.read(product_input_data_path+'/GALAH_'+product_subsets[each].replace(" ", "")+'_'+product_pipeline+'.fits', format='fits')])
    else:
        joined_sme_data = Table.read(product_input_data_path+'/GALAH_'+product_subsets.replace(" ", "")+'_'+product_pipeline+'.fits', format='fits')
        
    all_iraf_data = pyfits.getdata('../input/sobject_iraf_53_2MASS_GaiaDR2_WISE_PanSTARRSDR1_BailerJones_K2seis.fits',1)
    iraf_matched = []
    for each in joined_sme_data['SOBJECT_ID']:
        match = np.where(each == all_iraf_data['sobject_id'])[0]
        if len(match)>0:
            iraf_matched.append(match[0])
        else:
            print('No match found for '+str(each))
    iraf_matched = np.array(iraf_matched)
    iraf_data = all_iraf_data[iraf_matched]
    
    abundance_zeropoints= Table.read('../abundance_zeropoints/galahdr3_abundance_zeropoints.fits',1)
    
    print('Got input data')
    
    return dr3_output_structure, joined_sme_data, iraf_data, abundance_zeropoints

In [4]:
def compute_logg_uncertainty(final_output_data, mc_sampling=10000): 
    
    print('Sampling logg uncertainty')
    
    np.random.seed(12)
    
    mc_teff = np.random.normal(
        loc = final_output_data['teff'],
        # We do not trust the SME e_teff, and hence sample the max of e_teff=100 and 2% error of teff
        scale = np.array([np.max([100,0.02*final_output_data['teff'][x]]) for x in range(len(final_output_data['teff']))]),
        size=(mc_sampling,len(final_output_data['teff']))
    )
    mc_mass = np.random.normal(
        loc = final_output_data['mass'],
        # We do not know the error on mass and hence assume it is 10%
        scale = 0.1*final_output_data['mass'],
        size=(mc_sampling,len(final_output_data['mass']))
    )
    mc_mass.clip(min=0., out=mc_mass)
    mc_kmag = np.random.normal(
        loc = final_output_data['ks_m'],
        scale = final_output_data['ks_msigcom'],
        size=(mc_sampling,len(final_output_data['ks_m']))
    )
    mc_bc = np.random.normal(
        loc = final_output_data['bc_ks'],
        scale = 0.1*np.ones(len(final_output_data['bc_ks'])),
        size=(mc_sampling,len(final_output_data['bc_ks']))
    )
    
    sigma_dist_hi = abs(final_output_data['r_hi'] - final_output_data['r_est'])
    sigma_dist_lo = abs(final_output_data['r_est'] - final_output_data['r_lo'])
    mc_dist_lo = np.abs(np.random.normal(
        loc = 0,
        scale = sigma_dist_lo,
        size=(mc_sampling,len(final_output_data['r_est'])))
    )
    mc_dist_hi = np.abs(np.random.normal(
        loc = 0,
        scale = sigma_dist_hi,
        size=(mc_sampling,len(final_output_data['r_est'])))
    )
    #fraction_hi_lo = sigma_dist_hi / (sigma_dist_lo + sigma_dist_hi)
    fraction_hi_lo = 0.5
    select_dist_lo_hi = (np.random.uniform(0, 1, size=(mc_sampling,len(final_output_data['r_est']))) < fraction_hi_lo)
    mc_dist = np.array(final_output_data['r_est']).T + select_dist_lo_hi*mc_dist_hi - (1-select_dist_lo_hi)*mc_dist_lo
    mc_dist.clip(min=0.001, out=mc_dist)
    e_a_ks = (final_output_data['e_a_ks']).clip(min=0.01)
    mc_ak = np.random.normal(
        loc = final_output_data['a_ks'],
        scale = e_a_ks,
        size=(mc_sampling,len(final_output_data['a_ks']))
    )
    mc_ak.clip(min=0., out=mc_ak)
    
    def logg_function(teff, mass, kmag, bc, dist, ak):
        return(4.438 + 4*np.log10(teff/5772.) + np.log10(mass) + 0.4*(kmag + bc - 5.*np.log10(dist) + 5 - ak - 4.7554))

    mc_logg = logg_function(mc_teff, mc_mass, mc_kmag, mc_bc, mc_dist, mc_ak)

    logg_mean = np.array([np.nanmean(mc_logg[:,x]) for x in range(np.shape(mc_logg)[1])])
    logg_std  = np.array([np.nanstd(mc_logg[:,x]) for x in range(np.shape(mc_logg)[1])])

    print('Done sampling logg uncertainty')

    return(logg_std)

In [None]:
def compute final_uncertainty(final_output_data):
    
    return(final_output_data)

In [5]:
def combine_SME_IRAF_to_FINAL(output_filename, product_pipeline, sme_data, iraf_data, dr3_output_structure, abundance_zeropoints):
    
    print('Combining information')
    
    final_output_data = collections.OrderedDict()
    final_output_abundances = collections.OrderedDict()
    
    abundances_in_mode = np.array([(sme_data['MODE'][0,it]).replace(" ","") for it in range(len(sme_data['MODE'][0]))])
    
    for each_key in dr3_output_structure.keys():
        # Keys in output not matching input keys
        if each_key=='star_id':
            final_output_data[each_key]=np.array(iraf_data['tmass_id'])
            final_output_abundances[each_key]=np.array(iraf_data['tmass_id'])
        elif each_key in ['sobject_id']:
            final_output_data[each_key]=np.array(sme_data[each_key.upper()])
            final_output_abundances[each_key]=np.array(sme_data[each_key.upper()])
        elif each_key in ['field_id','source_id','ra','ra_error','dec','dec_error','r_est','r_lo','r_hi','pmra','pmra_error','pmdec','pmdec_error','ra_dec_corr','ra_parallax_corr','ra_pmra_corr','ra_pmdec_corr','dec_parallax_corr','dec_pmra_corr','dec_pmdec_corr','parallax_pmra_corr','parallax_pmdec_corr','pmra_pmdec_corr','red_flag','ebv','snr_c2_iraf','flag_guess','rv_guess','e_rv_guess','teff_guess','logg_guess','feh_guess','j_m','j_msigcom','h_m','h_msigcom','ks_m','ks_msigcom','ph_qual_tmass','w2mpro','w2mpro_error','ph_qual_wise','parallax','parallax_error','visibility_periods_used','astrometric_chi2_al','astrometric_n_good_obs_al','ruwe','phot_g_mean_mag','bp_rp']:
            final_output_data[each_key]=np.array(iraf_data[each_key])
        elif each_key=='wg4_field':
            if product_pipeline == 'lbol':
                final_output_data[each_key]=np.array([sme_data['FIELD'][x][:-5] for x in range(len(sme_data['FIELD']))])
                final_output_data['wg4_pipeline']=np.array(['lbol' for x in range(len(sme_data['FIELD']))])
            elif product_pipeline == 'seis':
                final_output_data[each_key]=np.array([sme_data['FIELD'][x][:-5] for x in range(len(sme_data['FIELD']))])
                final_output_data['wg4_pipeline']=np.array(['seis' for x in range(len(sme_data['FIELD']))])
            else:
                final_output_data[each_key]=np.array(sme_data['FIELD'])
                final_output_data['wg4_pipeline']=np.array(['free' for x in range(len(sme_data['FIELD']))])
        elif each_key in ['flag_sp','teff', 'e_teff', 'logg', 'e_logg', 'vmic', 'e_vmic', 'mass', 'lbol', 'age', 'alpha_fe', 'e_alpha_fe']:
            final_output_data[each_key]=np.array(sme_data[each_key.upper()])
        elif each_key == 'rv_5854':
            final_output_data[each_key]=np.array(sme_data['BA_VRAD'])
        elif each_key == 'rv_6708':
            final_output_data[each_key]=np.array(sme_data['LI_VRAD'])
        elif each_key == 'rv_6722':
            final_output_data[each_key]=np.array(sme_data['SI_VRAD'])
        elif each_key == 'bc_ks':
            final_output_data[each_key]=np.array(sme_data['BC_K'])
        elif each_key == 'fe_h_atmo':
            final_output_data[each_key]=np.array(sme_data['FEH'])
            final_output_abundances[each_key]=np.array(sme_data['FEH'])
        elif each_key == 'e_fe_h_atmo':
            final_output_data[each_key]=np.array(sme_data['E_FEH'])
            final_output_abundances[each_key]=np.array(sme_data['E_FEH'])
        elif each_key == 'cov_e_fe_h_atmo':
            final_output_data[each_key]=np.array(sme_data['C_FEH'])
            final_output_abundances[each_key]=np.array(sme_data['C_FEH'])
        elif each_key[:6]=='cov_e_':
            if each_key[6:] in ['teff', 'logg']:
                final_output_data[each_key]=np.array(sme_data['C_'+each_key[6:].upper()])
        elif each_key in ['init_teff','init_logg']:
            final_output_data[each_key]=np.array(sme_data['S'+each_key[5:].upper()][:,0])
        elif each_key == 'init_fe_h_atmo':
            final_output_data[each_key]=np.array(sme_data['SFEH'][:,0])
        elif each_key == 'init_vbroad':
            final_output_data[each_key]=np.array(sme_data['SVSINI'][:,0])
        elif each_key=='fe_h':
            final_output_data['fe_h']=np.array(sme_data['A_ABUND'][:,1] - abundance_zeropoints['A_Fe'][0])
            final_output_data['e_fe_h']=np.array(sme_data['E_ABUND'][:,1])                
            final_output_data['cov_e_fe_h']=np.array(sme_data['C_ABUND'][:,1])                
            final_output_data['flag_fe_h']=np.array(sme_data['AFLAG'][:,1])                
            final_output_abundances['fe_h']=np.array(sme_data['A_ABUND'][:,1] - abundance_zeropoints['A_Fe'][0])
            final_output_abundances['e_fe_h']=np.array(sme_data['E_ABUND'][:,1])                
            final_output_abundances['cov_e_fe_h']=np.array(sme_data['C_ABUND'][:,1])                
            final_output_abundances['flag_fe_h']=np.array(sme_data['AFLAG'][:,1])                
        elif each_key=='vbroad':
            final_output_data[each_key]=np.array(sme_data['VSINI'])
            final_output_data['e_'+each_key]=np.array(sme_data['E_VSINI'])
            final_output_data['cov_'+each_key]=np.array(sme_data['C_VSINI'])
        elif each_key=='rv_galah':
            final_output_data[each_key]=np.array(sme_data['VEL'])
            final_output_data['e_'+each_key]=np.array(sme_data['E_VEL'])
            final_output_data['cov_e_'+each_key]=np.array(sme_data['C_VEL'])
        elif each_key=='rv_gaia':
            final_output_data[each_key]=np.array(iraf_data['radial_velocity'])
            final_output_data['e_'+each_key]=np.array(iraf_data['radial_velocity_error'])
        elif each_key=='chi2_sp':
            final_output_data[each_key]=np.array(sme_data['CHI'][:,0])
        elif each_key=='a_ks':

            # Apply RJCE method
            rjce_ak = np.array(0.918*(iraf_data['h_m'] - iraf_data['w2mpro'] - 0.08))
            e_rjce_ak = 0.918*np.sqrt(iraf_data['h_msigcom']**2. + iraf_data['w2mpro_error']**2.)
            
            rjce_ak.clip(min=0.0, out=rjce_ak)
            
            # check if 2MASS Hmag and WISE W2mag have good quality
            tmass_adjusted = np.array(iraf_data['ph_qual_tmass'])
            tmass_adjusted[np.where(tmass_adjusted=='   ')[0]] = 'UUU'
            tmass_adjusted[np.where(tmass_adjusted=='')[0]] = 'UUU'
            wise_adjusted = np.array(iraf_data['ph_qual_wise'])
            wise_adjusted[np.where(wise_adjusted=='    ')[0]] = 'UUUU'
            wise_adjusted[np.where(wise_adjusted=='')[0]] = 'UUUU'
            h_m_qual = np.array([tmass_adjusted[x][1] == 'A' for x in range(len(tmass_adjusted))])
            w2_qual = np.array([wise_adjusted[x][1] == 'A' for x in range(len(wise_adjusted))])

            # if photometry bad, exchange a_k by ebv approximation
            bad_rjce = np.isnan(iraf_data['h_m']) | np.isnan(iraf_data['w2mpro']) | (h_m_qual==False) | (w2_qual==False)
            ebv_ak = 0.36*iraf_data['ebv']
            rjce_ak[bad_rjce] = ebv_ak[bad_rjce]
            e_rjce_ak[bad_rjce] = ebv_ak[bad_rjce]
            
            final_output_data['a_ks']=rjce_ak
            final_output_data['e_a_ks']=e_rjce_ak
            
            # adjust too large E(B-V) if photometry good
            bad_ebv = np.where(0.36*iraf_data['ebv'] > 3*rjce_ak)[0]
            if len(bad_ebv) > 0:
                final_output_data['ebv'][bad_ebv] = 2.78*rjce_ak[bad_ebv]
            
            # nearby stars with ebv=0 and a_ks=0
            nearby = np.where(iraf_data['r_est'] <= 100.)[0]
            final_output_data['ebv'][nearby] = 0.
            final_output_data['a_ks'][nearby] = 0.
            final_output_data['e_a_ks'][nearby] = 0.

        # Abundance keys
        elif (each_key[0:2]=='A_') & (each_key != 'A_Ks'):
            element = each_key[2:]
            element_in_sme_data = np.where(element == abundances_in_mode)[0]
            if len(element_in_sme_data) != 1:
                # print('Exchange values for '+element)
                if element == 'K': 
                    element_in_sme_data = np.where('K7699' == abundances_in_mode)[0]
                if element == 'Ca': 
                    element_in_sme_data = np.where('Ca5862' == abundances_in_mode)[0]
                if element == 'Cu': 
                    element_in_sme_data = np.where('Cu5782' == abundances_in_mode)[0]
                if element == 'Ba': 
                    element_in_sme_data = np.where('Ba5854' == abundances_in_mode)[0]
            else:
                pass

            element_in_sme_data=element_in_sme_data[0]
            final_output_abundances[each_key]=np.array(sme_data['A_ABUND'][:,element_in_sme_data])
                        
            if element=='Li':
                final_output_data[each_key]=np.array(sme_data['A_ABUND'][:,element_in_sme_data])

            if element in ['Fe', 'Li']:
                final_output_data['flux_'+each_key]=np.array(sme_data['LINEFLUX'][:,element_in_sme_data])
                final_output_data['chi_'+each_key]=np.array(sme_data['CHI'][:,element_in_sme_data])

            if element!='Fe':
                try:
                    final_output_data[element+'_fe']=np.array(
                        # [X/Fe] = [X/H] - [Fe/H] = A(X) - A(X)_sun - (A(Fe) - A(Fe)_sun)
                        sme_data['A_ABUND'][:,element_in_sme_data]- abundance_zeropoints[each_key][0] 
                        - (final_output_abundances['A_Fe'] - abundance_zeropoints['A_Fe'][0])
                        )
                except:
                    print('You are using A(Fe) = 7.45 + fe_h_atmo!')
                    final_output_data[element+'_fe']=np.array(
                        # [X/Fe] = [X/H] - [Fe/H] = A(X) - A(X)_sun - (A(Fe) - A(Fe)_sun)
                        sme_data['A_ABUND'][:,element_in_sme_data]- abundance_zeropoints[each_key][0] 
                        - (7.45 + final_output_data['fe_h_atmo'] - abundance_zeropoints['A_Fe'][0])
                        )
                #final_output_data[element+'_fe']=np.array(sme_data['ABUND'][:,element_in_sme_data])
                #final_output_data['e_'+element+'_fe']=np.array(sme_data['E_ABUND'][:,element_in_sme_data])
                
                final_output_data['e_'+element]=np.array(sme_data['E_ABUND'][:,element_in_sme_data])
                final_output_data['cov_e_'+element]=np.array(sme_data['C_ABUND'][:,element_in_sme_data])
                final_output_data['flag_'+element]=np.array(sme_data['AFLAG'][:,element_in_sme_data])
            final_output_abundances['flux_'+each_key]=np.array(sme_data['LINEFLUX'][:,element_in_sme_data])
            final_output_abundances['chi_'+each_key]=np.array(sme_data['CHI'][:,element_in_sme_data])
            
        # Already filled
        elif ((each_key in ['wg4_pipeline', 'e_fe_h', 'e_rv_galah', 'c_rv_galah', 'e_rv_gaia', 'e_vbroad', 'e_a_ks']) | (each_key[:4]=='e_A_') | (each_key[:7]=='flag_A_') | (each_key[:7]=='flux_A_') | (each_key[:7]=='chi2_A_') | (each_key[-3:]=='_fe')):
            pass
        # Placeholder
        elif each_key in ['e_mass', 'e_lbol', 'e_age', 'e_bc_ks']:
            final_output_data[each_key] = np.array([np.NaN for x in range(len(sme_data['FIELD']))])
        else:
            print('No match for '+each_key)
    
    # Previously, we have reverse engineered the BC(Ks), but now we read it from the SME Log file as well
    #def bc_function(lbol, ks_m, dist, ak):
    #    return(-2.5*np.log10(lbol) - ks_m - 5.*np.log10(1./dist) - 5. + 4.7554)
    ## Reverse engineer BC for Ks based on bolometric luminosity
    #final_output_data['bc_ks'] = bc_function(
    #    final_output_data['lbol'],
    #    final_output_data['ks_m'],
    #    final_output_data['r_est'],
    #    final_output_data['a_ks']
    #)

    # MC sample the uncertainties for logg
    final_output_data['e_logg'] = compute_logg_uncertainty(final_output_data)
            
    return(final_output_data, final_output_abundances)

In [6]:
def combine_line_by_line(
    final_output_data, 
    final_output_abundances, 
    abundance_zeropoints,
    flag_limit = 0, 
    abundance_uncertainty_limit = 0.005,
    clip_outlier_sigma = 2,
    debug = False
    ):
    
    """
    We combine all line measurements 
    for a given element and species 
    as outlined in the converter dictionary, 
    e.g. OI is a combination of O7772, O7774, O7775
    
    Only measurements up to the 
    flag_limit are considered
    
    Abundance uncertainties are set be 
    at least above the abundance_uncertainty_limit
    
    Only use those measurements that are as close as 2 sigma
    to the error-weighted mean and recompute it

    
    """
    
    print('Only using A(X) with flags <= '+str(flag_limit))
    print('Setting uncertainty floor for A(X) to >= '+str(abundance_uncertainty_limit))
    
    converter = collections.OrderedDict()
    converter['LiI']  = ['Li']
    converter['CI']   = ['C6588']
    converter['OI']   = ['O7772','O7774','O7775']
    converter['NaI']  = ['Na5683','Na5688'] # leaving out Na4752
    converter['MgI']  = ['Mg4730','Mg5711','Mg7692'] # leaving out Mg7722, Mg7759, Mg7811
    converter['AlI']  = ['Al6696','Al6699','Al7835','Al7836']
    converter['SiI']  = ['Si5666','Si5684','Si5690','Si5701','Si5772','Si5793','Si6722','Si7680']
    converter['KI']   = ['K5802','K7699']
    converter['CaI']  = ['Ca5857','Ca5868','Ca6494','Ca6500'] # leaving out Ca6509
    converter['ScI']  = ['Sc4744','Sc4753','Sc5672','Sc5687','Sc5717','Sc5724']
    converter['ScII'] = ['Sc5658','Sc5667','Sc5684','Sc6605']
    converter['TiI']  = ['Ti4758','Ti4759','Ti4778','Ti4782','Ti4798','Ti4802','Ti4820','Ti5689','Ti5716','Ti5720','Ti5739','Ti5866','Ti6599','Ti6717','Ti7853']
    converter['TiII'] = ['Ti4720','Ti4765','Ti4799','Ti4849','Ti4866','Ti4874']
    converter['VI']   = ['V4747','V4784','V4797','V4832']
    converter['CrI']  = ['Cr4775','Cr4789','Cr4801','Cr5702','Cr5720','Cr5788','Cr5845','Cr6630']
    converter['CrII'] = ['Cr4848']
    converter['MnI']  = ['Mn4739','Mn4762','Mn4766','Mn4783']
    converter['FeI']  = ['Fe4789','Fe4793','Fe4794','Fe4803','Fe4808','Fe4876','Fe4890','Fe4891','Fe5651','Fe5652','Fe5661','Fe5663','Fe5679','Fe5680','Fe5696','Fe5702','Fe5705','Fe5731','Fe5732','Fe5742','Fe5775','Fe5778','Fe5807','Fe5809','Fe5812','Fe5815','Fe5850','Fe5853','Fe5855','Fe5859','Fe6482','Fe6495','Fe6499','Fe6518','Fe6546','Fe6593','Fe6594','Fe6598','Fe6609','Fe6628','Fe6648','Fe6678','Fe6699','Fe6704','Fe6714','Fe6725','Fe6733','Fe7710','Fe7723','Fe7748']
    converter['FeII'] = ['Fe4720','Fe4731','Fe4833','Fe6516','Fe7712']
    converter['CoI']  = ['Co5647','Co6632','Co7713','Co7838']
    converter['NiI']  = ['Ni5847','Ni6586']
    converter['CuI']  = ['Cu5700','Cu5782']
    converter['ZnI']  = ['Zn4722','Zn4811']
    converter['RbI']  = ['Rb7800']
    converter['SrI']  = ['Sr6550']
    converter['YII']  = ['Y4855','Y4884','Y5663']
    converter['ZrI']  = ['Zr4739','Zr4772','Zr4806','Zr4828']
    converter['MoI']  = ['Mo5751','Mo5858']
    converter['RuI']  = ['Ru4869']
    converter['BaII'] = ['Ba5854','Ba6497']
    converter['LaII'] = ['La4716','La4749','La4804','La5806']
    converter['CeII'] = ['Ce4774']
    converter['NdII'] = ['Nd4811','Nd5741','Nd5770','Nd5812','Nd5842']
    converter['SmII'] = ['Sm4837','Sm4854']
    converter['EuII'] = ['Eu5819','Eu6645']
    converter['alpha_fe'] = ['Mg4730','Mg5711','Mg7692','Si5666','Si5684','Si5690','Si5701','Si5772','Si5793','Si6722','Si7680','Ca5857','Ca5868','Ca6494','Ca6500','Ti4758','Ti4759','Ti4778','Ti4782','Ti4798','Ti4802','Ti4820','Ti5689','Ti5716','Ti5720','Ti5739','Ti5866','Ti6599','Ti6717','Ti7853']
    
    """
    ii=where(mode eq 'Mg' or mode eq 'Si' or mode eq 'Ti')
    for i=0,n_elements(object)-1 do begin
       j=where(finite(res[i].abund[ii]) and res[i].aflag[ii] eq 0,jc)
       if jc ne 0 then begin
          res[i].alpha_fe   = total(res[i].abund[ii[j]]/res[i].e_abund[ii[j]]^2)/total(1./res[i].e_abund[ii[j]]^2)
          res[i].e_alpha_fe = sqrt(1./total(1./res[i].e_abund[ii[j]]^2))
          ;print,res[i].alpha_fe,res[i].e_alpha_fe,res[i].e_teff                                                                                                                                                
       endif
    endfor
    """
    
    def combine_line_measurements_for_element(each_element, final_output_data, final_output_abundances, data_index):

        """
        We convert the measurements to [X/H] before we combine them

        """

        useful_line_bitmask = []
        useful_line_measurements = []
        useful_line_uncertainties = []

        for each_index, each_line in enumerate(converter[each_element]):

            # if line measurement flag is <= flag_limit
            if (
                (final_output_data['flag_'+each_line][data_index] <= flag_limit) &
                np.isfinite(final_output_abundances['A_'+each_line][data_index])
            ):

                useful_line_bitmask.append(2**each_index)
                useful_line_measurements.append(final_output_abundances['A_'+each_line][data_index] - abundance_zeropoints['A_'+each_line][0] - final_output_abundances['fe_h'][data_index])
                useful_line_uncertainties.append(final_output_data['cov_e_'+each_line][data_index])

        useful_line_bitmask=np.array(useful_line_bitmask)
        useful_line_measurements=np.array(useful_line_measurements)
        useful_line_uncertainties=np.array(useful_line_uncertainties)

        if debug==True:
            print(useful_line_bitmask, useful_line_measurements, useful_line_uncertainties)

        if len(useful_line_measurements) == 1:
            return(useful_line_measurements[0], useful_line_uncertainties[0],1)
        elif len(useful_line_measurements) != 0:
            
            all_lines = (
                np.sum(useful_line_measurements/useful_line_uncertainties**2)/np.sum(1./useful_line_uncertainties**2),
                np.sqrt(1./np.sum(1./useful_line_uncertainties**2) + np.var(useful_line_measurements)),
                np.sum(useful_line_bitmask)
                )
                
            sigma_outliers = np.abs(all_lines[0] - useful_line_measurements) > clip_outlier_sigma*all_lines[1]

            if debug==True:
                print(useful_line_measurements,all_lines,sigma_outliers)
                print(useful_line_measurements[~sigma_outliers])
            
            if len(useful_line_measurements[~sigma_outliers]) < 2:
                return(all_lines)
            else:
                return(
                    np.sum(useful_line_measurements[~sigma_outliers]/useful_line_uncertainties[~sigma_outliers]**2)/np.sum(1./useful_line_uncertainties[~sigma_outliers]**2),
                    np.sqrt(1./np.sum(1./useful_line_uncertainties[~sigma_outliers]**2) + np.var(useful_line_measurements)),
                    np.sum(useful_line_bitmask[~sigma_outliers])
                )
        else:
            return(np.nan,np.nan,np.nan)

    for each_element in converter.keys():
        (final_output_data[each_element+'_fe'], final_output_data['cov_e_'+each_element], final_output_data['nr_'+each_element]) = np.array([combine_line_measurements_for_element(each_element, final_output_data, final_output_abundances, data_index) for data_index in range(len(final_output_abundances['sobject_id']))]).T

    return(final_output_data, final_output_abundances)

In [7]:
def binarity_flag(final_output_data):
    
    class read_iso():

        def __init__(self):
            self.num_cols=4
            self.columns = ['M_Mo', 'logTeff', 'logG', 'logL_Lo']
            self.num_ages = len(age)
            self.ages = age

        def fill_chemistry(self, m_h, fe_h, alpha_fe):
            self.FeH = fe_h
            self.Z = 10**m_h*0.0152
            self.aFe = alpha_fe

        def fill_iso(self, iso_input):
            self.data = iso_input

    parsec = np.load('../input/Parsec_isochrones.npy')

    binarity_flag = np.zeros(len(final_output_data['teff']),dtype=np.int)
    return(binarity_flag)

    parsec_feh = np.array([parsec[x].FeH for x in range(len(parsec))])

    for p_it in np.arange(len(parsec)):
        if p_it == 0:
            in_bin = final_output_data['fe_h'] <= parsec[p_it].FeH
        elif p_it == len(parsec):
            in_bin = final_output_data['fe_h'] > parsec[p_it].FeH
        else:
            in_bin = (final_output_data['fe_h'] > parsec[p_it-1].FeH) & (final_output_data['fe_h'] <= parsec[p_it].FeH)

        #print(in_bin)
        plt.figure()
        plt.scatter(
            final_output_data['teff'][in_bin],
            final_output_data['logg'][in_bin]
            )
        plt.plot(
            10**parsec[p_it].data[-1]['logTeff'],
            parsec[p_it].data[-1]['logG']
        )
        plt.plot(
            10**parsec[p_it].data[-1]['logTeff'] - 200,
            parsec[p_it].data[-1]['logG']
        )
        plt.plot(
            10**parsec[p_it].data[-1]['logTeff'],
            parsec[p_it].data[-1]['logG'] - np.log10(2) # 2xLuminosity
        )
        plt.plot(
            10**parsec[p_it].data[-1]['logTeff'] - 200,
            parsec[p_it].data[-1]['logG'] - np.log10(2) # 2xLuminosity
        )

        plt.xlim(8000,3000)
        plt.ylim(6,-1)

In [8]:
def apply_sp_flags(final_output_data, final_output_abundances):
    
    get_bin = lambda x, n: format(int(x), 'b').zfill(n)
    
    sme_idl_bitmask = np.array([get_bin(x, 10) for x in final_output_data['flag_sp']])
    #  0 : alright  
    #+ 1 : convergence == non-finite SPs  
    #+ 2 : grid limit reached  
    #+ 4 : Gaussian RV fit failed  
    #+ 8 : ELLI mass estimated failed  
    #+16 : Timeout on ISAAC
        
    red_bitmask = np.array([get_bin(x, 10) for x in final_output_data['red_flag']])
    #  0 : for no flags,
    #+ 1 : for bad wavelength solution in ccd_1,
    #+ 2 : for bad wavelength solution in ccd_2, 
    #+ 4 : for bad wavelength solution in ccd_3,
    #+ 8 : for bad wavelength solution in ccd_4,
    #+16 : for molecfit fail in ccd_3,
    #+32 : for molecfit fail in ccd_4,
    #+64 : if the object is actually a twilight flat.
    
    tsne_binary    = np.loadtxt('../input/tsne_binaries.txt',dtype=np.int)
    
    tsne_emission  = np.loadtxt('../input/tsne_emission.txt',dtype=np.int)
    
    tsne_reduction = np.loadtxt('../input/tsne_reduction_issues.txt',dtype=np.int)
    
    bitmask_sp = np.zeros(len(final_output_data['sobject_id']),dtype=np.int)
    
    print('Applying the following flags:')
    
    def raise_bitmask(bit, position):
        #print(position)
        bitmask_sp[position] += bit
    
    # Raise bitmask 1
    print('   1: RUWE > 1.4 (bad astrometric solution)')
    raise_bitmask(1, final_output_data['ruwe'] > 1.4)
    
    # Raise bitmask 2
    print('   2: unreliable broadening')
    raise_bitmask(2, (
        (final_output_data['vbroad'] <= 3.) | 
        (final_output_data['vbroad'] >= 100.))
        )
    
    # Raise bitmask 4
    print('   4: Low S/N (below 5 or 10? below this value for all CCDs or just CCD2 as SNR-tracer?)')
    raise_bitmask(4, final_output_data['snr_c2_iraf'] <= 10)

    # Raise bitmask 32
    print('   8: reduction issue:')
    print('      a) Wavelength solution (propagating of red_flag)')
    print('      b) t-SNE projected reduction issues: Negative/positive fluxes, spikes, etc.')
    raise_bitmask(8, np.array([(x[-1]=='1') | (x[-2]=='1') | (x[-3]=='1') | (x[-4]=='1') | (y in tsne_reduction)  for (x, y) in zip(red_bitmask, final_output_data['sobject_id'])]))
    
    # Raise bitmask 16
    print('  16: t-SNE projected emission features')
    raise_bitmask(16, np.array([each in tsne_emission for each in final_output_data['sobject_id']]))

    # Raise bitmask 32
    print('  32: t-SNE projected binaries')
    raise_bitmask(32, np.array([each in tsne_binary for each in final_output_data['sobject_id']]))

    # Raise bitmask 64
    print('  64: Astrometric binarity flag')
    final_output_data['flag_sp'] += 64 * np.zeros(len(final_output_data['sobject_id']))#binarity_flag(final_output_data)
    
    # Raise bitmask 128
    print(' 128: SNR-dependent high SME chi2 (bad fit) / FYI: median chi2_sp is 0.748')
    raise_bitmask(128, final_output_data['chi2_sp'] > np.exp(0.08*final_output_data['snr_c2_iraf'])+0.1*final_output_data['snr_c2_iraf'])
    
    """
    Karin's suggestion:
    if not using chi2_limit(snr, teff), go only for selecting the definite outliers
    raise_bitmask(16, final_output_data['chi2_sp'] > np.exp(0.08*final_output_data['snr_c2_iraf'])+0.1*final_output_data['snr_c2_iraf'])

    My suggestion:
    raise_bitmask(16, final_output_data['chi2_sp'] / 0.35 > 3. * np.exp(1/90.*final_output_data['snr_c2_iraf']))
    which cuts away most cool giants...

    Morgan's suggestion:
    (chi2_sp - (0.75 - 1) / 10 * ln(snr_c2_iraf)) / (((.005)/60.) * snr_c2_iraf * snr_c2_iraf + 0.3) > 3. * chi2_sp
    """
    
    """
    t-SNE flags as taken from "tsne_classification_dr52_2018_04_09.csv"
    TO BE UPDATED WITH TRAVEN'S NEW DR5.3 FLAGS
    """
    
    # Raise bitmask 256
    print(' 256: Problems with Fe lines, where lineflux is not between 0.03 and 1.00')
    raise_bitmask(256,
        (
        (final_output_abundances['flux_A_Fe'] < 0.03) | 
        (final_output_abundances['flux_A_Fe'] > 1.00)
        ) |
        np.isnan(final_output_data['fe_h'])
        )
    
    # Raise bitmask 512')
    print(' 512: sme did not finish')
    print('      a) no convergence == non-finite SPs')
    print('      b) Gaussian RV fit failed')
    print('      c) Timeout on ISAAC')
    raise_bitmask(512, np.array([((x[-1]=='1') | (x[-3]=='1') | (x[-4]=='1') |(x[-5]=='1') ) for x in sme_idl_bitmask]))
    
    # Raise bitmask 1024
    print('1024: MARCS grid limit reached or outside of reasonable parameter range')
    raise_bitmask(1024, (
        np.array([x[-2]=='1' for x in sme_idl_bitmask]) | 
        (final_output_data['fe_h_atmo'] > 1.0) | 
        (final_output_data['fe_h'] > 1.0))
        ) 

    final_output_data['flag_sp'] = bitmask_sp
    
    return(final_output_data, final_output_abundances)

In [9]:
def apply_ab_flags(final_output_data, final_output_abundances):    

    get_bin = lambda x, n: format(int(x), 'b').zfill(n)   

    for each_key in final_output_abundances.keys():

        if each_key[0:2]=='A_':
            
            each_element == each_key[2:]

            if each_element == 'Fe':
                element = 'fe_h'

                print('Applying the following flags:')
                print(' 1: Upper limit')
                print(' 2: Bad chi2 fit')
                print(' 4: Bad wavelength solution / rv for Li6708')
                print(' 8: Bad stellar parameter flag')
                print('16: No abundance value at all')

            else:
                element = each_element

            old_bitmask = np.array([get_bin(x, 10) for x in final_output_data['flag_'+element]])

            bitmask_element = np.zeros(len(final_output_data['sobject_id']),dtype=np.int)

            def raise_bitmask(bit, position):
                #print(position)
                bitmask_element[position] += bit

            # Raise bitmask 1
            # - upper limit
            raise_bitmask(1, np.array([x[-1]=='1' for x in old_bitmask]))

            # Raise bitmask 2
            # - bad chi2 fit
            raise_bitmask(2, np.array([x[-2]=='1' for x in old_bitmask]))

            if element == 'Li':
                # Raise bitmask 4
                # - bad wavelength solution / rv for Li6708
                raise_bitmask(4, (np.abs(final_output_data['rv_6708']) > 10))

            if element != 'fe_h':
                # Raise bitmask 8
                # - no abundance value at all
                raise_bitmask(8, np.isnan(final_output_abundances['A_'+element]))

            final_output_data['flag_'+element] = bitmask_element

        return(final_output_data, final_output_abundances)

In [10]:
def write_to_fits(final_output_data, final_output_abundances,output_filename):
    output_pandas = pandas.DataFrame(final_output_data,columns=final_output_data.keys())
    output_astropy = Table.from_pandas(output_pandas)

    output_astropy.write('../data_products/GALAH_iDR3_'+output_filename+'.fits',overwrite=True)

    output_a_pandas = pandas.DataFrame(final_output_abundances,columns=final_output_abundances.keys())
    output_a_astropy = Table.from_pandas(output_a_pandas)

    output_a_astropy.write('../data_products/GALAH_iDR3_'+output_filename+'_abund.fits',overwrite=True)

    return output_astropy, output_a_astropy

## Execute

In [None]:
#product_list = ['10k_0','10k_1','10k_2','10k_3','10k_5','10k_7','10k_31','10k_41','10k_51','10k_53','10k_55','10k_57','10k_59','10k_61','10k_62','10k_63','10k_64','10k_65']
product_list = []
for each_subset in np.arange(50,66):
    if np.shape(glob.glob('10k_subsets/GALAH_10k_'+str(each_subset)+'_lbol.fits'))[0] > 0:
        product_list.append('10k_'+str(each_subset))
product_list = np.array(product_list)

#product_list = ['10k_0','10k_10','10k_20','10k_30']

#product_list = ['GBS','seis','OpenClusters','GlobularClusters','random10000','ts_DR2','high_vtot','Li_rich_giants']

#product_list = ['GBS']

print(product_list)

for product_name in product_list:

    print(product_name)
    
    product_subsets, product_input_data_path, product_pipeline = get_product_information(product_name)

    dr3_output_structure, sme_data, iraf_data, abundance_zeropoints = get_input_data(
        product_subsets,
        product_input_data_path,
        product_pipeline)

    (final_output_data, final_output_abundances) = combine_SME_IRAF_to_FINAL(
        output_filename = product_name, 
        product_pipeline=product_pipeline,
        sme_data=sme_data,
        iraf_data=iraf_data,
        dr3_output_structure=dr3_output_structure,
        abundance_zeropoints=abundance_zeropoints);

    (final_output_data, final_output_abundances) = apply_sp_flags(final_output_data, final_output_abundances)

    (final_output_data, final_output_abundances) = apply_ab_flags(final_output_data, final_output_abundances)
    
    (final_output_data,final_output_abundances) =  combine_line_by_line(final_output_data, final_output_abundances, abundance_zeropoints)
    
    fits_data = write_to_fits(
        final_output_data,
        final_output_abundances,
        output_filename = product_name)

['10k_50' '10k_51' '10k_52' '10k_53' '10k_54' '10k_55' '10k_56' '10k_57'
 '10k_58' '10k_59' '10k_60' '10k_61' '10k_62' '10k_63' '10k_64' '10k_65']
10k_50
Got input data
Combining information
Sampling logg uncertainty




Done sampling logg uncertainty
Applying the following flags:
   1: RUWE > 1.4 (bad astrometric solution)
   2: unreliable broadening
   4: Low S/N (below 5 or 10? below this value for all CCDs or just CCD2 as SNR-tracer?)
   8: reduction issue:
      a) Wavelength solution (propagating of red_flag)
      b) t-SNE projected reduction issues: Negative/positive fluxes, spikes, etc.
  16: t-SNE projected emission features
  32: t-SNE projected binaries
  64: Astrometric binarity flag
 128: SNR-dependent high SME chi2 (bad fit) / FYI: median chi2_sp is 0.748
 256: Problems with Fe lines
 512: sme did not finish
      a) no convergence == non-finite SPs
      b) Gaussian RV fit failed
      c) Timeout on ISAAC
1024: MARCS grid limit reached or outside of reasonable parameter range
Only using A(X) with flags <= 0
Setting uncertainty floor for A(X) to >= 0.005
10k_51
Got input data
Combining information
Sampling logg uncertainty
Done sampling logg uncertainty
Applying the following flags:
   1

In [None]:
fits_data[0]

In [None]:
fits_data[1]