### Definitions, imports, and settings

In [5]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from astropy.table import Table

# Plot settings
plt.rc(('xtick', 'ytick'), direction='in')
plt.rc('xtick', top=True)
plt.rc('ytick', right=True)
plt.rc('font', family='STIXgeneral')

# Paths
data_dir = '/users/PAS2055/lodubay/Data/APOGEE'
data_path = Path(data_dir)
apokasc_file = 'APOKASC_cat_v6.7.2.fits'
starhorse_file = 'APOGEE_DR17_EDR3_STARHORSE_v2.fits'
astroNN_file = 'apogee_astroNN-DR17.fits'

def decode(df):
    """
    Decode DataFrame with byte strings into ordinary strings.

    """
    str_df = df.select_dtypes([object])
    str_df = str_df.stack().str.decode('utf-8').unstack()
    for col in str_df:
        df[col] = str_df[col]
    return df

def rms(array, arrmin=None, arrmax=None):
    """
    Return the root-mean-square of a given array

    """
    if arrmin:
        array = array[array > arrmin]
    if arrmax:
        array = array[array < arrmax]
    return np.sqrt(np.mean(array**2))

### Import and combine data

In [6]:
# APOKASC catalog
print('Importing APOKASC catalog...')
data = Table.read(data_path / apokasc_file, format='fits')
apokasc_df = decode(data.to_pandas())
apokasc_df['LOC_ID'] = apokasc_df['LOC_ID'].astype(int)
# Relevant columns
apokasc_cols = ['2MASS_ID', 'LOC_ID', 'APOKASC2_AGE', 'APOKASC2_AGE_MERR', 
                'APOKASC2_AGE_PERR', 'DR16_ALP_M_COR', 'DR16_ALP_M_COR_ERR', 
                'DR16_FE_H', 'DR16_FE_H_ERR', 'DR16_M_H_COR', 
                'DR16_M_H_COR_ERR']

# astroNN DR17 catalog
print('Importing astroNN DR17 catalog...')
data = Table.read(data_path / astroNN_file, format='fits')
astroNN_df = decode(data.to_pandas())
astroNN_cols = ['APOGEE_ID', 'LOCATION_ID', 'age_lowess_correct', 'age_total_error',
                'TEFF', 'TEFF_ERR', 'LOGG', 'LOGG_ERR', 'C_H', 'C_H_ERR', 
                'N_H', 'N_H_ERR', 'O_H', 'O_H_ERR', 'TI_H', 'TI_H_ERR', 'FE_H', 'FE_H_ERR']

# StarHorse DR17 catalog
print('Importing StarHorse DR17 catalog...')
data = Table.read(data_path / starhorse_file, format='fits')
starhorse_df = decode(data.to_pandas())
starhorse_cols = ['APOGEE_ID', 'met16', 'met50', 'met84']

# Consolidate into single DataFrame
print('Combining datasets...')
stellar_params = apokasc_df[apokasc_cols].rename(columns={'2MASS_ID': 'APOGEE_ID', 'LOC_ID': 'LOCATION_ID'})
stellar_params = stellar_params.join(astroNN_df[astroNN_cols].set_index(['APOGEE_ID', 'LOCATION_ID']), 
                                     on=['APOGEE_ID', 'LOCATION_ID'], how='outer', rsuffix='_astroNN')
stellar_params = stellar_params.join(starhorse_df[starhorse_cols].set_index('APOGEE_ID'), 
                                     on='APOGEE_ID', how='outer', rsuffix='_StarHorse')
# Clean up
stellar_params.replace([np.inf, -np.inf, -9999., -9999.99, -999., -999.99], np.nan, inplace=True)
stellar_params.dropna(how='all', inplace=True)
stellar_params.set_index('APOGEE_ID', inplace=True)
# Rename columns
mapper = dict([(col, 'ASTRONN_'+col) for col in astroNN_cols[4:]])
stellar_params.rename(columns=mapper, inplace=True)
stellar_params.rename(columns={'age_lowess_correct': 'ASTRONN_AGE', 'age_total_error': 'ASTRONN_AGE_ERR', 'met50': 'STARHORSE_M_H'}, 
                      inplace=True)
stellar_params['STARHORSE_M_H_MERR'] = stellar_params['STARHORSE_M_H'] - stellar_params['met16']
stellar_params['STARHORSE_M_H_PERR'] = stellar_params['met84'] - stellar_params['STARHORSE_M_H']
stellar_params.drop(['met16', 'met84'], axis='columns', inplace=True)
print('Done!')
print(stellar_params)

Importing APOKASC catalog...
Importing astroNN DR17 catalog...
Importing StarHorse DR17 catalog...
Combining datasets...
Done!
                     LOCATION_ID  APOKASC2_AGE  APOKASC2_AGE_MERR  \
APOGEE_ID                                                           
2M19051857+4654038        5006.0           NaN                NaN   
2M19052050+4655041        5006.0           NaN                NaN   
2M19052775+4658526        5006.0           NaN                NaN   
2M19052985+4654372        4405.0      8.413952           1.085706   
2M19061343+4657264        5006.0           NaN                NaN   
...                          ...           ...                ...   
2M23595886+5726058        4424.0           NaN                NaN   
2M23595921+5609479           NaN           NaN                NaN   
2M23595949-7342592        7218.0           NaN                NaN   
2M23595980+1528407        4548.0           NaN                NaN   
2M19285121+3813282B          NaN           Na