In [3]:
# Takes LAMOST spectra compares retrievals between rrlfe and from the catalog in 
# Table 6 of Liu+ 2020 ApJSS 247:68 'Probing the Galactic Halo with RR Lyrae Stars. I. The Catalog'.

# Created 2023 Jan. 5 by E.S.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import glob
from astropy.io import fits
import os

%matplotlib qt

In [2]:
# catalog_liu_et_al.txt: Liu+ retrievals from LAMOST spectra
# lamost_li_file_info.csv: names of LAMOST spectra as downloaded; [file_name,ra,dec,emp_snr]

In [3]:
stem = "/Users/bandari/Documents/git.repos/rrlfe/"

In [4]:
# read in Liu+ catalog

df_liu_stars = pd.read_csv(stem + "notebooks_for_development/spec_sets_check/lamost/catalog_liu_et_al.txt",
                              skiprows=65, delim_whitespace=True, usecols=[0,1,2,11,12,13,14,31], 
                               names=["ID","RAdeg","DEdeg","VType","SNR","FeH","e_FeH","Num"])

In [5]:
# read in RA, DEC info for the LAMOST spectra I downloaded, from the query I submitted
# (there are ~34 duplicate rows)

'''
df_downloaded_lamost_spectra = pd.read_csv(stem + "notebooks_for_development/spec_sets_check/lamost/lamost_target_query.txt")
'''

'\ndf_downloaded_lamost_spectra = pd.read_csv(stem + "notebooks_for_development/spec_sets_check/lamost/lamost_target_query.txt")\n'

In [6]:
# read in our retrieved Fe/H for LAMOST spectra and splice stuff to enable matching

df_our_data = pd.read_csv(stem + "bin/20230120_all_spectra_retrieved_vals.csv")

In [7]:
# standalone: make table of spectrum names and their RA, DEC from FITS headers

'''
fits_dir = stem + "notebooks_for_development/spec_sets_check/lamost/spectra/"
fits_file_names = glob.glob(fits_dir + "*fits")
init_array = np.zeros((len(fits_file_names),1), dtype=float)

# initialize DataFrame
df_lamost_specs_ra_dec = pd.DataFrame(fits_file_names, columns=["file_name"])
df_lamost_specs_ra_dec["ra"] = init_array
df_lamost_specs_ra_dec["dec"] = init_array

for file_num in range(0, len(fits_file_names)):
    
    print(file_num)
    
    hdul = fits.open(fits_file_names[file_num])
    
    df_lamost_specs_ra_dec["file_name"].loc[file_num] = os.path.basename(fits_file_names[file_num])
    df_lamost_specs_ra_dec["ra"].loc[file_num] = hdul[0].header["RA"]
    df_lamost_specs_ra_dec["dec"].loc[file_num] = hdul[0].header["DEC"]
    
df_lamost_specs_ra_dec.to_csv(stem + "notebooks_for_development/spec_sets_check/lamost/junk.csv", index=False)
'''

'\nfits_dir = stem + "notebooks_for_development/spec_sets_check/lamost/spectra/"\nfits_file_names = glob.glob(fits_dir + "*fits")\ninit_array = np.zeros((len(fits_file_names),1), dtype=float)\n\n# initialize DataFrame\ndf_lamost_specs_ra_dec = pd.DataFrame(fits_file_names, columns=["file_name"])\ndf_lamost_specs_ra_dec["ra"] = init_array\ndf_lamost_specs_ra_dec["dec"] = init_array\n\nfor file_num in range(0, len(fits_file_names)):\n    \n    print(file_num)\n    \n    hdul = fits.open(fits_file_names[file_num])\n    \n    df_lamost_specs_ra_dec["file_name"].loc[file_num] = os.path.basename(fits_file_names[file_num])\n    df_lamost_specs_ra_dec["ra"].loc[file_num] = hdul[0].header["RA"]\n    df_lamost_specs_ra_dec["dec"].loc[file_num] = hdul[0].header["DEC"]\n    \ndf_lamost_specs_ra_dec.to_csv(stem + "notebooks_for_development/spec_sets_check/lamost/junk.csv", index=False)\n'

In [8]:
# retrieve LAMOST RA, DEC

df_lamost_specs_ra_dec = pd.read_csv(stem + "notebooks_for_development/spec_sets_check/lamost/spectra_ra_dec.csv")

In [9]:
# give some cols same name, so we can merge on them

df_liu_stars["ra"] = df_liu_stars["RAdeg"]
df_liu_stars["dec"] = df_liu_stars["DEdeg"]

# some rounding is necessary to enable matching
df_liu_stars["ra_round"] = np.round(df_liu_stars["ra"],3)
df_lamost_specs_ra_dec["ra_round"] = np.round(df_lamost_specs_ra_dec["ra"],3)
df_liu_stars["dec_round"] = np.round(df_liu_stars["dec"],3)
df_lamost_specs_ra_dec["dec_round"] = np.round(df_lamost_specs_ra_dec["dec"],3)

In [10]:
# this merges tables based on RA, DEC, and leaves the Liu FeH in the final table
# (but our FeH is still missing)

merged_df_liu_feh = pd.merge(df_liu_stars, df_lamost_specs_ra_dec, how="inner", on=["ra_round","dec_round"])

In [11]:
# for clarity 

merged_df_liu_feh["feh_liu"] = merged_df_liu_feh["FeH"]

In [12]:
print("Number of stars in Liu:",len(df_liu_stars.drop_duplicates()))

Number of stars in Liu: 6268


In [13]:
print("Number of LAMOST spectra:",len(df_lamost_specs_ra_dec.drop_duplicates()))

Number of LAMOST spectra: 6018


In [14]:
print("Number of Liu-LAMOST matches:",len(merged_df_liu_feh.drop_duplicates()))

Number of Liu-LAMOST matches: 5839


In [15]:
# now read in our own retrievals, and merge with the above by file name

df_lamost_rrlfe_retrievals = pd.read_csv(stem + "/bin/20230120_all_spectra_retrieved_vals.csv")

In [16]:
# for clarity 

df_lamost_rrlfe_retrievals["feh_rrlfe"] = df_lamost_rrlfe_retrievals["feh_retrieved"]

In [17]:
# for matching 
merged_df_liu_feh["match_name"] = merged_df_liu_feh["file_name"].str.split(".",1).str[0]
df_lamost_rrlfe_retrievals["match_name"] = df_lamost_rrlfe_retrievals["orig_spec_file_name"].str.split(".",1).str[0]

In [18]:
# merge our retrievals onto Liu etc.

merged_df_liu_rrlfe = pd.merge(merged_df_liu_feh, df_lamost_rrlfe_retrievals, how="inner", on=["match_name"])

In [19]:
# drop rows with '-999' retrievals in Liu
liu_bad = merged_df_liu_rrlfe["feh_liu"] < -900
print("Unphysical Liu values:",np.sum(liu_bad))
merged_df_liu_rrlfe_good_liu = merged_df_liu_rrlfe.drop(merged_df_liu_rrlfe.loc[merged_df_liu_rrlfe["feh_liu"] < -900].index, inplace=False)

# drop rows with '> 50' retrievals from rrlfe (there was only 1 when I did this)
rrlfe_bad = merged_df_liu_rrlfe_good_liu["feh_rrlfe"] > 50
print("Unphysical rrlfe values:",np.sum(rrlfe_bad))
merged_df_liu_rrlfe_good_all = merged_df_liu_rrlfe_good_liu.drop(merged_df_liu_rrlfe_good_liu.loc[merged_df_liu_rrlfe_good_liu["feh_rrlfe"] > 50].index, inplace=False)

# drop any remaining NaN values
merged_df_liu_rrlfe_good_all = merged_df_liu_rrlfe_good_all[merged_df_liu_rrlfe_good_all["feh_rrlfe"].notna()]

Unphysical Liu values: 435
Unphysical rrlfe values: 1


In [21]:
# find best fit, using non-NaN values

coeffs_poly = np.polyfit(merged_df_liu_rrlfe_good_all["feh_liu"], merged_df_liu_rrlfe_good_all["feh_rrlfe"], deg=1)

In [22]:
print("Coeffs of best fit:",coeffs_poly)

Coeffs of best fit: [ 0.50041744 -0.63913339]


In [26]:
plt.scatter(merged_df_liu_rrlfe_good_all["feh_liu"],merged_df_liu_rrlfe_good_all["feh_rrlfe"], alpha=0.2)
plt.plot([-4.0,4.0],np.add(np.multiply(coeffs_poly[0],[-4.0,4.0]),coeffs_poly[1]),color="k", label="best fit")
plt.plot([-4.0,4.0],[-4.0,4.0], linestyle="--", color="grey", label="1-to-1")
plt.ylabel("[Fe/H], rrlfe")
plt.xlabel("[Fe/H], Liu+")
plt.legend()
plt.show()

In [28]:
'''
# check to see if data points along vertical lines represent multiple single-epoch spectra 
# answer: no! they're different stars
np.sum(merged_df_liu_rrlfe_good_all["feh_liu"] == -2.2)
df_liu_stars.where(df_liu_stars["FeH"] == -2.2).dropna()
'''

44