### This notebook contains the steps needed to merge various literature tables for Kepler exoplanet host stars and compute abundance ratios from the Brewer & Fischer 2018 catalog

In [1]:
import os
import requests
from io import BytesIO
from io import StringIO

import pickle
import numpy as np
import pandas as pd

from astropy.table import Table
from astropy.table import join
from astropy.io import ascii

import astropy.units as u
import astropy.constants as c

# Function to get NASA Exoplanet Archive catalogs 
# from Dan Foreman-Mackey (https://github.com/dfm/exopop)
def get_catalog(name, basepath="../data"):
    fn = os.path.join(basepath, "{0}.h5".format(name))
    if os.path.exists(fn):
        return pd.read_hdf(fn, name)
    if not os.path.exists(basepath):
        os.makedirs(basepath)
    print("Downloading {0}...".format(name))
    url = ("http://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/"
           "nph-nstedAPI?table={0}&select=*").format(name)
    r = requests.get(url)
    if r.status_code != requests.codes.ok:
        r.raise_for_status()
        
    fh = BytesIO(r.content)
    df = pd.read_csv(fh)
    df.to_hdf(fn, name, format="t")
    return df

#To track the provenance of different columns, we want a function to prepend an informative string to each column name.
#We will do this before merging tables to make tracking the information easier.
def prepend_dataframe_columns(df, prefix):
    df.columns = [prefix+col for col in df.columns]
    return 

#Suppressing some warnings
import warnings
from astropy.utils.exceptions import AstropyWarning

warnings.filterwarnings('ignore', category=UserWarning, append=True)
warnings.simplefilter('ignore', category=AstropyWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

pd.options.mode.chained_assignment = None  # default='warn'

### Cumulative KOI table from NASA Exoplanet Archive

In [2]:
koi = get_catalog('cumulative')
#koi = get_catalog('q1_q17_dr25_sup_koi')
koi['kepoi_int'] = [int(float(x.strip("K"))) for x in koi['kepoi_name']]

print(len(np.unique(koi['kepid'])))
print(len(np.unique(koi['kepoi_name'])))

koi.head()

8214
9564


Unnamed: 0,kepid,kepoi_name,kepler_name,ra,ra_err,ra_str,dec,dec_err,dec_str,koi_gmag,...,koi_fpflag_ec,koi_insol,koi_insol_err1,koi_insol_err2,koi_srho,koi_srho_err1,koi_srho_err2,koi_fittype,koi_score,kepoi_int
0,10797460,K00752.01,Kepler-227 b,291.93423,0.0,19h27m44.22s,48.141651,0.0,+48d08m29.9s,15.89,...,0,93.59,29.45,-16.65,3.20796,0.33173,-1.09986,LS+MCMC,1.0,752
1,10797460,K00752.02,Kepler-227 c,291.93423,0.0,19h27m44.22s,48.141651,0.0,+48d08m29.9s,15.89,...,0,9.11,2.87,-1.62,3.02368,2.20489,-2.49638,LS+MCMC,0.969,752
2,10811496,K00753.01,,297.00482,0.0,19h48m01.16s,48.134129,0.0,+48d08m02.9s,15.943,...,0,39.3,31.04,-10.49,7.29555,35.03293,-2.75453,LS+MCMC,0.0,753
3,10848459,K00754.01,,285.53461,0.0,19h02m08.31s,48.28521,0.0,+48d17m06.8s,16.1,...,0,891.96,668.95,-230.35,0.2208,0.00917,-0.01837,LS+MCMC,0.0,754
4,10854555,K00755.01,Kepler-664 b,288.75488,0.0,19h15m01.17s,48.2262,0.0,+48d13m34.3s,16.015,...,0,926.16,874.33,-314.24,1.98635,2.71141,-1.74541,LS+MCMC,1.0,755


### Gaia-Kepler Stellar Properties Catalog I (Berger et al. 2020a)
Stellar properties

In [3]:
ber_st = pd.read_parquet('./tables/GKSPCPapTable2.parquet')
#print(len(ber_st), 'stars')
#ber_st.columns = ber_st.columns.str.lower()
ber_st.head() 

Unnamed: 0,b20_KIC,b20_iso_mass,b20_iso_mass_err1,b20_iso_mass_err2,b20_iso_teff,b20_iso_teff_err1,b20_iso_teff_err2,b20_iso_logg,b20_iso_logg_err1,b20_iso_logg_err2,...,b20_iso_age_err2,b20_iso_dis,b20_iso_dis_err1,b20_iso_dis_err2,b20_iso_avs,b20_iso_gof,b20_TAMS,b20_log10_iso_age,b20_E_log10_iso_age,b20_e_log10_iso_age
0,757076,1.396,0.179,-0.218,5052.2,103.9,-86.6,3.373,0.067,-0.081,...,-0.73,651.5,22.6,-21.6,0.371,1.0,2.69,9.392697,0.23976,0.152148
1,757099,0.873,0.054,-0.039,5364.9,102.7,-84.9,4.318,0.037,-0.029,...,-3.98,367.8,7.1,-6.9,0.34,1.0,17.18,10.181844,0.078228,0.131851
2,757137,1.669,0.309,-0.3,4628.7,84.8,-76.2,2.388,0.083,-0.086,...,-0.58,568.4,12.2,-11.6,0.344,1.0,1.66,9.178977,0.230956,0.210494
3,757280,1.715,0.086,-0.089,6856.8,144.4,-139.9,3.834,0.033,-0.032,...,-0.16,822.0,19.4,-21.0,0.496,1.0,1.62,9.093422,0.064941,0.059998
4,757450,0.914,0.062,-0.057,5301.2,111.0,-103.1,4.432,0.045,-0.044,...,-5.15,829.6,24.0,-23.4,0.465,1.0,16.1,9.975891,0.194957,0.341414


### GKSPC II (Berger et al. 2020b)
Planet properties

In [4]:
ber_pl = Table.read("http://cdsarc.unistra.fr/ftp/J/AJ/160/108/table1.dat",
              readme="http://cdsarc.unistra.fr/ftp/J/AJ/160/108/ReadMe",
              format="ascii.cds")

ber_pl = ber_pl.to_pandas()
ber_pl['kepoi_name'] = ['K'+str(koi).zfill(8) for koi in ber_pl['KOI']]
prepend_dataframe_columns(ber_pl, 'b20b_')
#print(len(ber_pl), 'planets')
ber_pl.head()

Unnamed: 0,b20b_KIC,b20b_KOI,b20b_PD,b20b_Radius,b20b_E_Radius,b20b_e_Radius,b20b_a,b20b_E_a,b20b_e_a,b20b_Flux,b20b_E_Flux,b20b_e_Flux,b20b_ZAMSFlux,b20b_Flag,b20b_kepoi_name
0,11446443,1.01,CONFIRMED,14.21,0.29,0.29,0.0355,0.0008,0.0008,854.78,69.34,64.81,524.75,AO,K00001.01
1,10666592,2.01,CONFIRMED,16.45,0.35,0.34,0.0381,0.0005,0.0006,4285.95,331.23,336.99,2394.68,AO,K00002.01
2,10748390,3.01,CONFIRMED,4.88,0.08,0.07,0.0516,0.0006,0.0004,86.58,5.24,4.73,58.22,,K00003.01
3,3861595,4.01,CONFIRMED,13.18,0.42,0.82,0.0583,0.0007,0.0015,5244.42,478.9,520.46,3647.63,AO,K00004.01
4,11853905,7.01,CONFIRMED,4.01,0.1,0.1,0.0455,0.0007,0.001,1247.17,99.09,99.02,719.8,,K00007.01


### Lithium abundances for the CKS sample ([Berger et al. 2018](https://ui.adsabs.harvard.edu/abs/2018ApJ...855..115B/abstract))

In [5]:
b18 = Table.read("https://cdsarc.unistra.fr/ftp/J/ApJ/855/115/table1.dat",
             readme="https://cdsarc.unistra.fr/ftp/J/ApJ/855/115/ReadMe",
             format="ascii.cds")

b18 = b18.to_pandas()
prepend_dataframe_columns(b18, "b18_")

b18_tb2 = Table.read("https://cdsarc.unistra.fr/ftp/J/ApJ/855/115/table2.dat",
                 readme="https://cdsarc.unistra.fr/ftp/J/ApJ/855/115/ReadMe",
                 format="ascii.cds")

b18_tb2 = b18_tb2.to_pandas()
prepend_dataframe_columns(b18_tb2, "b18_tb2_")

b18 = b18.merge(b18_tb2, how='left', left_on='b18_KOI', right_on='b18_tb2_KOI')
#print(len(b18))
b18.head()

Unnamed: 0,b18_Obs,b18_Date,b18_KOI,b18_Kpmag,b18_S/N,b18_Teff,b18_logg,b18_[Fe/H],b18_Vt,b18_EW(Li),...,b18_tb2_S/N,b18_tb2_Teff,b18_tb2_logg,b18_tb2_[Fe/H],b18_tb2_Vt,b18_tb2_EW(Li),b18_tb2_e_EW(Li),b18_tb2_A(Li),b18_tb2_e_A(Li),b18_tb2_dA(Li)Hy
0,j122.742,2011-06-16,1,11.34,39,5819,4.4,0.01,1.04,85.5,...,39.0,5819.0,4.4,0.01,1.04,85.5,7.9,2.62,0.08,0.08
1,j122.92,2011-06-13,2,10.46,39,6449,4.13,0.2,1.77,82.9,...,39.0,6449.0,4.13,0.2,1.77,82.9,7.3,3.11,0.07,0.59
2,j122.81,2011-06-13,3,9.17,41,4864,4.5,0.33,0.54,4.2,...,,,,,,,,,,
3,j70.1247,2009-06-05,6,12.16,119,6348,4.36,0.04,1.58,15.4,...,,,,,,,,,,
4,j74.509,2009-07-31,7,12.21,126,5827,4.09,0.18,1.17,54.0,...,,,,,,,,,,


### CKS I (Petigura et al. 2017)
For consistency with CKS VII, we will use the false positive designations adopted in Table 4 of CKS I, which themselves are based on a compilation of planet candidate dispositions from Morton et al. (2016), Mullaly et al. (2015), and the NASA Exoplanet Archive (as it was on Feb. 1, 2017).

In [6]:
p17 = Table.read('ftp://cdsarc.unistra.fr/ftp/J/AJ/154/107/table4.dat',
             readme='ftp://cdsarc.unistra.fr/ftp/J/AJ/154/107/ReadMe',
             format='ascii.cds')

p17 = p17.to_pandas()
#Munging
p17['Name'] = [x.replace('OI-', '') for x in p17['Name']]
p17 = p17.rename(columns={'Name':'KOI'})
prepend_dataframe_columns(p17, 'p17_')
#print(len(p17), 'planets')
p17.head()

Unnamed: 0,p17_KOI,p17_Disp,p17_M16,p17_M15,p17_NEA
0,K00001.01,CP,CP,CP,CP
1,K00002.01,CP,CP,CP,CP
2,K00003.01,CP,CP,CP,CP
3,K00006.01,FP,FP,FP,FP
4,K00007.01,CP,CP,CP,CP


### CKS VII (Fulton & Petigura 2018)

In [7]:
cks2 = Table.read("http://cdsarc.unistra.fr/ftp/J/AJ/156/264/table2.dat",
            readme="http://cdsarc.unistra.fr/ftp/J/AJ/156/264/ReadMe",
            format="ascii.cds")
cks2 = cks2.to_pandas()
cks2 = cks2.rename(columns={"KOI": "KOI_star"})
prepend_dataframe_columns(cks2, 'cks_')

cks3 = Table.read("http://cdsarc.unistra.fr/ftp/J/AJ/156/264/table3.dat",
                readme="http://cdsarc.unistra.fr/ftp/J/AJ/156/264/ReadMe",
                format="ascii.cds")
cks3 = cks3.to_pandas()
cks3["KOI_star"] = [x.split(".")[0] for x in cks3["KOI"]]
prepend_dataframe_columns(cks3, 'cks_')

cks = cks3.merge(cks2, how='left', left_on='cks_KOI_star', right_on='cks_KOI_star')
#print(len(cks2), 'stars')
#print(len(cks), 'planets')
cks.head()

Unnamed: 0,cks_KOI,cks_Per,cks_E_Per,cks_e_Per,cks_Rp/R*,cks_E_Rp/R*,cks_e_Rp/R*,cks_Rp,cks_E_Rp,cks_e_Rp,...,cks_E_rhoiso,cks_e_rhoiso,cks_logAiso,cks_E_logAiso,cks_e_logAiso,cks_plxspec,cks_E_plxspec,cks_e_plxspec,cks_r8,cks_RCF
0,K00001.01,2.470613,1.9e-08,1.9e-08,0.123851,2.5e-05,7.6e-05,14.141,0.331,0.331,...,0.04,0.04,9.74,0.13,0.15,4.756,0.32,0.474,1.0,1.0102
1,K00002.01,2.204735,3.8e-08,3.8e-08,0.075408,8e-06,7e-06,16.25,0.349,0.349,...,0.01,0.0,9.25,0.04,0.05,3.661,0.411,0.586,1.001,1.0034
2,K00006.01,1.334104,7.07e-07,7.07e-07,0.294016,0.103683,0.209459,41.936,22.239,22.239,...,0.03,0.03,9.32,0.09,0.24,2.201,0.127,0.212,1.007,1.0006
3,K00007.01,3.213669,1.122e-06,1.122e-06,0.024735,0.000141,7.6e-05,4.076,0.107,0.107,...,0.02,0.02,9.78,0.09,0.08,2.112,0.228,0.274,1.0,
4,K00008.01,1.160153,4.27e-07,4.27e-07,0.018559,0.000246,0.001678,1.896,0.11,0.11,...,0.03,0.05,9.18,0.26,0.52,2.895,0.127,0.192,1.001,


### Martinez et al. 2019 independent analysis of CKS spectra

In [8]:
m19_t1 = Table.read("./tables/martinez2019/table1.dat", 
                 readme="./tables/martinez2019/ReadMe",
                format="ascii.cds")

m19_t1 = m19_t1.to_pandas()
m19_t1 = m19_t1.rename(columns={'Name':'KOI_star'})
#m19_t1.head()

m19_t2 = Table.read("./tables/martinez2019/table2.dat", 
                 readme="./tables/martinez2019/ReadMe",
                format="ascii.cds")

m19_t2 = m19_t2.to_pandas()
m19_t2['KOI_star'] = [x.split(".")[0] for x in m19_t2['KOI']]
m19_t2.head()


m19 = m19_t2.merge(m19_t1, how='left', left_on='KOI_star', right_on='KOI_star')
print(len(m19_t1), len(m19_t2), len(m19))

prepend_dataframe_columns(m19, 'm19_')
m19.head()

1285 1633 1633


Unnamed: 0,m19_Seq_x,m19_KOI,m19_Rp,m19_e_Rp,m19_KOI_star,m19_Seq_y,m19_Teff,m19_e_Teff,m19_log(g),m19_e_log(g),m19_Vturb,m19_e_Vturb,m19_R*,m19_e_R*
0,1,K00001.01,13.62,0.3,K00001,35,5856,33,4.44,0.09,1.15,0.06,1.05,0.02
1,2,K00002.01,17.93,1.07,K00002,154,6384,77,3.9,0.13,1.98,0.1,2.01,0.12
2,3,K00007.01,4.49,0.13,K00007,337,5852,20,4.19,0.06,1.2,0.03,1.52,0.04
3,4,K00010.01,16.62,1.14,K00010,947,6118,120,4.09,0.14,1.89,0.2,1.59,0.11
4,5,K00017.01,14.31,0.35,K00017,467,5699,22,4.34,0.07,1.02,0.04,1.28,0.03


### Petigura 2020

In [9]:
url = "https://raw.githubusercontent.com/petigura/keprat/master/data/cksgaia-planets.csv"
s = requests.get(url).content
p20 = pd.read_csv(StringIO(s.decode('utf-8')))
p20['Rtau'] = p20.tau/p20.tau0
prepend_dataframe_columns(p20, 'p20_')
print(len(p20), 'planets')
p20.head()

1870 planets


Unnamed: 0,p20_id_kic,p20_id_starname,p20_id_kic.1,p20_id_koi,p20_id_koicand,p20_id_kepler_name,p20_koi_disposition,p20_koi_period,p20_koi_period_err1,p20_koi_period_err2,...,p20_fgraz,p20_tau0,p20_tau,p20_dr25_ror_v18_srad,p20_dr25_ror_v18_srad_err1,p20_dr25_ror_gdir_srad,p20_dr25_ror_gdir_srad_err1,p20_v18_ror_gdir_srad,p20_v18_ror_gdir_srad_err1,p20_Rtau
0,757450,K00889,757450,889.0,K00889.01,Kepler-75 b,CONFIRMED,8.884923,1e-06,-1e-06,...,0.0,4.019727,1.856715,,,11.802615,0.412225,,,0.461901
1,1026957,K00958,1026957,958.0,K00958.01,,NOT DISPOSITIONED,21.761294,1.1e-05,-1.1e-05,...,0.0,4.427408,1.204967,,,2.478217,0.114768,,,0.272161
2,1718189,K00993,1718189,993.0,K00993.01,Kepler-262 c,CONFIRMED,21.853679,9.7e-05,-9.7e-05,...,0.0,4.914936,3.306574,,,1.551245,0.081248,,,0.67276
3,1718189,K00993,1718189,993.0,K00993.02,Kepler-262 b,CONFIRMED,13.060767,9.2e-05,-9.2e-05,...,0.0,4.139989,3.093553,,,1.267332,0.068661,,,0.747237
4,1718189,K00993,1718189,993.0,K00993.03,,CANDIDATE,86.723152,0.001814,-0.001814,...,0.0,7.781345,4.761139,,,1.257706,0.094337,,,0.611866


### Lu et al. 2021 gyrokinematic ages

In [10]:
l20 = pd.read_parquet('./tables/KinageAllKepler.parquet')
#l20.columns = l20.columns.str.lower()
print(len(l20), 'stars')
l20.head()

155263 stars


Unnamed: 0,l20_KID,l20_Rvar,l20_v_t,l20_v_b,l20_ebv,l20_Av,l20_Av_errp,l20_Av_errm,l20_Av_std,l20_bp_dered,l20_rp_dered,l20_bprp_dered,l20_G_dered,l20_teff_calc,l20_abs_G,l20_Prot_predict,l20_Prot_log,l20_Ro,l20_vel_dis,l20_kin_age
0,757076,876.277685,46.183244,-5.383212,0.105,0.28791,0.009323,0.009323,0.009323,11.777006,10.774865,1.002141,11.345285,5258.348073,2.216916,20.483619,1.311407,2.282666,11.194525,2.732534
1,757099,57346.507907,6.947554,-6.672655,0.059046,0.161903,0.022417,0.022417,0.022417,13.361525,12.369631,0.991894,12.947337,5285.025302,5.086547,4.24975,0.628363,0.328795,1.9029,0.167638
2,757280,707.322359,11.457449,-2.60831,0.15,0.4113,0.0,0.0,0.0,11.553965,11.0676,0.486364,11.367806,6917.117893,1.733926,2.86635,0.457329,0.255402,7.812125,1.550512
3,757450,21632.027626,33.88967,-33.411547,0.14,0.38388,0.018646,0.018646,0.018646,15.240799,14.264825,0.975974,14.829724,5327.028622,5.168635,15.227621,1.182632,0.792438,13.113275,3.50578
4,891901,1109.89809,74.201645,6.434752,0.095812,0.262716,0.007809,0.007809,0.007809,13.201027,12.588211,0.612816,12.975954,6456.866533,2.564473,7.6178,0.88183,0.64323,17.558927,5.552499


In [11]:
l21 = Table.read("./tables/lu2021/table1.dat", 
                 readme="./tables/lu2021/ReadMe", format="ascii.cds")
l21 = l21.to_pandas()
prepend_dataframe_columns(l21, "l21_")
l21.head()

Unnamed: 0,l21_KIC,l21_Prot,l21_e_Prot,l21_Gaia,l21_RAdeg,l21_e_RAdeg,l21_DEdeg,l21_e_DEdeg,l21_Wvel,l21_e_Wvel,l21_sigma,l21_GyroAge,l21_e_GyroAge,l21_Ro,l21_Teff,l21_GMAG,l21_e_sigma
0,757099,0.367,0.006,2050233601176543232,291.043,0.02,36.594,0.028,-1.967,3.403,13.291,3.581,1.771,0.028,5285.025,5.11,0.006
1,892713,5.753,0.777,2050232256840088832,291.14,0.017,36.623,0.022,-12.292,0.79,14.556,4.132,1.776,0.522,6308.396,1.487,0.006
2,892834,13.765,0.023,2050232089348169728,291.163,0.019,36.628,0.024,2.592,3.274,6.762,1.235,1.738,0.673,4881.242,6.251,0.011
3,893033,26.999,0.214,2050232639104044032,291.208,0.02,36.668,0.024,-41.087,3.357,18.404,5.979,1.79,1.388,4689.995,6.653,0.004
4,893165,54.874,3.786,2050231505232662528,291.237,0.011,36.615,0.015,5.218,3.285,13.73,3.769,1.772,2.517,5933.169,3.695,0.001


### Angus et al. 2018 rotation periods

In [12]:
a18 = pd.read_csv("./tables/Angus2018_koi_periods_nohead.csv")
prepend_dataframe_columns(a18, "a18_")
a18.head()

Unnamed: 0,a18_Index,a18_KOI,a18_feh,a18_feh_errm,a18_feh_errp,a18_logg,a18_logg_errm,a18_logg_errp,a18_period,a18_period_errm,a18_period_errp,a18_teff,a18_teff_errm,a18_teff_errp
0,0,1,-0.12,-0.28,0.33,4.609,-0.106,0.027,24.848729,5.996906,23.240003,5126.0,-138.0,160.0
1,1,2,-0.15,-0.1,0.1,4.455,-0.025,0.025,19.595792,3.779434,3.95145,5850.0,-50.0,50.0
2,2,3,0.26,-0.08,0.08,4.021,-0.011,0.011,28.159582,0.193655,2.656332,6350.0,-80.0,80.0
3,3,6,-0.04,-0.16,0.12,4.169,-0.048,0.055,22.765454,15.400891,3.123556,6225.0,-158.0,114.0
4,4,7,0.44,-0.101,0.101,4.081,-0.014,0.014,25.310087,1.685785,1.96845,5543.0,-79.0,79.0


### Mazeh et al. 2015 rotation periods

In [13]:
m15 = Table.read("http://cdsarc.unistra.fr/ftp/J/ApJ/801/3/table1.dat",
             readme="http://cdsarc.unistra.fr/ftp/J/ApJ/801/3/ReadMe",
             format="ascii.cds")

m15 = m15.to_pandas()
prepend_dataframe_columns(m15, "m15_")
m15.head()

Unnamed: 0,m15_KOI,m15_KIC,m15_Teff,m15_log(g),m15_Prot,m15_e_Prot,m15_Rvar,m15_LPH,m15_w,m15_D,m15_N,m15_C,m15_G,m15_T,m15_F,m15_R,m15_M1,m15_M2
0,1,11446443,5713,4.143,70.55,16.47,445,0.362359,0.0,0,0,0,0,0,0,0,1,0
1,2,10666592,6264,3.79,70.69,6.8,919,0.542156,0.0,0,1,0,0,0,0,0,1,0
2,3,10748390,4766,4.59,29.31,0.49,12246,0.607229,0.431926,0,1,0,0,0,0,0,0,0
3,5,8554498,5861,4.17,37.23,4.18,237,0.082306,0.0,0,0,0,0,0,0,0,1,0
4,10,6922244,6213,4.17,82.12,16.05,1245,0.350827,0.0,0,0,0,0,0,0,0,1,0


### McQuillan et al. 2013 rotation periods

In [14]:
m13 = Table.read("http://cdsarc.unistra.fr/ftp/J/ApJ/775/L11/table1.dat",
             readme="http://cdsarc.unistra.fr/ftp/J/ApJ/775/L11/ReadMe",
             format="ascii.cds")

m13 = m13.to_pandas()
prepend_dataframe_columns(m13, "m13_")
print(len(m13), 'stars')
m13.head()

2010 stars


Unnamed: 0,m13_KOI,m13_KIC,m13_Teff,m13_log(g),m13_Rp,m13_---,m13_Porb,m13_Prot,m13_e_Prot,m13_Rvar,m13_Flag
0,3,10748390,4766,4.59,4.68,,4.888,29.472,0.134,11.75,N
1,12,5812701,6419,4.26,13.4,,17.855,1.245,0.124,0.78,
2,41,6521045,5909,4.28,1.24,,6.887,24.988,2.192,0.39,
3,42,8866102,6170,4.1,2.71,,17.834,20.85,0.007,1.12,
4,44,8845026,6250,3.5,9.61,,66.468,3.792,0.907,1.16,


In [15]:
#Reinhold et al. 2013 rotation periods
# r13 = Table.read("http://cdsarc.unistra.fr/ftp/J/A+A/560/A4/table.dat",
#                  readme="http://cdsarc.unistra.fr/ftp/J/A+A/560/A4/ReadMe",
#                  format="ascii.cds", masked=True)

# r13.info()

#astropy throws following error:
#ValueError: Column Rvar failed to convert: could not convert string to float: '---'

In [16]:
# apo = Table.read("http://cdsarc.unistra.fr/ftp/J/AJ/155/68/table3.dat",
#                 readme="http://cdsarc.unistra.fr/ftp/J/AJ/155/68/ReadMe",
#                 format="ascii.cds")

# apo.info()

### Silva-Aguirre et al. 2015  asteroseismic sample

In [17]:
s15 = Table.read("ftp://cdsarc.unistra.fr/ftp/J/MNRAS/452/2127/table3.dat",
                 readme="ftp://cdsarc.unistra.fr/ftp/J/MNRAS/452/2127/ReadMe",
                 format="ascii.cds")

s15 = s15.to_pandas()
prepend_dataframe_columns(s15, 's15_')
s15.head()

Unnamed: 0,s15_KOI,s15_KIC,s15_Teff,s15_e_Teff,s15_[Fe/H],s15_e_[Fe/H],s15_Mass,s15_E_Mass,s15_e_Mass,s15_Radius,...,s15_e_L,s15_Age,s15_E_Age,s15_e_Age,s15_Dist,s15_E_Dist,s15_e_Dist,s15_Notes,s15_n_Notes,s15_Ref
0,2,10666592,6350,80,0.26,0.08,1.497,0.042,0.04,1.986,...,0.322,2.11,0.29,0.24,386.44,12.11,11.95,HAT-P7,,Pal et al. (2008ApJ...680.1450P)
1,5,8554498,5945,60,0.17,0.05,1.197,0.021,0.029,1.794,...,0.182,5.6,0.45,0.42,439.34,13.68,13.68,,,
2,7,11853905,5781,76,0.09,0.1,1.117,0.021,0.029,1.555,...,0.124,6.71,0.77,0.67,499.1,15.46,15.46,Kepler-4,,Borucki et al. (2010Sci...327..977B)
3,41,6521045,5825,75,0.02,0.1,1.108,0.021,0.019,1.513,...,0.112,6.5,0.46,0.56,310.34,9.49,9.63,Kepler-100,,Marcy et al. (2014ApJS..210...20M)
4,42,8866102,6325,75,0.01,0.1,1.228,0.042,0.04,1.357,...,0.13,2.6,0.56,0.53,140.83,4.4,4.5,Kepler-410 A,,Van Eylen et al. (2015ApJ...808..126V)


### Brewer & Fischer 2018 CKS abundances

In [18]:
# Brewer & Fischer abundances
bf3 = Table.read("http://cdsarc.unistra.fr/ftp/J/ApJS/237/38/table3.dat",
           readme="http://cdsarc.unistra.fr/ftp/J/ApJS/237/38/ReadMe", format="ascii.cds")

bf4 = Table.read("http://cdsarc.unistra.fr/ftp/J/ApJS/237/38/table4.dat",
               readme="http://cdsarc.unistra.fr/ftp/J/ApJS/237/38/ReadMe", format="ascii.cds")

bf5 = Table.read("http://cdsarc.unistra.fr/ftp/J/ApJS/237/38/table5.dat",
               readme="http://cdsarc.unistra.fr/ftp/J/ApJS/237/38/ReadMe", format="ascii.cds")

bf6 = Table.read("http://cdsarc.unistra.fr/ftp/J/ApJS/237/38/table6.dat",
               readme="http://cdsarc.unistra.fr/ftp/J/ApJS/237/38/ReadMe", format="ascii.cds")

bf = join(bf3, bf4)
bf = join(bf, bf5)
bf = join(bf, bf6)

bf['KOI_int'] = np.zeros(len(bf), dtype=int)

for i in range(len(bf)):
    if ("KOI" in bf['Name'][i]) and ("E" not in bf['Name'][i]) and ("W" not in bf['Name'][i]) and ("B" not in bf['Name'][i]):
        bf['KOI_int'][i] = bf['Name'][i].strip("KOI-")
    else:
        bf['KOI_int'][i] = 0

bf = bf.to_pandas()
prepend_dataframe_columns(bf, "bf18_")
bf.head()

Unnamed: 0,bf18_SPOCS,bf18_Name,bf18_RAh,bf18_RAm,bf18_RAs,bf18_DE-,bf18_DEd,bf18_DEm,bf18_DEs,bf18_Teff,...,bf18_e_[Si/H],bf18_e_[Ca/H],bf18_e_[Ti/H],bf18_e_[V/H],bf18_e_[Cr/H],bf18_e_[Mn/H],bf18_e_[Fe/H],bf18_e_[Ni/H],bf18_e_[Y/H],bf18_KOI_int
0,2281,KOI-3248,19,21,51.6,+,48,19,56,5742,...,0.02,0.02,0.02,0.04,0.02,0.03,0.02,0.02,0.04,3248
1,2361,KOI-4273,19,36,50.4,+,46,28,48,6123,...,0.02,0.03,0.02,0.04,0.02,0.03,0.02,0.02,0.04,4273
2,2393,KOI-3605,19,43,21.2,+,41,25,38,5252,...,0.02,0.02,0.02,0.04,0.02,0.03,0.02,0.02,0.04,3605
3,2405,KOI-3197,19,45,9.7,+,44,25,24,6218,...,0.02,0.03,0.02,0.04,0.02,0.03,0.02,0.02,0.04,3197
4,2430,KOI-1353,19,49,51.7,+,42,52,58,5951,...,0.02,0.03,0.02,0.04,0.02,0.03,0.02,0.02,0.04,1353


### Frasca et al. 2016 LAMOST activity parameters

In [19]:
f16 = Table.read("https://cdsarc.unistra.fr/ftp/J/A+A/594/A39/tablea4.dat",
             readme="https://cdsarc.unistra.fr/ftp/J/A+A/594/A39/ReadMe",
             format="ascii.cds")
f16 = f16.to_pandas()
prepend_dataframe_columns(f16, 'f16_')
f16.head()

Unnamed: 0,f16_SpName,f16_HJD,f16_KIC,f16_RAdeg,f16_DEdeg,f16_EWHa,f16_e_EWHa,f16_EW8498,f16_e_EW8498,f16_EW8542,f16_e_EW8542,f16_EW8662,f16_e_EW8662,f16_n_EWHa,f16_Prot,f16_r_Prot
0,spec-55712-IF10M_sp02-195.fits,55712.29462,3725427,283.916107,38.864498,1.76,0.59,,,0.3,0.3,0.83,0.3,E,,
1,spec-55712-IF10M_sp03-059.fits,55712.29449,5079590,284.472107,40.2136,2.54,0.5,,,1.2,0.59,0.57,0.59,E,2.016,R13
2,spec-55712-IF10M_sp03-123.fits,55712.29447,5342618,284.399689,40.573399,3.26,0.38,0.87,0.35,1.77,0.36,1.09,0.36,E,,
3,spec-56094-kepler05F56094_sp14-049.fits,56094.30678,4353364,288.165924,39.431591,1.82,1.16,0.39,0.7,1.63,0.73,1.36,0.73,E,,
4,spec-56432-KP192102N424113V01_sp11-140.fits,56432.2611,8749284,290.197968,44.924999,13.04,0.37,1.15,0.3,2.5,0.31,2.22,0.31,E,3.218,D11


### Gaia DR2-Kepler 1-arcsecond positional cross-match (M. Bedell) and RUWE from Gaia source ID crossmatch (David et al. 2021) 

In [20]:
gaia = pd.read_parquet('./tables/kepler_dr2_1arcsec.parquet')
ruwe = pd.read_parquet('./tables/kepler_dr2_1arcsec_ruwe.parquet')
gaia = gaia.merge(ruwe, how='left', left_on='source_id', right_on='d20_source_id')

prepend_dataframe_columns(gaia, "gaia_")
print(len(gaia), 'stars')
gaia.head()

201350 stars


Unnamed: 0,gaia_solution_id,gaia_designation,gaia_source_id,gaia_random_index,gaia_gaia_ref_epoch,gaia_ra,gaia_ra_error,gaia_dec,gaia_dec_error,gaia_parallax,...,gaia_ntce,gaia_jmag,gaia_hmag,gaia_kmag,gaia_planet?,gaia_kepler_gaia_ang_dist,gaia_d20_kepid,gaia_d20_source_id,gaia_d20_ruwe,gaia_d20_koi
0,1635721458409799680,b'Gaia DR2 2050233807328471424',2050233807328471424,689405501,2015.5,291.038681,0.035878,36.598031,0.047832,1.493916,...,0,10.126,9.667,9.559,b'none',0.15616,757076,2050233807328471424,0.946665,
1,1635721458409799680,b'Gaia DR2 2050233601176543104',2050233601176543104,447382505,2015.5,291.043057,0.019919,36.593767,0.027553,2.678194,...,1,11.656,11.193,11.094,b'none',0.128262,757099,2050233601176543104,2.172801,
2,1635721458409799680,b'Gaia DR2 2050230543159814656',2050230543159814656,1004823384,2015.5,291.055892,0.019394,36.559886,0.026621,1.723413,...,0,7.467,6.853,6.722,b'none',0.146787,757137,2050230543159814656,0.912556,
3,1635721458409799680,b'Gaia DR2 2050230611879323904',2050230611879323904,1253666653,2015.5,291.095373,0.016886,36.564903,0.022993,1.183652,...,0,10.852,10.662,10.627,b'none',0.11642,757280,2050230611879323904,0.870375,
4,1635721458409799680,b'Gaia DR2 2050231848829944320',2050231848829944320,298032508,2015.5,291.13762,0.01956,36.577344,0.025196,1.168913,...,1,13.665,13.262,13.118,b'conf',0.114502,757450,2050231848829944320,1.030048,889.0


### Kepler Stellar Data
We will also want the [Kepler Stellar data table](https://exoplanetarchive.ipac.caltech.edu/docs/Kepler_completeness_reliability.html) for the noise properties of each planet host. Since this is a large table, we have extracted the only columns we want ahead of time and stored this in the data directory.

In [21]:
stlr = pd.read_csv('./tables/Kepler-Q1-Q17-DR25-CDPP3.csv')
prepend_dataframe_columns(stlr, 'stlr_')
stlr.head()

Unnamed: 0,stlr_kepid,stlr_rrmscdpp03p0
0,10000785,445.41
1,10000797,80.767
2,10000800,226.348
3,10000823,181.468
4,10000827,124.834


### C. Hedges amplitudes

In [22]:
ch = pd.read_csv('./tables/hedges_df_w_cdpp_candidates.csv')

for q in range(1,18):
    ch['q'+str(q)+'_rvar'] = 1e6*((ch['q'+str(q)+'_perc90']-ch['q'+str(q)+'_perc10'])/ch['q'+str(q)+'_perc50'])


ch['rvar'] = np.zeros(len(ch))    

for i in range(len(ch)):
    ch['rvar'].iloc[i] = np.nanmedian([ch["q1_rvar"].iloc[i],
                                       ch["q2_rvar"].iloc[i],
                                       ch["q3_rvar"].iloc[i],
                                       ch["q4_rvar"].iloc[i],
                                       ch["q5_rvar"].iloc[i],
                                       ch["q6_rvar"].iloc[i],
                                       ch["q7_rvar"].iloc[i],
                                       ch["q8_rvar"].iloc[i],
                                       ch["q9_rvar"].iloc[i],
                                       ch["q10_rvar"].iloc[i],
                                       ch["q11_rvar"].iloc[i],
                                       ch["q12_rvar"].iloc[i],
                                       ch["q13_rvar"].iloc[i],
                                       ch["q14_rvar"].iloc[i],
                                       ch["q15_rvar"].iloc[i],
                                       ch["q16_rvar"].iloc[i],
                                       ch["q17_rvar"].iloc[i]
                                       ])

prepend_dataframe_columns(ch, 'ch_')        
ch.head() 

Unnamed: 0,ch_kepid,ch_kepoi_name,ch_kepler_name,ch_ra,ch_ra_err,ch_ra_str,ch_dec,ch_dec_err,ch_dec_str,ch_koi_gmag,...,ch_q9_rvar,ch_q10_rvar,ch_q11_rvar,ch_q12_rvar,ch_q13_rvar,ch_q14_rvar,ch_q15_rvar,ch_q16_rvar,ch_q17_rvar,ch_rvar
0,10811496,K00753.01,,297.00482,0.0,19h48m01.16s,48.134129,0.0,+48d08m02.9s,15.943,...,9658.583689,20917.404831,,10706.954274,15466.03652,26506.930011,,8247.953395,12525.836854,13076.233378
1,11138155,K00760.01,,292.16705,0.0,19h28m40.09s,48.727589,0.0,+48d43m39.3s,15.715,...,947.866965,929.594987,961.746416,926.02716,944.915174,955.794202,955.798705,987.866182,1038.416014,955.794202
2,11818800,K00777.01,,294.31686,0.0,19h37m16.05s,50.080231,0.0,+50d04m48.8s,16.1,...,56355.724813,26138.543446,27495.668946,44824.07061,30788.777817,33638.311218,50327.25152,27070.947143,45649.783409,38450.436047
3,11918099,K00780.02,,293.83331,0.0,19h35m19.99s,50.23035,0.0,+50d13m49.3s,16.11,...,1204.903465,1358.803539,1287.971033,1453.058955,1461.610244,1324.312893,1244.791681,1433.994033,1589.536179,1364.742964
4,9579641,K00115.03,,287.88733,0.0,19h11m32.96s,46.276241,0.0,+46d16m34.5s,13.111,...,537.856795,498.740592,415.166335,484.984661,1350.397802,530.915544,1738.465122,635.256795,1526.028138,498.740592


### David et al. 2021 rotation period vetting results

In [23]:
d21 = pd.read_csv('./tables/david2021.csv')
d21 = d21[['kepid', 'prot', 'prot_ref', 'prot_numflag']]
prepend_dataframe_columns(d21, "d21_")
d21.head()    

Unnamed: 0,d21_kepid,d21_prot,d21_prot_ref,d21_prot_numflag
0,11446443.0,,,1.0
1,10666592.0,,,1.0
2,3248033.0,,,0.0
3,11853905.0,,,1.0
4,5903312.0,,,0.0


### Merging the dataframes
For stellar tables we want to merge on kepid (KIC) or koi_int

For planetary tables we want to merge on kepoi_name

In [24]:
df = koi.merge(stlr, how='left', left_on='kepid', right_on='stlr_kepid')
df = df.merge(ber_st, how='left', left_on='kepid', right_on='b20_KIC')
df = df.merge(ber_pl, how='left', left_on='kepoi_name', right_on='b20b_kepoi_name')
df = df.merge(b18, how='left', left_on='kepoi_int', right_on='b18_KOI')
df = df.merge(cks, how='left', left_on='kepoi_name', right_on='cks_KOI')
df = df.merge(p17, how='left', left_on='kepoi_name', right_on='p17_KOI')
df = df.merge(p20, how='left', left_on='kepoi_name', right_on='p20_id_koicand')
df = df.merge(bf, how='left', left_on='kepoi_int', right_on='bf18_KOI_int')
df = df.merge(l20, how='left', left_on='kepid', right_on='l20_KID')
df = df.merge(l21, how='left', left_on='kepid', right_on='l21_KIC')
df = df.merge(a18, how='left', left_on='kepoi_int', right_on='a18_KOI')
df = df.merge(m13, how='left', left_on='kepid', right_on='m13_KIC')
df = df.merge(m15, how='left', left_on='kepid', right_on='m15_KIC')
df = df.merge(s15, how='left', left_on='kepid', right_on='s15_KIC')
df = df.merge(f16, how='left', left_on='kepid', right_on='f16_KIC')
df = df.merge(gaia, how='left', left_on='kepid', right_on='gaia_kepid')
df = df.merge(d21, how='left', left_on='kepid', right_on='d21_kepid')
df = df.merge(ch, how='left', left_on='kepoi_name', right_on='ch_kepoi_name')
df = df.merge(m19, how='left', left_on='kepoi_name', right_on='m19_KOI')

### Computing additional columns

In [25]:
#Computing additional rows
df['single_transit_snr'] = ((c.R_earth.cgs.value)/(df['b20_iso_rad']*c.R_sun.cgs.value))**2. / (df['stlr_rrmscdpp03p0']*1e-6)
#Bring McQuillan Rvar into ppm units (like the others)
df['m13_Rvar'] *= 1e3

df["cks_age"] = (10.**df["cks_logAiso"])/1.0e9
df["cks_e_age"] = df["cks_age"] - (10.**(df["cks_logAiso"]-df["cks_e_logAiso"]))/1.0e9
df["cks_E_age"] = (10.**(df["cks_logAiso"]+df["cks_E_logAiso"]))/1.0e9 - df["cks_age"]
df["cks_age_maxerr"] = np.max([df["cks_e_age"], df["cks_E_age"]], axis=0)

df["bf18_e_Age"] = df["bf18_Age"] - df["bf18_b_Age"]
df["bf18_E_Age"] = df["bf18_B_Age"] - df["bf18_Age"]
df["bf18_Age_maxerr"] = np.max([df["bf18_e_Age"], df["bf18_E_Age"]], axis=0)

df["b20_iso_age_maxerr"] = np.max([-df["b20_iso_age_err2"], df["b20_iso_age_err1"]], axis=0)

In [26]:
#Abundances normalized to Fe
bf18_elements = ['C', 'N', 'O', 'Na', 'Mg', 'Al', 'Si', 'Ca', 'Ti', 'V', 'Cr', 'Mn', 'Ni', 'Y']
for el in bf18_elements:
    el_str = "bf18_["+el+"/H]"
    el_err_str = "bf18_e_["+el+"/H]"
    
    df["bf18_["+el+"/Fe]"] = df[el_str] - df["bf18_[Fe/H]"]
    df["bf18_e_["+el+"/Fe]"] = np.sqrt(df[el_err_str]**2 + df["bf18_e_[Fe/H]"]**2)

In [27]:
from itertools import combinations

elems = ["Fe","C","N","O","Na","Mg","Al","Si","Ca","Ti","V","Cr","Mn","Ni","Y"]
elems = elems[::-1]
print(elems)

#combos = ["/".join(map(str, comb)) for comb in combinations(elems, 2)]

for comb in combinations(elems,2):
    
    el1 = comb[0]
    el2 = comb[1]
    
    #print(comb[0], comb[1])
    
    x1, e_x1 = df["bf18_["+el1+"/H]"], df["bf18_e_["+el1+"/H]"]
    x2, e_x2 = df["bf18_["+el2+"/H]"], df["bf18_e_["+el2+"/H]"]
    
    df["bf18_["+el1+"/"+el2+"]"] = x1-x2
    df["bf18_e_["+el1+"/"+el2+"]"] = np.sqrt(e_x2**2+e_x2**2)
    

['Y', 'Ni', 'Mn', 'Cr', 'V', 'Ti', 'Ca', 'Si', 'Al', 'Mg', 'Na', 'O', 'N', 'C', 'Fe']


In [28]:
#Alpha-element abundance (average of Mg/Fe, Si/Fe, and Ti/Fe)
df["bf18_[alpha/Fe]"] = np.average([df['bf18_[Mg/Fe]'], df['bf18_[Si/Fe]'], df['bf18_[Ti/Fe]']], axis=0,
                                   weights=[1/df['bf18_e_[Mg/Fe]']**2, 1/df['bf18_e_[Si/Fe]']**2, 1/df['bf18_e_[Ti/Fe]']**2])

### Write the merged dataframe to file. This dataframe will form the basis for the rest of the paper analysis.

In [31]:
df.to_parquet('./data.parquet')