In [59]:
%matplotlib inline
%config InlineBackend.figure_format = "retina"

import matplotlib.pyplot as plt
from matplotlib import rcParams
rcParams["font.size"] = 20
rcParams['figure.dpi'] = 100
rcParams["savefig.dpi"] = 300
rcParams["savefig.bbox"] = 'tight'

import seaborn as sns
sns.set_context("paper")
sns.set_style("ticks")

import numpy as np
import pandas as pd
import pickle

import os
import requests
from io import BytesIO
from io import StringIO

from astropy.table import Table
from astropy.table import join
from astropy.io import ascii
import astropy.constants as c

from scipy import interpolate
from sklearn import svm
from arviz import hpd

# Function to get NASA Exoplanet Archive catalogs 
# from Dan Foreman-Mackey (https://github.com/dfm/exopop)
def get_catalog(name, basepath="data"):
    fn = os.path.join(basepath, "{0}.h5".format(name))
    if os.path.exists(fn):
        return pd.read_hdf(fn, name)
    if not os.path.exists(basepath):
        os.makedirs(basepath)
    print("Downloading {0}...".format(name))
    url = ("http://exoplanetarchive.ipac.caltech.edu/cgi-bin/nstedAPI/"
           "nph-nstedAPI?table={0}&select=*").format(name)
    r = requests.get(url)
    if r.status_code != requests.codes.ok:
        r.raise_for_status()
        
    fh = BytesIO(r.content)
    df = pd.read_csv(fh)
    df.to_hdf(fn, name, format="t")
    return df

#To track the provenance of different columns, we want a function to prepend an informative string to each column name.
#We will do this before merging tables to make tracking the information easier.
def prepend_dataframe_columns(df, prefix):
    df.columns = [prefix+col for col in df.columns]
    return 


#Suppressing astropy units warnings
import warnings
from astropy.utils.exceptions import AstropyWarning

warnings.filterwarnings('ignore', category=UserWarning, append=True)
warnings.simplefilter('ignore', category=AstropyWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

#Suppressing Pandas chained assignment warnings
pd.options.mode.chained_assignment = None  # default='warn'

### CKS VII Sample (Fulton & Petigura 2018)
As a starting point we will begin with the CKS VII sample. This includes some planets which were not contained in Kepler Q1-Q17 DR25, so it allows us to study a larger sample.

In [60]:
# To access the CKS tables via FTP from CDS uncomment below
# cks_st = Table.read("ftp://cdsarc.unistra.fr/ftp/J/AJ/156/264/table2.dat",
#                     readme="ftp://cdsarc.unistra.fr/ftp/J/AJ/156/264/ReadMe", 
#                     format='ascii.cds')

# cks_pl = Table.read("ftp://cdsarc.unistra.fr/ftp/J/AJ/156/264/table3.dat",
#                     readme="ftp://cdsarc.unistra.fr/ftp/J/AJ/156/264/ReadMe", 
#                     format='ascii.cds')

cks_st = Table.read("../data/Fulton2018_stellarradii_ajaae828t2_mrt.txt", format='ascii')
cks_pl = Table.read("../data/Fulton2018_planetradii_ajaae828t3_mrt.txt", format='ascii')

# In order to join the tables we need to reformat the KOI identifiers
cks_pl['KOI_star'] = np.array([(x.split(".")[0]) for x in cks_pl['KOI']])
cks_st.rename_column('KOI', 'KOI_star')

# Join the CKS tables
cks = join(cks_pl, cks_st, join_type='left', keys='KOI_star')
# Prepend a prefix to each column to help keep track of the provenance of the data
cks = cks.to_pandas()
prepend_dataframe_columns(cks, 'f18_')

# Create a column for the integer values of each KOI. This will be useful for merging later.
cks['f18_KOI_int'] = [int(koi.split("K")[1]) for koi in cks['f18_KOI_star']]

cks.head(5)

Unnamed: 0,f18_KOI,f18_P,f18_E_P,f18_e_P,f18_Rp/R*,f18_E_Rp/R*,f18_e_Rp/R*,f18_Rp,f18_E_Rp,f18_e_Rp,...,f18_e_rhoiso,f18_logAiso,f18_E_logAiso,f18_e_logAiso,f18_plxspec,f18_E_plxspec,f18_e_plxspec,f18_r8,f18_RCF,f18_KOI_int
0,K00001.01,2.470613,1.9e-08,1.9e-08,0.123851,2.5e-05,7.6e-05,14.141,0.331,0.331,...,0.04,9.74,0.13,0.15,4.756,0.32,0.474,1.0,1.0102,1
1,K00002.01,2.204735,3.8e-08,3.8e-08,0.075408,8e-06,7e-06,16.25,0.349,0.349,...,0.0,9.25,0.04,0.05,3.661,0.411,0.586,1.001,1.0034,2
2,K00006.01,1.334104,7.07e-07,7.07e-07,0.294016,0.103683,0.209459,41.936,22.239,22.239,...,0.03,9.32,0.09,0.24,2.201,0.127,0.212,1.007,1.0006,6
3,K00007.01,3.213669,1.122e-06,1.122e-06,0.024735,0.000141,7.6e-05,4.076,0.107,0.107,...,0.02,9.78,0.09,0.08,2.112,0.228,0.274,1.0,,7
4,K00008.01,1.160153,4.27e-07,4.27e-07,0.018559,0.000246,0.001678,1.896,0.11,0.11,...,0.05,9.18,0.26,0.52,2.895,0.127,0.192,1.001,,8


### CKS I (Petigura et al. 2017)
For consistency with CKS VII, we will use the false positive designations adopted in Table 4 of CKS I, which themselves are based on a compilation of planet candidate dispositions from Morton et al. (2016), Mullaly et al. (2015), and the NASA Exoplanet Archive (as it was on Feb. 1, 2017).

In [61]:
p17 = Table.read('ftp://cdsarc.unistra.fr/ftp/J/AJ/154/107/table4.dat',
                 readme='ftp://cdsarc.unistra.fr/ftp/J/AJ/154/107/ReadMe',
                 format='ascii.cds')

p17 = p17.to_pandas()
#Munging
p17['Name'] = [x.replace('OI-', '') for x in p17['Name']]
p17 = p17.rename(columns={'Name':'KOI'})
prepend_dataframe_columns(p17, 'p17_')

p17.head()

Unnamed: 0,p17_KOI,p17_Disp,p17_M16,p17_M15,p17_NEA
0,K00001.01,CP,CP,CP,CP
1,K00002.01,CP,CP,CP,CP
2,K00003.01,CP,CP,CP,CP
3,K00006.01,FP,FP,FP,FP
4,K00007.01,CP,CP,CP,CP


### Kepler Q1-Q17 DR25 table
As a starting point we begin with the [Kepler Q1-Q17 DR 25 KOI Table](https://exoplanetarchive.ipac.caltech.edu/docs/PurposeOfKOITable.html#q1-q17_dr25) which is described in [Thompson et al. 2018](https://arxiv.org/abs/1710.06758)

In [62]:
dr25 = get_catalog('q1_q17_dr25_koi')
prepend_dataframe_columns(dr25, 'dr25_')
dr25.head()

Unnamed: 0,dr25_kepid,dr25_kepoi_name,dr25_kepler_name,dr25_ra,dr25_ra_err,dr25_ra_str,dr25_dec,dr25_dec_err,dr25_dec_str,dr25_koi_gmag,...,dr25_koi_fpflag_co,dr25_koi_fpflag_ec,dr25_koi_insol,dr25_koi_insol_err1,dr25_koi_insol_err2,dr25_koi_srho,dr25_koi_srho_err1,dr25_koi_srho_err2,dr25_koi_fittype,dr25_koi_score
0,10811496,K00753.01,,297.00482,0.0,19h48m01.16s,48.134129,0.0,+48d08m02.9s,15.943,...,0,0,39.3,31.04,-10.49,7.29555,35.03293,-2.75453,LS+MCMC,0.0
1,10848459,K00754.01,,285.53461,0.0,19h02m08.31s,48.28521,0.0,+48d17m06.8s,16.1,...,0,0,891.96,668.95,-230.35,0.2208,0.00917,-0.01837,LS+MCMC,0.0
2,10854555,K00755.01,Kepler-664 b,288.75488,0.0,19h15m01.17s,48.2262,0.0,+48d13m34.3s,16.015,...,0,0,926.16,874.33,-314.24,1.98635,2.71141,-1.74541,LS+MCMC,1.0
3,10872983,K00756.01,Kepler-228 d,296.28613,0.0,19h45m08.67s,48.22467,0.0,+48d13m28.8s,16.234,...,0,0,114.81,112.85,-36.7,0.67324,0.33286,-0.38858,LS+MCMC,1.0
4,10872983,K00756.02,Kepler-228 c,296.28613,0.0,19h45m08.67s,48.22467,0.0,+48d13m28.8s,16.234,...,0,0,427.65,420.33,-136.7,0.37377,0.74768,-0.26357,LS+MCMC,1.0


### Kepler Stellar Data
We will also want the [Kepler Stellar data table](https://exoplanetarchive.ipac.caltech.edu/docs/Kepler_completeness_reliability.html) for the noise properties of each planet host. Since this is a large table, we have extracted the only columns we want ahead of time and stored this in the data directory.

In [63]:
stlr = pd.read_csv('../data/Kepler-Q1-Q17-DR25-CDPP3.csv')
prepend_dataframe_columns(stlr, 'stlr_')
stlr.head()

Unnamed: 0,stlr_kepid,stlr_rrmscdpp03p0
0,10000785,445.41
1,10000797,80.767
2,10000800,226.348
3,10000823,181.468
4,10000827,124.834


### Gaia Kepler Stellar Properties Catalog (Berger et al. 2020)

In [64]:
b20 = pd.read_parquet('../data/GKSPCPapTable2.parquet')
b20.head()

Unnamed: 0,b20_KIC,b20_iso_mass,b20_iso_mass_err1,b20_iso_mass_err2,b20_iso_teff,b20_iso_teff_err1,b20_iso_teff_err2,b20_iso_logg,b20_iso_logg_err1,b20_iso_logg_err2,...,b20_iso_age_err2,b20_iso_dis,b20_iso_dis_err1,b20_iso_dis_err2,b20_iso_avs,b20_iso_gof,b20_TAMS,b20_log10_iso_age,b20_E_log10_iso_age,b20_e_log10_iso_age
0,757076,1.396,0.179,-0.218,5052.2,103.9,-86.6,3.373,0.067,-0.081,...,-0.73,651.5,22.6,-21.6,0.371,1.0,2.69,9.392697,0.23976,0.152148
1,757099,0.873,0.054,-0.039,5364.9,102.7,-84.9,4.318,0.037,-0.029,...,-3.98,367.8,7.1,-6.9,0.34,1.0,17.18,10.181844,0.078228,0.131851
2,757137,1.669,0.309,-0.3,4628.7,84.8,-76.2,2.388,0.083,-0.086,...,-0.58,568.4,12.2,-11.6,0.344,1.0,1.66,9.178977,0.230956,0.210494
3,757280,1.715,0.086,-0.089,6856.8,144.4,-139.9,3.834,0.033,-0.032,...,-0.16,822.0,19.4,-21.0,0.496,1.0,1.62,9.093422,0.064941,0.059998
4,757450,0.914,0.062,-0.057,5301.2,111.0,-103.1,4.432,0.045,-0.044,...,-5.15,829.6,24.0,-23.4,0.465,1.0,16.1,9.975891,0.194957,0.341414


### Petigura 2020
The radius valley is partially contaminated by planets with poorly determined radius ratios due to high impact parameters (grazing or near-grazing transits)...

In [65]:
url = "https://raw.githubusercontent.com/petigura/keprat/master/data/cksgaia-planets.csv"
s = requests.get(url).content
p20 = pd.read_csv(StringIO(s.decode('utf-8')))
p20['Rtau'] = p20.tau/p20.tau0
prepend_dataframe_columns(p20, 'p20_')

p20.head()

Unnamed: 0,p20_id_kic,p20_id_starname,p20_id_kic.1,p20_id_koi,p20_id_koicand,p20_id_kepler_name,p20_koi_disposition,p20_koi_period,p20_koi_period_err1,p20_koi_period_err2,...,p20_fgraz,p20_tau0,p20_tau,p20_dr25_ror_v18_srad,p20_dr25_ror_v18_srad_err1,p20_dr25_ror_gdir_srad,p20_dr25_ror_gdir_srad_err1,p20_v18_ror_gdir_srad,p20_v18_ror_gdir_srad_err1,p20_Rtau
0,757450,K00889,757450,889.0,K00889.01,Kepler-75 b,CONFIRMED,8.884923,1e-06,-1e-06,...,0.0,4.019727,1.856715,,,11.802615,0.412225,,,0.461901
1,1026957,K00958,1026957,958.0,K00958.01,,NOT DISPOSITIONED,21.761294,1.1e-05,-1.1e-05,...,0.0,4.427408,1.204967,,,2.478217,0.114768,,,0.272161
2,1718189,K00993,1718189,993.0,K00993.01,Kepler-262 c,CONFIRMED,21.853679,9.7e-05,-9.7e-05,...,0.0,4.914936,3.306574,,,1.551245,0.081248,,,0.67276
3,1718189,K00993,1718189,993.0,K00993.02,Kepler-262 b,CONFIRMED,13.060767,9.2e-05,-9.2e-05,...,0.0,4.139989,3.093553,,,1.267332,0.068661,,,0.747237
4,1718189,K00993,1718189,993.0,K00993.03,,CANDIDATE,86.723152,0.001814,-0.001814,...,0.0,7.781345,4.761139,,,1.257706,0.094337,,,0.611866


### Gaia-Kepler cross-match catalog
Megan Bedell performed a 1 arcsecond positional cross-match of the Kepler and Gaia DR2 catalogs. The results are hosted [here](https://gaia-kepler.fun/).

In [66]:
gaia = pd.read_parquet('../data/kepler_dr2_1arcsec.parquet')
prepend_dataframe_columns(gaia, "gaia_")
gaia.head()

Unnamed: 0,gaia_solution_id,gaia_designation,gaia_source_id,gaia_random_index,gaia_gaia_ref_epoch,gaia_ra,gaia_ra_error,gaia_dec,gaia_dec_error,gaia_parallax,...,gaia_mass_err2,gaia_prov_sec,gaia_nconfp,gaia_nkoi,gaia_ntce,gaia_jmag,gaia_hmag,gaia_kmag,gaia_planet?,gaia_kepler_gaia_ang_dist
0,1635721458409799680,b'Gaia DR2 2050233807328471424',2050233807328471424,689405501,2015.5,291.038681,0.035878,36.598031,0.047832,1.493916,...,-0.475,b'DSEP',0,0,0,10.126,9.667,9.559,b'none',0.15616
1,1635721458409799680,b'Gaia DR2 2050233601176543104',2050233601176543104,447382505,2015.5,291.043057,0.019919,36.593767,0.027553,2.678194,...,-0.233,b'DSEP',0,0,1,11.656,11.193,11.094,b'none',0.128262
2,1635721458409799680,b'Gaia DR2 2050230543159814656',2050230543159814656,1004823384,2015.5,291.055892,0.019394,36.559886,0.026621,1.723413,...,-0.947,b'DSEP',0,0,0,7.467,6.853,6.722,b'none',0.146787
3,1635721458409799680,b'Gaia DR2 2050230611879323904',2050230611879323904,1253666653,2015.5,291.095373,0.016886,36.564903,0.022993,1.183652,...,-0.157,b'DSEP',0,0,0,10.852,10.662,10.627,b'none',0.11642
4,1635721458409799680,b'Gaia DR2 2050231848829944320',2050231848829944320,298032508,2015.5,291.13762,0.01956,36.577344,0.025196,1.168913,...,-0.04,b'DSEP',1,1,1,13.665,13.262,13.118,b'conf',0.114502


### Gaia DR2 RUWE values
I queried the Gaia archive for RUWE values for the Gaia DR2-Kepler cross-match catalog above.

In [67]:
ruwe = pd.read_parquet('../data/kepler_dr2_1arcsec_ruwe.parquet')
ruwe.head()

Unnamed: 0,d20_kepid,d20_source_id,d20_ruwe,d20_koi
0,757076,2050233807328471424,0.946665,
1,757099,2050233601176543104,2.172801,
2,757137,2050230543159814656,0.912556,
3,757280,2050230611879323904,0.870375,
4,757450,2050231848829944320,1.030048,889.0


### Kinematic ages from [Lu et al. 2021](https://arxiv.org/abs/2102.01772)

In [68]:
kin = pickle.load(open( "../data/KinageAllKepler.pkl", "rb" ))
kin_base = pd.DataFrame(kin)
kin = kin[kin.columns[-20:]] #Select only the columns of interest
prepend_dataframe_columns(kin, 'l20_')

### Rotation periods

In [69]:
##############################################################
# Rotation periods
rot = pd.read_csv('../data/Young_planets_CKOI_20200324.csv')
rot = pd.DataFrame({'Prot_KIC': rot.KIC,
                    'Prot_flag': rot.Prot_flag,
                    'Prot_adopted': rot.Prot_adopted,
                    'Prot_ANG18': rot.Prot_ANG18,
                    'Prot_MAZ15': rot.Prot_MAZ15,
                    'Prot_D20': rot.Prot_D20,
                    'Prot_MCQ13': rot.Prot_MCQ13,
                    'Prot_WAL13': rot.Prot_WAL13})

rot['Prot'] = np.nan*np.ones(len(rot))

for ref in ['a18', 'd20', 'm13', 'm15']:
    _mask = rot['Prot_adopted'] == ref
    
    if ref=='a18':
        rot['Prot'][_mask] = rot['Prot_ANG18'][_mask]
    elif ref=='d20':
        rot['Prot'][_mask] = rot['Prot_D20'][_mask]
    elif ref=='M13':
        rot['Prot'][_mask] = rot['Prot_MCQ13'][_mask]
    elif ref=='M15':
        rot['Prot'][_mask] = rot['Prot_MAZ15'][_mask]

### Rotation periods
Several authors have measured rotation periods for the Kepler sample, and specifically for KOIs. We compile some of the largest studies here.

### [Angus et al. 2018](https://ui.adsabs.harvard.edu/abs/2018MNRAS.474.2094A/abstract) rotation period catalog

In [70]:
a18 = pd.read_csv("../data/Angus2018_koi_periods_nohead.csv")
prepend_dataframe_columns(a18, "a18_")
a18.head()

Unnamed: 0,a18_Index,a18_KOI,a18_feh,a18_feh_errm,a18_feh_errp,a18_logg,a18_logg_errm,a18_logg_errp,a18_period,a18_period_errm,a18_period_errp,a18_teff,a18_teff_errm,a18_teff_errp
0,0,1,-0.12,-0.28,0.33,4.609,-0.106,0.027,24.848729,5.996906,23.240003,5126.0,-138.0,160.0
1,1,2,-0.15,-0.1,0.1,4.455,-0.025,0.025,19.595792,3.779434,3.95145,5850.0,-50.0,50.0
2,2,3,0.26,-0.08,0.08,4.021,-0.011,0.011,28.159582,0.193655,2.656332,6350.0,-80.0,80.0
3,3,6,-0.04,-0.16,0.12,4.169,-0.048,0.055,22.765454,15.400891,3.123556,6225.0,-158.0,114.0
4,4,7,0.44,-0.101,0.101,4.081,-0.014,0.014,25.310087,1.685785,1.96845,5543.0,-79.0,79.0


### [Mazeh et al. 2015](https://ui.adsabs.harvard.edu/abs/2015ApJ...801....3M/abstract) rotation period catalog

In [71]:
m15 = Table.read("ftp://cdsarc.unistra.fr/ftp/J/ApJ/801/3/table1.dat",
                readme="ftp://cdsarc.unistra.fr/ftp/J/ApJ/801/3/ReadMe", format="ascii.cds")
m15 = m15.to_pandas()
prepend_dataframe_columns(m15, "m15_")
m15.head()

Unnamed: 0,m15_KOI,m15_KIC,m15_Teff,m15_log(g),m15_Prot,m15_e_Prot,m15_Rvar,m15_LPH,m15_w,m15_D,m15_N,m15_C,m15_G,m15_T,m15_F,m15_R,m15_M1,m15_M2
0,1,11446443,5713,4.143,70.55,16.47,445.0,0.362359,0.0,0,0,0,0,0,0,0,1,0
1,2,10666592,6264,3.79,70.69,6.8,919.0,0.542156,0.0,0,1,0,0,0,0,0,1,0
2,3,10748390,4766,4.59,29.31,0.49,12246.0,0.607229,0.431926,0,1,0,0,0,0,0,0,0
3,5,8554498,5861,4.17,37.23,4.18,237.0,0.082306,0.0,0,0,0,0,0,0,0,1,0
4,10,6922244,6213,4.17,82.12,16.05,1245.0,0.350827,0.0,0,0,0,0,0,0,0,1,0


### [McQuillan et al. 2013](https://ui.adsabs.harvard.edu/abs/2013ApJ...775L..11M/abstract) rotation period catalog

In [72]:
m13 = Table.read("ftp://cdsarc.unistra.fr/ftp/J/ApJ/775/L11/table1.dat", 
                readme="ftp://cdsarc.unistra.fr/ftp/J/ApJ/775/L11/ReadMe", format="ascii.cds")
m13 = m13.to_pandas()
prepend_dataframe_columns(m13, "m13_")
m13.head()

Unnamed: 0,m13_KOI,m13_KIC,m13_Teff,m13_log(g),m13_Rp,m13_---,m13_Porb,m13_Prot,m13_e_Prot,m13_Rvar,m13_Flag
0,3,10748390,4766,4.59,4.68,,4.888,29.472,0.134,11.75,N
1,12,5812701,6419,4.26,13.4,,17.855,1.245,0.124,0.78,
2,41,6521045,5909,4.28,1.24,,6.887,24.988,2.192,0.39,
3,42,8866102,6170,4.1,2.71,,17.834,20.85,0.007,1.12,
4,44,8845026,6250,3.5,9.61,,66.468,3.792,0.907,1.16,


### [Walkowicz & Basri 2013](https://ui.adsabs.harvard.edu/abs/2013MNRAS.436.1883W/abstract) rotation period catalog

In [73]:
w13 = Table.read("ftp://cdsarc.unistra.fr/ftp/J/MNRAS/436/1883/table2.dat",
                 readme="ftp://cdsarc.unistra.fr/ftp/J/MNRAS/436/1883/ReadMe",
                 format="ascii.cds")
w13 = w13.to_pandas()
prepend_dataframe_columns(w13, "w13_")
w13.head()

Unnamed: 0,w13_KIC,w13_KOI,w13_Teff,w13_logg,w13_logVar,w13_Per,w13_e_Per,w13_Ro,w13_B-V,w13_Age
0,5903312,8,5783.0,4.29,0.43,13.88,3.28,1.12,0.66,1.46
1,7684873,14,7906.0,3.93,-0.07,5.83,7.32,-99.0,0.24,-99.0
2,7255336,19,,-99.0,0.15,2.43,2.4,0.13,0.78,0.05
3,10125352,21,6122.0,4.22,0.29,4.59,1.03,0.67,0.56,0.35
4,9071386,23,6324.0,4.27,0.32,4.69,9.07,0.93,0.52,0.62


### Lithium abundances for the CKS sample ([Berger et al. 2018](https://ui.adsabs.harvard.edu/abs/2018ApJ...855..115B/abstract))

In [74]:
b18 = Table.read("https://cdsarc.unistra.fr/ftp/J/ApJ/855/115/table1.dat",
                 readme="https://cdsarc.unistra.fr/ftp/J/ApJ/855/115/ReadMe",
                 format="ascii.cds")

b18 = b18.to_pandas()
prepend_dataframe_columns(b18, "b18_")

b18_tb2 = Table.read("https://cdsarc.unistra.fr/ftp/J/ApJ/855/115/table2.dat",
                 readme="https://cdsarc.unistra.fr/ftp/J/ApJ/855/115/ReadMe",
                 format="ascii.cds")

b18_tb2 = b18_tb2.to_pandas()
prepend_dataframe_columns(b18_tb2, "b18_tb2_")

b18.head()

Unnamed: 0,b18_Obs,b18_Date,b18_KOI,b18_Kpmag,b18_S/N,b18_Teff,b18_logg,b18_[Fe/H],b18_Vt,b18_EW(Li),b18_e_EW(Li),b18_A(Li),b18_e_A(Li)
0,j122.742,2011-06-16,1,11.34,39,5819,4.4,0.01,1.04,85.5,7.9,2.62,0.08
1,j122.92,2011-06-13,2,10.46,39,6449,4.13,0.2,1.77,82.9,7.3,3.11,0.07
2,j122.81,2011-06-13,3,9.17,41,4864,4.5,0.33,0.54,4.2,5.1,-0.4,
3,j70.1247,2009-06-05,6,12.16,119,6348,4.36,0.04,1.58,15.4,2.2,2.16,0.08
4,j74.509,2009-07-31,7,12.21,126,5827,4.09,0.18,1.17,54.0,2.1,2.36,0.06


### Activity indicators from LAMOST ([Frasca et al. 2016](https://ui.adsabs.harvard.edu/abs/2016A%26A...594A..39F/abstract))

Note these are not used in the publication

In [75]:
# f16 = Table.read("https://cdsarc.unistra.fr/ftp/J/A+A/594/A39/tablea4.dat",
#                  readme="https://cdsarc.unistra.fr/ftp/J/A+A/594/A39/ReadMe",
#                  format="ascii.cds")
# f16 = f16.to_pandas()
# prepend_dataframe_columns(f16, 'f16_')
# f16.head()

### GALEX-Kepler cross-match (Olmedo et al. 2015)

In [76]:
# Olmedo et al. 2015 GALEX Kepler 
# o15 = Table.read("../data/olmedo2015/table4.dat",
#                  readme="../data/olmedo2015/ReadMe",
#                  format="ascii.cds")

# o15 = o15.to_pandas()
# prepend_dataframe_columns(o15, 'o15_')

### Silva-Aguirre et al. 2015

In [77]:
# Silva-Aguirre et al. 2015 Asteroseismic Sample
s15 = Table.read("ftp://cdsarc.unistra.fr/ftp/J/MNRAS/452/2127/table3.dat",
                 readme="ftp://cdsarc.unistra.fr/ftp/J/MNRAS/452/2127/ReadMe",
                 format="ascii.cds")

s15 = s15.to_pandas()
prepend_dataframe_columns(s15, 's15_')

### Merging tables
Note this cell should only be run once

In [78]:
cks = cks.merge(p17, how='left', left_on='f18_KOI', right_on='p17_KOI')
cks = cks.merge(dr25, how='left', left_on='f18_KOI', right_on='dr25_kepoi_name')
cks = cks.merge(stlr, how='left', left_on='dr25_kepid', right_on='stlr_kepid')
cks = cks.merge(gaia, how='left', left_on='dr25_kepid', right_on='gaia_kepid')
cks = cks.merge(p20, how='left', left_on='dr25_kepoi_name', right_on='p20_id_koicand')
cks = cks.merge(b20, how='left', left_on='dr25_kepid', right_on='b20_KIC')
cks = cks.merge(ruwe, how='left', left_on='dr25_kepid', right_on='d20_kepid')
cks = cks.merge(kin, how='left', left_on='dr25_kepid', right_on='l20_KID')
cks = cks.merge(rot, how='left', left_on='dr25_kepid', right_on='Prot_KIC')

cks = cks.merge(a18, how='left', left_on='p20_id_koi', right_on='a18_KOI')
cks = cks.merge(m15, how='left', left_on='dr25_kepid', right_on='m15_KIC')
cks = cks.merge(m13, how='left', left_on='dr25_kepid', right_on='m13_KIC')
cks = cks.merge(w13, how='left', left_on='dr25_kepid', right_on='w13_KIC')
cks = cks.merge(b18, how='left', left_on='f18_KOI_int', right_on='b18_KOI')
cks = cks.merge(b18_tb2, how='left', left_on='f18_KOI_int', right_on='b18_tb2_KOI')
cks = cks.merge(s15, how='left', left_on='dr25_kepid', right_on='s15_KIC')

#cks = cks.merge(f16, how='left', left_on='dr25_kepid', right_on='f16_KIC')
#Note, merging with the Olmedo et al. 2015 catalog takes a long time
#cks = cks.merge(o15, how='left', left_on='dr25_kepid', right_on='o15_KIC')

cks_base = pd.DataFrame(cks)

### Assigning rotation periods based on manual vetting results

In [79]:
cks['prot'] = np.nan*np.ones(len(cks))
cks['prot_err1'] = np.nan*np.ones(len(cks))
cks['prot_err2'] = np.nan*np.ones(len(cks))

for ref in ['a18', 'd20', 'm13', 'm15']:
    _mask = cks['Prot_adopted'] == ref
    
    if ref=='a18':
        cks['prot'][_mask] = cks['a18_period'][_mask]
        cks['prot_err1'][_mask] = cks['a18_period_errp'][_mask]
        cks['prot_err2'][_mask] = -1*cks['a18_period_errm'][_mask]
        
    elif ref=='d20':
        cks['prot'][_mask] = cks['Prot_D20'][_mask]
        cks['prot_err1'][_mask] = 0.1*cks['Prot_D20'][_mask]
        cks['prot_err2'][_mask] = -0.1*cks['Prot_D20'][_mask]
        
    elif ref=='m13':
        cks['prot'][_mask] = cks['m13_Prot'][_mask]
        cks['prot_err1'][_mask] = cks['m13_e_Prot'][_mask]
        cks['prot_err2'][_mask] = -1*cks['m13_e_Prot'][_mask]
        
    elif ref=='m15':
        cks['prot'][_mask] = cks['m15_Prot'][_mask]
        cks['prot_err1'][_mask] = cks['m15_e_Prot'][_mask]
        cks['prot_err2'][_mask] = -1*cks['m15_e_Prot'][_mask]
        
        
#Rotation period flags
cks['prot_numflag'] = np.zeros(len(cks))

for i in range(len(cks)):
    if str(cks['Prot_flag'][i])=='a':
        cks['prot_numflag'][i] = 3
    elif str(cks['Prot_flag'][i])=='b':
        cks['prot_numflag'][i] = 2
    elif str(cks['Prot_flag'][i])=='m':
        cks['prot_numflag'][i] = 1
    elif str(cks['Prot_flag'][i])=='nan':
        cks['prot_numflag'][i] = 0        

In [80]:
#Response to referee
cks_copy = cks.copy()
cks_copy = cks_copy.drop_duplicates(subset='f18_KOI_star')

prot_flag = np.array(cks_copy['prot_numflag'])

for i in range(4):
    print('% of sample with rotation period flag of',i,'=',100.*len(prot_flag[prot_flag==i])/len(prot_flag))

% of sample with rotation period flag of 0 = 20.77375946173255
% of sample with rotation period flag of 1 = 33.809924306139614
% of sample with rotation period flag of 2 = 22.960470984020183
% of sample with rotation period flag of 3 = 22.455845248107654


### Rotation period table (Table 1)

In [81]:
prot_table = cks[['f18_KOI_int', 
                  'dr25_kepid', 
                  'prot', #'prot_err1', #'prot_err2', 
                  'Prot_adopted',
                  'prot_numflag',
                  'a18_period', #'a18_period_errm', #'a18_period_errp',
                  'm13_Prot',
                  'm15_Prot',
                  'w13_Per']].copy()
    

#Only keep targets where at least one rotation period determination exists
prot_table = prot_table[prot_table['prot'].notna()]
#Remove duplicate rows
prot_table = prot_table.drop_duplicates(subset='f18_KOI_int', keep='first')
#Formatting
prot_table.dr25_kepid = prot_table.dr25_kepid.astype(int)    
#Rounding
prot_table.prot = prot_table.prot.round(2)
prot_table.a18_period = prot_table.a18_period.round(2)
prot_table.m13_Prot = prot_table.m13_Prot.round(2)

tex_table = prot_table.to_latex(index=False)
print(tex_table)

\begin{tabular}{rrrlrrrrr}
\toprule
 f18\_KOI\_int &  dr25\_kepid &   prot & Prot\_adopted &  prot\_numflag &  a18\_period &  m13\_Prot &  m15\_Prot &  w13\_Per \\
\midrule
          10 &     6922244 &   7.46 &          a18 &           2.0 &        7.46 &       NaN &     82.12 &      NaN \\
          49 &     9527334 &   8.74 &          a18 &           3.0 &        8.74 &      8.55 &      8.59 &     8.60 \\
          63 &    11554435 &   5.49 &          a18 &           3.0 &        5.49 &      5.41 &       NaN &     5.39 \\
          64 &     7051180 &   2.23 &          a18 &           3.0 &        2.23 &       NaN &       NaN &     2.22 \\
          70 &     6850504 &  27.92 &          a18 &           2.0 &       27.92 &       NaN &     27.72 &    28.25 \\
          82 &    10187017 &  26.55 &          a18 &           3.0 &       26.55 &     26.57 &     26.40 &    26.74 \\
          84 &     2571238 &  20.23 &          a18 &           2.0 &       20.23 &       NaN &     20.27 &    32.

#### Save the rotation period table

In [82]:
prot_table.to_csv("../data/table1.csv")

#### Save the merged CKS table 

In [83]:
cks.to_parquet('../data/cks-merged.parquet')