In [1]:
import numpy as np
import pandas as pd

fpp_all = pd.read_csv('fpp_final_table.csv', index_col=0)
fpp_all.count()

koi.1                    7470
rprs                     7202
secthresh                7202
maxrad                   7202
pr_heb                   7048
pr_heb_Px2               7048
pr_eb                    7048
pr_eb_Px2                7048
pr_beb                   7048
pr_beb_Px2               7048
L_tot                    7052
fp                       7052
FPP                      7048
kepid                    7470
period                   7470
rp                       7128
disposition              7470
prob_ontarget            6786
pos_prob_score           7470
not_transitlike          7470
significant_secondary    7470
centroid_offset          7470
ephem_match              7470
MES                      7470
exception                 418
has_ttv                  7268
dtype: int64

In [2]:
columns = ['kepid', 'koi', 'kepler_pl', 'period','depth','aR','Kp',
           'Teff','logg','feh','nature','reference']

from keputils.koiutils import koiname
santerne = pd.read_table('TargetSelection.tex', delimiter='\s*&\s*',
                        names=columns)
santerne['koi'] = santerne.koi.apply(koiname)
santerne['FPP'] = fpp_all.ix[santerne.koi,'FPP'].values



In [4]:
santerne.groupby('nature').count()

Unnamed: 0_level_0,kepid,koi,kepler_pl,period,depth,aR,Kp,Teff,logg,feh,reference,FPP
nature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
?,18,18,18,18,18,18,18,18,18,18,18,18
BD,3,3,3,3,3,3,3,3,3,3,3,3
CEB,15,15,15,15,15,15,15,15,15,15,15,13
EB,48,48,48,48,48,48,48,48,48,48,48,43
planet,45,45,45,45,45,45,45,45,45,45,45,43


In [3]:
santerne[['koi','nature','FPP']].head()

Unnamed: 0,koi,nature,FPP
0,K00001.01,planet,0.015
1,K00002.01,planet,1.4e-05
2,K00003.01,planet,4e-06
3,K00010.01,planet,6.5e-05
4,K00012.01,planet,0.0526


Here are the summarized results of the `vespa` FPP calculations on the KOI giant planet sample presented in Santerne et al. (2015; arxiv:1511.00643v1), which find a 54.6 +/- 6.5% false positive rate in this sample.

In [6]:
print santerne.groupby('nature')['FPP'].mean()
print santerne.groupby('nature')['FPP'].median()

nature
?         0.239273
BD        0.003463
CEB       0.832615
EB        0.776397
planet    0.089198
Name: FPP, dtype: float64
nature
?         0.003235
BD        0.003840
CEB       0.995000
EB        0.945000
planet    0.000195
Name: FPP, dtype: float64


In [10]:
# nature & mean & median
vmean = santerne.groupby('nature')['FPP'].mean()
vmedian = santerne.groupby('nature')['FPP'].median()
vmean['huh'] = vmean['?']
vmedian['huh'] = vmedian['?']
print('planet & {0.planet:.4f} & {1.planet:.4f} \\\\'.format(vmean,vmedian))
print('BD & {0.BD:.4f} & {1.BD:.4f} \\\\'.format(vmean,vmedian))
print('EB & {0.EB:.4f} & {1.EB:.4f} \\\\'.format(vmean,vmedian))
print('CEB & {0.CEB:.4f} & {1.CEB:.4f} \\\\'.format(vmean,vmedian))
print('? & {0.huh:.4f} & {1.huh:.4f}'.format(vmean,vmedian))

planet & 0.0892 & 0.0002 \\
BD & 0.0035 & 0.0038 \\
EB & 0.7764 & 0.9450 \\
CEB & 0.8326 & 0.9950 \\
? & 0.2393 & 0.0032


In [5]:
santerne['FPP'].mean()

0.4363491218336184

In [6]:
santerne.to_csv('santerne_sample_with_fpp.csv')

In [7]:
santerne.index = santerne.koi

In [8]:
santerne.ix['K00614.01']

kepid                              7368664
koi                              K00614.01
kepler_pl                             434b
period                              12.875
depth                                 0.43
aR           22.90$^{_{+1.20}}_{^{-1.20}}$
Kp                                    14.5
Teff            5926$^{_{+154}}_{^{-189}}$
logg          4.51$^{_{+0.04}}_{^{-0.24}}$
feh          -0.04$^{_{+0.20}}_{^{-0.30}}$
nature                              planet
reference                             Al15
FPP                                  0.468
Name: K00614.01, dtype: object

In [15]:
santerne.query('nature=="planet" and FPP > 0.3')

Unnamed: 0,kepid,koi,kepler_pl,period,depth,aR,Kp,Teff,logg,feh,nature,reference,FPP
42,3323887,K00377.01,9b,19.271,0.7,36.84$^{_{+4.30}}_{^{-4.30}}$,13.8,5779$^{_{+110}}_{^{-128}}$,4.49$^{_{+0.03}}_{^{-0.16}}$,0.12$^{_{+0.10}}_{^{-0.14}}$,planet,Ho10,0.99
55,7368664,K00614.01,434b,12.875,0.43,22.90$^{_{+1.20}}_{^{-1.20}}$,14.5,5926$^{_{+154}}_{^{-189}}$,4.51$^{_{+0.04}}_{^{-0.24}}$,-0.04$^{_{+0.20}}_{^{-0.30}}$,planet,Al15,0.468
75,11122894,K01426.02,297c,74.928,0.42,82.40$^{_{+14.90}}_{^{-14.90}}$,14.2,6150$^{_{+151}}_{^{-193}}$,4.42$^{_{+0.06}}_{^{-0.30}}$,-0.12$^{_{+0.22}}_{^{-0.30}}$,planet,Ro14,0.596
80,12365184,K01474.01,419b,69.727,0.46,45.17$^{_{+7.80}}_{^{-7.80}}$,13.0,6287$^{_{+106}}_{^{-153}}$,4.24$^{_{+0.10}}_{^{-0.18}}$,0.22$^{_{+0.14}}_{^{-0.20}}$,planet,Da14,0.992


In [55]:
fpp_all.ix['K00368.01']

koi.1                         K00368.01
rprs                              0.084
secthresh                            40
maxrad                              0.6
pr_heb                         0.000192
pr_heb_Px2                     2.14e-22
pr_eb                            0.0221
pr_eb_Px2                      8.11e-25
pr_beb                         3.07e-06
pr_beb_Px2                     3.21e-31
L_tot                             3.672
fp                                0.055
FPP                              0.0223
kepid                           6603043
period                         110.3217
rp                             16.14879
disposition              FALSE POSITIVE
prob_ontarget                         1
pos_prob_score                        1
not_transitlike                   False
significant_secondary              True
centroid_offset                   False
ephem_match                       False
MES                            1688.579
exception                           NaN


Now, let's look for a minute into single transits: 

In [10]:
import koiutils as ku
n_cands = []
for k in fpp_all.index:
    try:
        n_cands.append(ku.get_ncands(k))
    except:
        n_cands.append(np.nan)
        
fpp_all['n_cands'] = n_cands

In [12]:
fpp_all.query('n_cands == 1 and disposition=="CANDIDATE"')['FPP'].describe()

count    2142.000000
mean        0.195576
std         0.354793
min         0.000000
25%         0.000192
50%         0.003970
75%         0.139750
max         1.000000
Name: FPP, dtype: float64