In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.special as spec
import pandas as pd
from astropy.io import ascii
import scipy.stats as stats
from ipywidgets import FloatProgress
from IPython.display import display

import sys
sys.path.insert(0, '..')
import dr25Models as funcModels

In [41]:
def drawHeatMap(dataArray, imageSize, x, y, nData=[], colorBarLabel="", textOn=True, forceInt=True):
    dx = x[(1,0)] - x[(0,0)];
    dy = y[(0,1)] - y[(0,0)];
    extent = [x[(0,0)], x[(-1,0)]+dx,y[(0,0)],y[(0,-1)]+dy];
    plt.figure(figsize=imageSize);
    # fig, ax = plt.subplots(figsize=imageSize);
    ax = plt.gca()

    da = np.transpose(dataArray);
    im = ax.imshow(da, extent = extent, origin='lower');
#    im = ax.imshow(da, extent = extent, origin='lower', cmap="Greys");
    ax.set_aspect(10);
    
    if len(nData) == 0:
        nData = np.ones(dataArray.shape)

    # ax.imshow(da, origin='lower');
    arrayShape = da.shape;
    minda = np.min(da)
    maxda = np.max(da)
    daRange = maxda - minda;
    for i in range(arrayShape[0]):
        for j in range(arrayShape[1]):
            if da[i, j] > minda + daRange*0.5:
                cstr = "k"
                # cstr = "w"
            else:
                cstr = "w"
                # cstr = "k"
            if np.abs(da[i,j]) < 100:
                fsz = 9
            else:
                fsz = 6
            
            if textOn:
                if nData[(j,i)] > 0:
                    if forceInt:
                        ax.text(x[(j,i)]+dx/2, y[(j,i)]+dy/2, da[i, j].astype("int"),
                               ha="center", va="center", color=cstr, fontsize=fsz)
                    else:
                        ax.text(x[(j,i)]+dx/2, y[(j,i)]+dy/2, da[i, j],
                               ha="center", va="center", color=cstr, fontsize=fsz)                        
                else:
                    ax.text(x[(j,i)]+dx/2, y[(j,i)]+dy/2, "-",
                           ha="center", va="center", color=cstr, fontsize=fsz)

            
    
    im_ratio = float(da.shape[0])/da.shape[1] 
    cbh = plt.colorbar(im,fraction=0.0477*im_ratio, pad=0.02)
    cbh.ax.set_ylabel(colorBarLabel, fontSize = 24);

    # ax.invert_yaxis();
    


In this notebook we measure DR25 vetting completeness, defined as the fraction of detections (TCEs) that are correctly vetted as planet candidates.  We use the set of injected on-target planets that were detected at the correct ephemeris as the base set of TCEs.  All of these TCEs are "true planets" by definition.  Then vetting completeness is the fraction of these TCEs that are vetter as PC by the robovetter.  We study how vetting completeness depends on period and MES.

We think of TCEs as consisting of two sets: those that are dispositioned as FP and those that are dispositioned as PC.  Then we can think of the vetting process as drawing from the set of TCEs, with a probability $r$ of selecting PCs. We identify $r$ with vetting completeness.  Then the probability distribution of selecting $c$ FPs from $n$ TCEs is given by the binomial distribution

$$P\{c\} = \left( \begin{array}{c} n \\ c \end{array} \right) r^c (1-r)^{n-c}.$$

In this spirit, we define the vetting effectiveness $r$ as the probability of drawing PCs from inverted/scrambled TCEs, found via the Bayesian inference $p(r|n, c) \propto p(c|r, n) p(r)$, where $c$ is the number of TCEs vetted as PCs, $n$ is the total number of TCEs,
$$p(c|r, n) = \left( \begin{array}{c} n \\ c \end{array} \right) r^c (1-r)^{n-c}$$ and
$p(r)$ is a prior distribution of the probability $r$.  By putting the data on a grid indexed by $i,j$, we can fit effectiveness as a function parameterized by a vector $\theta$, $r(\theta,\mathrm{period},\mathrm{MES})$, as $p(\theta)|n_{i,j}, c_{i,j}, \mathrm{period}_{i,j},\mathrm{MES}_{i,j}) \propto p(c_{i,j}|\theta, n_{i,j}, \mathrm{period}_{i,j},\mathrm{MES}_{i,j}) p(\theta)$, where $p(\theta)$ is some prior distribution of the parameters.


First we define our stellar and planet populations.

In [3]:
stellarType = "GK"

scoreCut = 0.0;

scenario = "large"
if scenario == "small":
    # small planet long period scenario
    periodMin = 200;
    periodMax = 400;
    rpMin = 0.75;
    rpMax = 2.5;
    mesMin = 0;
    mesMax = 30;
elif scenario == "large":
    # all planet scenario
    periodMin = 0;
    periodMax = 600;
    rpMin = 0.5;
    rpMax = 15;
    mesMin = 0;
    mesMax = 30;
else:
    raise ValueError('Bad scenario name');


Then we define our binomial probability distribution function.

In [4]:
def binPdf(n, r, c):
    return sp.comb(n,c)*(r**c)*((1-r)**(n-c));

Then we define a bunch of models of the rate as functions of period ($x$) and MES ($y$) which are identified by the variable 'model'.

Read in our data.

In [5]:
dataLoc = "../data/"
injTceList = dataLoc + "kplr_dr25_inj1_tces.txt"
tcelist = dataLoc + "DR25-Injected-Recovered-OnTarget-Planet-TCEs-1-1-Prat.txt"
# starlist = dataLoc + "dr25_stellar_updated_feh_" + stellarType + ".txt"
starlist = "../stellarCatalogs/dr25_stellar_supp_gaia_clean_" + stellarType + ".txt"

In [6]:
# Load the stellar population we want to use
kic = pd.read_csv(starlist)
# Load D2 table
injTces = ascii.read(injTceList)
tces = np.genfromtxt(tcelist, dtype='str')

tceKepids = np.zeros(len(tces));
for i in range(len(tces)):
    s = tces[i].split('-');
    tceKepids[i] = int(s[0]);
print(tceKepids)

print("num injected/recovered TCEs: " + str(np.size(tceKepids)))
print("num injected TCEs: " + str(np.size(injTces)))



[  757280.   891916.   892195. ... 12984183. 12984200. 12984214.]
num injected/recovered TCEs: 46463
num injected TCEs: 45377


In [7]:
# Select only those TCEs that are in this stellar population
injTces = injTces[np.in1d(injTces['KIC'],kic.kepid)]
print("after: " + str(np.size(injTces)))


after: 17234


In [8]:
# Do some basic stats
print(injTceList)
print("# of injected TCEs: " + str(len(injTces)))
print("# of injected PCs: " + str(len(injTces[injTces['Disp']=='PC'])))
print("# of injected FPs: " + str(len(injTces[injTces['Disp']=='FP'])))
print(' ')

print("for " + str(rpMax) + " < Rp < " + str(rpMax) + ", " + str(periodMin) + " < period < " + str(periodMax) + ":");
print("# of injected injected TCEs: " + str(len(injTces[np.all([                   \
    injTces['Rp']>rpMin,injTces['Rp']<rpMax,injTces['period']>periodMin,injTces['period']<periodMax], axis=0)])))
print("# of injected PCs: " + str(len(injTces[np.all([injTces['Disp']=='PC',\
    injTces['Rp']>rpMin,injTces['Rp']<rpMax,injTces['period']>periodMin,injTces['period']<periodMax], axis=0)])))
print("# of injected FPs: " + str(len(injTces[np.all([injTces['Disp']=='FP',\
    injTces['Rp']>rpMin,injTces['Rp']<rpMax,injTces['period']>periodMin,injTces['period']<periodMax], axis=0)])))


../data/kplr_dr25_inj1_tces.txt
# of injected TCEs: 17234
# of injected PCs: 15121
# of injected FPs: 2113
 
for 15 < Rp < 15, 0 < period < 600:
# of injected injected TCEs: 17205
# of injected PCs: 15104
# of injected FPs: 2101


Separate out the PCs and FPs

In [9]:
pcIndex = np.where(injTces[injTces['Disp']=='PC'])
fpIndex = np.where(injTces[injTces['Disp']=='FP'])
pcs = injTces[pcIndex]
fps = injTces[fpIndex]

Select the TCEs that are in our desired population and plot them.

In [None]:
injectionNotNtl = injTces[(injTces['NTL']==0) & (injTces['Score']>=scoreCut)]
injectionNtl = injTces[(injTces['NTL']==1) | (injTces['Score']<scoreCut)]
injectionSS = injTces[(injTces['SS']==1) | (injTces['Score']<scoreCut)]
injectionCO = injTces[(injTces['CO']==1) | (injTces['Score']<scoreCut)]
injectionEM = injTces[(injTces['EM']==1) | (injTces['Score']<scoreCut)]
injectionPcs = injTces[(injTces['Disp']=='PC') & (injTces['Score']>=scoreCut)]
injectionFps = injTces[(injTces['Disp']=='FP') | (injTces['Score']<scoreCut)]

injectionNtlOnly = injTces[(injTces['NTL']==1)
                                        & (injTces['SS']==0)
                                        & (injTces['CO']==0)                    
                                        & (injTces['EM']==0)]
injectionSsOnly = injTces[(injTces['NTL']==0)
                                        & (injTces['SS']==1)
                                        & (injTces['CO']==0)                    
                                        & (injTces['EM']==0)]
injectionCoOnly = injTces[(injTces['NTL']==0)
                                        & (injTces['SS']==0)
                                        & (injTces['CO']==1)                    
                                        & (injTces['EM']==0)]
injectionEmOnly = injTces[(injTces['NTL']==0)
                                        & (injTces['SS']==0)
                                        & (injTces['CO']==0)                    
                                        & (injTces['EM']==1)]

# injectionPcs = injTces[(injTces['Disp']=='PC') & (injTces['Score']>=scoreCut)]
# injectionFps = injTces[(injTces['Disp']=='FP') | (injTces['Score']<scoreCut)]

print("length of injectionNotNtl = " + str(len(injectionNotNtl)))
print("length of injectionNtl = " + str(len(injectionNtl)))
print("length of injectionSS = " + str(len(injectionSS)))
print("length of injectionCO = " + str(len(injectionCO)))
print("length of injectionEM = " + str(len(injectionEM)))
print("length of injectionPcs = " + str(len(injectionPcs)))
print("length of injectionFps = " + str(len(injectionFps)))
print("length of injectionNtlOnly = " + str(len(injectionNtlOnly)))
print("length of injectionSsOnly = " + str(len(injectionSsOnly)))
print("length of injectionCoOnly = " + str(len(injectionCoOnly)))
print("length of injectionEmOnly = " + str(len(injectionEmOnly)))

