In [1]:
import sys

## This to get the peerless target star DataFrame for example purposes
#sys.path.append('/u/tdm/repositories/peerless/prediction')
#sys.path.append('/u/tdm/repositories/peerless')
#from targets import targets


import pandas as pd
targets = pd.read_hdf('targets.h5')

# Sanitize dtypes of targets DataFrame
for c in targets.columns:
    if targets[c].dtype == object:
        targets.loc[:,c] = targets.loc[:,c].astype(str)


# The action is here. Depends on vespa & isochrones.
from exosyspop.populations import KeplerBinaryPopulation



In [2]:
pop = KeplerBinaryPopulation(targets, fB=0.4)

In [3]:
# Accessing secondary properties will initialize a secondary simulation,
# calling pop._generate_binaries().  The first time this is called, the
# secondary property regressors get trained.
pop.radius_B

EB: dmag regressor trained, R2=0.999478994004
EB: qR regressor trained, R2=0.999225548626


array([        nan,         nan,         nan, ...,  0.45033301,
        0.42450442,         nan])

In [4]:
# subsequent calls are much faster; e.g.
pop._generate_binaries()
print(pop.radius_B)
%timeit pop._generate_binaries()

[ 0.60335989         nan         nan ...,  0.39600354         nan
  0.67549434]
10 loops, best of 3: 97.5 ms per loop


In [5]:
# If physical accuracy is important, you can also choose to generate binary properties
# directly from the isochrone, but it's a factor of a few slower:
pop._generate_binaries(use_ic=True)
print(pop.radius_B)
%timeit pop._generate_binaries(use_ic=True)

[ 0.74957311         nan  0.77404674 ...,         nan         nan
         nan]
1 loops, best of 3: 399 ms per loop


In [6]:
# Similarly, accessing orbital properties will generate them
pop.period

array([  6.40912013e+04,   2.14228943e+02,   5.44690078e+08, ...,
         1.54264955e+09,   4.07893941e+06,   1.40823485e+02])

# Synthetic observations

In [7]:
# Now, we can observe and see what we see.  This takes into account
# duty cycle & data span, as well as geometry.
obs = pop.observe()
print(len(obs))
print(obs.columns)
obs.head()

145
Index([u'host', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'b_target', u'flux_ratio', u'n_pri', u'n_sec', u'phase_sec'],
      dtype='object')


Unnamed: 0,host,period,ecc,w,inc,a,aR,b_pri,b_sec,k,...,T14_sec,T23_pri,T23_sec,dataspan,dutycycle,b_target,flux_ratio,n_pri,n_sec,phase_sec
0,127,70.072951,0.873722,3.907564,1.548575,5918824000000.0,88.130039,1.175052,0.288552,0.665052,...,0.125768,0.0,0.013043,1459.789,0.6989,10.380342,0.109753,13,14,0.055367
1,159,1.32385,0.194674,2.027846,1.524113,422394900000.0,5.902799,0.225611,0.321122,0.73815,...,0.147203,0.007932,0.0,1459.789,0.8749,19.312456,0.332275,947,983,0.444524
2,2835,3.562075,0.629051,3.374387,1.418,687947900000.0,11.947647,1.285432,0.959628,0.171404,...,0.043813,0.0,0.0,1459.789,0.8751,11.792313,0.001357,0,370,0.132974
3,7069,68.908433,0.508062,2.488099,1.560835,6184474000000.0,86.120773,0.48625,0.920893,0.970415,...,0.552996,0.0,0.0,1459.789,0.8751,16.371788,0.907508,18,20,0.238324
4,7270,372.421965,0.545586,3.963694,1.569175,15763670000000.0,283.097518,0.536953,0.230296,0.409973,...,0.348765,0.142969,0.136199,1459.789,0.8749,18.960348,0.015485,3,3,0.249322


In [8]:
# This is pretty fast, even when generating a new population each time:
%timeit pop.observe(new=True)

1 loops, best of 3: 212 ms per loop


In [9]:
# Even faster if we only generate new orbits.
%timeit pop.observe(new_orbits=True)

10 loops, best of 3: 79.2 ms per loop


In [10]:
# So we can predict the expected number of observations pretty easily.
import numpy as np
N = 100
n_obs = np.array([len(pop.observe(new_orbits=True)) for i in range(N)])
n_obs.mean(), n_obs.std()

(150.56, 12.621663915665003)

In [11]:
# Notice that the above does not yet have trapezoidal parameters.  There are two options to generate these.
# Either we can set the fit_trap parameter, as follows:
obs = pop.observe(fit_trap=True)
print(len(obs))
obs.columns

147


Index([u'host', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'b_target', u'flux_ratio', u'n_pri', u'n_sec', u'phase_sec',
       u'trap_dur_pri', u'trap_depth_pri', u'trap_slope_pri', u'trap_dur_sec',
       u'trap_depth_sec', u'trap_slope_sec'],
      dtype='object')

In [12]:
# All things considered, this is still pretty fast if we just need to do it a few times:
%timeit pop.observe(fit_trap=True)

1 loops, best of 3: 841 ms per loop


In [13]:
# However, this is pretty slow if we want to do inference.  To help with this, we can 
# tell it to train & use a regression.  Training only happens once; by default with 10,000 
# synthetic observations.  This takes a minute or so.
obs = pop.observe(regr_trap=True)
print(len(obs))
obs.columns

EB: Depth trained: R2=0.998124257679
EB: Duration trained: R2=0.997735090565
EB: Slope trained: R2=0.994708310901
140


Index([u'host', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'b_target', u'flux_ratio', u'n_pri', u'n_sec', u'phase_sec',
       u'trap_dur_pri_regr', u'trap_depth_pri_regr', u'trap_slope_pri_regr',
       u'trap_dur_sec_regr', u'trap_depth_sec_regr', u'trap_slope_sec_regr'],
      dtype='object')

In [14]:
# Subsequent calls are much faster
%timeit pop.observe(regr_trap=True)

10 loops, best of 3: 25.9 ms per loop


In [15]:
# Even generating a new stellar population & observing it is pretty quick
%timeit pop.observe(regr_trap=True, new=True)

1 loops, best of 3: 224 ms per loop


In [16]:
# Or again, you can just generate new orbits (rather than new binaries & new orbits)
%timeit pop.observe(regr_trap=True, new_orbits=True)

10 loops, best of 3: 94.1 ms per loop


In [17]:
# Generating the training data used for the trapezoid shape regression above used
# this function, which can be otherwise useful to sample >N random observations 
# from the existing population.  `trap_regr` defaults to `True` here.  
# This function also takes `new` or `new_orbits` keywords.
obs_pop = pop.get_N_observed(N=10000, new_orbits=True)
print(len(obs_pop))
obs_pop.columns

10030


Index([u'host', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'b_target', u'flux_ratio', u'n_pri', u'n_sec', u'phase_sec',
       u'trap_dur_pri_regr', u'trap_depth_pri_regr', u'trap_slope_pri_regr',
       u'trap_dur_sec_regr', u'trap_depth_sec_regr', u'trap_slope_sec_regr'],
      dtype='object')

In [18]:
# We can now look, e.g. at the expected number of single/double eclipsing systems:
query = '(n_pri < 3) & (n_sec < 3) & (n_pri==0 | n_sec==0)'
N = 100
n_obs = np.array([len(pop.observe(new_orbits=True).query(query)) for i in range(N)])
n_obs.mean(), n_obs.std()

(4.3399999999999999, 1.8396738841436002)

In [19]:
# Try this again, this time using the empirical eccentricity distribution
# (as opposed to the beta distribution with default params)---eccentricity matters!
pop.ecc_empirical = True
n_obs = np.array([len(pop.observe(new_orbits=True).query(query)) for i in range(N)])
n_obs.mean(), n_obs.std()

(6.2800000000000002, 2.1821090715177367)

In [20]:
# You can also save a trained model so that you can load it back and hit the ground running.
pop.save('ebpop', overwrite=True)
pop = KeplerBinaryPopulation.load('ebpop')

In [21]:
# No training necessary!
pop.observe(regr_trap=True).head()

Unnamed: 0,host,period,ecc,w,inc,a,aR,b_pri,b_sec,k,...,flux_ratio,n_pri,n_sec,phase_sec,trap_dur_pri_regr,trap_depth_pri_regr,trap_slope_pri_regr,trap_dur_sec_regr,trap_depth_sec_regr,trap_slope_sec_regr
0,1303,1.508235,0.79469,0.908785,0.571017,439256000000.0,6.76058,1.288318,5.616169,0.49581,...,0.021415,849,0,0.87117,0.044697,0.021101,2.288434,,,
1,1481,1.119021,0.095647,1.2203,1.557535,394654300000.0,4.311374,0.051982,0.062243,0.645139,...,0.267657,1139,1144,0.520989,0.111869,0.36857,2.405604,0.140385,0.207068,2.538372
2,1704,54.984939,0.687238,1.753266,1.523692,4608242000000.0,60.380019,0.895275,4.628196,0.279898,...,0.003981,22,0,0.392804,0.110049,0.047288,2.554566,,,
3,2426,5.777147,0.086197,4.066556,1.550388,1162926000000.0,18.001832,0.39158,0.341142,0.932021,...,0.769349,175,184,0.466909,0.179252,0.405019,2.298384,0.161438,0.359476,2.288638
4,5496,44.842503,0.80826,4.761843,1.518526,3995373000000.0,87.611478,8.234749,0.878159,0.872604,...,0.376656,0,19,0.543067,,,,0.082805,0.130946,2.359772
