In [1]:
import sys

## This to get the peerless target star DataFrame for example purposes
#sys.path.append('/u/tdm/repositories/peerless/prediction')
#sys.path.append('/u/tdm/repositories/peerless')
#from targets import targets


import pandas as pd
targets = pd.read_hdf('targets.h5')

# Sanitize dtypes of targets DataFrame
for c in targets.columns:
    if targets[c].dtype == object:
        targets.loc[:,c] = targets.loc[:,c].astype(str)


# The action is here. Depends on vespa & isochrones.
from exosyspop.populations import KeplerBinaryPopulation



In [2]:
pop = KeplerBinaryPopulation(targets, fB=0.4)

In [3]:
# Accessing secondary properties will initialize a secondary simulation,
# calling pop._generate_binaries().  The first time this is called, the
# secondary property regressors get trained.
pop.radius_B

EB: dmag regressor trained, R2=0.999571783813
EB: qR regressor trained, R2=0.999491522142


array([        nan,         nan,         nan, ...,         nan,
        0.79875507,         nan])

In [4]:
# subsequent calls are much faster; e.g.
pop._generate_binaries()
print(pop.radius_B)
%timeit pop._generate_binaries()

[        nan  0.48217627  0.17883418 ...,         nan         nan
  0.84244681]
10 loops, best of 3: 119 ms per loop


In [5]:
# If physical accuracy is important, you can also choose to generate binary properties
# directly from the isochrone, but it's a factor of a few slower:
pop._generate_binaries(use_ic=True)
print(pop.radius_B)
%timeit pop._generate_binaries(use_ic=True)

[ 0.59548858  0.87027945  0.21264382 ...,         nan         nan
         nan]
1 loops, best of 3: 540 ms per loop


In [6]:
# Similarly, accessing orbital properties will generate them
pop.period

array([  2.37831975e+03,   4.58478418e+05,   1.29475810e+01, ...,
         2.53311429e+04,   2.12623475e+00,   6.64783757e+00])

# Synthetic observations

In [7]:
# Now, we can observe and see what we see.  This takes into account
# duty cycle & data span, as well as geometry.
obs = pop.observe()
print(len(obs))
print(obs.columns)
obs.head()

534
Index([u'host', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'b_target', u'flux_ratio', u'n_pri', u'n_sec', u'phase_sec'],
      dtype='object')


Unnamed: 0,host,period,ecc,w,inc,a,aR,b_pri,b_sec,k,...,T14_sec,T23_pri,T23_sec,dataspan,dutycycle,b_target,flux_ratio,n_pri,n_sec,phase_sec
0,162,1.546554,0.259108,4.888875,1.299541,475226100000.0,6.653142,2.232421,1.324986,0.694411,...,0.062706,0.0,0.0,1459.789,0.8751,19.302264,0.193729,0,824,0.529941
1,583,2.107884,0.336064,0.538883,1.397619,594980200000.0,8.717726,1.136517,1.610221,0.864561,...,0.083766,0.0,0.0,1459.789,0.6988,9.314785,0.564509,467,486,0.683719
2,793,28.011116,0.560312,1.798498,1.556732,2756256000000.0,43.806803,0.273422,0.93068,0.141959,...,0.245678,0.08871,0.0,1459.789,0.6988,10.281481,0.000729,39,36,0.404264
3,1411,4.632179,0.733379,5.737479,1.567717,937584400000.0,13.034092,0.029954,0.013437,0.482192,...,0.082737,0.064204,0.028841,1459.789,0.8754,12.995275,0.020901,278,276,0.89574
4,1509,2.43272,0.59529,4.428534,1.299418,592573500000.0,12.003357,4.847686,1.321941,0.860955,...,0.044943,0.0,0.0,1459.789,0.8755,13.927386,0.343714,0,516,0.371561


In [8]:
# This is pretty fast, even when generating a new population each time:
%timeit pop.observe(new=True)

1 loops, best of 3: 264 ms per loop


In [9]:
# Even faster if we only generate new orbits.
%timeit pop.observe(new_orbits=True)

10 loops, best of 3: 103 ms per loop


In [10]:
# So we can predict the expected number of observations pretty easily.
import numpy as np
N = 100
n_obs = np.array([len(pop.observe(new_orbits=True)) for i in range(N)])
n_obs.mean(), n_obs.std()

(530.79999999999995, 21.420550879937707)

In [11]:
# Notice that the above does not yet have trapezoidal parameters.  There are two options to generate these.
# Either we can set the fit_trap parameter, as follows:
obs = pop.observe(fit_trap=True)
print(len(obs))
obs.columns

568


Index([u'host', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'b_target', u'flux_ratio', u'n_pri', u'n_sec', u'phase_sec',
       u'trap_dur_pri', u'trap_depth_pri', u'trap_slope_pri', u'trap_dur_sec',
       u'trap_depth_sec', u'trap_slope_sec'],
      dtype='object')

In [12]:
# All things considered, this is still pretty fast if we just need to do it a few times:
%timeit pop.observe(fit_trap=True)

1 loops, best of 3: 2.61 s per loop


In [13]:
# However, this is pretty slow if we want to do inference.  To help with this, we can 
# tell it to train & use a regression.  Training only happens once; by default with 10,000 
# synthetic observations.  This takes a minute or so.
obs = pop.observe(regr_trap=True)
print(len(obs))
obs.columns

EB: Depth trained: R2=0.996803131152
EB: Duration trained: R2=0.999143741679
EB: Slope trained: R2=0.989257644072
502


Index([u'host', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'b_target', u'flux_ratio', u'n_pri', u'n_sec', u'phase_sec',
       u'trap_dur_pri_regr', u'trap_depth_pri_regr', u'trap_slope_pri_regr',
       u'trap_dur_sec_regr', u'trap_depth_sec_regr', u'trap_slope_sec_regr'],
      dtype='object')

In [14]:
# Subsequent calls are much faster
%timeit pop.observe(regr_trap=True)

10 loops, best of 3: 33.4 ms per loop


In [15]:
# Even generating a new stellar population & observing it is pretty quick
%timeit pop.observe(regr_trap=True, new=True)

1 loops, best of 3: 259 ms per loop


In [16]:
# Or again, you can just generate new orbits (rather than new binaries & new orbits)
%timeit pop.observe(regr_trap=True, new_orbits=True)

10 loops, best of 3: 121 ms per loop


In [17]:
# Generating the training data used for the trapezoid shape regression above used
# this function, which can be otherwise useful to sample >N random observations 
# from the existing population.  `trap_regr` defaults to `True` here.  
# This function also takes `new` or `new_orbits` keywords.
obs_pop = pop.get_N_observed(N=10000, new_orbits=True)
print(len(obs_pop))
obs_pop.columns

10169


Index([u'host', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'b_target', u'flux_ratio', u'n_pri', u'n_sec', u'phase_sec',
       u'trap_dur_pri_regr', u'trap_depth_pri_regr', u'trap_slope_pri_regr',
       u'trap_dur_sec_regr', u'trap_depth_sec_regr', u'trap_slope_sec_regr'],
      dtype='object')

In [18]:
# We can now look, e.g. at the expected number of single/double eclipsing systems:
query = '(n_pri < 3) & (n_sec < 3) & (n_pri==0 | n_sec==0)'
N = 100
n_obs = np.array([len(pop.observe(new_orbits=True).query(query)) for i in range(N)])
n_obs.mean(), n_obs.std()

(6.2699999999999996, 2.2443484577934858)

In [19]:
# Try this again, this time using the empirical eccentricity distribution
# (as opposed to the beta distribution with default params)---eccentricity matters!
pop.ecc_empirical = True
n_obs = np.array([len(pop.observe(new_orbits=True).query(query)) for i in range(N)])
n_obs.mean(), n_obs.std()

(10.23, 2.8874036780471135)

In [20]:
# You can also save a trained model so that you can load it back and hit the ground running.
pop.save('ebpop', overwrite=True)
pop = KeplerBinaryPopulation.load('ebpop')

In [21]:
# No training necessary!
pop.observe(regr_trap=True).head()

Unnamed: 0,host,period,ecc,w,inc,a,aR,b_pri,b_sec,k,...,flux_ratio,n_pri,n_sec,phase_sec,trap_dur_pri_regr,trap_depth_pri_regr,trap_slope_pri_regr,trap_dur_sec_regr,trap_depth_sec_regr,trap_slope_sec_regr
0,546,19.935476,0.0,0.344833,1.556793,2398414000000.0,44.941875,0.629302,0.629302,0.667462,...,0.074515,55,48,0.5,0.194304,0.331414,2.362083,0.19746,0.055789,2.54261
1,612,1.069235,0.030276,4.568418,1.424856,315077600000.0,6.442061,0.964877,0.908738,0.434965,...,0.020585,1184,1199,0.497233,0.058478,0.080409,2.368696,0.057238,0.011923,2.41193
2,1652,10.888951,0.0,0.494733,1.527856,1697257000000.0,30.300001,1.300691,1.300691,0.88053,...,0.539813,89,93,0.5,0.147233,0.100797,2.430906,0.148203,0.072054,2.396568
3,2016,14.983803,0.135394,3.861119,1.525659,1970475000000.0,35.954779,1.74863,1.462137,0.647213,...,0.070845,0,81,0.435025,,,,0.088492,0.003549,2.548455
4,2377,12.53684,0.082197,1.143465,1.538545,1899138000000.0,30.408404,0.90614,1.05267,0.953555,...,0.821778,107,103,0.521744,0.188651,0.226335,2.423779,0.209617,0.160692,2.531009
