In [1]:
import sys

# This to get the peerless target star DataFrame for example purposes
sys.path.append('/u/tdm/repositories/peerless/prediction')
sys.path.append('/u/tdm/repositories/peerless')
from targets import targets

# The action is here. Depends on vespa & isochrones.
from exosyspop.populations import KeplerBinaryPopulation

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [2]:
pop = KeplerBinaryPopulation(targets, fB=0.4)

In [3]:
# Accessing secondary properties will initialize a secondary simulation,
# calling pop._generate_binaries().  The first time this is called, the
# secondary property regressors get trained.
pop.radius_B

dmag regressor trained, R2=0.999232045787
q regressor trained, R2=0.999646571181


array([        nan,         nan,         nan, ...,  0.45223187,
        0.57815922,  0.30554178])

In [4]:
# subsequent calls are much faster; e.g.
pop._generate_binaries()
print(pop.radius_B)
%timeit pop._generate_binaries()

[        nan         nan  0.53384353 ...,  0.43024235         nan
  0.84651946]
10 loops, best of 3: 88 ms per loop


In [5]:
# Similarly, accessing orbital properties will generate them
pop.period

array([  1.11245753e+05,   7.92807104e+02,   1.43041258e+05, ...,
         3.66699423e+04,   5.70222238e+03,   1.30271621e+02])

# Synthetic observations

In [6]:
# Now, we can observe and see what we see.  This takes into account
# duty cycle & data span, as well as geometry.
obs = pop.observe()
print(len(obs))
print(obs.columns)
obs.head()

536
Index([u'index', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'flux_ratio', u'n_pri', u'n_sec'],
      dtype='object')


Unnamed: 0,index,period,ecc,w,inc,a,aR,b_pri,b_sec,k,...,d_sec,T14_pri,T14_sec,T23_pri,T23_sec,dataspan,dutycycle,flux_ratio,n_pri,n_sec
0,531,3.585071,0.0,3.606254,1.516895,775748900000.0,12.699439,0.684183,0.684183,0.654988,...,0.062051,0.135931,0.135931,0,0,1459.789,0.875,0.089177,362,357
1,557,117.809806,0.939598,5.075881,1.525295,7944181000000.0,129.467282,5.664627,0.36733,0.653539,...,0.069027,0.0,0.085192,0,0,1459.789,0.6988,0.074392,0,8
2,707,3.414607,0.773348,1.007553,1.287839,734656900000.0,13.283652,0.901315,4.306891,0.576628,...,0.0,0.042327,0.0,0,0,1459.789,0.8749,0.038368,358,0
3,1155,21.369804,0.272991,3.697487,1.52874,2742820000000.0,41.833898,1.90173,1.422804,0.77605,...,0.026571,0.0,0.145487,0,0,1459.789,0.8752,0.286366,0,60
4,2300,1.897511,0.727851,6.194288,1.026348,539444600000.0,6.924148,1.802916,1.584054,0.732371,...,0.00811,0.0,0.046175,0,0,1459.789,0.8751,0.352192,0,681


In [7]:
# This is pretty fast, even when generating a new population each time:
%timeit pop.observe(new=True)

10 loops, best of 3: 170 ms per loop


In [8]:
# Even faster if we only generate new orbits.
%timeit pop.observe(new_orbits=True)

10 loops, best of 3: 81.9 ms per loop


In [9]:
# So we can predict the expected number of observations pretty easily.
import numpy as np
N = 100
n_obs = np.array([len(pop.observe(new_orbits=True)) for i in range(N)])
n_obs.mean(), n_obs.std()

(535.49000000000001, 25.06212082007427)

In [10]:
# Notice that the above does not yet have trapezoidal parameters.  There are two options to generate these.
# Either we can set the fit_trap parameter, as follows:
obs = pop.observe(fit_trap=True)
print(len(obs))
obs.columns

555


Index([u'index', u'period', u'ecc', u'w', u'inc', u'a', u'aR', u'b_pri',
       u'b_sec', u'k', u'tra', u'occ', u'd_pri', u'd_sec', u'T14_pri',
       u'T14_sec', u'T23_pri', u'T23_sec', u'dataspan', u'dutycycle',
       u'flux_ratio', u'n_pri', u'n_sec', u'trap_dur_pri', u'trap_depth_pri',
       u'trap_slope_pri', u'trap_dur_sec', u'trap_depth_sec',
       u'trap_slope_sec'],
      dtype='object')

In [None]:
# All things considered, this is still pretty fast if we just need to do it a few times:
%timeit pop.observe(fit_trap=True)

1 loops, best of 3: 2.43 s per loop


In [None]:
# However, this is pretty slow if we want to do inference.  To help with this, we can 
# tell it to train & use a regression.  Training only happens once; by default with 10,000 
# synthetic observations.  This takes a minute or so.
obs = pop.observe(regr_trap=True)
print(len(obs))
obs.columns

In [None]:
# Subsequent calls are much faster
%timeit pop.observe(regr_trap=True)

In [None]:
# Even generating a new stellar population & observing it is pretty quick
%timeit pop.observe(regr_trap=True, new=True)

In [None]:
# Or again, you can just generate new orbits (rather than new binaries & new orbits)
%timeit pop.observe(regr_trap=True, new_orbits=True)

In [None]:
# Generating the training data used for the trapezoid shape regression above used
# this function, which can be otherwise useful to sample >N random observations 
# from the existing population.  `trap_regr` defaults to `True` here.  
# This function also takes `new` or `new_orbits` keywords.
obs_pop = pop.get_N_observed(N=10000, new_orbits=True)
print(len(obs_pop))
obs_pop.columns

In [None]:
# We can now look, e.g. at the expected number of single/double eclipsing systems:
query = '(n_pri < 3) & (n_sec < 3) & (n_pri==0 | n_sec==0)'
N = 100
n_obs = np.array([len(pop.observe(new_orbits=True).query(query)) for i in range(N)])
n_obs.mean(), n_obs.std()