Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix mass and parallax 2 #69

Merged
merged 12 commits into from
Oct 18, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 98 additions & 84 deletions orbitize/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,10 @@ def prepare_samples(self, num_samples):
# generate sample orbits
samples = np.empty([len(self.priors), num_samples])
for i in range(len(self.priors)):
samples[i, :] = self.priors[i].draw_samples(num_samples)
if hasattr(self.priors[i], "draw_samples"):
samples[i, :] = self.priors[i].draw_samples(num_samples)
else: # param is fixed & has no prior
samples[i, :] = self.priors[i] * np.ones(num_samples)

sma, ecc, inc, argp, lan, tau, plx, mtot = [s for s in samples]

Expand Down Expand Up @@ -243,21 +246,23 @@ def run_sampler(self, total_orbits, num_samples=10000):
return np.array(output_orbits)


class PTMCMC(Sampler):
class MCMC(Sampler):
"""
MCMC sampler. Supports either parallel tempering or just regular MCMC. Parallel temperuing will be run if num_temps > 1
Parallel-Tempered MCMC Sampler using ptemcee, a fork of the emcee Affine-infariant sampler
Affine-Invariant Ensemble MCMC Sampler using emcee. Warning: may not work well for multi-modal distributions

Args:
lnlike (string): name of likelihood function in ``lnlike.py``
system (system.System): system.System object
num_temps (int): number of temperatures to run the sampler at
num_temps (int): number of temperatures to run the sampler at. Parallel tempering will be used if num_temps > 1
num_walkers (int): number of walkers at each temperature
num_threads (int): number of threads to use for parallelization (default=1)

(written): Jason Wang, Henry Ngo, 2018
"""
def __init__(self, lnlike, system, num_temps, num_walkers, num_threads=1):
super(PTMCMC, self).__init__(system, like=lnlike)
super(MCMC, self).__init__(system, like=lnlike)
self.num_temps = num_temps
self.num_walkers = num_walkers
self.num_threads = num_threads
Expand All @@ -268,112 +273,108 @@ def __init__(self, lnlike, system, num_temps, num_walkers, num_threads=1):
lnlike = None
)

# get priors from the system class
self.priors = system.sys_priors
if self.num_temps > 1:
self.use_pt = True
else:
self.use_pt = False
self.num_temps = 1

# get priors from the system class. need to remove and record fixed priors
self.priors = []
self.fixed_params = []
for i, prior in enumerate(system.sys_priors):
# check for pixed parameters
if not hasattr(prior, "draw_samples"):
self.fixed_params.append((i, prior))
else:
self.priors.append(prior)

# initialize walkers initial postions
self.num_params = len(self.priors)
init_positions = []
for prior in self.priors:
# draw them uniformly becase we don't know any better right now
# TODO: be smarter in the future
random_init = prior.draw_samples(num_walkers*num_temps).reshape([num_temps, num_walkers])
random_init = prior.draw_samples(num_walkers*self.num_temps)
if self.num_temps > 1:
random_init = random_init.reshape([self.num_temps, num_walkers])

init_positions.append(random_init)

# make this an numpy array, but combine the parameters into a shape of (ntemps, nwalkers, nparams)
# we currently have a list of [ntemps, nwalkers] with nparam arrays. We need to make nparams the third dimension
# save this as the current position
self.curr_pos = np.dstack(init_positions)
# save this as the current position for the walkers
if self.use_pt:
# make this an numpy array, but combine the parameters into a shape of (ntemps, nwalkers, nparams)
# we currently have a list of [ntemps, nwalkers] with nparam arrays. We need to make nparams the third dimension
self.curr_pos = np.dstack(init_positions)
else:
# make this an numpy array, but combine the parameters into a shape of (nwalkers, nparams)
# we currently have a list of arrays where each entry is num_walkers prior draws for each parameter
# We need to make nparams the second dimension, so we have to transpose the stacked array
self.curr_pos = np.stack(init_positions).T

def run_sampler(self, total_orbits, burn_steps=0, thin=1):
"""
Runs PT MCMC sampler. Results are stored in self.chain, and self.lnlikes
Results also added to orbitize.results.Results object (self.results)

Can be run multiple times if you want to pause and inspect things.
Each call will continue from the end state of the last execution

def _fill_in_fixed_params(self, sampled_params):
"""
Fills in the missing parameters from the chain that aren't being sampeld

Args:
total_orbits (int): total number of accepted possible
orbits that are desired. This equals
``num_steps_per_walker``x``num_walkers``
burn_steps (int): optional paramter to tell sampler
to discard certain number of steps at the beginning
thin (int): factor to thin the steps of each walker
by to remove correlations in the walker steps
sampled_params (np.array): either 1-D array of size = number of sampled params, or 2-D array of shape (num_models, num_params)

Returns:
emcee.sampler object
full_params (np.array): same number of dimensions as sampled_params, but with num_params including the fixed parameters
"""
sampler = ptemcee.Sampler(self.num_walkers, self.num_params, self._logl, orbitize.priors.all_lnpriors, ntemps=self.num_temps, threads=self.num_threads, logpargs=[self.priors,] )
if len(self.fixed_params) == 0:
# nothing to add
return sampled_params

# check if 1-D or 2-D
twodim = np.ndim(sampled_params) == 2

for pos, lnprob, lnlike in sampler.sample(self.curr_pos, iterations=burn_steps, thin=thin):
pass

sampler.reset()
self.curr_pos = pos
print('Burn in complete')

nsteps = int(np.ceil(total_orbits / self.num_walkers))

for pos, lnprob, lnlike in sampler.sample(p0=pos, iterations=nsteps, thin=thin):
pass

self.curr_pos = pos
# TODO: Need something here to pick out temperatures, just using lowest one for now
self.chain = sampler.chain
self.post = sampler.flatchain[0,:]
self.lnlikes = sampler.logprobability
self.results.add_samples(self.post,self.lnlikes)
# insert in params
for index, value in self.fixed_params:
if twodim:
sampled_params = np.insert(sampled_params, index, value, axis=1)
else:
sampled_params = np.insert(sampled_params, index, value)

return sampler
return sampled_params

class EnsembleMCMC(Sampler):
"""
Affine-Invariant Ensemble MCMC Sampler using emcee. Warning: may not work well for multi-modal distributions
def _logl(self, params, include_logp=False):
"""
log likelihood function that interfaces with the orbitize objects
Comptues the sum of the log likelihoods of the data given the input model

Args:
lnlike (string): name of likelihood function in ``lnlike.py``
system (system.System): system.System object
num_walkers (int): number of walkers at each temperature
num_threads (int): number of threads to use for parallelization (default=1)
Args:
params (np.array of float): MxR array
of fitting parameters, where R is the number of
parameters being fit, and M is the number of orbits
we need model predictions for. Must be in the same order
documented in System() above. If M=1, this can be a 1d array.

(written): Jason Wang, Henry Ngo, 2018
"""
def __init__(self, lnlike, system, num_walkers, num_threads=1):
super(EnsembleMCMC, self).__init__(system, like=lnlike)
self.num_walkers = num_walkers
self.num_threads = num_threads
# Create an empty results object
self.results = orbitize.results.Results(
sampler_name = self.__class__.__name__,
post = None,
lnlike = None
)
include_logp (bool): if True, also includ elog prior in this function

# get priors from the system class
self.priors = system.sys_priors
Returns:
lnlikes (float): sum of all log likelihoods of the data given input model

# initialize walkers initial postions
self.num_params = len(self.priors)
init_positions = []
for prior in self.priors:
# draw them uniformly becase we don't know any better right now
# todo: be smarter in the future
random_init = prior.draw_samples(num_walkers)
"""
if include_logp:
if np.ndim(params) == 1:
logp = orbitize.priors.all_lnpriors(params, self.priors)
else:
logp = np.array([orbitize.priors.all_lnpriors(pset, self.priors) for pset in params])
else:
logp = 0 # don't include prior

init_positions.append(random_init)
full_params = self._fill_in_fixed_params(params)
if np.ndim(full_params) == 2:
full_params = full_params.T

# make this an numpy array, but combine the parameters into a shape of (nwalkers, nparams)
# we currently have a list of arrays where each entry is num_walkers prior draws for each parameter
# We need to make nparams the second dimension, so we have to transpose the stacked array
self.curr_pos = np.stack(init_positions).T
return super(MCMC, self)._logl(full_params) + logp

def run_sampler(self, total_orbits, burn_steps=0, thin=1):
"""
Runs the Affine-Invariant MCMC sampler. Results are stored in self.chain, and self.lnlikes
Runs PT MCMC sampler. Results are stored in self.chain, and self.lnlikes
Results also added to orbitize.results.Results object (self.results)

Can be run multiple times if you want to pause and inspect things.
Expand All @@ -391,8 +392,11 @@ def run_sampler(self, total_orbits, burn_steps=0, thin=1):
Returns:
emcee.sampler object
"""
# sampler = emcee.EnsembleSampler(num_walkers, self.num_params, self._logl, orbitize.priors.all_lnpriors, threads=num_threads, logpargs=[self.priors,] )
sampler = emcee.EnsembleSampler(self.num_walkers, self.num_params, self._logl, threads=self.num_threads)
if self.use_pt:
sampler = ptemcee.Sampler(self.num_walkers, self.num_params, self._logl, orbitize.priors.all_lnpriors, ntemps=self.num_temps, threads=self.num_threads, logpargs=[self.priors,] )
else:
sampler = emcee.EnsembleSampler(self.num_walkers, self.num_params, self._logl, threads=self.num_threads, kwargs={'include_logp' : True})


for pos, lnprob, lnlike in sampler.sample(self.curr_pos, iterations=burn_steps, thin=thin):
pass
Expand All @@ -407,9 +411,19 @@ def run_sampler(self, total_orbits, burn_steps=0, thin=1):
pass

self.curr_pos = pos
# TODO: Need something here to pick out temperatures, just using lowest one for now
self.chain = sampler.chain
self.post = sampler.flatchain
self.lnlikes = sampler.lnprobability
if self.use_pt:
self.post = sampler.flatchain[0,:]
self.lnlikes = sampler.logprobability[0:,] # shoudl also be picking out the lowest temperature logps
self.lnlikes_alltemps = sampler.logprobability
else:
self.post = sampler.flatchain
self.lnlikes = sampler.lnprobability

# include fixed parameters in posterior
self.post = self._fill_in_fixed_params(self.post)

self.results.add_samples(self.post,self.lnlikes)

return sampler
26 changes: 8 additions & 18 deletions orbitize/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,22 +119,18 @@ def __init__(self, num_secondary_bodies, data_table, system_mass,
self.labels.append('epp{}'.format(body+1))

#
# Set priors on system mass and parallax
# Set priors on total mass and parallax
#
self.labels.append('plx')
self.labels.append('mtot')
if plx_err > 0:
self.sys_priors.append(priors.GaussianPrior(plx, plx_err))
self.abs_plx = np.nan
else:
self.abs_plx = plx
self.labels.append('plx')
self.sys_priors.append(plx)
if mass_err > 0:
self.sys_priors.append(priors.GaussianPrior(
system_mass, mass_err)
)
self.abs_system_mass = np.nan
self.sys_priors.append(priors.GaussianPrior(system_mass, mass_err))
else:
self.abs_system_mass = system_mass
self.labels.append('mtot')
self.sys_priors.append(system_mass)

#add labels dictionary for parameter indexing
self.param_idx = dict(zip(self.labels, np.arange(len(self.labels))))
Expand All @@ -161,14 +157,6 @@ def compute_model(self, params_arr):
else:
model = np.zeros((len(self.data_table), 2, params_arr.shape[1]))

if not np.isnan(self.abs_plx):
plx = self.abs_plx
else:
plx = params_arr[6*self.num_secondary_bodies]
if not np.isnan(self.abs_system_mass):
mtot = self.abs_system_mass
else:
mtot = params_arr[-1]

for body_num in np.arange(self.num_secondary_bodies)+1:

Expand All @@ -179,6 +167,8 @@ def compute_model(self, params_arr):
argp = params_arr[body_num+2]
lan = params_arr[body_num+3]
tau = params_arr[body_num+4]
plx = params_arr[6*self.num_secondary_bodies]
mtot = params_arr[-1]

raoff, decoff, vz = kepler.calc_orbit(
epochs, sma, ecc, inc, argp, lan, tau, plx, mtot
Expand Down
11 changes: 11 additions & 0 deletions tests/test_OFTI.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,17 @@ def test_run_sampler():
print()


def test_fixed_sys_params_sampling():
# test in case of fixed mass and parallax
myDriver = orbitize.driver.Driver(input_file, 'OFTI',
1, 1.22, 56.95)

s = myDriver.sampler
samples = s.prepare_samples(100)
assert np.all(samples[-1] == s.priors[-1])
assert isinstance(samples[-3], np.ndarray)


if __name__ == "__main__":
test_scale_and_rotate()
test_run_sampler()
Expand Down
12 changes: 6 additions & 6 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ def test_compute_model():
1, data_table, 10., 10.
)

params_arr = np.array([[1.,0.5],[0.,0.],[0.,0.],[0.,0.],[0.,0.],[245000., 245000.]])
params_arr = np.array([[1.,0.5],[0.,0.],[0.,0.],[0.,0.],[0.,0.],[245000., 245000.], [10, 10], [10, 10]])
model = testSystem_parsing.compute_model(params_arr)
assert model.shape == (4,2,2)

params_arr = np.array([1.,0.,0.,0.,0.,245000.])
params_arr = np.array([1., 0., 0., 0., 0., 245000., 10, 10])
model = testSystem_parsing.compute_model(params_arr)
assert model.shape == (4,2)

Expand All @@ -48,10 +48,10 @@ def test_systeminit():
data_table['object'][1] = 2

plx_mass_errs2lens = {
(0.,0.): 12,
(0.,0.): 14,
(1.,1.): 14,
(0.,1.): 13,
(1.,0.): 13
(0.,1.): 14,
(1.,0.): 14
}

for plx_e, mass_e in plx_mass_errs2lens.keys():
Expand All @@ -75,7 +75,7 @@ def test_systeminit():

assert testSystem_parsing.labels == [
'sma1', 'ecc1', 'inc1', 'aop1', 'pan1', 'epp1', 'sma2',
'ecc2', 'inc2', 'aop2', 'pan2', 'epp2'
'ecc2', 'inc2', 'aop2', 'pan2', 'epp2','plx','mtot'
]


Expand Down
12 changes: 6 additions & 6 deletions tests/test_mcmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ def test_pt_mcmc_runs(num_threads=1):
# construct sampler
n_temps=2
n_walkers=100
mcmc = sampler.PTMCMC(chi2_lnlike, orbit, n_temps, n_walkers, num_threads=num_threads)
mcmc = sampler.MCMC(chi2_lnlike, orbit, n_temps, n_walkers, num_threads=num_threads)

# run it a little
mcmc.run_sampler(10, 1)
mcmc.run_sampler(1000, 1)
# run it a little more (tests adding to results object
mcmc.run_sampler(10, 1)
mcmc.run_sampler(1000, 1)

def test_ensemble_mcmc_runs(num_threads=1):
"""
Expand All @@ -44,12 +44,12 @@ def test_ensemble_mcmc_runs(num_threads=1):

# construct sampler
n_walkers=100
mcmc = sampler.EnsembleMCMC(chi2_lnlike, orbit, n_walkers, num_threads=num_threads)
mcmc = sampler.MCMC(chi2_lnlike, orbit, 0, n_walkers, num_threads=num_threads)

# run it a little
mcmc.run_sampler(10, burn_steps=1)
mcmc.run_sampler(1000, burn_steps=1)
# run it a little more (tests adding to results object)
mcmc.run_sampler(10, burn_steps=1)
mcmc.run_sampler(1000, burn_steps=1)

if __name__ == "__main__":
test_pt_mcmc_runs(num_threads=1)
Expand Down