From c8a3e6fcc5372be5cdaa6dfcc75af95f10ac7ade Mon Sep 17 00:00:00 2001 From: "Sergey E. Koposov" Date: Mon, 3 Oct 2022 20:49:14 +0100 Subject: [PATCH 1/3] fix broken MCMC volume calculations in RadFriends/SupFriends also add testing of those --- py/dynesty/bounding.py | 25 +++++----- tests/test_ellipsoid.py | 105 +++++++++++++++++++++++++++++----------- 2 files changed, 90 insertions(+), 40 deletions(-) diff --git a/py/dynesty/bounding.py b/py/dynesty/bounding.py index 1d19a865..4055ab1a 100644 --- a/py/dynesty/bounding.py +++ b/py/dynesty/bounding.py @@ -645,7 +645,7 @@ def __init__(self, ndim, cov=None): detsign, detln = linalg.slogdet(self.am) assert detsign > 0 - self.logvol_ball = (logvol_prefactor(self.n) - 0.5 * detln) + self.logvol_ball = logvol_prefactor(self.n) - 0.5 * detln self.expand = 1. self.funit = 1 @@ -744,18 +744,19 @@ def monte_carlo_logvol(self, estimated fractional overlap with the unit cube.""" # Estimate volume using Monte Carlo integration. - samples = [ + samples = ([ self.sample(ctrs, rstate=rstate, return_q=True) for i in range(ndraws) - ] - qsum = sum((q for (x, q) in samples)) - logvol = np.log(1. * ndraws / qsum * len(ctrs)) + self.logvol_ball + ]) + qs = np.array([_[1] for _ in samples]) + qsum = np.sum(1. / qs) + logvol = np.log(1. / ndraws * qsum * len(ctrs)) + self.logvol_ball if return_overlap: # Estimate the fractional amount of overlap with the # unit cube using the same set of samples. - qin = sum((q * unitcheck(x) for (x, q) in samples)) - overlap = 1. * qin / qsum + qin = sum((1. / q * unitcheck(x) for (x, q) in samples)) + overlap = qin / qsum return logvol, overlap else: return logvol @@ -1005,7 +1006,7 @@ def monte_carlo_logvol(self, ctrs, ndraws=10000, rstate=None, - return_overlap=False): + return_overlap=True): """Using `ndraws` Monte Carlo draws, estimate the log volume of the *union* of cubes. If `return_overlap=True`, also returns the estimated fractional overlap with the unit cube.""" @@ -1015,14 +1016,14 @@ def monte_carlo_logvol(self, self.sample(ctrs, rstate=rstate, return_q=True) for i in range(ndraws) ] - qsum = sum((q for (x, q) in samples)) - logvol = np.log(1. * ndraws / qsum * len(ctrs)) + self.logvol_cube + qsum = sum((1. / q for (x, q) in samples)) + logvol = np.log(1. * qsum / ndraws * len(ctrs)) + self.logvol_cube if return_overlap: # Estimate the fractional overlap with the unit cube using # the same set of samples. - qin = sum((q * unitcheck(x) for (x, q) in samples)) - overlap = 1. * qin / qsum + qin = sum((1. / q * unitcheck(x) for (x, q) in samples)) + overlap = qin / qsum return logvol, overlap else: return logvol diff --git a/tests/test_ellipsoid.py b/tests/test_ellipsoid.py index 4acbb1cc..f48b6e62 100644 --- a/tests/test_ellipsoid.py +++ b/tests/test_ellipsoid.py @@ -130,40 +130,45 @@ def test_overlap(): assert np.all(within == within2) -def test_mc_logvol(): - - def capvol(n, r, h): - # see https://en.wikipedia.org/wiki/Spherical_cap - Cn = np.pi**(n / 2.) / scipy.special.gamma(1 + n / 2.) - return ( - Cn * r**n * +def capvol(n, r, h): + # this is a volume of the n-dimensional spherical cap + # see https://en.wikipedia.org/wiki/Spherical_cap + Cn = np.pi**(n / 2.) / scipy.special.gamma(1 + n / 2.) + return (Cn * r**n * (1 / 2 - (r - h) / r * scipy.special.gamma(1 + n / 2) / np.sqrt(np.pi) / scipy.special.gamma( (n + 1.) / 2) * scipy.special.hyp2f1(1. / 2, (1 - n) / 2, 3. / 2, ((r - h) / r)**2))) - def sphere_vol(n, r): - Cn = np.pi**(n / 2.) / scipy.special.gamma(1 + n / 2.) * r**n - return Cn - - def two_sphere_vol(c1, c2, r1, r2): - D = np.sqrt(np.sum((c1 - c2)**2)) - n = len(c1) - if D >= r1 + r2: - return sphere_vol(n, r1) + sphere_vol(n, r2) - # now either one is fully inside or the is overlap - if D + r1 <= r2 or D + r2 <= r1: - #fully inside - return max(sphere_vol(n, r1), sphere_vol(n, r2)) - else: - x = 1. / 2 / D * np.sqrt(2 * r1**2 * r2**2 + 2 * D**2 * r1**2 + - 2 * D**2 * r2**2 - r1**4 - r2**4 - D**4) - capsize1 = r1 - np.sqrt(r1**2 - x**2) - capsize2 = r2 - np.sqrt(r2**2 - x**2) - V = (sphere_vol(n, r1) + sphere_vol(n, r2) - - capvol(n, r1, capsize1) - capvol(n, r2, capsize2)) - return V + +def sphere_vol(n, r): + # n-d sphere volume + Cn = np.pi**(n / 2.) / scipy.special.gamma(1 + n / 2.) * r**n + return Cn + + +def two_sphere_vol(c1, c2, r1, r2): + # Return the volume of the unions of two n-d spheres + D = np.sqrt(np.sum((c1 - c2)**2)) + n = len(c1) + if D >= r1 + r2: + return sphere_vol(n, r1) + sphere_vol(n, r2) + # now either one is fully inside or the is overlap + if D + r1 <= r2 or D + r2 <= r1: + # fully inside + return max(sphere_vol(n, r1), sphere_vol(n, r2)) + else: + x = 1. / 2 / D * np.sqrt(2 * r1**2 * r2**2 + 2 * D**2 * r1**2 + + 2 * D**2 * r2**2 - r1**4 - r2**4 - D**4) + capsize1 = r1 - np.sqrt(r1**2 - x**2) + capsize2 = r2 - np.sqrt(r2**2 - x**2) + V = (sphere_vol(n, r1) + sphere_vol(n, r2) - capvol(n, r1, capsize1) - + capvol(n, r2, capsize2)) + return V + + +def test_mc_logvol(): rstate = get_rstate() ndim = 10 @@ -185,6 +190,50 @@ def two_sphere_vol(c1, c2, r1, r2): assert (np.abs(np.log(vtrue) - lv) < 1e-2) +def test_mc_logvolRad(): + + rstate = get_rstate() + ndim = 10 + cen1 = np.zeros(ndim) + cen2 = np.zeros(ndim) + r1 = 0.7 + sig1 = np.eye(ndim) * r1**2 + Ds = np.linspace(0, 2, 30) + nsamp = 10000 + for D in Ds: + cen2[0] = D + rf = db.RadFriends(ndim, sig1) + lv = rf.monte_carlo_logvol(np.array([cen1, cen2]), + nsamp, + rstate=rstate)[0] + vtrue = two_sphere_vol(cen1, cen2, r1, r1) + assert (np.abs(np.log(vtrue) - lv) < 1e-2) + + +def test_mc_logvolCube(): + + rstate = get_rstate() + ndim = 10 + cen1 = np.zeros(ndim) + cen2 = np.zeros(ndim) + r1 = 0.7 + sig1 = np.eye(ndim) * r1**2 + Ds = np.linspace(0, 2, 30) + nsamp = 10000 + for D in Ds: + cen2[0] = D + rf = db.SupFriends(ndim, sig1) + lv = rf.monte_carlo_logvol(np.array([cen1, cen2]), + nsamp, + rstate=rstate)[0] + if D > 2 * r1: + lvtrue = np.log(2 * r1) * ndim + np.log(2) + else: + lvtrue = np.log(2 * r1) * (ndim - 1) + np.log(D + 2 * r1) + + assert (np.abs(lvtrue - lv) < 1e-2) + + def test_bounds(): rstate = get_rstate() Ndim = 20 From ba7edf82787e9dbad194bb4e9d557aaa182dbd92 Mon Sep 17 00:00:00 2001 From: "Sergey E. Koposov" Date: Tue, 4 Oct 2022 00:19:46 +0100 Subject: [PATCH 2/3] fix the MC volume calculation for multiellipsoid --- CHANGELOG.md | 1 + py/dynesty/bounding.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 505a11a8..57be8296 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ This is a major release with as several significant improvements. One is the imp - Sampler.n_effective is no longer unnecessarily computed when sampling with an infinite limit on n_effective. ( #379 ; by @edbennett ) - In rare occasions, dynamic nested samplng fits with maxiter set could have failed with 'list index out of range' errors. That was addressed ( #392 ; by @segasai ) +- The Monte-Carlo volume calculations by RadFriends/SupFriends/MultiEllipsoid were inaccurate ### Changed - Setting n_effective for Sampler.run_nested() and DynamicSampler.sample_initial(), and n_effective_init for DynamicSampler.run_nested(), are deprecated ( #379 ; by @edbennett ) diff --git a/py/dynesty/bounding.py b/py/dynesty/bounding.py index 4055ab1a..4549e6f8 100644 --- a/py/dynesty/bounding.py +++ b/py/dynesty/bounding.py @@ -516,14 +516,14 @@ def monte_carlo_logvol(self, samples = [ self.sample(rstate=rstate, return_q=True) for i in range(ndraws) ] - qsum = sum((q for (x, idx, q) in samples)) - logvol = np.log(ndraws * 1. / qsum) + self.logvol_tot + qsum = sum((1. / q for (x, idx, q) in samples)) + logvol = np.log(qsum / ndraws) + self.logvol_tot if return_overlap: # Estimate the fractional amount of overlap with the # unit cube using the same set of samples. - qin = sum((q * unitcheck(x) for (x, idx, q) in samples)) - overlap = 1. * qin / qsum + qin = sum((1. / q * unitcheck(x) for (x, idx, q) in samples)) + overlap = qin / qsum return logvol, overlap else: return logvol From b843d5509f8bb55a67fc471cbb70ad1cb41e8c68 Mon Sep 17 00:00:00 2001 From: "Sergey E. Koposov" Date: Tue, 4 Oct 2022 02:23:28 +0100 Subject: [PATCH 3/3] update changelog --- CHANGELOG.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57be8296..11942d81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,10 @@ All notable changes to dynesty will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [2.0.0] -This is a major release with as several significant improvements. One is the implementation of the check-points to save progress and allow restarting of fits and a new simple interface to obtain equally weighted samples directly from results object. Also likelihood functions can now return additional computed quantities (blobs) that will be saved together with samples. +This is a major release with several significant improvements. +- One is the implementation of the check-points to save progress and allow restarting of fits [See here](https://dynesty.readthedocs.io/en/latest/quickstart.html#checkpointing) +- A new simple interface to obtain equally weighted samples directly from results object. [See here](https://dynesty.readthedocs.io/en/latest/crashcourse.html) +- Allow likelihood functions to return additional computed quantities (blobs) that will be saved together with samples. [See hre](https://dynesty.readthedocs.io/en/latest/quickstart.html#saving-auxialiary-information-from-log-likelihood-function) *IMPORTANT* This release includes some major refactoring of class structure in dynesty in to implement the check-pointing. While we haven't seen any breakage in our tests, it is possible that some of more-unusual workflows may be affected. Please submit a bug report on github if you see anything that doesn't look correct. Also, while every effort was made that checkpointing works correctly, it is possible that some corner-cases have been missed. Please report if you see any issues. @@ -21,7 +24,7 @@ This is a major release with as several significant improvements. One is the imp - Sampler.n_effective is no longer unnecessarily computed when sampling with an infinite limit on n_effective. ( #379 ; by @edbennett ) - In rare occasions, dynamic nested samplng fits with maxiter set could have failed with 'list index out of range' errors. That was addressed ( #392 ; by @segasai ) -- The Monte-Carlo volume calculations by RadFriends/SupFriends/MultiEllipsoid were inaccurate +- The Monte-Carlo volume calculations by RadFriends/SupFriends/MultiEllipsoid were inaccurate (fix # 398; #399 ; by @segasai ) ### Changed - Setting n_effective for Sampler.run_nested() and DynamicSampler.sample_initial(), and n_effective_init for DynamicSampler.run_nested(), are deprecated ( #379 ; by @edbennett )