-
-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add drop_first
option to HSGPPeriodic
#7115
base: main
Are you sure you want to change the base?
Changes from all commits
f606d10
47732db
f40aea8
c0b6890
f68313d
14f9c4c
8edd05f
c667927
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,18 +71,17 @@ | |
Xs: TensorLike, | ||
period: TensorLike, | ||
m: int, | ||
tl: ModuleType = np, | ||
): | ||
""" | ||
Calculate basis vectors for the cosine series expansion of the periodic covariance function. | ||
These are derived from the Taylor series representation of the covariance. | ||
""" | ||
w0 = (2 * np.pi) / period # angular frequency defining the periodicity | ||
m1 = tl.tile(w0 * Xs, m) | ||
m2 = tl.diag(tl.arange(0, m, 1)) | ||
w0 = (2 * pt.pi) / period # angular frequency defining the periodicity | ||
m1 = pt.tile(w0 * Xs, m) | ||
m2 = pt.diag(pt.arange(0, m, 1)) | ||
mw0x = m1 @ m2 | ||
phi_cos = tl.cos(mw0x) | ||
phi_sin = tl.sin(mw0x) | ||
phi_cos = pt.cos(mw0x) | ||
phi_sin = pt.sin(mw0x) | ||
return phi_cos, phi_sin | ||
|
||
|
||
|
@@ -121,8 +120,8 @@ | |
provided. Further information can be found in Ruitort-Mayol et al. | ||
drop_first: bool | ||
Default `False`. Sometimes the first basis vector is quite "flat" and very similar to | ||
the intercept term. When there is an intercept in the model, ignoring the first basis | ||
vector may improve sampling. This argument will be deprecated in future versions. | ||
the intercept term. When there is an intercept in the model, removing the first basis | ||
vector may improve sampling. | ||
parameterization: str | ||
Whether to use `centred` or `noncentered` parameterization when multiplying the | ||
basis by the coefficients. | ||
|
@@ -210,13 +209,6 @@ | |
if parameterization not in ["centered", "noncentered"]: | ||
raise ValueError("`parameterization` must be either 'centered' or 'noncentered'.") | ||
|
||
if drop_first: | ||
warnings.warn( | ||
"The drop_first argument will be deprecated in future versions." | ||
" See https://github.com/pymc-devs/pymc/pull/6877", | ||
DeprecationWarning, | ||
) | ||
|
||
self._drop_first = drop_first | ||
self._m = m | ||
self._m_star = int(np.prod(self._m)) | ||
|
@@ -474,11 +466,15 @@ | |
self, | ||
m: int, | ||
scale: Optional[Union[float, TensorLike]] = 1.0, | ||
drop_first=False, | ||
*, | ||
mean_func: Mean = Zero(), | ||
cov_func: Periodic, | ||
): | ||
arg_err_msg = "`m` must be a positive integer as the `Periodic` kernel approximation is only implemented for 1-dimensional case." | ||
arg_err_msg = ( | ||
"`m` must be a positive integer as the `Periodic` kernel approximation is " | ||
"only implemented for 1-dimensional case." | ||
) | ||
|
||
if not isinstance(m, int): | ||
raise ValueError(arg_err_msg) | ||
|
@@ -488,7 +484,8 @@ | |
|
||
if not isinstance(cov_func, Periodic): | ||
raise ValueError( | ||
"`cov_func` must be an instance of a `Periodic` kernel only. Use the `scale` parameter to control the variance." | ||
"`cov_func` must be an instance of a `Periodic` kernel only. Use the `scale` " | ||
"parameter to control the variance." | ||
) | ||
|
||
if cov_func.n_dims > 1: | ||
|
@@ -498,6 +495,7 @@ | |
|
||
self._m = m | ||
self.scale = scale | ||
self.drop_first = drop_first | ||
|
||
super().__init__(mean_func=mean_func, cov_func=cov_func) | ||
|
||
|
@@ -577,8 +575,7 @@ | |
ppc = pm.sample_posterior_predictive(idata, var_names=["f"]) | ||
""" | ||
Xs, _ = self.cov_func._slice(Xs) | ||
|
||
phi_cos, phi_sin = calc_basis_periodic(Xs, self.cov_func.period, self._m, tl=pt) | ||
phi_cos, phi_sin = calc_basis_periodic(Xs, self.cov_func.period, self._m) | ||
J = pt.arange(0, self._m, 1) | ||
# rescale basis coefficients by the sqrt variance term | ||
psd = self.scale * self.cov_func.power_spectral_density_approx(J) | ||
|
@@ -603,21 +600,28 @@ | |
(phi_cos, phi_sin), psd = self.prior_linearized(X - self._X_mean) | ||
|
||
m = self._m | ||
self._beta = pm.Normal(f"{name}_hsgp_coeffs_", size=(m * 2 - 1)) | ||
# The first eigenfunction for the sine component is zero | ||
# and so does not contribute to the approximation. | ||
f = ( | ||
self.mean_func(X) | ||
+ phi_cos @ (psd * self._beta[:m]) # type: ignore | ||
+ phi_sin[..., 1:] @ (psd[1:] * self._beta[m:]) # type: ignore | ||
) | ||
|
||
self.f = pm.Deterministic(name, f, dims=dims) | ||
if self.drop_first: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder is we would like to inform the user when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree this would be very cool. The basis vectors are made in pytensor. I tried using |
||
# Drop first sine term (all zeros), drop first cos term (all ones) | ||
beta = pm.Normal(f"{name}_hsgp_coeffs_", size=(m * 2) - 2) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which one is the old case with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Neither, now
True, need to double check this is all good, especially if someone uses an odd number There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm okay with the If you go against the maths, please add some comments to explain why. Otherwise pymc devs in 2034 might get confused when they look at appendix b in the original paper. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Love the optimism! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yup it does. OK, I made a dumb mistake. When sin(x) = 0, cos(x) is either 1 or -1 (been a while since trig class), not cos(x) always equals one like I wrote. So it only happens sometimes that the cosine term is all ones. Could be all -1's or be a mix of 1's and -1's. So To see what I'm trying to explain (sorry kind of poorly), try running the code below, but change
Samples of the prior predictive for the gp look good in all cases. import pymc as pm
import numpy as np
import matplotlib.pyplot as plt
import pytensor
import pytensor.tensor as pt
X = np.linspace(0, 10, 1000)[:, None]
with pm.Model() as model:
period = 1
scale = pm.HalfNormal("scale", 10)
cov_func = pm.gp.cov.Periodic(1, period=period, ls=1.0)
m = 200
gp = pm.gp.HSGPPeriodic(m=m, scale=scale, cov_func=cov_func)
X_mean = np.mean(X, axis=0)
X = pm.MutableData("X", X)
Xs = X - X_mean
# FROM pm.gp.hsgp_approx.calc_basis_periodic(Xs, cov_func.period, m=m)
w0 = (2 * pt.pi) / period # angular frequency defining the periodicity
m1 = pt.tile(w0 * Xs, m)
m2 = pt.diag(pt.arange(0, m, 1))
mw0x = m1 @ m2
(phi_cos, phi_sin), psd = gp.prior_linearized(Xs=Xs)
plt.plot(phi_cos[0, :].eval())
plt.plot(phi_sin[0, :].eval())
beta = pm.Normal(f"beta", size=m * 2)
beta_cos = beta[:m]
beta_sin = beta[m:]
cos_term = phi_cos @ (psd * beta_cos)
sin_term = phi_sin @ (psd * beta_sin)
f = pm.Deterministic("f", cos_term + sin_term)
#prior = pm.sample_prior_predictive()
#import arviz as az
#f = az.extract(prior.prior, var_names="f")
#plt.figure(figsize=(10, 4))
#plt.plot(X.eval().flatten(), f.mean(dim="sample").data)
#plt.xticks(np.arange(10)); There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm I stared at the equation again and I can't see how So There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yuuuuuup. You're right. I plotted it wrong, so sorry for spreading my confusion around! It looks like I found that the cosine term is somtimes all ones, so the issue with the extra intercept is real. But then plotted it wrong and proceeded to draw lots of incorrect conclusions from there. What the code should do is either:
Is that right @theorashid? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds right to me. Maybe remove the |
||
self._beta_cos = pt.concatenate(([0.0], beta[: m - 1])) | ||
self._beta_sin = pt.concatenate(([0.0], beta[m - 1 :])) | ||
|
||
else: | ||
# The first eigenfunction for the sine component is zero | ||
# and so does not contribute to the approximation. | ||
beta = pm.Normal(f"{name}_hsgp_coeffs_", size=(m * 2) - 1) | ||
self._beta_cos = beta[:m] | ||
self._beta_sin = pt.concatenate(([0.0], beta[m:])) | ||
|
||
cos_term = phi_cos @ (psd * self._beta_cos) | ||
sin_term = phi_sin @ (psd * self._beta_sin) | ||
self.f = pm.Deterministic(name, self.mean_func(X) + cos_term + sin_term, dims=dims) | ||
return self.f | ||
|
||
def _build_conditional(self, Xnew): | ||
try: | ||
beta, X_mean = self._beta, self._X_mean | ||
X_mean = self._X_mean | ||
|
||
except AttributeError: | ||
raise ValueError( | ||
|
@@ -626,14 +630,15 @@ | |
|
||
Xnew, _ = self.cov_func._slice(Xnew) | ||
|
||
phi_cos, phi_sin = calc_basis_periodic(Xnew - X_mean, self.cov_func.period, self._m, tl=pt) | ||
phi_cos, phi_sin = calc_basis_periodic(Xnew - X_mean, self.cov_func.period, self._m) | ||
m = self._m | ||
J = pt.arange(0, m, 1) | ||
# rescale basis coefficients by the sqrt variance term | ||
psd = self.scale * self.cov_func.power_spectral_density_approx(J) | ||
|
||
phi = phi_cos @ (psd * beta[:m]) + phi_sin[..., 1:] @ (psd[1:] * beta[m:]) | ||
return self.mean_func(Xnew) + phi | ||
cos_term = phi_cos @ (psd * self._beta_cos) | ||
sin_term = phi_sin @ (psd * self._beta_sin) | ||
return self.mean_func(Xnew) + cos_term + sin_term | ||
|
||
def conditional(self, name: str, Xnew: TensorLike, dims: Optional[str] = None): # type: ignore | ||
R""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -229,35 +229,61 @@ def test_conditional(self, model, cov_func, X1, parameterization): | |
|
||
class TestHSGPPeriodic(_BaseFixtures): | ||
def test_parametrization(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To add some drop first tests |
||
err_msg = "`m` must be a positive integer as the `Periodic` kernel approximation is only implemented for 1-dimensional case." | ||
err_msg = ( | ||
"`m` must be a positive integer as the `Periodic` kernel approximation is only " | ||
"implemented for 1-dimensional case." | ||
) | ||
|
||
with pytest.raises(ValueError, match=err_msg): | ||
# `m` must be a positive integer, not a list | ||
cov_func = pm.gp.cov.Periodic(1, period=1, ls=0.1) | ||
cov_func = pm.gp.cov.Periodic(1, period=1, ls=1.0) | ||
pm.gp.HSGPPeriodic(m=[500], cov_func=cov_func) | ||
|
||
with pytest.raises(ValueError, match=err_msg): | ||
# `m`` must be a positive integer | ||
cov_func = pm.gp.cov.Periodic(1, period=1, ls=0.1) | ||
cov_func = pm.gp.cov.Periodic(1, period=1, ls=1.0) | ||
pm.gp.HSGPPeriodic(m=-1, cov_func=cov_func) | ||
|
||
with pytest.raises( | ||
ValueError, | ||
match="`cov_func` must be an instance of a `Periodic` kernel only. Use the `scale` parameter to control the variance.", | ||
match=( | ||
"`cov_func` must be an instance of a `Periodic` kernel only. Use the `scale` " | ||
"parameter to control the variance." | ||
), | ||
): | ||
# `cov_func` must be `Periodic` only | ||
cov_func = 5.0 * pm.gp.cov.Periodic(1, period=1, ls=0.1) | ||
cov_func = 5.0 * pm.gp.cov.Periodic(1, period=1, ls=1.0) | ||
pm.gp.HSGPPeriodic(m=500, cov_func=cov_func) | ||
|
||
with pytest.raises( | ||
ValueError, | ||
match="HSGP approximation for `Periodic` kernel only implemented for 1-dimensional case.", | ||
match=( | ||
"HSGP approximation for `Periodic` kernel only implemented for 1-dimensional case." | ||
), | ||
): | ||
cov_func = pm.gp.cov.Periodic(2, period=1, ls=[1, 2]) | ||
pm.gp.HSGPPeriodic(m=500, scale=0.5, cov_func=cov_func) | ||
|
||
@pytest.mark.parametrize("cov_func", [pm.gp.cov.Periodic(1, period=1, ls=1)]) | ||
@pytest.mark.parametrize("eta", [100.0]) | ||
@pytest.mark.parametrize("drop_first", [True, False]) | ||
def test_parametrization_drop_first(self, model, cov_func, X1, drop_first): | ||
n_basis = 101 | ||
with model: | ||
gp = pm.gp.HSGPPeriodic(m=n_basis, scale=1.0, drop_first=drop_first, cov_func=cov_func) | ||
gp.prior("f1", X1) | ||
first_cos_zero = gp._beta_cos[0].eval() == 0.0 | ||
first_sin_zero = gp._beta_sin[0].eval() == 0.0 | ||
if drop_first: | ||
assert ( | ||
first_cos_zero and first_sin_zero | ||
), "First element of both coefficient vectors should be zero." | ||
else: | ||
assert ( | ||
not first_cos_zero and first_sin_zero | ||
), "Only the first element of the sine coefficent vectors should be zero." | ||
|
||
@pytest.mark.parametrize("cov_func", [pm.gp.cov.Periodic(1, period=1, ls=1)]) | ||
@pytest.mark.parametrize("eta", [2.0]) | ||
@pytest.mark.xfail( | ||
reason="For `pm.gp.cov.Periodic`, this test does not pass.\ | ||
The mmd is around `0.0468`.\ | ||
|
@@ -266,25 +292,28 @@ def test_parametrization(self): | |
See https://github.com/pymc-devs/pymc/pull/6877/ for the full discussion." | ||
) | ||
def test_prior(self, model, cov_func, eta, X1, rng): | ||
"""Compare HSGPPeriodic prior to unapproximated GP prior, pm.gp.Latent. Draw samples from the | ||
prior and compare them using MMD two sample test. | ||
"""Compare HSGPPeriodic prior to unapproximated GP prior, pm.gp.Latent. Draw samples from | ||
the prior and compare them using MMD two sample test. | ||
""" | ||
with model: | ||
hsgp = pm.gp.HSGPPeriodic(m=200, scale=eta, cov_func=cov_func) | ||
hsgp = pm.gp.HSGPPeriodic(m=200, scale=eta, drop_first=False, cov_func=cov_func) | ||
f1 = hsgp.prior("f1", X=X1) | ||
|
||
gp = pm.gp.Latent(cov_func=eta**2 * cov_func) | ||
f2 = gp.prior("f2", X=X1) | ||
|
||
idata = pm.sample_prior_predictive(samples=1000, random_seed=rng) | ||
|
||
samples1 = az.extract(idata.prior["f1"])["f1"].values.T | ||
samples2 = az.extract(idata.prior["f2"])["f2"].values.T | ||
samples1 = az.extract(idata.prior, var_names="f1").values.T | ||
samples2 = az.extract(idata.prior, var_names="f2").values.T | ||
|
||
h0, mmd, critical_value, reject = two_sample_test( | ||
samples1, samples2, n_sims=500, alpha=0.01 | ||
) | ||
assert not reject, f"H0 was rejected, {mmd} even though HSGP and GP priors should match." | ||
assert not reject, ( | ||
f"H0 was rejected, MMD {mmd:.3f} > {critical_value:.3f} even though HSGP and GP priors " | ||
"should match." | ||
) | ||
|
||
@pytest.mark.parametrize("cov_func", [pm.gp.cov.Periodic(1, period=1, ls=1)]) | ||
def test_conditional_periodic(self, model, cov_func, X1): | ||
|
@@ -299,8 +328,8 @@ def test_conditional_periodic(self, model, cov_func, X1): | |
|
||
idata = pm.sample_prior_predictive(samples=1000) | ||
|
||
samples1 = az.extract(idata.prior["f"])["f"].values.T | ||
samples2 = az.extract(idata.prior["fc"])["fc"].values.T | ||
samples1 = az.extract(idata.prior, var_names="f").values.T | ||
samples2 = az.extract(idata.prior, var_names="fc").values.T | ||
|
||
h0, mmd, critical_value, reject = two_sample_test( | ||
samples1, samples2, n_sims=500, alpha=0.01 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remember to add it to the dostrings ;) Also type:
drop_first: bool = Flase