Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make coords and data always mutable #7047

Merged
merged 3 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ jobs:
tests/model/test_fgraph.py
tests/model/transform/test_basic.py
tests/model/transform/test_conditioning.py
tests/model/transform/test_optimization.py
tests/test_model_graph.py
tests/ode/test_ode.py
tests/ode/test_utils.py
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,16 @@ Model Conditioning
.. autosummary::
:toctree: generated/

change_value_transforms
do
observe
change_value_transforms
remove_value_transforms


Model Optimization
------------------
.. currentmodule:: pymc.model.transform.optimization
.. autosummary::
:toctree: generated/

freeze_dims_and_data
71 changes: 16 additions & 55 deletions pymc/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,29 +262,25 @@ def ConstantData(
*,
dims: Optional[Sequence[str]] = None,
coords: Optional[dict[str, Union[Sequence, np.ndarray]]] = None,
export_index_as_coords=False,
infer_dims_and_coords=False,
**kwargs,
) -> TensorConstant:
"""Alias for ``pm.Data(..., mutable=False)``.
"""Alias for ``pm.Data``.

Registers the ``value`` as a :class:`~pytensor.tensor.TensorConstant` with the model.
For more information, please reference :class:`pymc.Data`.
"""
if export_index_as_coords:
infer_dims_and_coords = export_index_as_coords
warnings.warn(
"Deprecation warning: 'export_index_as_coords; is deprecated and will be removed in future versions. Please use 'infer_dims_and_coords' instead.",
DeprecationWarning,
)
warnings.warn(
"ConstantData is deprecated. All Data variables are now mutable. Use Data instead.",
FutureWarning,
)

var = Data(
name,
value,
dims=dims,
coords=coords,
infer_dims_and_coords=infer_dims_and_coords,
mutable=False,
**kwargs,
)
return cast(TensorConstant, var)
Expand All @@ -296,29 +292,25 @@ def MutableData(
*,
dims: Optional[Sequence[str]] = None,
coords: Optional[dict[str, Union[Sequence, np.ndarray]]] = None,
export_index_as_coords=False,
infer_dims_and_coords=False,
**kwargs,
) -> SharedVariable:
"""Alias for ``pm.Data(..., mutable=True)``.
"""Alias for ``pm.Data``.

Registers the ``value`` as a :class:`~pytensor.compile.sharedvalue.SharedVariable`
with the model. For more information, please reference :class:`pymc.Data`.
"""
if export_index_as_coords:
infer_dims_and_coords = export_index_as_coords
warnings.warn(
"Deprecation warning: 'export_index_as_coords; is deprecated and will be removed in future versions. Please use 'infer_dims_and_coords' instead.",
DeprecationWarning,
)
warnings.warn(
"MutableData is deprecated. All Data variables are now mutable. Use Data instead.",
FutureWarning,
)

var = Data(
name,
value,
dims=dims,
coords=coords,
infer_dims_and_coords=infer_dims_and_coords,
mutable=True,
**kwargs,
)
return cast(SharedVariable, var)
Expand All @@ -330,7 +322,6 @@ def Data(
*,
dims: Optional[Sequence[str]] = None,
coords: Optional[dict[str, Union[Sequence, np.ndarray]]] = None,
export_index_as_coords=False,
infer_dims_and_coords=False,
mutable: Optional[bool] = None,
**kwargs,
Expand Down Expand Up @@ -373,15 +364,6 @@ def Data(
infer_dims_and_coords : bool, default=False
If True, the ``Data`` container will try to infer what the coordinates
and dimension names should be if there is an index in ``value``.
mutable : bool, optional
Switches between creating a :class:`~pytensor.compile.sharedvalue.SharedVariable`
(``mutable=True``) vs. creating a :class:`~pytensor.tensor.TensorConstant`
(``mutable=False``).
Consider using :class:`pymc.ConstantData` or :class:`pymc.MutableData` as less
verbose alternatives to ``pm.Data(..., mutable=...)``.
If this parameter is not specified, the value it takes will depend on the
version of the package. Since ``v4.1.0`` the default value is
``mutable=False``, with previous versions having ``mutable=True``.
**kwargs : dict, optional
Extra arguments passed to :func:`pytensor.shared`.

Expand All @@ -394,7 +376,7 @@ def Data(
>>> observed_data = [mu + np.random.randn(20) for mu in true_mu]

>>> with pm.Model() as model:
... data = pm.MutableData('data', observed_data[0])
... data = pm.Data('data', observed_data[0])
... mu = pm.Normal('mu', 0, 10)
... pm.Normal('y', mu=mu, sigma=1, observed=data)

Expand Down Expand Up @@ -430,19 +412,12 @@ def Data(
"Pass them directly to `observed` if you want to trigger auto-imputation"
)

if mutable is None:
if mutable is not None:
warnings.warn(
"The `mutable` kwarg was not specified. Before v4.1.0 it defaulted to `pm.Data(mutable=True)`,"
" which is equivalent to using `pm.MutableData()`."
" In v4.1.0 the default changed to `pm.Data(mutable=False)`, equivalent to `pm.ConstantData`."
" Use `pm.ConstantData`/`pm.MutableData` or pass `pm.Data(..., mutable=False/True)` to avoid this warning.",
UserWarning,
"Data is now always mutable. Specifying the `mutable` kwarg will raise an error in a future release",
FutureWarning,
)
mutable = False
if mutable:
x = pytensor.shared(arr, name, **kwargs)
else:
x = pt.as_tensor_variable(arr, name, **kwargs)
x = pytensor.shared(arr, name, **kwargs)

if isinstance(dims, str):
dims = (dims,)
Expand All @@ -453,24 +428,11 @@ def Data(
expected=x.ndim,
)

# Optionally infer coords and dims from the input value.
if export_index_as_coords:
infer_dims_and_coords = export_index_as_coords
warnings.warn(
"Deprecation warning: 'export_index_as_coords; is deprecated and will be removed in future versions. Please use 'infer_dims_and_coords' instead.",
DeprecationWarning,
)

if infer_dims_and_coords:
coords, dims = determine_coords(model, value, dims)

if dims:
if not mutable:
# Use the dimension lengths from the before it was tensorified.
# These can still be tensors, but in many cases they are numeric.
xshape = np.shape(arr)
else:
xshape = x.shape
xshape = x.shape
# Register new dimension lengths
for d, dname in enumerate(dims):
if dname not in model.dim_lengths:
Expand All @@ -479,7 +441,6 @@ def Data(
# Note: Coordinate values can't be taken from
# the value, because it could be N-dimensional.
values=coords.get(dname, None),
mutable=mutable,
length=xshape[d],
)

Expand Down
4 changes: 2 additions & 2 deletions pymc/gp/hsgp_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def prior_linearized(self, Xs: TensorLike):
eigenfunctions `phi`, and the square root of the power spectral density.

Correct results when using `prior_linearized` in tandem with `pm.set_data` and
`pm.MutableData` require two conditions. First, one must specify `L` instead of `c` when
`pm.Data` require two conditions. First, one must specify `L` instead of `c` when
the GP is constructed. If not, a RuntimeError is raised. Second, the `Xs` needs to be
zero-centered, so its mean must be subtracted. An example is given below.

Expand Down Expand Up @@ -290,7 +290,7 @@ def prior_linearized(self, Xs: TensorLike):
# First calculate the mean, then make X a shared variable, then subtract the mean.
# When X is mutated later, the correct mean will be subtracted.
X_mean = np.mean(X, axis=0)
X = pm.MutableData("X", X)
X = pm.Data("X", X)
Xs = X - X_mean

# Pass the zero-subtracted Xs in to the GP
Expand Down
76 changes: 42 additions & 34 deletions pymc/model/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,12 @@ def __init__(
self.name = self._validate_name(name)
self.check_bounds = check_bounds

if coords_mutable is not None:
warnings.warn(
"All coords are now mutable by default. coords_mutable will be removed in a future release.",
FutureWarning,
)

if self.parent is not None:
self.named_vars = treedict(parent=self.parent.named_vars)
self.named_vars_to_dims = treedict(parent=self.parent.named_vars_to_dims)
Expand Down Expand Up @@ -612,6 +618,7 @@ def compile_logp(
vars: Optional[Union[Variable, Sequence[Variable]]] = None,
jacobian: bool = True,
sum: bool = True,
**compile_kwargs,
) -> PointFunc:
"""Compiled log probability density function.

Expand All @@ -626,12 +633,13 @@ def compile_logp(
Whether to sum all logp terms or return elemwise logp for each variable.
Defaults to True.
"""
return self.compile_fn(self.logp(vars=vars, jacobian=jacobian, sum=sum))
return self.compile_fn(self.logp(vars=vars, jacobian=jacobian, sum=sum), **compile_kwargs)

def compile_dlogp(
self,
vars: Optional[Union[Variable, Sequence[Variable]]] = None,
jacobian: bool = True,
**compile_kwargs,
) -> PointFunc:
"""Compiled log probability density gradient function.

Expand All @@ -643,12 +651,13 @@ def compile_dlogp(
jacobian : bool
Whether to include jacobian terms in logprob graph. Defaults to True.
"""
return self.compile_fn(self.dlogp(vars=vars, jacobian=jacobian))
return self.compile_fn(self.dlogp(vars=vars, jacobian=jacobian), **compile_kwargs)

def compile_d2logp(
self,
vars: Optional[Union[Variable, Sequence[Variable]]] = None,
jacobian: bool = True,
**compile_kwargs,
) -> PointFunc:
"""Compiled log probability density hessian function.

Expand All @@ -660,7 +669,7 @@ def compile_d2logp(
jacobian : bool
Whether to include jacobian terms in logprob graph. Defaults to True.
"""
return self.compile_fn(self.d2logp(vars=vars, jacobian=jacobian))
return self.compile_fn(self.d2logp(vars=vars, jacobian=jacobian), **compile_kwargs)

def logp(
self,
Expand Down Expand Up @@ -948,7 +957,7 @@ def add_coord(
self,
name: str,
values: Optional[Sequence] = None,
mutable: bool = False,
mutable: Optional[bool] = None,
*,
length: Optional[Union[int, Variable]] = None,
):
Expand All @@ -969,6 +978,12 @@ def add_coord(
A scalar of the dimensions length.
Defaults to ``pytensor.tensor.constant(len(values))``.
"""
if mutable is not None:
warnings.warn(
"Coords are now always mutable. Specifying `mutable` will raise an error in a future release",
FutureWarning,
)

if name in {"draw", "chain", "__sample__"}:
raise ValueError(
"Dimensions can not be named `draw`, `chain` or `__sample__`, "
Expand All @@ -992,10 +1007,7 @@ def add_coord(
if length is None:
length = len(values)
if not isinstance(length, Variable):
if mutable:
length = pytensor.shared(length, name=name)
else:
length = pytensor.tensor.constant(length)
length = pytensor.shared(length, name=name)
assert length.type.ndim == 0
self._dim_lengths[name] = length
self._coords[name] = values
Expand Down Expand Up @@ -1026,8 +1038,6 @@ def set_dim(self, name: str, new_length: int, coord_values: Optional[Sequence] =
coord_values : array_like, optional
Optional sequence of coordinate values.
"""
if not isinstance(self.dim_lengths[name], SharedVariable):
raise ValueError(f"The dimension '{name}' is immutable.")
if coord_values is None and self.coords.get(name, None) is not None:
raise ValueError(
f"'{name}' has coord values. Pass `set_dim(..., coord_values=...)` to update them."
Expand Down Expand Up @@ -1076,7 +1086,7 @@ def set_data(
):
"""Changes the values of a data variable in the model.

In contrast to pm.MutableData().set_value, this method can also
In contrast to pm.Data().set_value, this method can also
update the corresponding coordinates.

Parameters
Expand All @@ -1094,7 +1104,7 @@ def set_data(
if not isinstance(shared_object, SharedVariable):
raise TypeError(
f"The variable `{name}` must be a `SharedVariable`"
" (created through `pm.MutableData()` or `pm.Data(mutable=True)`) to allow updating. "
" (created through `pm.Data()` or `pm.Data(mutable=True)`) to allow updating. "
f"The current type is: {type(shared_object)}"
)

Expand All @@ -1119,7 +1129,7 @@ def set_data(
length_changed = new_length != old_length

# Reject resizing if we already know that it would create shape problems.
# NOTE: If there are multiple pm.MutableData containers sharing this dim, but the user only
# NOTE: If there are multiple pm.Data containers sharing this dim, but the user only
# changes the values for one of them, they will run into shape problems nonetheless.
if length_changed:
if original_coords is not None:
Expand Down Expand Up @@ -1393,24 +1403,22 @@ def create_value_var(
else:
transform = _default_transform(rv_var.owner.op, rv_var)

if value_var is not None:
if transform is not None:
raise ValueError("Cannot use transform when providing a pre-defined value_var")
elif transform is None:
# Create value variable with the same type as the RV
value_var = rv_var.type()
value_var.name = rv_var.name
if pytensor.config.compute_test_value != "off":
value_var.tag.test_value = rv_var.tag.test_value
else:
# Create value variable with the same type as the transformed RV
value_var = transform.forward(rv_var, *rv_var.owner.inputs).type()
value_var.name = f"{rv_var.name}_{transform.name}__"
value_var.tag.transform = transform
if pytensor.config.compute_test_value != "off":
value_var.tag.test_value = transform.forward(
rv_var, *rv_var.owner.inputs
).tag.test_value
if value_var is None:
if transform is None:
# Create value variable with the same type as the RV
value_var = rv_var.type()
value_var.name = rv_var.name
if pytensor.config.compute_test_value != "off":
value_var.tag.test_value = rv_var.tag.test_value
else:
# Create value variable with the same type as the transformed RV
value_var = transform.forward(rv_var, *rv_var.owner.inputs).type()
value_var.name = f"{rv_var.name}_{transform.name}__"
value_var.tag.transform = transform
if pytensor.config.compute_test_value != "off":
value_var.tag.test_value = transform.forward(
rv_var, *rv_var.owner.inputs
).tag.test_value

_add_future_warning_tag(value_var)
rv_var.tag.value_var = value_var
Expand Down Expand Up @@ -1981,8 +1989,8 @@ def set_data(new_data, model=None, *, coords=None):
import pymc as pm

with pm.Model() as model:
x = pm.MutableData('x', [1., 2., 3.])
y = pm.MutableData('y', [1., 2., 3.])
x = pm.Data('x', [1., 2., 3.])
y = pm.Data('y', [1., 2., 3.])
beta = pm.Normal('beta', 0, 1)
obs = pm.Normal('obs', x * beta, 1, observed=y, shape=x.shape)
idata = pm.sample()
Expand Down Expand Up @@ -2011,7 +2019,7 @@ def set_data(new_data, model=None, *, coords=None):
data = rng.normal(loc=1.0, scale=2.0, size=100)

with pm.Model() as model:
y = pm.MutableData('y', data)
y = pm.Data('y', data)
theta = pm.Normal('theta', mu=0.0, sigma=10.0)
obs = pm.Normal('obs', theta, 2.0, observed=y, shape=y.shape)
idata = pm.sample()
Expand Down
Loading
Loading