Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Botorch closures #1439

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
230 changes: 148 additions & 82 deletions botorch/fit.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion botorch/models/pairwise_gp.py
Expand Up @@ -818,7 +818,7 @@ def forward(self, datapoints: Tensor) -> MultivariateNormal:

# self.utility might be None if exception was raised and _update
# was failed to be called during hyperparameter optimization
# procedures (e.g., fit_gpytorch_scipy)
# procedures (e.g., fit_gpytorch_mll_scipy)
if self.utility is None:
self._update(transformed_dp)

Expand Down
18 changes: 18 additions & 0 deletions botorch/optim/__init__.py
Expand Up @@ -4,6 +4,17 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from botorch.optim.closures import (
ForwardBackwardClosure,
get_loss_closure,
get_loss_closure_with_grads,
)
from botorch.optim.core import (
OptimizationResult,
OptimizationStatus,
scipy_minimize,
torch_minimize,
)
from botorch.optim.initializers import initialize_q_batch, initialize_q_batch_nonneg
from botorch.optim.numpy_converter import module_to_array, set_params_with_array
from botorch.optim.optimize import (
Expand All @@ -18,15 +29,22 @@


__all__ = [
"ForwardBackwardClosure",
"get_loss_closure",
"get_loss_closure_with_grads",
"gen_batch_initial_conditions",
"initialize_q_batch",
"initialize_q_batch_nonneg",
"OptimizationResult",
"OptimizationStatus",
"optimize_acqf",
"optimize_acqf_cyclic",
"optimize_acqf_discrete",
"optimize_acqf_discrete_local_search",
"optimize_acqf_mixed",
"module_to_array",
"scipy_minimize",
"set_params_with_array",
"torch_minimize",
"ExpMAStoppingCriterion",
]
22 changes: 22 additions & 0 deletions botorch/optim/closures/__init__.py
@@ -0,0 +1,22 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from botorch.optim.closures.core import (
ForwardBackwardClosure,
NdarrayOptimizationClosure,
)
from botorch.optim.closures.model_closures import (
get_loss_closure,
get_loss_closure_with_grads,
)


__all__ = [
"ForwardBackwardClosure",
"get_loss_closure",
"get_loss_closure_with_grads",
"NdarrayOptimizationClosure",
]
184 changes: 184 additions & 0 deletions botorch/optim/closures/core.py
@@ -0,0 +1,184 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

"""Core methods for building closures in torch and interfacing with numpy."""

from __future__ import annotations

from functools import partial
from typing import Any, Callable, Dict, Optional, Sequence, Tuple

import torch
from botorch.optim.utils import (
_handle_numerical_errors,
get_tensors_as_ndarray_1d,
set_tensors_from_ndarray_1d,
)
from botorch.optim.utils.numpy_utils import as_ndarray
from botorch.utils.context_managers import zero_grad_ctx
from numpy import float64 as np_float64, full as np_full, ndarray, zeros as np_zeros
from torch import Tensor


class ForwardBackwardClosure:
r"""Wrapper for fused forward and backward closures."""

def __init__(
self,
forward: Callable[[], Tensor],
parameters: Dict[str, Tensor],
backward: Callable[[Tensor], None] = Tensor.backward,
reducer: Optional[Callable[[Tensor], Tensor]] = torch.sum,
callback: Optional[Callable[[Tensor, Sequence[Optional[Tensor]]], None]] = None,
context_manager: Callable = None, # pyre-ignore [9]
) -> None:
r"""Initializes a ForwardBackwardClosure instance.

Args:
closure: Callable that returns a tensor.
parameters: A dictionary of tensors whose `grad` fields are to be returned.
backward: Callable that takes the (reduced) output of `forward` and sets the
`grad` attributes of tensors in `parameters`.
reducer: Optional callable used to reduce the output of the forward pass.
callback: Optional callable that takes the reduced output of `forward` and
the gradients of `parameters` as positional arguments.
context_manager: A ContextManager used to wrap each forward-backward call.
When passed as `None`, `context_manager` defaults to a `zero_grad_ctx`
that zeroes the gradients of `parameters` upon entry.
"""
if context_manager is None:
context_manager = partial(zero_grad_ctx, parameters)

self.forward = forward
self.backward = backward
self.parameters = parameters
self.reducer = reducer
self.callback = callback
self.context_manager = context_manager

def __call__(self, **kwargs: Any) -> Tuple[Tensor, Tuple[Optional[Tensor], ...]]:
with self.context_manager():
values = self.forward(**kwargs)
value = values if self.reducer is None else self.reducer(values)
self.backward(value)

grads = tuple(param.grad for param in self.parameters.values())
if self.callback:
self.callback(value, grads)

return value, grads


class NdarrayOptimizationClosure:
r"""Adds stateful behavior and a numpy.ndarray-typed API to a closure with an
expected return type Tuple[Tensor, Union[Tensor, Sequence[Optional[Tensor]]]]."""

def __init__(
self,
closure: Callable[[], Tuple[Tensor, Sequence[Optional[Tensor]]]],
parameters: Dict[str, Tensor],
as_array: Callable[[Tensor], ndarray] = None, # pyre-ignore [9]
as_tensor: Callable[[ndarray], Tensor] = torch.as_tensor,
get_state: Callable[[], ndarray] = None, # pyre-ignore [9]
set_state: Callable[[ndarray], None] = None, # pyre-ignore [9]
fill_value: float = 0.0,
persistent: bool = True,
) -> None:
r"""Initializes a NdarrayOptimizationClosure instance.

Args:
closure: A ForwardBackwardClosure instance.
parameters: A dictionary of tensors representing the closure's state.
Expected to correspond with the first `len(parameters)` optional
gradient tensors returned by `closure`.
as_array: Callable used to convert tensors to ndarrays.
as_tensor: Callable used to convert ndarrays to tensors.
get_state: Callable that returns the closure's state as an ndarray. When
passed as `None`, defaults to calling `get_tensors_as_ndarray_1d`
on `closure.parameters` while passing `as_array` (if given by the user).
set_state: Callable that takes a 1-dimensional ndarray and sets the
closure's state. When passed as `None`, `set_state` defaults to
calling `set_tensors_from_ndarray_1d` with `closure.parameters` and
a given ndarray while passing `as_tensor`.
fill_value: Fill value for parameters whose gradients are None. In most
cases, `fill_value` should either be zero or NaN.
persistent: Boolean specifying whether an ndarray should be retained
as a persistent buffer for gradients.
"""
if get_state is None:
# Note: Numpy supports copying data between ndarrays with different dtypes.
# Hence, our default behavior need not coerce the ndarray represenations of
# tensors in `parameters` to float64 when copying over data.
_as_array = as_ndarray if as_array is None else as_array
get_state = partial(
get_tensors_as_ndarray_1d, parameters, as_array=_as_array
)

if as_array is None: # per the note, do this after resolving `get_state`
as_array = partial(as_ndarray, dtype=np_float64)

if set_state is None:
set_state = partial(
set_tensors_from_ndarray_1d, parameters, as_tensor=as_tensor
)

self.closure = closure
self.parameters = parameters

self.as_array = as_ndarray
self.as_tensor = as_tensor
self._get_state = get_state
self._set_state = set_state

self.fill_value = fill_value
self.persistent = persistent
self._gradient_ndarray: Optional[ndarray] = None

def __call__(
self, state: Optional[ndarray] = None, **kwargs: Any
) -> Tuple[ndarray, ndarray]:
if state is not None:
self.state = state

try:
value_tensor, grad_tensors = self.closure(**kwargs)
value = self.as_array(value_tensor)
grads = self._get_gradient_ndarray(fill_value=self.fill_value)
index = 0
for param, grad in zip(self.parameters.values(), grad_tensors):
size = param.numel()
if grad is not None:
grads[index : index + size] = self.as_array(grad.view(-1))
index += size
except RuntimeError as e:
value, grads = _handle_numerical_errors(error=e, x=self.state)

return value, grads

@property
def state(self) -> ndarray:
return self._get_state()

@state.setter
def state(self, state: ndarray) -> None:
self._set_state(state)

def _get_gradient_ndarray(self, fill_value: Optional[float] = None) -> ndarray:
if self.persistent and self._gradient_ndarray is not None:
if fill_value is not None:
self._gradient_ndarray.fill(fill_value)
return self._gradient_ndarray

size = sum(param.numel() for param in self.parameters.values())
array = (
np_zeros(size)
if fill_value is None or fill_value == 0.0
else np_full(size, fill_value)
)
if self.persistent:
self._gradient_ndarray = array

return array