From b5f241a35c61eae7bf569a6d45f784b0a344f57f Mon Sep 17 00:00:00 2001 From: "Kevin J. Sung" Date: Fri, 26 Apr 2024 15:56:56 -0400 Subject: [PATCH 1/5] add stochastic reconfiguration --- python/ffsim/optimize/__init__.py | 4 + .../optimize/stochastic_reconfiguration.py | 225 ++++++++++++++++++ .../stochastic_reconfiguration_test.py | 118 +++++++++ 3 files changed, 347 insertions(+) create mode 100644 python/ffsim/optimize/stochastic_reconfiguration.py create mode 100644 tests/python/optimize/stochastic_reconfiguration_test.py diff --git a/python/ffsim/optimize/__init__.py b/python/ffsim/optimize/__init__.py index 594e4191a..8cb5b45c5 100644 --- a/python/ffsim/optimize/__init__.py +++ b/python/ffsim/optimize/__init__.py @@ -11,7 +11,11 @@ """Optimization algorithms.""" from ffsim.optimize.linear_method import minimize_linear_method +from ffsim.optimize.stochastic_reconfiguration import ( + minimize_stochastic_reconfiguration, +) __all__ = [ "minimize_linear_method", + "minimize_stochastic_reconfiguration", ] diff --git a/python/ffsim/optimize/stochastic_reconfiguration.py b/python/ffsim/optimize/stochastic_reconfiguration.py new file mode 100644 index 000000000..4253e19f0 --- /dev/null +++ b/python/ffsim/optimize/stochastic_reconfiguration.py @@ -0,0 +1,225 @@ +# (C) Copyright IBM 2024. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Stochastic reconfiguration for optimization of a variational ansatz.""" + +from __future__ import annotations + +import math +from typing import Any, Callable + +import numpy as np +import scipy.linalg +from scipy.optimize import OptimizeResult, minimize +from scipy.sparse.linalg import LinearOperator + +from ffsim.optimize._util import ( + WrappedCallable, + WrappedLinearOperator, + jacobian_finite_diff, + orthogonalize_columns, +) + + +def minimize_stochastic_reconfiguration( + params_to_vec: Callable[[np.ndarray], np.ndarray], + hamiltonian: LinearOperator, + x0: np.ndarray, + *, + maxiter: int = 1000, + variation: float = 1.0, + cond: float = 1e-8, + epsilon: float = 1e-8, + gtol: float = 1e-5, + optimize_hyperparameters: bool = True, + optimize_hyperparameters_args: dict | None = None, + callback: Callable[[OptimizeResult], Any] | None = None, +) -> OptimizeResult: + """Minimize the energy of a variational ansatz using stochastic reconfiguration. + + References: + + - `Generalized Lanczos algorithm for variational quantum Monte Carlo`_ + + Args: + params_to_vec: Function representing the wavefunction ansatz. It takes as input + a vector of real-valued parameters and outputs the state vector represented + by those parameters. + hamiltonian: The Hamiltonian representing the energy to be minimized. + x0: Initial guess for the parameters. + maxiter: Maximum number of optimization iterations to perform. + variation: Hyperparameter controlling the size of parameter variations + used in the linear expansion of the wavefunction. Its value must be + strictly between 0 and 1. A larger value results in larger variations. + cond: `cond` argument to pass to `scipy.linalg.lstsq`_, which is called to + solve for the parameter update. + epsilon: Increment to use for approximating the gradient using + finite difference. + gtol: Convergence threshold for the norm of the projected gradient. + optimize_hyperparameters: Whether to optimize the `variation` hyperparameter in + each iteration. Optimizing the hyperparameter incurs more function and + energy evaluations in each iteration, but may speed up convergence, leading + to fewer iterations overall. The optimization is performed using + `scipy.optimize.minimize`_. + optimize_hyperparameters_args: Arguments to use when calling + `scipy.optimize.minimize`_ to optimize the hyperparameters. The call is + constructed as + + .. code:: + + scipy.optimize.minimize(f, x0, **scipy_optimize_minimize_args) + + If not specified, the default value of `dict(method="L-BFGS-B")` will be + used. + + callback: A callable called after each iteration. It is called with the + signature + + .. code:: + + callback(intermediate_result: OptimizeResult) + + where ``intermediate_result`` is a `scipy.optimize.OptimizeResult`_ + with attributes ``x`` and ``fun``, the present values of the parameter + vector and objective function. For all iterations except for the last, + it also contains the ``jac`` attribute holding the present value of the + gradient, as well as ``regularization`` and ``variation`` attributes + holding the present values of the `regularization` and `variation` + hyperparameters. + + Returns: + The optimization result represented as a `scipy.optimize.OptimizeResult`_ + object. Note the following definitions of selected attributes: + + - ``x``: The final parameters of the optimization. + - ``fun``: The energy associated with the final parameters. That is, the + expectation value of the Hamiltonian with respect to the state vector + corresponding to the parameters. + - ``nfev``: The number of times the ``params_to_vec`` function was called. + - ``nlinop``: The number of times the ``hamiltonian`` linear operator was + applied to a vector. + + .. _Generalized Lanczos algorithm for variational quantum Monte Carlo: https://doi.org/10.1103/PhysRevB.64.024512 + .. _scipy.optimize.OptimizeResult: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.OptimizeResult.html#scipy.optimize.OptimizeResult + .. _scipy.linalg.lstsq: https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.lstsq.html + .. _scipy.optimize.minimize: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html + """ # noqa: E501 + if variation <= 0: + raise ValueError(f"variation must be positive. Got {variation}.") + if maxiter < 1: + raise ValueError(f"maxiter must be at least 1. Got {maxiter}.") + + if optimize_hyperparameters_args is None: + optimize_hyperparameters_args = dict(method="L-BFGS-B") + + variation_param = math.sqrt(variation) + params = x0.copy() + converged = False + intermediate_result = OptimizeResult( + x=None, fun=None, jac=None, nfev=0, njev=0, nit=0, nlinop=0 + ) + + params_to_vec = WrappedCallable(params_to_vec, intermediate_result) + hamiltonian = WrappedLinearOperator(hamiltonian, intermediate_result) + + for i in range(maxiter): + vec = params_to_vec(params) + jac = jacobian_finite_diff(params_to_vec, params, len(vec), epsilon=epsilon) + jac = orthogonalize_columns(jac, vec) + + energy, grad, overlap_mat = _sr_matrices(vec, jac, hamiltonian) + intermediate_result.njev += 1 + + if i > 0 and callback is not None: + intermediate_result.x = params + intermediate_result.fun = energy + intermediate_result.jac = grad + intermediate_result.overlap_mat = overlap_mat + intermediate_result.variation = variation + callback(intermediate_result) + + if np.linalg.norm(grad) < gtol: + converged = True + break + + if optimize_hyperparameters: + + def f(x: np.ndarray) -> float: + (variation_param,) = x + variation = variation_param**2 + param_update = _get_param_update( + grad, + overlap_mat, + variation, + cond=cond, + ) + vec = params_to_vec(params + param_update) + return np.vdot(vec, hamiltonian @ vec).real + + result = minimize( + f, + x0=[variation_param], + **optimize_hyperparameters_args, + ) + (variation_param,) = result.x + variation = variation_param**2 + + param_update = _get_param_update( + grad, + overlap_mat, + variation, + cond=cond, + ) + params = params + param_update + intermediate_result.nit += 1 + + vec = params_to_vec(params) + energy = np.vdot(vec, hamiltonian @ vec).real + + intermediate_result.x = params + intermediate_result.fun = energy + del intermediate_result.jac + if callback is not None: + callback(intermediate_result) + + if converged: + success = True + message = "Convergence: Norm of projected gradient <= gtol." + else: + success = False + message = "Stop: Total number of iterations reached limit." + + return OptimizeResult( + x=params, + success=success, + message=message, + fun=energy, + jac=grad, + nfev=intermediate_result.nfev, + njev=intermediate_result.njev, + nlinop=intermediate_result.nlinop, + nit=intermediate_result.nit, + ) + + +def _sr_matrices( + vec: np.ndarray, jac: np.ndarray, hamiltonian: LinearOperator +) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + energy = np.vdot(vec, hamiltonian @ vec) + grad = vec.conj() @ (hamiltonian @ jac) + overlap_mat = jac.T.conj() @ jac + return energy.real, 2 * grad.real, overlap_mat.real + + +def _get_param_update( + grad: np.ndarray, overlap_mat: np.ndarray, variation: float, cond: float +) -> np.ndarray: + x, _, _, _ = scipy.linalg.lstsq(overlap_mat, -0.5 * variation * grad, cond=cond) + return x diff --git a/tests/python/optimize/stochastic_reconfiguration_test.py b/tests/python/optimize/stochastic_reconfiguration_test.py new file mode 100644 index 000000000..f5b57c88f --- /dev/null +++ b/tests/python/optimize/stochastic_reconfiguration_test.py @@ -0,0 +1,118 @@ +# (C) Copyright IBM 2024. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. + +"""Tests for stochastic reconfiguration optimization algorithm.""" + +from __future__ import annotations + +from collections import defaultdict + +import numpy as np +import pyscf +import pytest + +import ffsim + + +def test_minimize_stochastic_reconfiguration(): + # Build an H2 molecule + mol = pyscf.gto.Mole() + mol.build( + atom=[["H", (0, 0, 0)], ["H", (0, 0, 1.8)]], + basis="sto-6g", + ) + hartree_fock = pyscf.scf.RHF(mol) + hartree_fock.kernel() + + # Initialize parameters + n_reps = 2 + n_params = ffsim.UCJOperator.n_params(hartree_fock.mol.nao_nr(), n_reps) + rng = np.random.default_rng(1804) + x0 = rng.uniform(-10, 10, size=n_params) + + # Get molecular data and molecular Hamiltonian (one- and two-body tensors) + mol_data = ffsim.MolecularData.from_scf(hartree_fock) + norb = mol_data.norb + nelec = mol_data.nelec + mol_hamiltonian = mol_data.hamiltonian + hamiltonian = ffsim.linear_operator(mol_hamiltonian, norb=norb, nelec=nelec) + reference_state = ffsim.hartree_fock_state(norb, nelec) + + def params_to_vec(x: np.ndarray): + operator = ffsim.UCJOperator.from_parameters(x, norb=norb, n_reps=n_reps) + return ffsim.apply_unitary(reference_state, operator, norb=norb, nelec=nelec) + + def energy(x: np.ndarray): + vec = params_to_vec(x) + return np.real(np.vdot(vec, hamiltonian @ vec)) + + info = defaultdict(list) + + def callback(intermediate_result): + info["x"].append(intermediate_result.x) + info["fun"].append(intermediate_result.fun) + np.testing.assert_allclose( + energy(intermediate_result.x), intermediate_result.fun + ) + if hasattr(intermediate_result, "jac"): + info["jac"].append(intermediate_result.jac) + if hasattr(intermediate_result, "variation"): + info["variation"].append(intermediate_result.variation) + + # default optimization + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, x0=x0, hamiltonian=hamiltonian, callback=callback + ) + np.testing.assert_allclose(energy(result.x), result.fun) + np.testing.assert_allclose(result.fun, -0.970773) + np.testing.assert_allclose(info["fun"][0], -0.853501, atol=1e-5) + np.testing.assert_allclose(info["fun"][-1], -0.970773, atol=1e-5) + for params, fun in zip(info["x"], info["fun"]): + np.testing.assert_allclose(energy(params), fun) + assert result.nit <= 20 + assert result.nit < result.nlinop < result.nfev + + # optimization without optimizing hyperparameters + info = defaultdict(list) + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, + x0=x0, + hamiltonian=hamiltonian, + optimize_hyperparameters=False, + callback=callback, + ) + np.testing.assert_allclose(energy(result.x), result.fun) + np.testing.assert_allclose(result.fun, -0.970773) + for params, fun in zip(info["x"], info["fun"]): + np.testing.assert_allclose(energy(params), fun) + assert result.nit <= 30 + assert result.nit < result.nlinop < result.nfev + assert set(info["variation"]) == {1.0} + + # optimization with maxiter + info = defaultdict(list) + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, hamiltonian=hamiltonian, x0=x0, maxiter=3, callback=callback + ) + assert result.nit == 3 + assert len(info["x"]) == 3 + assert len(info["fun"]) == 3 + assert len(info["jac"]) == 2 + np.testing.assert_allclose(energy(result.x), result.fun) + + # test raising errors + with pytest.raises(ValueError, match="variation"): + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, x0=x0, hamiltonian=hamiltonian, variation=-0.1 + ) + with pytest.raises(ValueError, match="maxiter"): + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, x0=x0, hamiltonian=hamiltonian, maxiter=0 + ) From e2de1adc1127fbb7047d90a77a91406e1f7ee461 Mon Sep 17 00:00:00 2001 From: "Kevin J. Sung" Date: Fri, 10 May 2024 19:21:26 -0500 Subject: [PATCH 2/5] change cond default value from 1e-8 to 1e-4 --- python/ffsim/optimize/stochastic_reconfiguration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ffsim/optimize/stochastic_reconfiguration.py b/python/ffsim/optimize/stochastic_reconfiguration.py index 4253e19f0..30a0763bb 100644 --- a/python/ffsim/optimize/stochastic_reconfiguration.py +++ b/python/ffsim/optimize/stochastic_reconfiguration.py @@ -35,7 +35,7 @@ def minimize_stochastic_reconfiguration( *, maxiter: int = 1000, variation: float = 1.0, - cond: float = 1e-8, + cond: float = 1e-4, epsilon: float = 1e-8, gtol: float = 1e-5, optimize_hyperparameters: bool = True, From 9c87e696dca29892c892ff083c303f4f36f425e6 Mon Sep 17 00:00:00 2001 From: "Kevin J. Sung" Date: Sat, 11 May 2024 22:13:08 -0500 Subject: [PATCH 3/5] rename optimize hyperparameters arguments --- .../optimize/stochastic_reconfiguration.py | 36 +++++++++---------- .../stochastic_reconfiguration_test.py | 2 +- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/python/ffsim/optimize/stochastic_reconfiguration.py b/python/ffsim/optimize/stochastic_reconfiguration.py index 30a0763bb..c07370b57 100644 --- a/python/ffsim/optimize/stochastic_reconfiguration.py +++ b/python/ffsim/optimize/stochastic_reconfiguration.py @@ -34,12 +34,12 @@ def minimize_stochastic_reconfiguration( x0: np.ndarray, *, maxiter: int = 1000, - variation: float = 1.0, cond: float = 1e-4, epsilon: float = 1e-8, gtol: float = 1e-5, - optimize_hyperparameters: bool = True, - optimize_hyperparameters_args: dict | None = None, + variation: float = 1.0, + optimize_variation: bool = True, + optimize_kwargs: dict | None = None, callback: Callable[[OptimizeResult], Any] | None = None, ) -> OptimizeResult: """Minimize the energy of a variational ansatz using stochastic reconfiguration. @@ -55,26 +55,24 @@ def minimize_stochastic_reconfiguration( hamiltonian: The Hamiltonian representing the energy to be minimized. x0: Initial guess for the parameters. maxiter: Maximum number of optimization iterations to perform. - variation: Hyperparameter controlling the size of parameter variations - used in the linear expansion of the wavefunction. Its value must be - strictly between 0 and 1. A larger value results in larger variations. cond: `cond` argument to pass to `scipy.linalg.lstsq`_, which is called to solve for the parameter update. epsilon: Increment to use for approximating the gradient using finite difference. gtol: Convergence threshold for the norm of the projected gradient. - optimize_hyperparameters: Whether to optimize the `variation` hyperparameter in - each iteration. Optimizing the hyperparameter incurs more function and - energy evaluations in each iteration, but may speed up convergence, leading - to fewer iterations overall. The optimization is performed using - `scipy.optimize.minimize`_. - optimize_hyperparameters_args: Arguments to use when calling - `scipy.optimize.minimize`_ to optimize the hyperparameters. The call is - constructed as + variation: Hyperparameter controlling the size of parameter variations + used in the linear expansion of the wavefunction. Its value must be + strictly between 0 and 1. A larger value results in larger variations. + optimize_variation; Whether to optimize the `variation` hyperparameter + in each iteration. Optimizing hyperparameters incurs more function and + energy evaluations in each iteration, but may improve convergence. + The optimization is performed using `scipy.optimize.minimize`_. + optimize_kwargs: Arguments to use when calling `scipy.optimize.minimize`_ to + optimize hyperparameters. The call is constructed as .. code:: - scipy.optimize.minimize(f, x0, **scipy_optimize_minimize_args) + scipy.optimize.minimize(f, x0, **optimize_kwargs) If not specified, the default value of `dict(method="L-BFGS-B")` will be used. @@ -116,8 +114,8 @@ def minimize_stochastic_reconfiguration( if maxiter < 1: raise ValueError(f"maxiter must be at least 1. Got {maxiter}.") - if optimize_hyperparameters_args is None: - optimize_hyperparameters_args = dict(method="L-BFGS-B") + if optimize_kwargs is None: + optimize_kwargs = dict(method="L-BFGS-B") variation_param = math.sqrt(variation) params = x0.copy() @@ -149,7 +147,7 @@ def minimize_stochastic_reconfiguration( converged = True break - if optimize_hyperparameters: + if optimize_variation: def f(x: np.ndarray) -> float: (variation_param,) = x @@ -166,7 +164,7 @@ def f(x: np.ndarray) -> float: result = minimize( f, x0=[variation_param], - **optimize_hyperparameters_args, + **optimize_kwargs, ) (variation_param,) = result.x variation = variation_param**2 diff --git a/tests/python/optimize/stochastic_reconfiguration_test.py b/tests/python/optimize/stochastic_reconfiguration_test.py index f5b57c88f..eb84d05c2 100644 --- a/tests/python/optimize/stochastic_reconfiguration_test.py +++ b/tests/python/optimize/stochastic_reconfiguration_test.py @@ -85,7 +85,7 @@ def callback(intermediate_result): params_to_vec, x0=x0, hamiltonian=hamiltonian, - optimize_hyperparameters=False, + optimize_variation=False, callback=callback, ) np.testing.assert_allclose(energy(result.x), result.fun) From 278fc7bc33db208d3bb5abe5bfc8afdfc13e0f3f Mon Sep 17 00:00:00 2001 From: "Kevin J. Sung" Date: Mon, 13 May 2024 20:30:33 -0500 Subject: [PATCH 4/5] fix doc --- python/ffsim/optimize/stochastic_reconfiguration.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/ffsim/optimize/stochastic_reconfiguration.py b/python/ffsim/optimize/stochastic_reconfiguration.py index c07370b57..9194b4b86 100644 --- a/python/ffsim/optimize/stochastic_reconfiguration.py +++ b/python/ffsim/optimize/stochastic_reconfiguration.py @@ -60,10 +60,10 @@ def minimize_stochastic_reconfiguration( epsilon: Increment to use for approximating the gradient using finite difference. gtol: Convergence threshold for the norm of the projected gradient. - variation: Hyperparameter controlling the size of parameter variations + variation: TODO Hyperparameter controlling the size of parameter variations used in the linear expansion of the wavefunction. Its value must be - strictly between 0 and 1. A larger value results in larger variations. - optimize_variation; Whether to optimize the `variation` hyperparameter + positive. + optimize_variation: Whether to optimize the `variation` hyperparameter in each iteration. Optimizing hyperparameters incurs more function and energy evaluations in each iteration, but may improve convergence. The optimization is performed using `scipy.optimize.minimize`_. From 04eeafeb14cccaea2a85d108e7cd2da9840e2372 Mon Sep 17 00:00:00 2001 From: "Kevin J. Sung" Date: Mon, 20 May 2024 21:32:19 -0500 Subject: [PATCH 5/5] add regularization hyperparameter --- .../optimize/stochastic_reconfiguration.py | 83 +++++++++++++++++-- .../stochastic_reconfiguration_test.py | 48 ++++++++++- 2 files changed, 124 insertions(+), 7 deletions(-) diff --git a/python/ffsim/optimize/stochastic_reconfiguration.py b/python/ffsim/optimize/stochastic_reconfiguration.py index 9194b4b86..f28aac33b 100644 --- a/python/ffsim/optimize/stochastic_reconfiguration.py +++ b/python/ffsim/optimize/stochastic_reconfiguration.py @@ -37,7 +37,9 @@ def minimize_stochastic_reconfiguration( cond: float = 1e-4, epsilon: float = 1e-8, gtol: float = 1e-5, + regularization: float = 0.0, variation: float = 1.0, + optimize_regularization: bool = True, optimize_variation: bool = True, optimize_kwargs: dict | None = None, callback: Callable[[OptimizeResult], Any] | None = None, @@ -60,9 +62,16 @@ def minimize_stochastic_reconfiguration( epsilon: Increment to use for approximating the gradient using finite difference. gtol: Convergence threshold for the norm of the projected gradient. + regularization: Hyperparameter controlling regularization of the + overlap matrix. Its value must be positive. A larger value results in + greater regularization. variation: TODO Hyperparameter controlling the size of parameter variations used in the linear expansion of the wavefunction. Its value must be positive. + optimize_regularization: Whether to optimize the `regularization` hyperparameter + in each iteration. Optimizing hyperparameters incurs more function and + energy evaluations in each iteration, but may improve convergence. + The optimization is performed using `scipy.optimize.minimize`_. optimize_variation: Whether to optimize the `variation` hyperparameter in each iteration. Optimizing hyperparameters incurs more function and energy evaluations in each iteration, but may improve convergence. @@ -117,7 +126,6 @@ def minimize_stochastic_reconfiguration( if optimize_kwargs is None: optimize_kwargs = dict(method="L-BFGS-B") - variation_param = math.sqrt(variation) params = x0.copy() converged = False intermediate_result = OptimizeResult( @@ -140,6 +148,7 @@ def minimize_stochastic_reconfiguration( intermediate_result.fun = energy intermediate_result.jac = grad intermediate_result.overlap_mat = overlap_mat + intermediate_result.regularization = regularization intermediate_result.variation = variation callback(intermediate_result) @@ -147,7 +156,58 @@ def minimize_stochastic_reconfiguration( converged = True break - if optimize_variation: + if optimize_regularization and optimize_variation: + + def f(x: np.ndarray) -> float: + (regularization_param, variation_param) = x + regularization = regularization_param**2 + variation = variation_param**2 + param_update = _get_param_update( + grad, + overlap_mat, + regularization=regularization, + variation=variation, + cond=cond, + ) + vec = params_to_vec(params + param_update) + return np.vdot(vec, hamiltonian @ vec).real + + regularization_param = math.sqrt(regularization) + variation_param = math.sqrt(variation) + result = minimize( + f, + x0=[regularization_param, variation_param], + **optimize_kwargs, + ) + (regularization_param, variation_param) = result.x + regularization = regularization_param**2 + variation = variation_param**2 + + elif optimize_regularization: + + def f(x: np.ndarray) -> float: + (regularization_param,) = x + regularization = regularization_param**2 + param_update = _get_param_update( + grad, + overlap_mat, + regularization=regularization, + variation=variation, + cond=cond, + ) + vec = params_to_vec(params + param_update) + return np.vdot(vec, hamiltonian @ vec).real + + regularization_param = math.sqrt(regularization) + result = minimize( + f, + x0=[regularization_param], + **optimize_kwargs, + ) + (regularization_param,) = result.x + regularization = regularization_param**2 + + elif optimize_variation: def f(x: np.ndarray) -> float: (variation_param,) = x @@ -155,12 +215,14 @@ def f(x: np.ndarray) -> float: param_update = _get_param_update( grad, overlap_mat, - variation, + regularization=regularization, + variation=variation, cond=cond, ) vec = params_to_vec(params + param_update) return np.vdot(vec, hamiltonian @ vec).real + variation_param = math.sqrt(variation) result = minimize( f, x0=[variation_param], @@ -172,7 +234,8 @@ def f(x: np.ndarray) -> float: param_update = _get_param_update( grad, overlap_mat, - variation, + regularization=regularization, + variation=variation, cond=cond, ) params = params + param_update @@ -217,7 +280,15 @@ def _sr_matrices( def _get_param_update( - grad: np.ndarray, overlap_mat: np.ndarray, variation: float, cond: float + grad: np.ndarray, + overlap_mat: np.ndarray, + regularization: float, + variation: float, + cond: float, ) -> np.ndarray: - x, _, _, _ = scipy.linalg.lstsq(overlap_mat, -0.5 * variation * grad, cond=cond) + x, _, _, _ = scipy.linalg.lstsq( + overlap_mat + regularization * np.eye(overlap_mat.shape[0]), + -0.5 * variation * grad, + cond=cond, + ) return x diff --git a/tests/python/optimize/stochastic_reconfiguration_test.py b/tests/python/optimize/stochastic_reconfiguration_test.py index eb84d05c2..834444dca 100644 --- a/tests/python/optimize/stochastic_reconfiguration_test.py +++ b/tests/python/optimize/stochastic_reconfiguration_test.py @@ -63,6 +63,8 @@ def callback(intermediate_result): ) if hasattr(intermediate_result, "jac"): info["jac"].append(intermediate_result.jac) + if hasattr(intermediate_result, "regularization"): + info["regularization"].append(intermediate_result.regularization) if hasattr(intermediate_result, "variation"): info["variation"].append(intermediate_result.variation) @@ -85,6 +87,9 @@ def callback(intermediate_result): params_to_vec, x0=x0, hamiltonian=hamiltonian, + regularization=1e-4, + variation=0.9, + optimize_regularization=False, optimize_variation=False, callback=callback, ) @@ -94,7 +99,48 @@ def callback(intermediate_result): np.testing.assert_allclose(energy(params), fun) assert result.nit <= 30 assert result.nit < result.nlinop < result.nfev - assert set(info["variation"]) == {1.0} + assert set(info["regularization"]) == {1e-4} + assert set(info["variation"]) == {0.9} + + # optimization without optimizing regularization + info = defaultdict(list) + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, + x0=x0, + hamiltonian=hamiltonian, + regularization=1e-4, + variation=0.9, + optimize_regularization=False, + callback=callback, + ) + np.testing.assert_allclose(energy(result.x), result.fun) + np.testing.assert_allclose(result.fun, -0.970773) + for params, fun in zip(info["x"], info["fun"]): + np.testing.assert_allclose(energy(params), fun) + assert result.nit <= 30 + assert result.nit < result.nlinop < result.nfev + assert set(info["regularization"]) == {1e-4} + assert len(set(info["variation"])) > 1 + + # optimization without optimizing variation + info = defaultdict(list) + result = ffsim.optimize.minimize_stochastic_reconfiguration( + params_to_vec, + x0=x0, + hamiltonian=hamiltonian, + regularization=1e-4, + variation=0.9, + optimize_variation=False, + callback=callback, + ) + np.testing.assert_allclose(energy(result.x), result.fun) + np.testing.assert_allclose(result.fun, -0.970773) + for params, fun in zip(info["x"], info["fun"]): + np.testing.assert_allclose(energy(params), fun) + assert result.nit <= 30 + assert result.nit < result.nlinop < result.nfev + assert set(info["regularization"]) != {1e-4} + assert set(info["variation"]) == {0.9} # optimization with maxiter info = defaultdict(list)