Merge pull request #8 from peekxc/hutchpp

Hutchpp
peekxc · Jan 19, 2024 · d1ea18f · d1ea18f
2 parents abab21c + 917e85f
commit d1ea18f
Show file tree

Hide file tree

Showing 10 changed files with 507 additions and 274 deletions.
diff --git a/.cirrus.yml b/.cirrus.yml
@@ -1,15 +1,20 @@
 env: 
   CIRRUS_CLONE_SUBMODULES: true
 
-
+# quay.io/pypa/manylinux2014_x86_64  # CentOS 7  (use GCC 10)
+# quay.io/pypa/manylinux_2_24_x86_64 # Debian    (unknown)
+# quay.io/pypa/manylinux_2_28_x86_64 # AlmaLinux (use clang)
 linux_task:
   container:
-    image: quay.io/pypa/manylinux_2_28_x86_64
+    # image: quay.io/pypa/manylinux_2_28_x86_64
+    image: quay.io/pypa/manylinux2014_x86_64
   only_if: changesInclude('.cirrus.yml', '**.{h,cpp,py}') 
   env:
-    CC: clang 
-    CXX: clang++
-    PATH: ${PATH}:/opt/python/cp310-cp310/bin
+    # CC: clang 
+    # CXX: clang++
+    CC: gcc 
+    CXX: g++
+    PATH: /opt/python/cp310-cp310/bin:${PATH}
   pip_cache:
     folder: ~/.cache/pip
     fingerprint_script: echo $PYTHON_VERSION

diff --git a/pyproject.toml b/pyproject.toml
@@ -44,11 +44,15 @@ test-requires = ["pytest", "pytest-cov", "pytest-benchmark", "bokeh"] # coverage
 test-command = "python -m pytest {package}/tests/ --cov={package} --benchmark-skip"
 build-verbosity = 1
 skip = "cp36-* pp* cp37-* *_ppc64le *_i686 *_s390x *-musllinux*" # todo: revisit musllinux
-manylinux-x86_64-image = "manylinux_2_28" # prefer the newer one
+# manylinux-x86_64-image = "manylinux_2_28" # prefer the newer one
+manylinux-x86_64-image = "manylinux2014"
+
 
 [tool.cibuildwheel.linux]
 before-build = "bash {project}/tools/cibw_linux.sh {project}"
-environment = { CC="clang", CXX="clang++" }
+environment = { CC="gcc", CXX="g++" } # for manylinux2014
+# environment = { CC="clang", CXX="clang++" } # for manylinux_2_28
+
 # before-build = ["ulimit -n 4096", "yum install -y clang", "yum install -y openblas"]
 
 [tool.cibuildwheel.macos]

diff --git a/src/primate/_trace.cpp b/src/primate/_trace.cpp
@@ -14,10 +14,12 @@ using namespace pybind11::literals;
 template< typename F >
 using py_array = py::array_t< F, py::array::f_style | py::array::forcecast >;
 
+
 // Template function for generating module definitions for a given Operator / precision 
 template< bool multithreaded, std::floating_point F, class Matrix, LinearOperator Wrapper >
 void _trace_wrapper(py::module& m){
   using ArrayF = Eigen::Array< F, Dynamic, 1 >;
+  using VectorF = Eigen::Matrix< F, Dynamic, 1 >;
 
   m.def("hutch", [](
     const Matrix& A, 
@@ -60,6 +62,21 @@ void _trace_wrapper(py::module& m){
       "matvec_time_us"_a = op.matvec_time
     );
   });
+
+  // Computes the trace of Q.T @ (A @ Q) including the inner terms q_i^T A q_i 
+  m.def("quad_sum", [](const Matrix& A, DenseMatrix< F > Q) -> py_array< F > {
+    const auto op = Wrapper(A);
+    F quad_sum = 0.0; 
+    const size_t N = static_cast< size_t >(Q.cols());
+    auto estimates = static_cast< ArrayF >(ArrayF::Zero(N));
+    auto y = static_cast< VectorF >(VectorF::Zero(Q.rows()));
+    for (size_t j = 0; j < N; ++j){
+      op.matvec(Q.col(j).data(), y.data());
+      estimates[j] = Q.col(j).adjoint().dot(y);
+      quad_sum += estimates[j];
+    }
+    return py::cast(estimates);
+  });
 }
 
 // const Matrix& A, std::function< F(F) > fun, int lanczos_degree, F lanczos_rtol, int _orth, int _ncv

diff --git a/src/primate/functional.py b/src/primate/functional.py
@@ -10,6 +10,7 @@
 from .diagonalize import lanczos
 
 ## Since Python has no support for inline creation of sized generators 
+## Based on "Estimating the Largest Eigenvalue by the Power and Lanczos Algorithms with a Random Start"
 class RelativeErrorBound():
   def __init__(self, n: int):
     self.base_num = 2.575 * np.log(n)

diff --git a/src/primate/meson.build b/src/primate/meson.build
@@ -27,6 +27,7 @@ python_sources = [
   'stats.py',
   'special.py',
   'functional.py',
+  'quadrature.py',
   '__init__.py'
 ]
 

diff --git a/src/primate/operator.py b/src/primate/operator.py
@@ -114,4 +114,20 @@ def _matvec(self, x: np.ndarray) -> np.ndarray:
 		self._z[:len(x)] = x
 		x_fft = np.fft.fft(self._z)
 		y = np.real(np.fft.ifft(self._dfft * x_fft))
-		return y[:len(x)]
+		return y[:len(x)]
+
+
+## For use with e.g. Hutch++ 
+class OrthComplement(LinearOperator):
+	def __init__(self, A: Union[LinearOperator, np.ndarray], Q: np.ndarray):
+		self.A = A 
+		self.Q = Q 
+		self.shape = A.shape
+		self.dtype = self.Q.dtype
+
+	def _deflate(self, w: np.ndarray) -> np.ndarray:
+		return w - self.Q @ (self.Q.T @ w)
+
+	def _matvec(self, x: np.ndarray) -> np.ndarray:
+		y = self._deflate(x)
+		return self._deflate(self.A @ y)
diff --git a/src/primate/quadrature.py b/src/primate/quadrature.py
@@ -0,0 +1,98 @@
+from typing import Union, Callable, Any
+from numbers import Integral
+import numpy as np
+from scipy.sparse.linalg import LinearOperator
+from scipy.linalg import solve_triangular
+from scipy.stats import t
+from scipy.special import erfinv
+from numbers import Real
+
+## Package imports
+from .random import _engine_prefixes, _engines, isotropic
+from .special import _builtin_matrix_functions
+from .operator import matrix_function
+import _lanczos
+import _trace
+import _orthogonalize
+
+
+def sl_gauss(
+	A: Union[LinearOperator, np.ndarray],
+	n: int = 150,
+	deg: int = 20,
+	pdf: str = "rademacher",
+	rng: str = "pcg",
+	seed: int = -1,
+	orth: int = 0,
+	num_threads: int = 0,
+) -> np.ndarray:
+	"""Stochastic Gaussian quadrature approximation.
+
+	Computes a set of sample nodes and weights for the degree-k orthogonal polynomial approximating the 
+	cumulative spectral measure of `A`. This function can be used to approximate the spectral density of `A`, 
+	or to approximate the spectral sum of any function applied to the spectrum of `A`.
+
+	Parameters
+	----------
+	A : ndarray, sparray, or LinearOperator
+	    real symmetric operator.
+	n : int, default=150
+	    Number of random vectors to sample for the quadrature estimate.
+	deg  : int, default=20
+	    Degree of the quadrature approximation.
+	rng : { 'splitmix64', 'xoshiro256**', 'pcg64', 'lcg64', 'mt64' }, default="pcg64"
+	    Random number generator to use (PCG64 by default).
+	seed : int, default=-1
+	    Seed to initialize the `rng` entropy source. Set `seed` > -1 for reproducibility.
+	pdf : { 'rademacher', 'normal' }, default="rademacher"
+	    Choice of zero-centered distribution to sample random vectors from.
+	orth: int, default=0
+		Number of additional Lanczos vectors to orthogonalize against when building the Krylov basis.
+	num_threads: int, default=0
+	    Number of threads to use to parallelize the computation. Setting `num_threads` < 1 to let OpenMP decide.
+
+	Returns
+	-------
+	trace_estimate : float
+	    Estimate of the trace of the matrix function $f(A)$.
+	info : dict, optional
+	    If 'info = True', additional information about the computation.
+
+	"""
+	attr_checks = [hasattr(A, "__matmul__"), hasattr(A, "matmul"), hasattr(A, "dot"), hasattr(A, "matvec")]
+	assert any(attr_checks), "Invalid operator; must have an overloaded 'matvec' or 'matmul' method"
+	assert hasattr(A, "shape") and len(A.shape) >= 2, "Operator must be at least two dimensional."
+	assert A.shape[0] == A.shape[1], "This function only works with square, symmetric matrices!"
+
+	## Choose the random number engine
+	assert rng in _engine_prefixes or rng in _engines, f"Invalid pseudo random number engine supplied '{str(rng)}'"
+	engine_id = _engine_prefixes.index(rng) if rng in _engine_prefixes else _engines.index(rng)
+
+	## Choose the distribution to sample random vectors from
+	assert pdf in ["rademacher", "normal"], f"Invalid distribution '{pdf}'; Must be one of 'rademacher' or 'normal'."
+	distr_id = ["rademacher", "normal"].index(pdf)
+
+	## Get the dtype; infer it if it's not available
+	f_dtype = (A @ np.zeros(A.shape[1])).dtype if not hasattr(A, "dtype") else A.dtype
+	assert (
+		f_dtype.type == np.float32 or f_dtype.type == np.float64
+	), "Only 32- or 64-bit floating point numbers are supported."
+
+	## Extract the machine precision for the given floating point type
+	lanczos_rtol = np.finfo(f_dtype).eps  # if lanczos_rtol is None else f_dtype.type(lanczos_rtol)
+
+	## Argument checking
+	m = A.shape[1]  # Dimension of the space
+	nv = int(n)  # Number of random vectors to generate
+	seed = int(seed)  # Seed should be an integer
+	deg = max(deg, 2)  # Must be at least 2
+	orth = m - 1 if orth < 0 else min(m - 1, orth)  # Number of additional vectors should be an integer
+	ncv = max(int(deg + orth), m)  # Number of Lanczos vectors to keep in memory
+	num_threads = int(num_threads)  # should be integer; if <= 0 will trigger max threads on C++ side
+
+	## Collect the arguments processed so far
+	sl_quad_args = (nv, distr_id, engine_id, seed, deg, lanczos_rtol, orth, ncv, num_threads)
+
+	## Make the actual call
+	quad_nw = _lanczos.stochastic_quadrature(A, *sl_quad_args)
+	return quad_nw