pyamg · lukeolson · Nov 9, 2018 · Nov 9, 2018 · Oct 27, 2023 · Mar 22, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -28,7 +28,7 @@ jobs:
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
 
     - name: Install
-      run: pip install -e .
+      run: pip install -v -e .
 
     - name: Test
       run: |

diff --git a/README.md b/README.md
@@ -176,3 +176,35 @@ It is possible to list all of the versions of `pyamg` available on your platform
 ```
 conda search pyamg --channel conda-forge
 ```
+
+# OpenMP
+
+PyAMG handles OpenMP in the following way
+
+    - The `has_flag()` function of `pybind11` is called, with either `-fopenmp` (Linux) or `-Xpreprocessor -fopenmp` (MacOS).  Then added to the build if present.
+
+    - Every instance of OpenMP is limited to `#pragma` or `#ifdef _OPENMP`.  Each kernel in `amg_core` that has OpenMP should be buildable without.
+
+    - To test, try `export OMP_NUM_THREADS=4; python test_omp.py` in `scripts/`
+
+    - The AMG solve phase add threading by rewriting the sparse matrix-vector multiplications of `A`, `P`, and `R`, with `ml.solve(...., openmp=True)`.
+
+#### MacOS
+    - To enable OpenMP on macOS, `brew install libomp`
+
+    - Then set environment variables, following https://scikit-learn.org/dev/developers/advanced_installation.html#macos-compilers-from-homebrew :
+    ```
+    export CC=/usr/bin/clang
+    export CXX=/usr/bin/clang++
+    export CPPFLAGS="$CPPFLAGS -Xpreprocessor -fopenmp"
+    export CFLAGS="$CFLAGS -I$(brew --prefix libomp)/include"
+    export CXXFLAGS="$CXXFLAGS -I$(brew --prefix libomp)/include"
+    export LDFLAGS="$LDFLAGS -Wl,-rpath,$(brew --prefix libomp)/lib -L$(brew --prefix libomp)/lib -lomp"
+    ```
+
+    - Then `setup.py` will attempt to add `-Xpreprocessor -fopenmp` to the compiler and `-lomp` to the linker.
+
+#### Tips
+    - The build directory may need to be removed, in order force PyAMG to re-build an OMP-enabled version 
+
+
diff --git a/pyamg/amg_core/__init__.py b/pyamg/amg_core/__init__.py
@@ -38,6 +38,8 @@
 from .air import (one_point_interpolation, approx_ideal_restriction_pass1,
                   approx_ideal_restriction_pass2, block_approx_ideal_restriction_pass2)
 
+from .sparse import csr_matvec, omp_info
+
 __all__ = [
     'apply_absolute_distance_filter',
     'apply_distance_filter',
@@ -112,4 +114,6 @@
     'approx_ideal_restriction_pass2',
     'block_approx_ideal_restriction_pass2'
     #
+    'csr_matvec',
+    'omp_info'
 ]
diff --git a/pyamg/amg_core/generate.sh b/pyamg/amg_core/generate.sh
@@ -6,3 +6,4 @@
 ./bindthem.py relaxation.h
 ./bindthem.py ruge_stuben.h
 ./bindthem.py smoothed_aggregation.h
+./bindthem.py sparse.h
diff --git a/pyamg/amg_core/instantiate.yml b/pyamg/amg_core/instantiate.yml
@@ -101,6 +101,7 @@ instantiate:
     - rs_classical_interpolation_pass1
     - cluster_node_incidence
     - print_it
+    - omp_info
 
 - types:
     - [int, float]

diff --git a/pyamg/amg_core/sparse.h b/pyamg/amg_core/sparse.h
@@ -0,0 +1,76 @@
+#include <complex>
+#include <iostream>
+#include <stdio.h>
+
+#ifdef _OPENMP
+#   include <omp.h>
+#endif
+
+//
+// Threaded SpMV
+//
+// y <- A * x
+//
+// Parameters
+// ----------
+// n_row, n_col : int
+//    dimensions of the n_row x n_col matrix A
+// Ap, Aj, Ax : array
+//    CSR pointer, index, and data vectors for matrix A
+// Xx : array
+//    input vector
+// Yy : array
+//    output vector (modified in-place)
+//
+// See Also
+// --------
+// csr_matvec
+//
+// Notes
+// -----
+// Requires GCC 4.9 for ivdep
+// Requires a compiler with OMP
+//
+template <class I, class T>
+void csr_matvec(const I n_row,
+                const I n_col,
+                const I Ap[], const int Ap_size,
+                const I Aj[], const int Aj_size,
+                const T Ax[], const int Ax_size,
+                const T Xx[], const int Xx_size,
+                      T Yx[], const int Yx_size)
+{
+    I i, jj;
+    T sum;
+
+    #pragma omp parallel for default(shared) schedule(static) private(i, sum, jj)
+    for(i = 0; i < n_row; i++){
+        sum = Yx[i];
+        #pragma GCC ivdep
+        for(jj = Ap[i]; jj < Ap[i+1]; jj++){
+            sum += Ax[jj] * Xx[Aj[jj]];
+        }
+        Yx[i] = sum;
+    }
+}
+
+
+//
+// OMP analytics
+//
+template <class I>
+void omp_info(const I m)
+{
+    I nthreads, tid;
+
+    #pragma omp parallel private(nthreads, tid)
+    {
+#ifdef _OPENMP
+        tid = omp_get_thread_num();
+        nthreads = omp_get_num_threads();
+        printf("Thread %d of %d total threads\n", tid, nthreads);
+#else
+        printf("OpenMP not available.\n");
+#endif
+    }
+}
diff --git a/pyamg/amg_core/sparse_bind.cpp b/pyamg/amg_core/sparse_bind.cpp
@@ -0,0 +1,106 @@
+// DO NOT EDIT: this file is generated
+
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <pybind11/complex.h>
+
+#include "sparse.h"
+
+namespace py = pybind11;
+
+template <class I, class T>
+void _csr_matvec(
+            const I n_row,
+            const I n_col,
+      py::array_t<I> & Ap,
+      py::array_t<I> & Aj,
+      py::array_t<T> & Ax,
+      py::array_t<T> & Xx,
+      py::array_t<T> & Yx
+                 )
+{
+    auto py_Ap = Ap.unchecked();
+    auto py_Aj = Aj.unchecked();
+    auto py_Ax = Ax.unchecked();
+    auto py_Xx = Xx.unchecked();
+    auto py_Yx = Yx.mutable_unchecked();
+    const I *_Ap = py_Ap.data();
+    const I *_Aj = py_Aj.data();
+    const T *_Ax = py_Ax.data();
+    const T *_Xx = py_Xx.data();
+    T *_Yx = py_Yx.mutable_data();
+
+    return csr_matvec <I, T>(
+                    n_row,
+                    n_col,
+                      _Ap, Ap.shape(0),
+                      _Aj, Aj.shape(0),
+                      _Ax, Ax.shape(0),
+                      _Xx, Xx.shape(0),
+                      _Yx, Yx.shape(0)
+                             );
+}
+
+template <class I>
+void _omp_info(
+                const I m
+               )
+{
+    return omp_info <I>(
+                        m
+                        );
+}
+
+PYBIND11_MODULE(sparse, m) {
+    m.doc() = R"pbdoc(
+    Pybind11 bindings for sparse.h
+
+    Methods
+    -------
+    csr_matvec
+    omp_info
+    )pbdoc";
+
+    py::options options;
+    options.disable_function_signatures();
+
+    m.def("csr_matvec", &_csr_matvec<int, float>,
+        py::arg("n_row"), py::arg("n_col"), py::arg("Ap").noconvert(), py::arg("Aj").noconvert(), py::arg("Ax").noconvert(), py::arg("Xx").noconvert(), py::arg("Yx").noconvert());
+    m.def("csr_matvec", &_csr_matvec<int, double>,
+        py::arg("n_row"), py::arg("n_col"), py::arg("Ap").noconvert(), py::arg("Aj").noconvert(), py::arg("Ax").noconvert(), py::arg("Xx").noconvert(), py::arg("Yx").noconvert());
+    m.def("csr_matvec", &_csr_matvec<int, std::complex<float>>,
+        py::arg("n_row"), py::arg("n_col"), py::arg("Ap").noconvert(), py::arg("Aj").noconvert(), py::arg("Ax").noconvert(), py::arg("Xx").noconvert(), py::arg("Yx").noconvert());
+    m.def("csr_matvec", &_csr_matvec<int, std::complex<double>>,
+        py::arg("n_row"), py::arg("n_col"), py::arg("Ap").noconvert(), py::arg("Aj").noconvert(), py::arg("Ax").noconvert(), py::arg("Xx").noconvert(), py::arg("Yx").noconvert(),
+R"pbdoc(
+Threaded SpMV
+
+y <- A * x
+
+Parameters
+----------
+n_row, n_col : int
+   dimensions of the n_row x n_col matrix A
+Ap, Aj, Ax : array
+   CSR pointer, index, and data vectors for matrix A
+Xx : array
+   input vector
+Yy : array
+   output vector (modified in-place)
+
+See Also
+--------
+csr_matvec
+
+Notes
+-----
+Requires GCC 4.9 for ivdep
+Requires a compiler with OMP)pbdoc");
+
+    m.def("omp_info", &_omp_info<int>,
+        py::arg("m"),
+R"pbdoc(
+OMP analytics)pbdoc");
+
+}
+
diff --git a/pyamg/multilevel.py b/pyamg/multilevel.py
@@ -12,7 +12,7 @@
 from .util.utils import to_type
 from .util.params import set_tol
 from .relaxation import smoothing
-from .util import upcast
+from .util import upcast, sparse
 
 
 class MultilevelSolver:
@@ -354,8 +354,23 @@ def matvec(b):
 
         return LinearOperator(shape, matvec, dtype=dtype)
 
+    def _enable_omp(self):
+        """Enable OpenMP (if available) by calling pyamg.amg_core.sparse.csr_matvec.
+
+        See Also
+        --------
+        scipy.sparse.csr.csr_matvec, pyamg.amg_core.sparse.csr_matvec, pyamg.util.sparse.csr
+        """
+        for l in self.levels:
+            l.A = sparse.csr(l.A)
+            if hasattr(l, 'P'):
+                l.P = sparse.csr(l.P)
+            if hasattr(l, 'R'):
+                l.R = sparse.csr(l.R)
+
     def solve(self, b, x0=None, tol=1e-5, maxiter=100, cycle='V', accel=None,
-              callback=None, residuals=None, cycles_per_level=1, return_info=False):
+              callback=None, residuals=None, cycles_per_level=1, return_info=False,
+              openmp=False):
         """Execute multigrid cycling.
 
         Parameters
@@ -421,6 +436,9 @@ def solve(self, b, x0=None, tol=1e-5, maxiter=100, cycle='V', accel=None,
         >>> x = ml.solve(b, tol=1e-12, residuals=residuals) # standalone solver
 
         """
+        if openmp:
+            self._enable_omp()
+
         if x0 is None:
             x = np.zeros_like(b)
         else:

diff --git a/pyamg/util/__init__.py b/pyamg/util/__init__.py
@@ -3,10 +3,11 @@
 from . import linalg
 from . import utils
 from . import params
+from . import sparse
 
 from .utils import make_system, upcast
 
-__all__ = ['linalg', 'utils', 'params', 'make_system', 'upcast']
+__all__ = ['linalg', 'utils', 'params', 'make_system', 'upcast', 'sparse']
 
 __doc__ += """
 linalg.py provides some linear algebra functionality not yet found in scipy.

diff --git a/pyamg/util/sparse.py b/pyamg/util/sparse.py
@@ -0,0 +1,39 @@
+"""Sparse matrix interface to internal sparse matrix operations."""
+import numpy as np
+from scipy.sparse import csr_matrix
+
+try:
+    # scipy >=1.8
+    from scipy.sparse._sputils import upcast_char
+except ImportError:
+    # scipy <1.8
+    from scipy.sparse.sputils import upcast_char
+
+from .. import amg_core
+
+
+class csr(csr_matrix):  # noqa: N801
+    """CSR class to redefine operations.
+
+    The purpose of this class is to redefine the matvec in scipy.sparse
+    """
+
+    def _mul_vector(self, other):
+        """Matrix-vector multiplication.
+
+        Identical to scipy.sparse with an in internal call to
+        pyamg.amg_core.sparse.csr_matvec
+        """
+        M, N = self.shape
+
+        # output array
+        result = np.zeros(M, dtype=upcast_char(self.dtype.char,
+                                               other.dtype.char))
+
+        amg_core.csr_matvec(M, N, self.indptr, self.indices, self.data,
+                            other, result)
+
+        return result
+
+
+csr.__doc__ += csr_matrix.__doc__
diff --git a/pyamg/util/tests/test_sparse.py b/pyamg/util/tests/test_sparse.py
@@ -0,0 +1,41 @@
+"""Test sparse matrix operations."""
+import numpy as np
+import pyamg.gallery
+import pyamg.util
+import scipy.sparse
+
+from numpy.testing import TestCase, assert_array_almost_equal
+
+
+class TestScipy(TestCase):
+    """Test sparse matrix operations against scipy."""
+
+    def test_matvec(self):
+
+        # initialize a seed
+        np.random.seed(678)
+
+        # real
+        A = np.array([[100.0, 0, 0], [0, 101, 0], [0, 0, 99]])
+        A = scipy.sparse.csr_matrix(A)
+        A2 = pyamg.util.sparse.csr(A)
+        u = np.random.rand(A.shape[0])
+
+        assert_array_almost_equal(A * u, A2 * u)
+
+        # complex
+        A = np.array([[100+1.0j, 0, 0],
+                      [0, 101-1.0j, 0],
+                      [0, 0, 99+9.9j]])
+        A = scipy.sparse.csr_matrix(A)
+        A2 = pyamg.util.sparse.csr(A)
+        u = np.random.rand(A.shape[0]) + 1j * np.random.rand(A.shape[0])
+
+        assert_array_almost_equal(A * u, A2 * u)
+
+        # random
+        A = pyamg.gallery.sprand(20, 20, 6 / 20.0, format='csr')
+        A2 = pyamg.util.sparse.csr(A)
+        u = np.random.rand(A.shape[0])
+
+        assert_array_almost_equal(A * u, A2 * u)