pytorch · IvanYashchuk · Nov 23, 2020 · Dec 9, 2020 · Dec 9, 2020 · Dec 9, 2020
diff --git a/aten/src/ATen/native/LinearAlgebra.cpp b/aten/src/ATen/native/LinearAlgebra.cpp
@@ -95,22 +95,70 @@ std::tuple<Tensor, Tensor> slogdet(const Tensor& self) {
   return std::make_tuple(det_sign, abslogdet_val);
 }
 
-Tensor pinverse(const Tensor& self, double rcond) {
-  TORCH_CHECK((at::isFloatingType(self.scalar_type()) || at::isComplexType(self.scalar_type())) && self.dim() >= 2,
-              "pinverse(", self.scalar_type(), "{", self.sizes(), "}): expected a tensor with 2 or more dimensions "
+Tensor linalg_pinv(const Tensor& input, const Tensor& rcond, bool hermitian) {
+  TORCH_CHECK((at::isFloatingType(input.scalar_type()) || at::isComplexType(input.scalar_type())) && input.dim() >= 2,
+              "linalg_pinv(", input.scalar_type(), "{", input.sizes(), "}): expected a tensor with 2 or more dimensions "
               "of floating types");
-  if (self.numel() == 0) {
+  if (input.numel() == 0) {
     // Match NumPy
-    auto self_sizes = self.sizes().vec();
-    std::swap(self_sizes[self.dim() - 1], self_sizes[self.dim() - 2]);
-    return at::empty(self_sizes, self.options());
+    auto input_sizes = input.sizes().vec();
+    std::swap(input_sizes[input.dim() - 1], input_sizes[input.dim() - 2]);
+    return at::empty(input_sizes, input.options());
+  }
+
+  Tensor rcond_ = rcond;
+  if (rcond.dim() > 0) {
+    rcond_ = rcond.unsqueeze(-1);
   }
-  Tensor U, S, V;
-  std::tie(U, S, V) = self.svd();
-  Tensor max_val = at::narrow(S, /*dim=*/-1, /*start=*/0, /*length=*/1);
-  Tensor S_pseudoinv = at::where(S > rcond * max_val, S.reciprocal(), at::zeros({}, S.options())).to(self.dtype());
-  // computes V.conj() @ diag(S_pseudoinv) @ U.T.conj()
-  return at::matmul(V.conj() * S_pseudoinv.unsqueeze(-2), U.transpose(-2, -1).conj());
+
+  // If not Hermitian use singular value decomposition, else use eigenvalue decomposition
+  if (!hermitian) {
+    // until https://github.com/pytorch/pytorch/issues/45821 is resolved
+    // svd() returns conjugated V for complex-valued input
+    Tensor U, S, V_conj;
+    // TODO: replace input.svd with linalg_svd
+    std::tie(U, S, V_conj) = input.svd();
+    Tensor max_val = at::narrow(S, /*dim=*/-1, /*start=*/0, /*length=*/1);  // singular values are sorted in descending order
+    Tensor S_pseudoinv = at::where(S > rcond_ * max_val, S.reciprocal(), at::zeros({}, S.options())).to(input.dtype());
+    // computes V @ diag(S_pseudoinv) @ U.T.conj()
+    // TODO: replace V_conj.conj() -> V once https://github.com/pytorch/pytorch/issues/45821 is resolved
+    return at::matmul(V_conj.conj() * S_pseudoinv.unsqueeze(-2), U.conj().transpose(-2, -1));
+  } else {
+    Tensor S, U;
+    std::tie(S, U) = at::linalg_eigh(input);
+    // For Hermitian matrices, singular values equal to abs(eigenvalues)
+    Tensor S_abs = S.abs();
+    // eigenvalues are sorted in ascending order starting with negative values, we need a maximum value of abs(eigenvalues)
+    Tensor max_val = S_abs.amax(/*dim=*/-1, /*keepdim=*/true);
+    Tensor S_pseudoinv = at::where(S_abs > rcond_ * max_val, S.reciprocal(), at::zeros({}, S.options())).to(input.dtype());
+    // computes U @ diag(S_pseudoinv) @ U.conj().T
+    return at::matmul(U * S_pseudoinv.unsqueeze(-2), U.conj().transpose(-2, -1));
+  }
+}
+
+Tensor linalg_pinv(const Tensor& input, double rcond, bool hermitian) {
+  Tensor rcond_tensor = at::full({}, rcond, input.options().dtype(ScalarType::Double));
+  return at::linalg_pinv(input, rcond_tensor, hermitian);
+}
+
+// TODO: implement _out variant avoiding copy and using already allocated storage directly
+Tensor& linalg_pinv_out(Tensor& result, const Tensor& input, const Tensor& rcond, bool hermitian) {
+  TORCH_CHECK(result.scalar_type() == input.scalar_type(),
+    "result dtype ", result.scalar_type(), " does not match the expected dtype ", input.scalar_type());
+
+  Tensor result_tmp = at::linalg_pinv(input, rcond, hermitian);
+  at::native::resize_output(result, result_tmp.sizes());
+  result.copy_(result_tmp);
+  return result;
+}
+
+Tensor& linalg_pinv_out(Tensor& result, const Tensor& input, double rcond, bool hermitian) {
+  Tensor rcond_tensor = at::full({}, rcond, input.options().dtype(ScalarType::Double));
+  return at::linalg_pinv_out(result, input, rcond_tensor, hermitian);
+}
+
+Tensor pinverse(const Tensor& self, double rcond) {
+  return at::linalg_pinv(self, rcond, /*hermitian=*/false);
 }
 
 Tensor& linalg_matrix_rank_out(Tensor& result, const Tensor& self, optional<double> tol, bool hermitian) {

diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -9560,6 +9560,32 @@
   dispatch:
     Math: linalg_cond_out
 
+- func: linalg_pinv(Tensor self, float rcond=1e-15, bool hermitian=False) -> Tensor
+  python_module: linalg
+  use_c10_dispatcher: full
+  variants: function
+  dispatch:
+    Math: linalg_pinv
+
+- func: linalg_pinv.rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False) -> Tensor
+  python_module: linalg
+  use_c10_dispatcher: full
+  variants: function
+  dispatch:
+    Math: linalg_pinv
+
+- func: linalg_pinv.out(Tensor self, float rcond=1e-15, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
+  python_module: linalg
+  variants: function
+  dispatch:
+    Math: linalg_pinv_out
+
+- func: linalg_pinv.out_rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)
+  python_module: linalg
+  variants: function
+  dispatch:
+    Math: linalg_pinv_out
+
 - func: linalg_tensorinv(Tensor self, int ind=2) -> Tensor
   python_module: linalg
   variants: function

diff --git a/docs/source/linalg.rst b/docs/source/linalg.rst
@@ -19,5 +19,6 @@ Functions
 .. autofunction:: eigvalsh
 .. autofunction:: matrix_rank
 .. autofunction:: norm
+.. autofunction:: pinv
 .. autofunction:: tensorinv
 .. autofunction:: tensorsolve
diff --git a/test/test_linalg.py b/test/test_linalg.py
@@ -2064,6 +2064,105 @@ def run_test_singular_input(batch_dim, n):
         for params in [(1, 0), (2, 0), (2, 1), (4, 0), (4, 2), (10, 2)]:
             run_test_singular_input(*params)
 
+    @precisionOverride({torch.float32: 1e-3, torch.complex64: 1e-3, torch.float64: 1e-7, torch.complex128: 1e-7})
+    @skipCUDAIfNoMagma
+    @skipCPUIfNoLapack
+    @dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
+    def test_pinv(self, device, dtype):
+        from torch.testing._internal.common_utils import random_hermitian_pd_matrix
+
+        def run_test_main(A, hermitian):
+            # Testing against definition for pseudo-inverses
+            A_pinv = torch.linalg.pinv(A, hermitian=hermitian)
+            if A.numel() > 0:
+                self.assertEqual(A, A @ A_pinv @ A, atol=self.precision, rtol=self.precision)
+                self.assertEqual(A_pinv, A_pinv @ A @ A_pinv, atol=self.precision, rtol=self.precision)
+                self.assertEqual(A @ A_pinv, (A @ A_pinv).conj().transpose(-2, -1))
+                self.assertEqual(A_pinv @ A, (A_pinv @ A).conj().transpose(-2, -1))
+            else:
+                self.assertEqual(A.shape, A_pinv.shape[:-2] + (A_pinv.shape[-1], A_pinv.shape[-2]))
+
+            # Check out= variant
+            out = torch.empty_like(A_pinv)
+            ans = torch.linalg.pinv(A, hermitian=hermitian, out=out)
+            self.assertEqual(ans, out)
+            self.assertEqual(ans, A_pinv)
+
+        def run_test_numpy(A, hermitian):
+            # Check against NumPy output
+            # Test float rcond, and specific value for each matrix
+            rconds = [float(torch.rand(1)), torch.rand(A.shape[:-2], dtype=torch.double, device=device)]
+            # Test broadcasting of rcond
+            if A.ndim > 2:
+                rconds.append(torch.rand(A.shape[-3], device=device))
+            for rcond in rconds:
+                actual = torch.linalg.pinv(A, rcond=rcond, hermitian=hermitian)
+                numpy_rcond = rcond if isinstance(rcond, float) else rcond.cpu().numpy()
+                expected = np.linalg.pinv(A.cpu().numpy(), rcond=numpy_rcond, hermitian=hermitian)
+                self.assertEqual(actual, expected)
+
+        for sizes in [(5, 5), (3, 5, 5), (3, 2, 5, 5),  # square matrices
+                      (3, 2), (5, 3, 2), (2, 5, 3, 2),  # fat matrices
+                      (2, 3), (5, 2, 3), (2, 5, 2, 3),  # thin matrices
+                      (0, 0), (0, 2), (2, 0), (3, 0, 0), (0, 3, 0), (0, 0, 3)]:  # zero numel matrices
+            A = torch.randn(*sizes, dtype=dtype, device=device)
+            hermitian = False
+            run_test_main(A, hermitian)
+            run_test_numpy(A, hermitian)
+
+        # Check hermitian = True
+        for sizes in [(5, 5), (3, 5, 5), (3, 2, 5, 5),  # square matrices
+                      (0, 0), (3, 0, 0), ]:  # zero numel square matrices
+            A = random_hermitian_pd_matrix(sizes[-1], *sizes[:-2], dtype=dtype, device=device)
+            hermitian = True
+            run_test_main(A, hermitian)
+            run_test_numpy(A, hermitian)
+
+    @skipCUDAIfNoMagma
+    @skipCPUIfNoLapack
+    @dtypes(torch.float64)
+    def test_pinv_autograd(self, device, dtype):
+        from torch.testing._internal.common_utils import random_fullrank_matrix_distinct_singular_value
+
+        n = 5
+        for batches in ([], [2], [2, 3]):
+            # using .to(device) instead of device=device because @xwang233 claims it's faster
+            a = random_fullrank_matrix_distinct_singular_value(n, *batches, dtype=dtype).to(device)
+            a.requires_grad_()
+
+            def func(a, hermitian):
+                if hermitian:
+                    a = a + a.conj().transpose(-2, -1)
+                return torch.linalg.pinv(a, hermitian=hermitian)
+
+            for hermitian in [False, True]:
+                gradcheck(func, [a, hermitian])
+                gradgradcheck(func, [a, hermitian])
+
+    # TODO: RuntimeError: svd does not support automatic differentiation for outputs with complex dtype.
+    # See https://github.com/pytorch/pytorch/pull/47761
+    @unittest.expectedFailure
+    @skipCUDAIfNoMagma
+    @skipCPUIfNoLapack
+    @dtypes(torch.complex128)
+    def test_pinv_autograd_complex_xfailed(self, device, dtype):
+        from torch.testing._internal.common_utils import random_fullrank_matrix_distinct_singular_value
+
+        n = 5
+        batches = (2, 3)
+        # using .to(device) instead of device=device because @xwang233 claims it's faster
+        a = random_fullrank_matrix_distinct_singular_value(n, *batches, dtype=dtype).to(device)
+        a.requires_grad_()
+
+        def func(a, hermitian):
+            if hermitian:
+                a = a + a.conj().transpose(-2, -1)
+            return torch.linalg.pinv(a, hermitian=hermitian)
+
+        for hermitian in [False, True]:
+            gradcheck(func, [a, hermitian])
+            gradgradcheck(func, [a, hermitian])
+
     def solve_test_helper(self, A_dims, b_dims, device, dtype):
         from torch.testing._internal.common_utils import random_fullrank_matrix_distinct_singular_value
 

diff --git a/test/test_ops.py b/test/test_ops.py
@@ -29,13 +29,15 @@ class TestOpInfo(TestCase):
     @onlyOnCPUAndCUDA
     @ops(op_db, dtypes=OpDTypes.unsupported)
     def test_unsupported_dtypes(self, device, dtype, op):
-        samples = op.sample_inputs(device, dtype)
-        if len(samples) == 0:
-            self.skipTest("Skipped! No sample inputs!")
-
-        # NOTE: only tests on first sample
-        sample = samples[0]
+        # sample_inputs can have a function for generating the input that doesn't work for specified dtype
+        # https://github.com/pytorch/pytorch/issues/49024
         with self.assertRaises(RuntimeError):
+            samples = op.sample_inputs(device, dtype)
+            if len(samples) == 0:
+                self.skipTest("Skipped! No sample inputs!")
+
+            # NOTE: only tests on first sample
+            sample = samples[0]
             op(*sample.input, *sample.args, **sample.kwargs)
 
     # Verifies that ops have their supported dtypes

diff --git a/torch/csrc/api/include/torch/linalg.h b/torch/csrc/api/include/torch/linalg.h
@@ -60,6 +60,14 @@ inline Tensor& matrix_rank_out(Tensor& result, const Tensor input, optional<doub
   return torch::linalg_matrix_rank_out(result, input, tol, hermitian);
 }
 
+inline Tensor pinv(const Tensor& input, double rcond, bool hermitian) {
+  return torch::linalg_pinv(input, rcond, hermitian);
+}
+
+inline Tensor& pinv_out(Tensor& result, const Tensor& input, double rcond, bool hermitian) {
+  return torch::linalg_pinv_out(result, input, rcond, hermitian);
+}
+
 inline Tensor tensorinv(const Tensor& self, int64_t ind) {
   return torch::linalg_tensorinv(self, ind);
 }
@@ -150,6 +158,17 @@ inline Tensor& matrix_rank_out(Tensor& result, const Tensor input, optional<doub
   return detail::matrix_rank_out(result, input, tol, hermitian);
 }
 
+/// Computes pseudo-inverse
+///
+/// See https://pytorch.org/docs/master/linalg.html#torch.linalg.pinv
+inline Tensor pinv(const Tensor& input, double rcond=1e-15, bool hermitian=false) {
+  return detail::pinv(input, rcond, hermitian);
+}
+
+inline Tensor& pinv_out(Tensor& result, const Tensor& input, double rcond=1e-15, bool hermitian=false) {
+  return detail::pinv_out(result, input, rcond, hermitian);
+}
+
 /// Computes the inverse of a tensor
 ///
 /// See https://pytorch.org/docs/master/linalg.html#torch.linalg.tensorinv

diff --git a/torch/linalg/__init__.py b/torch/linalg/__init__.py
@@ -483,6 +483,79 @@
     >>> tensor([ 11.6734+0.j, 105.1037+0.j,  10.1978+0.j])
 """)
 
+pinv = _add_docstr(_linalg.linalg_pinv, r"""
+linalg.pinv(input, rcond=1e-15, hermitian=False) -> Tensor
+
+Computes the pseudo-inverse (also known as the Moore-Penrose inverse) of a matrix :attr:`input`,
+or of each matrix in a batched :attr:`input`.
+The pseudo-inverse is computed using singular value decomposition (see :func:`torch.linalg.svd`) by default.
+If :attr:`hermitian` is ``True``, then :attr:`input` is assumed to be Hermitian (symmetric if real-valued),
+and the computation of the pseudo-inverse is done by obtaining the eigenvalues and eigenvectors
+(see :func:`torch.linalg.eigh`).
+The singular values (or the absolute eigenvalues when :attr:`hermitian` is ``True``) that are below
+the specified :attr:`rcond` threshold are treated to be zero and discarded in the computation.
+
+Supports input of ``float``, ``double``, ``cfloat`` and ``cdouble`` datatypes.
+
+.. note:: When given inputs on a CUDA device, this function synchronizes that device with the CPU.
+
+Args:
+    input (Tensor): the input matrix of size :math:`(m, n)` or the batch of matrices of size :math:`(*, m, n)`
+                    where `*` is one or more batch dimensions.
+    rcond (float, Tensor, optional): the tolerance value to determine the cutoff for small singular values. Default: 1e-15
+    hermitian(bool, optional): indicates whether :attr:`input` is Hermitian. Default: ``False``
+
+Examples::
+
+    >>> input = torch.randn(3, 5)
+    >>> input
+    tensor([[ 0.5495,  0.0979, -1.4092, -0.1128,  0.4132],
+            [-1.1143, -0.3662,  0.3042,  1.6374, -0.9294],
+            [-0.3269, -0.5745, -0.0382, -0.5922, -0.6759]])
+    >>> torch.linalg.pinv(input)
+    tensor([[ 0.0600, -0.1933, -0.2090],
+            [-0.0903, -0.0817, -0.4752],
+            [-0.7124, -0.1631, -0.2272],
+            [ 0.1356,  0.3933, -0.5023],
+            [-0.0308, -0.1725, -0.5216]])
+
+    Batched linalg.pinv example
+    >>> a = torch.randn(2, 6, 3)
+    >>> b = torch.linalg.pinv(a)
+    >>> torch.matmul(b, a)
+    tensor([[[ 1.0000e+00,  1.6391e-07, -1.1548e-07],
+            [ 8.3121e-08,  1.0000e+00, -2.7567e-07],
+            [ 3.5390e-08,  1.4901e-08,  1.0000e+00]],
+
+            [[ 1.0000e+00, -8.9407e-08,  2.9802e-08],
+            [-2.2352e-07,  1.0000e+00,  1.1921e-07],
+            [ 0.0000e+00,  8.9407e-08,  1.0000e+00]]])
+
+    Hermitian input example
+    >>> a = torch.randn(3, 3, dtype=torch.complex64)
+    >>> a = a + a.t().conj()  # creates a Hermitian matrix
+    >>> b = torch.linalg.pinv(a, hermitian=True)
+    >>> torch.matmul(b, a)
+    tensor([[ 1.0000e+00+0.0000e+00j, -1.1921e-07-2.3842e-07j,
+            5.9605e-08-2.3842e-07j],
+            [ 5.9605e-08+2.3842e-07j,  1.0000e+00+2.3842e-07j,
+            -4.7684e-07+1.1921e-07j],
+            [-1.1921e-07+0.0000e+00j, -2.3842e-07-2.9802e-07j,
+            1.0000e+00-1.7897e-07j]])
+
+    Non-default rcond example
+    >>> rcond = 0.5
+    >>> a = torch.randn(3, 3)
+    >>> torch.linalg.pinv(a)
+    tensor([[ 0.2971, -0.4280, -2.0111],
+            [-0.0090,  0.6426, -0.1116],
+            [-0.7832, -0.2465,  1.0994]])
+    >>> torch.linalg.pinv(a, rcond)
+    tensor([[-0.2672, -0.2351, -0.0539],
+            [-0.0211,  0.6467, -0.0698],
+            [-0.4400, -0.3638, -0.0910]])
+""")
+
 tensorinv = _add_docstr(_linalg.linalg_tensorinv, r"""
 linalg.tensorinv(input, ind=2, *, out=None) -> Tensor
 

diff --git a/torch/overrides.py b/torch/overrides.py
@@ -701,6 +701,7 @@ def get_testing_overrides() -> Dict[Callable, Callable]:
         torch.pca_lowrank: lambda input, q=None, center=True, niter=2: -1,
         torch.pdist: lambda input, p=2: -1,
         torch.pinverse: lambda input, rcond=1e-15: -1,
+        torch.linalg.pinv: lambda input, rcond=1e-15, hermitian=False: -1,
         torch.pixel_shuffle: lambda input, upscale_factor: -1,
         torch.poisson: lambda input, generator=None: -1,
         torch.poisson_nll_loss: lambda input, target, log_input, full, eps, reduction: -1,

diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py
@@ -172,6 +172,8 @@
 def _construct_test_name(test_name, op, device_type, dtype):
     if op is not None:
         test_name += "_" + op.name.replace('.', '_')
+        if op.variant_test_name:
+            test_name += "_" + op.variant_test_name
 
     test_name += "_" + device_type