pytorch · IvanYashchuk · Oct 7, 2020 · Oct 8, 2020 · Oct 8, 2020 · Oct 9, 2020
diff --git a/aten/src/ATen/native/BatchLinearAlgebra.cpp b/aten/src/ATen/native/BatchLinearAlgebra.cpp
@@ -534,11 +534,12 @@ static void apply_cholesky(Tensor& self, bool upper, std::vector<int64_t>& infos
   auto self_matrix_stride = matrixStride(self);
   auto batch_size = batchCount(self);
   auto n = self.size(-2);
+  auto lda = std::max(int64_t{1}, n);
 
   int info;
   for (int64_t i = 0; i < batch_size; i++) {
     scalar_t* self_working_ptr = &self_data[i * self_matrix_stride];
-    lapackCholesky<scalar_t>(uplo, n, self_working_ptr, n, &info);
+    lapackCholesky<scalar_t>(uplo, n, self_working_ptr, lda, &info);
     infos[i] = info;
     if (info != 0) {
       return;
@@ -583,6 +584,11 @@ Tensor& cholesky_out(Tensor &result, const Tensor &self, bool upper) {
   return result;
 }
 
+Tensor linalg_cholesky(const Tensor &self) {
+  squareCheckInputs(self);
+  return at::_cholesky_helper(self, /*upper=*/false).tril_();
+}
+
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lu ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 template<typename scalar_t>

diff --git a/aten/src/ATen/native/cuda/BatchLinearAlgebra.cu b/aten/src/ATen/native/cuda/BatchLinearAlgebra.cu
@@ -1068,10 +1068,11 @@ AT_ERROR("cholesky: MAGMA library not found in "
 
   auto self_data = self.data_ptr<scalar_t>();
   magma_int_t n = magma_int_cast(self.size(-2), "self.size(-2)");
+  auto lda = std::max(magma_int_t{1}, n);
 
   if (self.dim() == 2) {
     magma_int_t info = 0;
-    magmaCholesky<scalar_t>(uplo, n, self_data, n, &info);
+    magmaCholesky<scalar_t>(uplo, n, self_data, lda, &info);
     infos[0] = info;
   } else {
     auto self_mat_stride = matrixStride(self);
@@ -1102,14 +1103,14 @@ AT_ERROR("cholesky: MAGMA library not found in "
       magma_int_t* info_array_cur = &info_array[mini_idx];
 
       magmaCholeskyBatched<scalar_t>(
-        uplo, n, self_array_cur, n, info_array_cur, batch_limit, magma_queue);
+        uplo, n, self_array_cur, lda, info_array_cur, batch_limit, magma_queue);
     }
 
     // Compute whatever is left = batch_size - floor(batch_size / batch_limit) * batch_limit
     // which concisely is equal to batch_size % batch_limit
     if (batch_size % batch_limit != 0) {
       magmaCholeskyBatched<scalar_t>(
-        uplo, n, &self_array[mini_idx], n, &info_array[mini_idx], batch_size % batch_limit, magma_queue);
+        uplo, n, &self_array[mini_idx], lda, &info_array[mini_idx], batch_size % batch_limit, magma_queue);
     }
 
     for (int64_t i = 0; i < batch_size; i++) {

diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -8284,6 +8284,11 @@
 #
 # See linalg_det as an example.
 
+- func: linalg_cholesky(Tensor self) -> Tensor
+  python_module: linalg
+  use_c10_dispatcher: full
+  variants: function
+
 # torch.linalg.det, alias for torch.det
 - func: linalg_det(Tensor self) -> Tensor
   python_module: linalg

diff --git a/docs/source/linalg.rst b/docs/source/linalg.rst
@@ -12,5 +12,6 @@ Common linear algebra operations.
 Functions
 ---------
 
+.. autofunction:: cholesky
 .. autofunction:: det
 .. autofunction:: norm
diff --git a/test/test_linalg.py b/test/test_linalg.py
@@ -7,9 +7,10 @@
 from torch.testing._internal.common_utils import \
     (TestCase, run_tests, TEST_NUMPY, IS_MACOS, IS_WINDOWS, TEST_WITH_ASAN, make_tensor)
 from torch.testing._internal.common_device_type import \
-    (instantiate_device_type_tests, dtypes, skipCUDAIfNoMagma, skipCPUIfNoLapack, precisionOverride)
+    (instantiate_device_type_tests, dtypes, dtypesIfCPU, dtypesIfCUDA,
+     onlyCUDA, onlyCPU, skipCUDAIfNoMagma, skipCPUIfNoLapack, precisionOverride)
 from torch.testing._internal.jit_metaprogramming_utils import gen_script_fn_and_args
-from torch.autograd import gradcheck
+from torch.autograd import gradcheck, gradgradcheck
 
 if TEST_NUMPY:
     import numpy as np
@@ -922,6 +923,90 @@ def test_nuclear_norm_exceptions_old(self, device):
         self.assertRaisesRegex(IndexError, "Dimension out of range", torch.norm, x, "nuc", (0, 2))
 
 
+    @precisionOverride({torch.float: 1e-2, torch.cfloat: 1e-4})
+    @skipCUDAIfNoMagma
+    @skipCPUIfNoLapack
+    @dtypesIfCPU(torch.float32, torch.float64, torch.complex64, torch.complex128)
+    @dtypesIfCUDA(torch.float32, torch.float64)
+    def test_cholesky(self, device, dtype):
+        from torch.testing._internal.common_utils import random_hermitian_pd_matrix
+
+        def run_test(shape, batch):
+            A = random_hermitian_pd_matrix(shape, *batch, dtype=dtype, device=device)
+            expected_L = np.linalg.cholesky(A.cpu().numpy())
+            actual_L = torch.linalg.cholesky(A)
+            self.assertEqual(actual_L, expected_L)
+
+        shapes = (0, 3, 5)
+        batches = ((), (3, ), (2, 2))
+        larger_input_case = [(100, (5, ))]
+        for shape, batch in list(itertools.product(shapes, batches)) + larger_input_case:
+            run_test(shape, batch)
+
+        # cholesky requires the input to be a square matrix
+        A = torch.randn(2, 3, device=device, dtype=dtype)
+        with self.assertRaisesRegex(RuntimeError, r'must be batches of square matrices'):
+            torch.linalg.cholesky(A)
+        with self.assertRaisesRegex(np.linalg.LinAlgError, r'Last 2 dimensions of the array must be square'):
+            np.linalg.cholesky(A.cpu().numpy())
+
+        # cholesky requires the input to be a matrix
+        A = torch.randn(2, device=device, dtype=dtype)
+        with self.assertRaisesRegex(RuntimeError, r'must have at least 2 dimensions'):
+            torch.linalg.cholesky(A)
+        with self.assertRaisesRegex(np.linalg.LinAlgError, r'1-dimensional array given\. Array must be at least two-dimensional'):
+            np.linalg.cholesky(A.cpu().numpy())
+
+        # if the input matrix is singular, an error should be raised
+        A = torch.eye(3, 3, dtype=dtype, device=device)
+        A[-1, -1] = 0  # Now A is singular
+        with self.assertRaisesRegex(RuntimeError, r'U\(3,3\) is zero, singular U\.'):
+            torch.linalg.cholesky(A)
+        with self.assertRaisesRegex(np.linalg.LinAlgError, r'Matrix is not positive definite'):
+            np.linalg.cholesky(A.cpu().numpy())
+
+    # TODO: once there is more support for complex dtypes on GPU, they shall be added to above test
+    # particularly when RuntimeError: _th_bmm_out not supported on CUDAType for ComplexFloat is fixed
+    @unittest.expectedFailure
+    @onlyCUDA
+    @skipCUDAIfNoMagma
+    @dtypes(torch.complex64, torch.complex128)
+    def test_cholesky_xfailed(self, device, dtype):
+        from torch.testing._internal.common_utils import random_hermitian_pd_matrix
+        A = random_hermitian_pd_matrix(shape, *batch, dtype=dtype, device=device)
+        expected_L = np.linalg.cholesky(A.cpu().numpy())
+        actual_L = torch.linalg.cholesky(A)
+        self.assertEqual(actual_L, expected_L)
+
+    # TODO: enable CUDA tests once
+    # RuntimeError: "triangular_solve_cuda" not implemented for 'ComplexDouble' is fixed
+    @onlyCPU
+    @skipCPUIfNoLapack
+    @dtypes(torch.float64, torch.complex128)
+    def test_cholesky_autograd(self, device, dtype):
+        def func(root):
+            x = 0.5 * (root + root.transpose(-1, -2).conj())
+            return torch.linalg.cholesky(x)
+
+        def run_test(shape):
+            root = torch.rand(*shape, dtype=dtype, device=device, requires_grad=True)
+            root = root + torch.eye(shape[-1], dtype=dtype, device=device)
+
+            gradcheck(func, root)
+            # TODO: gradgradcheck does not work correctly yet for complex
+            if not dtype.is_complex:
+                gradgradcheck(func, root)
+
+            root = torch.rand(*shape, dtype=dtype, device=device)
+            root = torch.matmul(root, root.transpose(-1, -2).conj())
+            root.requires_grad_()
+            chol = torch.linalg.cholesky(root).sum().backward()
+            self.assertEqual(root.grad, root.grad.transpose(-1, -2).conj())  # Check the gradient is hermitian
+
+        shapes = ((3, 3), (4, 3, 2, 2))
+        for shape in shapes:
+            run_test(shape)
+
 instantiate_device_type_tests(TestLinalg, globals())
 
 if __name__ == '__main__':

diff --git a/test/test_torch.py b/test/test_torch.py
@@ -7820,19 +7820,9 @@ def cholesky_test_helper(n, batch_dims, upper):
     @dtypes(torch.float32, torch.float64, torch.complex64, torch.complex128)
     @tf32_on_and_off(0.01)
     def test_cholesky(self, device, dtype):
-        from torch.testing._internal.common_utils import \
-            (random_symmetric_pd_matrix,
-             random_fullrank_matrix_distinct_singular_value)
+        from torch.testing._internal.common_utils import random_hermitian_pd_matrix
 
-        # This is a workaround while there is no support for complex random_symmetric_pd_matrix
-        if dtype.is_complex:
-            real_dtype = torch.float32 if dtype is torch.complex64 else torch.float64
-            A_real = random_fullrank_matrix_distinct_singular_value(10, dtype=real_dtype, device=device)
-            A_imag = random_fullrank_matrix_distinct_singular_value(10, dtype=real_dtype, device=device)
-            A = A_real + 1j * A_imag
-            A = A @ A.t().conj()
-        else:
-            A = random_symmetric_pd_matrix(10, dtype=dtype, device=device)
+        A = random_hermitian_pd_matrix(10, dtype=dtype, device=device)
 
         # default Case
         C = torch.cholesky(A)

diff --git a/tools/autograd/derivatives.yaml b/tools/autograd/derivatives.yaml
@@ -294,6 +294,9 @@
 - name: cholesky(Tensor self, bool upper=False) -> Tensor
   self: cholesky_backward(grad, upper, result)
 
+- name: linalg_cholesky(Tensor self) -> Tensor
+  self: cholesky_backward(grad, false, result)
+
 - name: cholesky_solve(Tensor self, Tensor input2, bool upper=False) -> Tensor
   self, input2: cholesky_solve_backward(grad, self, input2, result, upper)
 

diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py
@@ -162,7 +162,7 @@
     'cosh', '__rmul__', 'sgn', 'asin', 'acos', 'sub', 'div', 'cat', 'view_as_complex',
     'neg', 'complex', 'select', '_s_where', 'as_strided', 'slice', 'constant_pad_nd',
     'unbind', 'split', 'split_with_sizes', 'unsafe_split', 'split_with_sizes_backward',
-    'dot', 'vdot', 'cholesky'
+    'dot', 'vdot', 'cholesky', 'linalg_cholesky'
 }
 
 # Some operators invalidate the grad_accumulator. Let's reset it.

diff --git a/torch/csrc/api/include/torch/linalg.h b/torch/csrc/api/include/torch/linalg.h
@@ -8,6 +8,10 @@ namespace linalg {
 #ifndef DOXYGEN_SHOULD_SKIP_THIS
 namespace detail {
 
+inline Tensor cholesky(const Tensor& self) {
+  return torch::linalg_cholesky(self);
+}
+
 inline Tensor det(const Tensor& self) {
   return torch::linalg_det(self);
 }
@@ -31,6 +35,20 @@ inline Tensor& norm_out(Tensor& result, const Tensor& self, std::string ord, opt
 } // namespace detail
 #endif /* DOXYGEN_SHOULD_SKIP_THIS */
 
+/// Cholesky decomposition
+///
+/// See https://pytorch.org/docs/master/linalg.html#torch.linalg.cholesky
+///
+/// Example:
+/// ```
+/// auto A = torch::randn({4, 4});
+/// auto A = torch::matmul(A, A.t());
+/// auto L = torch::linalg::cholesky(A);
+/// assert(torch::allclose(torch::matmul(L, L.t()), A));
+/// ```
+inline Tensor cholesky(const Tensor& self) {
+  return detail::cholesky(self);
+}
 
 /// See the documentation of torch.linalg.det
 inline Tensor linalg_det(const Tensor& self) {

diff --git a/torch/linalg/__init__.py b/torch/linalg/__init__.py
@@ -8,6 +8,51 @@
 # Note: This not only adds doc strings for functions in the linalg namespace, but
 # also connects the torch.linalg Python namespace to the torch._C._linalg builtins.
 
+cholesky = _add_docstr(_linalg.linalg_cholesky, r"""
+linalg.cholesky(input) -> Tensor
+
+Returns the Cholesky decomposition.
+
+Computes the Cholesky decomposition of a Hermitian (or symmetric for real-valued matrices)
+positive-definite matrix :math:`A` or for batches of Hermitian positive-definite matrices.
+The returned matrix ``L`` is lower-triangular, and
+the decomposition has the form:
+
+.. math::
+
+    A = LL^H
+
+If :attr:`input` is a batch of Hermitian positive-definite
+matrices, then the returned tensor will be composed of lower-triangular Cholesky factors
+of each of the individual matrices.
+
+.. note:: If the :attr:`input` is not Hermitian positive-definite matrix a RuntimeError is raised
+          saying that the input is singular and mentioning which minor of the input matrix is not positive-definite.
+
+.. note::
+    Supports real and complex inputs.
+    Backpropagation for complex inputs is only supported on the CPU.
+
+Args:
+    input (Tensor): the input tensor :math:`A` of size :math:`(*, n, n)` where `*` is zero or more
+                batch dimensions consisting of symmetric positive-definite matrices.
+
+Example::
+
+    >>> a = torch.randn(2, 2, dtype=torch.complex128)
+    >>> a = torch.mm(a, a.t().conj())  # To make a Hermitian
+    >>> l = torch.linalg.cholesky(a)
+    >>> a
+    tensor([[2.5266+0.0000j, 1.9586-2.0626j],
+            [1.9586+2.0626j, 9.4160+0.0000j]], dtype=torch.complex128)
+    >>> l
+    tensor([[1.5895+0.0000j, 0.0000+0.0000j],
+            [1.2322+1.2976j, 2.4928+0.0000j]], dtype=torch.complex128)
+    >>> torch.mm(l, l.t().conj())
+    tensor([[2.5266+0.0000j, 1.9586-2.0626j],
+            [1.9586+2.0626j, 9.4160+0.0000j]], dtype=torch.complex128)
+""")
+
 det = _add_docstr(_linalg.linalg_det, r"""
 linalg.det(input) -> Tensor
 

diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py
@@ -1528,6 +1528,14 @@ def random_symmetric_pd_matrix(matrix_size, *batch_dims, **kwargs):
         + torch.eye(matrix_size, dtype=dtype, device=device) * 1e-5
 
 
+def random_hermitian_pd_matrix(matrix_size, *batch_dims, **kwargs):
+    dtype = kwargs.get('dtype', torch.double)
+    device = kwargs.get('device', 'cpu')
+    A = torch.randn(*(batch_dims + (matrix_size, matrix_size)),
+                    dtype=dtype, device=device)
+    return torch.matmul(A, A.transpose(-2, -1).conj())
+
+
 def make_nonzero_det(A, sign=None, min_singular_value=0.1):
     u, s, v = A.svd()
     s.clamp_(min=min_singular_value)