rapidsai · rapids-bot · Aug 2, 2023 · Jul 6, 2023 · Jul 6, 2023 · Jul 8, 2023
diff --git a/python/cuml/prims/stats/__init__.py b/python/cuml/prims/stats/__init__.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2023, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

@@ -36,11 +36,42 @@
 }
 """
 
+mean_cov_kernel_str = r"""
+(const int *indptr, const int *index, {0} *data, int nrows, int ncols, {0} *out, {0} *mean) {
+    int row = blockDim.x * blockIdx.x + threadIdx.x;
+    if(row >= nrows) return;
+    int start_idx = indptr[row];
+    int stop_idx = indptr[row+1];
+
+    for(int idx = start_idx; idx < stop_idx; idx++){
+        int index1 = index[idx];
+        {0} data1 = data[idx];
+        long long int outidx = \
+            static_cast<long long int>(index1) * ncols + index1;
+        atomicAdd(&out[outidx], data1 * data1);
+        atomicAdd(&mean[index1], data1);
+        for(int idx2 = idx+1; idx2 < stop_idx; idx2++){
+            int index2 = index[idx2];
+            {0} data2 = data[idx2];
+            long long int outidx2 = \
+                static_cast<long long int>(index1) * ncols + index2;
+            atomicAdd(&out[outidx2], data1 * data2);
+        }
+    }
+}
+"""
+
 
 def _cov_kernel(dtype):
     return cuda_kernel_factory(cov_kernel_str, (dtype,), "cov_kernel")
 
 
+def _mean_cov_kernel(dtype):
+    return cuda_kernel_factory(
+        mean_cov_kernel_str, (dtype,), "mean_cov_kernel"
+    )
+
+
 @cuml.internals.api_return_any()
 def cov(x, y, mean_x=None, mean_y=None, return_gram=False, return_mean=False):
     """
@@ -102,6 +133,15 @@ def cov(x, y, mean_x=None, mean_y=None, return_gram=False, return_mean=False):
             "X and Y must have same shape %s != %s" % (x.shape, y.shape)
         )
 
+    # Fix for cupy issue #7699: addressing problems with sparse matrix multiplication (spGEMM)
+    if (
+        x is y
+        and cupyx.scipy.sparse.issparse(x)
+        and mean_x is None
+        and mean_y is None
+    ):
+        return _cov_sparse(x, return_gram=return_gram, return_mean=return_mean)
+
     if mean_x is not None and mean_y is not None:
         if mean_x.dtype != mean_y.dtype:
             raise ValueError(
@@ -156,3 +196,94 @@ def cov(x, y, mean_x=None, mean_y=None, return_gram=False, return_mean=False):
         return cov_result, mean_x, mean_y
     elif return_gram and return_mean:
         return cov_result, gram_matrix, mean_x, mean_y
+
+
+@cuml.internals.api_return_any()
+def _cov_sparse(x, return_gram=False, return_mean=False):
+    """
+    Computes the mean and the covariance of matrix X of
+    the form Cov(X, X) = E(XX) - E(X)E(X)
+
+    This is a temporary fix for cupy issue #7699, where the
+    operation `x.T.dot(x)` did not work for larger
+    sparse matrices.
+
+    Parameters
+    ----------
+
+    x : cupyx.scipy.sparse of size (m, n)
+    return_gram : boolean (default = False)
+        If True, gram matrix of the form (1 / n) * X.T.dot(X)
+        will be returned.
+        When True, a copy will be created
+        to store the results of the covariance.
+        When False, the local gram matrix result
+        will be overwritten
+    return_mean: boolean (default = False)
+        If True, the Maximum Likelihood Estimate used to
+        calculate the mean of X and X will be returned,
+        of the form (1 / n) * mean(X) and (1 / n) * mean(X)
+
+    Returns
+    -------
+
+    result : cov(X, X) when return_gram and return_mean are False
+            cov(X, X), gram(X, X) when return_gram is True,
+            return_mean is False
+            cov(X, X), mean(X), mean(X) when return_gram is False,
+            return_mean is True
+            cov(X, X), gram(X, X), mean(X), mean(X)
+            when return_gram is True and return_mean is True
+    """
+    if not cupyx.scipy.sparse.isspmatrix_csr(x):
+        x = x.tocsr()
+    gram_matrix = cp.zeros((x.shape[1], x.shape[1]), dtype=x.data.dtype)
+    mean_x = cp.zeros((x.shape[1],), dtype=x.data.dtype)
+
+    block = (8,)
+    grid = (math.ceil(x.shape[0] / block[0]),)
+    compute_mean_cov = _mean_cov_kernel(x.data.dtype)
+    compute_mean_cov(
+        grid,
+        block,
+        (
+            x.indptr,
+            x.indices,
+            x.data,
+            x.shape[0],
+            x.shape[1],
+            gram_matrix,
+            mean_x,
+        ),
+    )
+    gram_matrix = gram_matrix + gram_matrix.T
+    gram_matrix -= cp.diag(cp.diag(gram_matrix) / 2)
+    gram_matrix *= 1 / x.shape[0]
+    mean_x *= 1 / x.shape[0]
+
+    if return_gram:
+        cov_result = cp.zeros(
+            (gram_matrix.shape[0], gram_matrix.shape[0]),
+            dtype=gram_matrix.dtype,
+        )
+    else:
+        cov_result = gram_matrix
+
+    compute_cov = _cov_kernel(x.dtype)
+
+    block_size = (8, 8)
+    grid_size = (math.ceil(gram_matrix.shape[0] / 8),) * 2
+    compute_cov(
+        grid_size,
+        block_size,
+        (cov_result, gram_matrix, mean_x, mean_x, gram_matrix.shape[0]),
+    )
+
+    if not return_gram and not return_mean:
+        return cov_result
+    elif return_gram and not return_mean:
+        return cov_result, gram_matrix
+    elif not return_gram and return_mean:
+        return cov_result, mean_x, mean_x
+    elif return_gram and return_mean:
+        return cov_result, gram_matrix, mean_x, mean_x
@@ -15,6 +15,7 @@
 
 from cuml.testing.utils import array_equal
 from cuml.prims.stats import cov
+from cuml.prims.stats.covariance import _cov_sparse
 import pytest
 from cuml.internals.safe_imports import gpu_only_import
 
@@ -43,3 +44,26 @@ def test_cov(nrows, ncols, sparse, dtype):
     local_cov = cp.cov(x, rowvar=False, ddof=0)
 
     assert array_equal(cov_result, local_cov, 1e-6, with_sign=True)
+
+
+@pytest.mark.parametrize("nrows", [1000])
+@pytest.mark.parametrize("ncols", [500, 1500])
+@pytest.mark.parametrize("dtype", [cp.float32, cp.float64])
+def test_cov_sparse(nrows, ncols, dtype):
+
+    x = cupyx.scipy.sparse.random(
+        nrows, ncols, density=0.07, format="csr", dtype=dtype
+    )
+    cov_result = _cov_sparse(x, return_mean=True)
+
+    # check cov
+    assert cov_result[0].shape == (ncols, ncols)
+
+    x = x.todense()
+    local_cov = cp.cov(x, rowvar=False, ddof=0)
+
+    assert array_equal(cov_result[0], local_cov, 1e-6, with_sign=True)
+
+    # check mean
+    local_mean = x.mean(axis=0)
+    assert array_equal(cov_result[1], local_mean, 1e-6, with_sign=True)