Skip to content

Commit

Permalink
adds test for zero genes to pca (#144)
Browse files Browse the repository at this point in the history
* adds test for zero genes to pca

* adds release note
  • Loading branch information
Intron7 committed Mar 21, 2024
1 parent 8033359 commit 8c91049
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 13 deletions.
1 change: 1 addition & 0 deletions docs/release-notes/0.10.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
```
* Fixes an issue where `pp.normalize` and `pp.log1p` now use `copy` and `inplace` corretly {pr}`129` {smaller}`S Dicks`
* changes the graph constructor for `tl.leiden` and `tl.louvain` {pr}`143` {smaller}`S Dicks`
* Added a test to handle zero features, that caused issues in the sparse `pp.pca` {pr}`144` {smaller}`S Dicks`

```{rubric} Removals
```
Expand Down
12 changes: 12 additions & 0 deletions src/rapids_singlecell/preprocessing/_kernels/_pca_sparse_kernel.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import cupy as cp
from cuml.common.kernel_utils import cuda_kernel_factory

cov_kernel_str = r"""
Expand Down Expand Up @@ -50,6 +51,17 @@
}
}
"""
check_zero_genes = r"""
extern "C" __global__ void check_zero_genes(const int* indices, int* genes, int nnz) {
int value = blockIdx.x * blockDim.x + threadIdx.x;
if(value >= nnz){
return;
}
atomicAdd(&genes[indices[value]], 1);
}
"""
_zero_genes_kernel = cp.RawKernel(check_zero_genes, "check_zero_genes")


def _cov_kernel(dtype):
Expand Down
24 changes: 24 additions & 0 deletions src/rapids_singlecell/preprocessing/_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ def fit(self, x):

if not isspmatrix_csr(x):
x = x.tocsr()
_check_matrix_for_zero_genes(x)
self.n_samples_ = x.shape[0]
self.n_features_in_ = x.shape[1] if x.ndim == 2 else 1
self.dtype = x.data.dtype
Expand Down Expand Up @@ -354,3 +355,26 @@ def _cov_sparse(x, return_gram=False, return_mean=False):
return cov_result, mean_x, mean_x
elif return_gram and return_mean:
return cov_result, gram_matrix, mean_x, mean_x


def _check_matrix_for_zero_genes(X):
gene_ex = cp.zeros(X.shape[1], dtype=cp.int32)

from ._kernels._pca_sparse_kernel import _zero_genes_kernel

block = (32,)
grid = (int(math.ceil(X.nnz / block[0])),)
_zero_genes_kernel(
grid,
block,
(
X.indices,
gene_ex,
X.nnz,
),
)
if cp.any(gene_ex == 0):
raise ValueError(
"There are genes with zero expression. "
"Please remove them before running PCA."
)
3 changes: 2 additions & 1 deletion src/rapids_singlecell/preprocessing/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ def _check_gpu_X(X):
else:
raise TypeError(
"The input is not a CuPy ndarray or CuPy sparse matrix. "
"Rapids-singlecell only supports GPU matrices, "
"Rapids-singlecell only supports GPU matrices in this function, "
"so your input must be either a CuPy ndarray or a CuPy sparse matrix. "
"Please checkout `rapids_singlecell.get.anndata_to_GPU` to convert your data to GPU. "
"If you're working with CPU-based matrices, please consider using Scanpy instead."
)
19 changes: 7 additions & 12 deletions tests/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,18 +137,6 @@ def test_pca_sparse():
atol=1e-6,
)

def test_mask_highly_var_error():
"""Check if use_highly_variable=True throws an error if the annotation is missing."""
adata = AnnData(np.array(A_list).astype("float32"))
with pytest.warns(
FutureWarning,
match=r"Argument `use_highly_variable` is deprecated, consider using the mask argument\.",
), pytest.raises(
ValueError,
match=r"Did not find `adata\.var\['highly_variable'\]`\.",
):
sc.pp.pca(adata, use_highly_variable=True)

def test_mask_length_error():
"""Check error for n_obs / mask length mismatch."""
adata = AnnData(np.array(A_list).astype("float32"))
Expand Down Expand Up @@ -244,3 +232,10 @@ def test_pca_layer():
)
np.testing.assert_almost_equal(X_adata.obsm["X_pca"], layer_adata.obsm["X_pca"])
np.testing.assert_almost_equal(X_adata.varm["PCs"], layer_adata.varm["PCs"])

def test_pca_layer_mask():
adata = sc.datasets.pbmc3k()[:,1000].copy()
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
with pytest.raises(ValueError,match="There are genes with zero expression. Please remove them before running PCA."):
rsc.pp.pca(adata)

0 comments on commit 8c91049

Please sign in to comment.