Skip to content

Commit

Permalink
updates clip to work with masks (#158)
Browse files Browse the repository at this point in the history
* updates clip to work with masks

* adds release note
  • Loading branch information
Intron7 committed Apr 3, 2024
1 parent efff1bc commit 97fb0f7
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 15 deletions.
1 change: 1 addition & 0 deletions docs/release-notes/0.10.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
```{rubric} Bug fixes
```
* updates the behavior of `_check_gpu_X` for `require_cf`. It now only works for `pearson_residuals` calcs and corrects instead of throwing an error {pr}`154` {smaller}`S Dicks`
* Fixes the behavior of `pp.scale` with `mask_obs` and `max_value`. Now only the masked part gets clipped {pr}`158` {smaller}`S Dicks`

```{rubric} Misc
```
Expand Down
48 changes: 33 additions & 15 deletions src/rapids_singlecell/preprocessing/_scale.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,21 @@ def scale(

if isinstance(X, cp.ndarray):
X, means, std = scale_array(
X, mask_obs=mask_obs, zero_center=zero_center, inplace=inplace
X,
mask_obs=mask_obs,
zero_center=zero_center,
inplace=inplace,
max_value=max_value,
)
else:
X, means, std = scale_sparse(
X, mask_obs=mask_obs, zero_center=zero_center, inplace=inplace
X,
mask_obs=mask_obs,
zero_center=zero_center,
inplace=inplace,
max_value=max_value,
)

if max_value:
if zero_center:
X = cp.clip(X, a_min=-max_value, a_max=max_value)
else:
if isinstance(X, sparse.spmatrix):
X.data[X.data > max_value] = max_value
else:
X[X > max_value] = max_value

if inplace:
_set_obs_rep(adata, X, layer=layer, obsm=obsm)
adata.var[str_mean_std[0]] = means.get()
Expand All @@ -114,7 +113,7 @@ def scale(
return X


def scale_array(X, *, mask_obs=None, zero_center=True, inplace=True):
def scale_array(X, *, mask_obs=None, zero_center=True, inplace=True, max_value=None):
if not inplace:
X = X.copy()
if mask_obs is not None:
Expand All @@ -129,14 +128,26 @@ def scale_array(X, *, mask_obs=None, zero_center=True, inplace=True):
if zero_center:
X -= mean
X /= std
if max_value:
if zero_center:
X = cp.clip(X, a_min=-max_value, a_max=max_value)
else:
X[X > max_value] = max_value

return X, mean, std


def scale_sparse(X, *, mask_obs=None, zero_center=True, inplace=True):
def scale_sparse(X, *, mask_obs=None, zero_center=True, inplace=True, max_value=None):
if zero_center:
X = X.toarray()
# inplace is True because we copied with `toarray`
return scale_array(X, mask_obs=mask_obs, zero_center=zero_center, inplace=True)
return scale_array(
X,
mask_obs=mask_obs,
zero_center=zero_center,
inplace=True,
max_value=max_value,
)
else:
if mask_obs is not None:
# checking inplace because we are going to update the matrix
Expand All @@ -147,7 +158,10 @@ def scale_sparse(X, *, mask_obs=None, zero_center=True, inplace=True):
X = X.copy()

scale_rv = scale_sparse(
X[mask_obs, :], zero_center=zero_center, inplace=True
X[mask_obs, :],
zero_center=zero_center,
inplace=True,
max_value=max_value,
)
X_sub, mean, std = scale_rv
mask_array = cp.where(cp.array(mask_obs))[0].astype(cp.int32)
Expand Down Expand Up @@ -188,4 +202,8 @@ def scale_sparse(X, *, mask_obs=None, zero_center=True, inplace=True):
)
else:
raise ValueError("The sparse matrix must be a CSR or CSC matrix")

if max_value:
X.data[X.data > max_value] = max_value

return X, mean, std
37 changes: 37 additions & 0 deletions tests/test_scaling.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
[1, 0, 1, 0],
[0, 0, 0, 0],
]) # with gene std 1,0,1,0 and center 0,0,0,0
X_scaled_original_clipped = np.array([
[-1, 1, 0, 0],
[1, 1, 1, 0],
[0, 1, 1, 0],
]) # with gene std 1,0,1,0 and center 0,2,1,0

X_for_mask = np.array([
[27, 27, 27, 27],
Expand Down Expand Up @@ -56,6 +61,16 @@
[27, 27, 27, 27],
[27, 27, 27, 27],
])
X_scaled_for_mask_clipped = np.array([
[27, 27, 27, 27],
[27, 27, 27, 27],
[-1, 1, 0, 0],
[1, 1, 1, 0],
[0, 1, 1, 0],
[27, 27, 27, 27],
[27, 27, 27, 27],
])


@pytest.mark.parametrize("dtype", ["float32", "float64"])
def test_scale_simple(dtype):
Expand Down Expand Up @@ -121,3 +136,25 @@ def test_clip(zero_center):
if zero_center:
assert adata.X.min() >= -1
assert adata.X.max() <= 1

@pytest.mark.parametrize(
("mask_obs", "X", "X_scaled", "X_clipped"),
[
(None, X_original, X_scaled_original, X_scaled_original_clipped),
(
np.array((0, 0, 1, 1, 1, 0, 0), dtype=bool),
X_for_mask,
X_scaled_for_mask,
X_scaled_for_mask_clipped,
),
],
)
def test_scale_sparse(*, mask_obs, X, X_scaled, X_clipped):
adata = AnnData(csr_matrix(X).astype(np.float32))
adata0 = rsc.get.anndata_to_GPU(adata,copy= True)
rsc.pp.scale(adata0, mask_obs=mask_obs, zero_center=False)
cp.testing.assert_allclose(adata0.X.toarray(), X_scaled)
# test scaling with explicit zero_center == True
adata1 = rsc.get.anndata_to_GPU(adata,copy= True)
rsc.pp.scale(adata1, zero_center=False, mask_obs=mask_obs, max_value=1)
cp.testing.assert_allclose(adata1.X.toarray(), X_clipped)

0 comments on commit 97fb0f7

Please sign in to comment.