From c6a7ae18e041bb23983dc0130dc2188571b447b0 Mon Sep 17 00:00:00 2001 From: Severin Dicks <37635888+Intron7@users.noreply.github.com> Date: Thu, 14 Mar 2024 13:02:26 +0100 Subject: [PATCH] Scale clips low (#2913) Co-authored-by: Philipp A --- docs/release-notes/1.10.0.md | 1 + scanpy/preprocessing/_simple.py | 5 ++++- scanpy/tests/test_scaling.py | 9 +++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/release-notes/1.10.0.md b/docs/release-notes/1.10.0.md index afe72f87bf..9373c27370 100644 --- a/docs/release-notes/1.10.0.md +++ b/docs/release-notes/1.10.0.md @@ -25,6 +25,7 @@ * {func}`scanpy.external.pp.harmony_integrate` now runs with 64 bit floats improving reproducibility {pr}`2655` {smaller}`S Dicks` * {func}`scanpy.tl.rank_genes_groups` no longer warns that it's default was changed from t-test_overestim_var to t-test {pr}`2798` {smaller}`L Heumos` * `scanpy.pp.calculate_qc_metrics` now allows `qc_vars` to be passed as a string {pr}`2859` {smaller}`N Teyssier` +* {func}`scanpy.pp.scale` now clips `np.ndarray` also at `- max_value` for zero-centering {pr}`2913` {smaller}`S Dicks` ```{rubric} Docs ``` diff --git a/scanpy/preprocessing/_simple.py b/scanpy/preprocessing/_simple.py index 7fba1a71f8..8b21588669 100644 --- a/scanpy/preprocessing/_simple.py +++ b/scanpy/preprocessing/_simple.py @@ -871,7 +871,10 @@ def scale_array( # do the clipping if max_value is not None: logg.debug(f"... clipping at max_value {max_value}") - X[X > max_value] = max_value + if zero_center: + X = np.clip(X, a_min=-max_value, a_max=max_value) + else: + X[X > max_value] = max_value if return_mean_std: return X, mean, std else: diff --git a/scanpy/tests/test_scaling.py b/scanpy/tests/test_scaling.py index d6b004c1ff..ba425e4408 100644 --- a/scanpy/tests/test_scaling.py +++ b/scanpy/tests/test_scaling.py @@ -104,3 +104,12 @@ def test_mask_string(): sc.pp.scale(adata, mask_obs="some cells") assert np.array_equal(adata.X, X_centered_for_mask) assert "mean of some cells" in adata.var.keys() + + +@pytest.mark.parametrize("zero_center", [True, False]) +def test_clip(zero_center): + adata = sc.datasets.pbmc3k() + sc.pp.scale(adata, max_value=1, zero_center=zero_center) + if zero_center: + assert adata.X.min() >= -1 + assert adata.X.max() <= 1