Skip to content

Commit

Permalink
allow sc.pp.subsample in backed mode when returning a copy (#2624)
Browse files Browse the repository at this point in the history
* allow backed subsampling when returning a copy

* no extra copying, added test

* added description in release-notes

* more informative error message for copy=False

* Update scanpy/tests/test_preprocessing.py

Co-authored-by: Isaac Virshup <ivirshup@gmail.com>

* inverted branching

---------

Co-authored-by: Eljas <eljas.roellin@gmail.com>
Co-authored-by: Isaac Virshup <ivirshup@gmail.com>
  • Loading branch information
3 people committed Aug 24, 2023
1 parent b658b5c commit 2de9121
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 3 deletions.
1 change: 1 addition & 0 deletions docs/release-notes/1.10.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
```

* {func}`scanpy.pp.pca` and {func}`scanpy.pp.regress_out` now accept a layer argument {pr}`2588` {smaller}`S Dicks`
* {func}`scanpy.pp.subsample` with `copy=True` can now be called in backed mode {pr}`2624` {smaller}`E Roellin`

```{rubric} Docs
```
Expand Down
14 changes: 11 additions & 3 deletions scanpy/preprocessing/_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,10 +898,18 @@ def subsample(
raise ValueError('Either pass `n_obs` or `fraction`.')
obs_indices = np.random.choice(old_n_obs, size=new_n_obs, replace=False)
if isinstance(data, AnnData):
if copy:
return data[obs_indices].copy()
if data.isbacked:
if copy:
return data[obs_indices].to_memory()
else:
raise NotImplementedError(
"Inplace subsampling is not implemented for backed objects."
)
else:
data._inplace_subset_obs(obs_indices)
if copy:
return data[obs_indices].copy()
else:
data._inplace_subset_obs(obs_indices)
else:
X = data
return X[obs_indices], obs_indices
Expand Down
16 changes: 16 additions & 0 deletions scanpy/tests/test_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,22 @@ def test_subsample_copy():
assert sc.pp.subsample(adata, fraction=0.1, copy=True).shape == (20, 10)


def test_subsample_copy_backed(tmp_path):
A = np.random.rand(200, 10).astype(np.float32)
adata_m = AnnData(A.copy())
adata_d = AnnData(A.copy())
filename = tmp_path / 'test.h5ad'
adata_d.filename = filename
# This should not throw an error
assert sc.pp.subsample(adata_d, n_obs=40, copy=True).shape == (40, 10)
np.testing.assert_array_equal(
sc.pp.subsample(adata_m, n_obs=40, copy=True).X,
sc.pp.subsample(adata_d, n_obs=40, copy=True).X,
)
with pytest.raises(NotImplementedError):
sc.pp.subsample(adata_d, n_obs=40, copy=False)


def test_scale():
adata = pbmc68k_reduced()
adata.X = adata.raw.X
Expand Down

0 comments on commit 2de9121

Please sign in to comment.