Skip to content

Commit

Permalink
Backport PR #1432: (fix): optimize subsetting dask array
Browse files Browse the repository at this point in the history
  • Loading branch information
ilan-gold committed Mar 23, 2024
1 parent 4fbabda commit 253ee7c
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .azure-pipelines.yml
Expand Up @@ -61,7 +61,7 @@ jobs:
condition: eq(variables['DEPENDENCIES_VERSION'], 'minimum')
- script: |
uv pip install --system --compile --pre "anndata[dev,test] @ ." "scanpy>=1.10.0rc1"
uv pip install -v --system --compile --pre "anndata[dev,test] @ ." "scanpy>=1.10.0rc1"
displayName: "Install dependencies release candidates"
condition: eq(variables['DEPENDENCIES_VERSION'], 'pre-release')
Expand Down
9 changes: 2 additions & 7 deletions anndata/_core/index.py
Expand Up @@ -147,15 +147,10 @@ def _subset(a: np.ndarray | pd.DataFrame, subset_idx: Index):

@_subset.register(DaskArray)
def _subset_dask(a: DaskArray, subset_idx: Index):
if all(isinstance(x, cabc.Iterable) for x in subset_idx):
if len(subset_idx) > 1 and all(isinstance(x, cabc.Iterable) for x in subset_idx):
if isinstance(a._meta, csc_matrix):
return a[:, subset_idx[1]][subset_idx[0], :]
elif isinstance(a._meta, spmatrix):
return a[subset_idx[0], :][:, subset_idx[1]]
else:
# TODO: this may have been working for some cases?
subset_idx = np.ix_(*subset_idx)
return a.vindex[subset_idx]
return a[subset_idx[0], :][:, subset_idx[1]]
return a[subset_idx]


Expand Down
2 changes: 2 additions & 0 deletions docs/release-notes/0.10.7.md
Expand Up @@ -10,3 +10,5 @@

```{rubric} Performance
```

* Remove `vindex` for subsetting `dask.array.Array` because of its slowness and memory consumption {user} `ilan-gold` {pr}`1432`

0 comments on commit 253ee7c

Please sign in to comment.