Skip to content

Commit

Permalink
Merge pull request #672 from michalk8/feat/autocorr-obs
Browse files Browse the repository at this point in the history
Feat/autocorr obs
  • Loading branch information
LLehner committed Apr 2, 2023
2 parents 21436bc + a3fdef8 commit 0cd835d
Show file tree
Hide file tree
Showing 12 changed files with 113 additions and 55 deletions.
3 changes: 1 addition & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ repos:
rev: v1.0.0
hooks:
- id: mypy
additional_dependencies:
[numpy==1.21.0, scipy, pandas, types-requests]
additional_dependencies: [numpy, pandas, types-requests]
exclude: .scripts/ci/download_data.py|squidpy/datasets/_(dataset|image).py # See https://github.com/pre-commit/mirrors-mypy/issues/33
- repo: https://github.com/psf/black
rev: 23.1.0
Expand Down
3 changes: 3 additions & 0 deletions docs/source/release/changelog/672.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add :attr:`attr` option to :func:`squidpy.gr.spatial_autocorr` to select values from :attr:`anndata.AnnData.obs`
or :attr:`anndata.AnnData.obsm.`
`@michalk8 <https://github.com/michalk8>`__
12 changes: 12 additions & 0 deletions docs/source/release/notes-dev.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,15 @@ Bugfixes
and :func:`squidpy.read.visium`.
`@michalk8 <https://github.com/michalk8>`__
`#665 <https://github.com/scverse/squidpy/pull/665>`__


Squidpy dev (2023-04-01)
========================

Miscellaneous
-------------

- Add :attr:`attr` option to :func:`squidpy.gr.spatial_autocorr` to select values from :attr:`anndata.AnnData.obs`
or :attr:`anndata.AnnData.obsm.`
`@michalk8 <https://github.com/michalk8>`__
`#672 <https://github.com/scverse/squidpy/pull/672>`__
2 changes: 1 addition & 1 deletion squidpy/gr/_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def spatial_neighbors(

if library_key is not None:
mats: List[Tuple[spmatrix, spmatrix]] = []
ixs = []
ixs = [] # type: ignore[var-annotated]
for lib in libs:
ixs.extend(np.where(adata.obs[library_key] == lib)[0])
mats.append(_build_fun(adata[adata.obs[library_key] == lib]))
Expand Down
6 changes: 3 additions & 3 deletions squidpy/gr/_ligrec.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ def extractor(res: Sequence[TempResult]) -> TempResult:

return parallelize( # type: ignore[no-any-return]
_analysis_helper,
np.arange(n_perms, dtype=np.int32),
np.arange(n_perms, dtype=np.int32).tolist(),
n_jobs=n_jobs,
unit="permutation",
extractor=extractor,
Expand Down Expand Up @@ -816,13 +816,13 @@ def _analysis_helper(
# keep it f64, because we're setting NaN
res = np.zeros((len(interactions), len(interaction_clusters)), dtype=np.float64)
numba_parallel = (
(np.prod(res.shape) >= 2**20 or clustering.shape[0] >= 2**15) if numba_parallel is None else numba_parallel
(np.prod(res.shape) >= 2**20 or clustering.shape[0] >= 2**15) if numba_parallel is None else numba_parallel # type: ignore[assignment]
)

fn_key = f"_test_{n_cls}_{int(return_means)}_{bool(numba_parallel)}"
if fn_key not in globals():
exec(
compile(_create_template(n_cls, return_means=return_means, parallel=numba_parallel), "", "exec"), globals()
compile(_create_template(n_cls, return_means=return_means, parallel=numba_parallel), "", "exec"), globals() # type: ignore[arg-type]
)
_test = globals()[fn_key]

Expand Down
4 changes: 2 additions & 2 deletions squidpy/gr/_nhood.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def nhood_enrichment(

perms = parallelize(
_nhood_enrichment_helper,
collection=np.arange(n_perms),
collection=np.arange(n_perms).tolist(),
extractor=np.vstack,
n_jobs=n_jobs,
backend=backend,
Expand Down Expand Up @@ -332,7 +332,7 @@ def interaction_matrix(

g_data = g.data if weights else np.broadcast_to(1, shape=len(g.data))
dtype = int if pd.api.types.is_bool_dtype(g.dtype) or pd.api.types.is_integer_dtype(g.dtype) else float
output = np.zeros((n_cats, n_cats), dtype=dtype)
output = np.zeros((n_cats, n_cats), dtype=dtype) # type: ignore[var-annotated]

_interaction_matrix(g_data, g.indices, g.indptr, cats.cat.codes.to_numpy(), output)

Expand Down
73 changes: 55 additions & 18 deletions squidpy/gr/_ppatterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,13 @@
def spatial_autocorr(
adata: AnnData,
connectivity_key: str = Key.obsp.spatial_conn(),
genes: str | Sequence[str] | None = None,
genes: str | int | Sequence[str] | Sequence[int] | None = None,
mode: Literal["moran", "geary"] = SpatialAutocorr.MORAN.s, # type: ignore[assignment]
transformation: bool = True,
n_perms: int | None = None,
two_tailed: bool = False,
corr_method: str | None = "fdr_bh",
attr: Literal["obs", "X", "obsm"] = "X",
layer: str | None = None,
seed: int | None = None,
use_raw: bool = False,
Expand All @@ -80,11 +81,16 @@ def spatial_autocorr(
%(adata)s
%(conn_key)s
genes
List of gene names, as stored in :attr:`anndata.AnnData.var_names`, used to compute global
spatial autocorrelation statistic.
Depending on the ``attr``:
- if ``attr = 'X'``, it corresponds to genes stored in :attr:`anndata.AnnData.var_names`.
If `None`, it's computed :attr:`anndata.AnnData.var` ``['highly_variable']``,
if present. Otherwise, it's computed for all genes.
- if ``attr = 'obs'``, it corresponds to a list of columns in :attr:`anndata.AnnData.obs`.
If `None`, use all numerical columns.
- if ``attr = 'obsm'``, it corresponds to indices in :attr:`anndata.AnnData.obsm` ``['{{layer}}']``.
If `None`, all indices are used.
If `None`, it's computed :attr:`anndata.AnnData.var` ``['highly_variable']``, if present. Otherwise,
it's computed for all genes.
mode
Mode of score calculation:
Expand All @@ -99,8 +105,13 @@ def spatial_autocorr(
two_tailed
If `True`, p-values are two-tailed, otherwise they are one-tailed.
%(corr_method)s
use_raw
Whether to access :attr:`anndata.AnnData.raw`. Only used when ``attr = 'X'``.
layer
Depending on ``attr``:
Layer in :attr:`anndata.AnnData.layers` to use. If `None`, use :attr:`anndata.AnnData.X`.
attr
Which attribute of :class:`~anndata.AnnData` to access. See ``genes`` parameter for more information.
%(seed)s
%(copy)s
%(parallelize)s
Expand All @@ -127,20 +138,46 @@ def spatial_autocorr(
"""
_assert_connectivity_key(adata, connectivity_key)

if genes is None:
if "highly_variable" in adata.var:
genes = adata[:, adata.var["highly_variable"]].var_names.values
else:
genes = adata.var_names.values
if use_raw:
def extract_X(adata: AnnData, genes: str | Sequence[str] | None) -> tuple[NDArrayA | spmatrix, Sequence[Any]]:
if genes is None:
if "highly_variable" in adata.var:
genes = adata[:, adata.var["highly_variable"]].var_names.values
else:
genes = adata.var_names.values
elif isinstance(genes, str):
genes = [genes]

if not use_raw:
return _get_obs_rep(adata[:, genes], use_raw=False, layer=layer).T, genes
if adata.raw is None:
raise AttributeError("No `.raw` attribute found. Try specifying `use_raw=False`.")
genes = list(set(genes) & set(adata.raw.var_names))
vals = adata.raw[:, genes].X.T
return adata.raw[:, genes].X.T, genes

def extract_obs(adata: AnnData, cols: str | Sequence[str] | None) -> tuple[NDArrayA | spmatrix, Sequence[Any]]:
if cols is None:
df = adata.obs.select_dtypes(include=np.number)
return df.T.to_numpy(), df.columns
if isinstance(cols, str):
cols = [cols]
return adata.obs[cols].T.to_numpy(), cols

def extract_obsm(adata: AnnData, ixs: int | Sequence[int] | None) -> tuple[NDArrayA | spmatrix, Sequence[Any]]:
if layer not in adata.obsm:
raise KeyError(f"Key `{layer!r}` not found in `adata.obsm`.")
if ixs is None:
ixs = np.arange(adata.obsm[layer].shape[1]) # type: ignore[assignment]
ixs = list(np.ravel([ixs]))
return adata.obsm[layer][:, ixs].T, ixs

if attr == "X":
vals, index = extract_X(adata, genes) # type: ignore[arg-type]
elif attr == "obs":
vals, index = extract_obs(adata, genes) # type: ignore[arg-type]
elif attr == "obsm":
vals, index = extract_obsm(adata, genes) # type: ignore[arg-type]
else:
vals = _get_obs_rep(adata[:, genes], use_raw=False, layer=layer).T

genes = _assert_non_empty_sequence(genes, name="genes")
raise NotImplementedError(f"Extracting from `adata.{attr}` is not yet implemented.")

mode = SpatialAutocorr(mode) # type: ignore[assignment]
if TYPE_CHECKING:
Expand Down Expand Up @@ -187,7 +224,7 @@ def spatial_autocorr(
with np.errstate(divide="ignore"):
pval_results = _p_value_calc(score, score_perms, g, params)

df = pd.DataFrame({params["stat"]: score, **pval_results}, index=genes)
df = pd.DataFrame({params["stat"]: score, **pval_results}, index=index)

if corr_method is not None:
for pv in filter(lambda x: "pval" in x, df.columns):
Expand Down Expand Up @@ -372,8 +409,8 @@ def co_occurrence(
n_splits = max(min(n_splits, n_obs), 1)

# split array and labels
spatial_splits = tuple(s for s in np.array_split(spatial, n_splits, axis=0) if len(s))
labs_splits = tuple(s for s in np.array_split(labs, n_splits, axis=0) if len(s))
spatial_splits = tuple(s for s in np.array_split(spatial, n_splits, axis=0) if len(s)) # type: ignore[arg-type]
labs_splits = tuple(s for s in np.array_split(labs, n_splits, axis=0) if len(s)) # type: ignore[arg-type]
# create idx array including unique combinations and self-comparison
x, y = np.triu_indices_from(np.empty((n_splits, n_splits))) # type: ignore[arg-type]
idx_splits = [(i, j) for i, j in zip(x, y)]
Expand Down
4 changes: 2 additions & 2 deletions squidpy/gr/_sepal.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def sepal(

score = parallelize(
_score_helper,
collection=np.arange(len(genes)),
collection=np.arange(len(genes)).tolist(),
extractor=np.hstack,
use_ixs=False,
n_jobs=n_jobs,
Expand Down Expand Up @@ -183,7 +183,7 @@ def _score_helper(

score, sparse = [], issparse(vals)
for i in ixs:
conc = vals[:, i].A.flatten() if sparse else vals[:, i].copy()
conc = vals[:, i].A.flatten() if sparse else vals[:, i].copy() # type: ignore[union-attr]
time_iter = _diffusion(conc, fun, n_iter, sat, sat_idx, unsat, unsat_idx, dt=dt, thresh=thresh)
score.append(dt * time_iter)

Expand Down
2 changes: 1 addition & 1 deletion squidpy/im/_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,7 @@ def show(
raise ValueError("No channels have been selected.")
arr = arr[{arr.dims[-1]: channel}]
else:
channel = np.arange(arr.shape[-1])
channel = np.arange(arr.shape[-1]) # type: ignore[assignment]
if TYPE_CHECKING:
assert isinstance(channel, Sequence)

Expand Down
2 changes: 1 addition & 1 deletion squidpy/pl/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ def _heatmap(
if method is not None:
row_order, col_order, _, col_link = _dendrogram(adata.X, method, optimal_ordering=adata.n_obs <= 1500)
else:
row_order = col_order = np.arange(len(adata.uns[Key.uns.colors(key)]))
row_order = col_order = np.arange(len(adata.uns[Key.uns.colors(key)])).tolist()

row_order = row_order[::-1]
row_labels = adata.obs[key][row_order]
Expand Down
33 changes: 32 additions & 1 deletion tests/graph/test_ppatterns.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any, Literal

import numpy as np
import pytest
from anndata import AnnData
Expand Down Expand Up @@ -56,7 +58,7 @@ def test_spatial_autocorr_reproducibility(dummy_adata: AnnData, n_jobs: int, mod
df_2 = spatial_autocorr(dummy_adata, mode=mode, copy=True, n_jobs=n_jobs, seed=42, n_perms=50)

idx_df = df_1.index.values
idx_adata = dummy_adata[:, dummy_adata.var.highly_variable.values].var_names.values
idx_adata = dummy_adata[:, dummy_adata.var["highly_variable"].values].var_names.values

if mode == "moran":
UNS_KEY = MORAN_K
Expand All @@ -80,6 +82,35 @@ def test_spatial_autocorr_reproducibility(dummy_adata: AnnData, n_jobs: int, mod
assert_frame_equal(df_1, df_2)


@pytest.mark.parametrize(
"attr,layer,genes",
[
("X", None, None),
("obs", None, None),
("obs", None, "foo"),
("obsm", "spatial", None),
("obsm", "spatial", [1, 0]),
],
)
def test_spatial_autocorr_attr(dummy_adata: AnnData, attr: Literal["X", "obs", "obsm"], layer: str, genes: Any):
if attr == "obs":
if isinstance(genes, str):
dummy_adata.obs[genes] = np.random.RandomState(42).normal(size=(dummy_adata.n_obs,))
index = [genes]
else:
index = dummy_adata.obs.select_dtypes(include=np.number).columns
elif attr == "X":
index = dummy_adata.var_names if genes is None else genes
elif attr == "obsm":
index = np.arange(dummy_adata.obsm[layer].shape[1]) if genes is None else genes

spatial_autocorr(dummy_adata, attr=attr, mode="moran", layer=layer, genes=genes)

df = dummy_adata.uns[MORAN_K]
np.testing.assert_array_equal(np.isfinite(df), True)
np.testing.assert_array_equal(sorted(df.index), sorted(index))


def test_co_occurrence(adata: AnnData):
"""
check co_occurrence score and shape
Expand Down
24 changes: 0 additions & 24 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -201,30 +201,6 @@ commands =
sphinx-build --color -b html {toxinidir}/docs/source {toxinidir}/docs/build/html
python -c 'import pathlib; print(f"Documentation is available under:", pathlib.Path(f"{toxinidir}") / "docs" / "build" / "html" / "index.html")'

[testenv:news]
description = Create news fragment from a PR.
basepython = python3.9
skip_install = true
deps =
requests
commands =
python {toxinidir}/docs/source/create_news_fragment.py {posargs:}

[testenv:update-dev-notes]
description = Update release notes.
basepython = python3.9
deps =
towncrier
usedevelop = true
allowlist_externals =
rm
git
commands =
rm -f {toxinidir}/docs/source/release/notes-dev.rst
towncrier build --yes --version="dev"
git reset -- {toxinidir}/docs/source/release/changelog/
git checkout -- {toxinidir}/docs/source/release/changelog/

[testenv:build-release-notes]
description = Build release notes. Used when a new release happens.
basepython = python3.9
Expand Down

0 comments on commit 0cd835d

Please sign in to comment.