Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/autocorr obs #672

Merged
merged 16 commits into from
Apr 2, 2023
Merged
3 changes: 1 addition & 2 deletions .pre-commit-config.yaml
Expand Up @@ -10,8 +10,7 @@ repos:
rev: v1.0.0
hooks:
- id: mypy
additional_dependencies:
[numpy==1.21.0, scipy, pandas, types-requests]
additional_dependencies: [numpy, pandas, types-requests]
exclude: .scripts/ci/download_data.py|squidpy/datasets/_(dataset|image).py # See https://github.com/pre-commit/mirrors-mypy/issues/33
- repo: https://github.com/psf/black
rev: 23.1.0
Expand Down
3 changes: 3 additions & 0 deletions docs/source/release/changelog/672.misc.rst
@@ -0,0 +1,3 @@
Add :attr:`attr` option to :func:`squidpy.gr.spatial_autocorr` to select values from :attr:`anndata.AnnData.obs`
or :attr:`anndata.AnnData.obsm.`
`@michalk8 <https://github.com/michalk8>`__
12 changes: 12 additions & 0 deletions docs/source/release/notes-dev.rst
Expand Up @@ -12,3 +12,15 @@ Bugfixes
and :func:`squidpy.read.visium`.
`@michalk8 <https://github.com/michalk8>`__
`#665 <https://github.com/scverse/squidpy/pull/665>`__


Squidpy dev (2023-04-01)
========================

Miscellaneous
-------------

- Add :attr:`attr` option to :func:`squidpy.gr.spatial_autocorr` to select values from :attr:`anndata.AnnData.obs`
or :attr:`anndata.AnnData.obsm.`
`@michalk8 <https://github.com/michalk8>`__
`#672 <https://github.com/scverse/squidpy/pull/672>`__
2 changes: 1 addition & 1 deletion squidpy/gr/_build.py
Expand Up @@ -142,7 +142,7 @@ def spatial_neighbors(

if library_key is not None:
mats: List[Tuple[spmatrix, spmatrix]] = []
ixs = []
ixs = [] # type: ignore[var-annotated]
for lib in libs:
ixs.extend(np.where(adata.obs[library_key] == lib)[0])
mats.append(_build_fun(adata[adata.obs[library_key] == lib]))
Expand Down
6 changes: 3 additions & 3 deletions squidpy/gr/_ligrec.py
Expand Up @@ -741,7 +741,7 @@ def extractor(res: Sequence[TempResult]) -> TempResult:

return parallelize( # type: ignore[no-any-return]
_analysis_helper,
np.arange(n_perms, dtype=np.int32),
np.arange(n_perms, dtype=np.int32).tolist(),
n_jobs=n_jobs,
unit="permutation",
extractor=extractor,
Expand Down Expand Up @@ -816,13 +816,13 @@ def _analysis_helper(
# keep it f64, because we're setting NaN
res = np.zeros((len(interactions), len(interaction_clusters)), dtype=np.float64)
numba_parallel = (
(np.prod(res.shape) >= 2**20 or clustering.shape[0] >= 2**15) if numba_parallel is None else numba_parallel
(np.prod(res.shape) >= 2**20 or clustering.shape[0] >= 2**15) if numba_parallel is None else numba_parallel # type: ignore[assignment]
)

fn_key = f"_test_{n_cls}_{int(return_means)}_{bool(numba_parallel)}"
if fn_key not in globals():
exec(
compile(_create_template(n_cls, return_means=return_means, parallel=numba_parallel), "", "exec"), globals()
compile(_create_template(n_cls, return_means=return_means, parallel=numba_parallel), "", "exec"), globals() # type: ignore[arg-type]
)
_test = globals()[fn_key]

Expand Down
4 changes: 2 additions & 2 deletions squidpy/gr/_nhood.py
Expand Up @@ -176,7 +176,7 @@ def nhood_enrichment(

perms = parallelize(
_nhood_enrichment_helper,
collection=np.arange(n_perms),
collection=np.arange(n_perms).tolist(),
extractor=np.vstack,
n_jobs=n_jobs,
backend=backend,
Expand Down Expand Up @@ -332,7 +332,7 @@ def interaction_matrix(

g_data = g.data if weights else np.broadcast_to(1, shape=len(g.data))
dtype = int if pd.api.types.is_bool_dtype(g.dtype) or pd.api.types.is_integer_dtype(g.dtype) else float
output = np.zeros((n_cats, n_cats), dtype=dtype)
output = np.zeros((n_cats, n_cats), dtype=dtype) # type: ignore[var-annotated]

_interaction_matrix(g_data, g.indices, g.indptr, cats.cat.codes.to_numpy(), output)

Expand Down
73 changes: 55 additions & 18 deletions squidpy/gr/_ppatterns.py
Expand Up @@ -56,12 +56,13 @@
def spatial_autocorr(
adata: AnnData,
connectivity_key: str = Key.obsp.spatial_conn(),
genes: str | Sequence[str] | None = None,
genes: str | int | Sequence[str] | Sequence[int] | None = None,
mode: Literal["moran", "geary"] = SpatialAutocorr.MORAN.s, # type: ignore[assignment]
transformation: bool = True,
n_perms: int | None = None,
two_tailed: bool = False,
corr_method: str | None = "fdr_bh",
attr: Literal["obs", "X", "obsm"] = "X",
layer: str | None = None,
seed: int | None = None,
use_raw: bool = False,
Expand All @@ -80,11 +81,16 @@ def spatial_autocorr(
%(adata)s
%(conn_key)s
genes
List of gene names, as stored in :attr:`anndata.AnnData.var_names`, used to compute global
spatial autocorrelation statistic.
Depending on the ``attr``:

- if ``attr = 'X'``, it corresponds to genes stored in :attr:`anndata.AnnData.var_names`.
If `None`, it's computed :attr:`anndata.AnnData.var` ``['highly_variable']``,
if present. Otherwise, it's computed for all genes.
- if ``attr = 'obs'``, it corresponds to a list of columns in :attr:`anndata.AnnData.obs`.
If `None`, use all numerical columns.
- if ``attr = 'obsm'``, it corresponds to indices in :attr:`anndata.AnnData.obsm` ``['{{layer}}']``.
If `None`, all indices are used.

If `None`, it's computed :attr:`anndata.AnnData.var` ``['highly_variable']``, if present. Otherwise,
it's computed for all genes.
mode
Mode of score calculation:

Expand All @@ -99,8 +105,13 @@ def spatial_autocorr(
two_tailed
If `True`, p-values are two-tailed, otherwise they are one-tailed.
%(corr_method)s
use_raw
Whether to access :attr:`anndata.AnnData.raw`. Only used when ``attr = 'X'``.
layer
Depending on ``attr``:
Layer in :attr:`anndata.AnnData.layers` to use. If `None`, use :attr:`anndata.AnnData.X`.
attr
Which attribute of :class:`~anndata.AnnData` to access. See ``genes`` parameter for more information.
%(seed)s
%(copy)s
%(parallelize)s
Expand All @@ -127,20 +138,46 @@ def spatial_autocorr(
"""
_assert_connectivity_key(adata, connectivity_key)

if genes is None:
if "highly_variable" in adata.var:
genes = adata[:, adata.var["highly_variable"]].var_names.values
else:
genes = adata.var_names.values
if use_raw:
def extract_X(adata: AnnData, genes: str | Sequence[str] | None) -> tuple[NDArrayA | spmatrix, Sequence[Any]]:
if genes is None:
if "highly_variable" in adata.var:
genes = adata[:, adata.var["highly_variable"]].var_names.values
else:
genes = adata.var_names.values
elif isinstance(genes, str):
genes = [genes]

if not use_raw:
return _get_obs_rep(adata[:, genes], use_raw=False, layer=layer).T, genes
if adata.raw is None:
raise AttributeError("No `.raw` attribute found. Try specifying `use_raw=False`.")
genes = list(set(genes) & set(adata.raw.var_names))
vals = adata.raw[:, genes].X.T
return adata.raw[:, genes].X.T, genes

def extract_obs(adata: AnnData, cols: str | Sequence[str] | None) -> tuple[NDArrayA | spmatrix, Sequence[Any]]:
if cols is None:
df = adata.obs.select_dtypes(include=np.number)
return df.T.to_numpy(), df.columns
if isinstance(cols, str):
cols = [cols]
return adata.obs[cols].T.to_numpy(), cols

def extract_obsm(adata: AnnData, ixs: int | Sequence[int] | None) -> tuple[NDArrayA | spmatrix, Sequence[Any]]:
if layer not in adata.obsm:
raise KeyError(f"Key `{layer!r}` not found in `adata.obsm`.")
if ixs is None:
ixs = np.arange(adata.obsm[layer].shape[1]) # type: ignore[assignment]
ixs = list(np.ravel([ixs]))
return adata.obsm[layer][:, ixs].T, ixs

if attr == "X":
vals, index = extract_X(adata, genes) # type: ignore[arg-type]
elif attr == "obs":
vals, index = extract_obs(adata, genes) # type: ignore[arg-type]
elif attr == "obsm":
vals, index = extract_obsm(adata, genes) # type: ignore[arg-type]
else:
vals = _get_obs_rep(adata[:, genes], use_raw=False, layer=layer).T

genes = _assert_non_empty_sequence(genes, name="genes")
raise NotImplementedError(f"Extracting from `adata.{attr}` is not yet implemented.")

mode = SpatialAutocorr(mode) # type: ignore[assignment]
if TYPE_CHECKING:
Expand Down Expand Up @@ -187,7 +224,7 @@ def spatial_autocorr(
with np.errstate(divide="ignore"):
pval_results = _p_value_calc(score, score_perms, g, params)

df = pd.DataFrame({params["stat"]: score, **pval_results}, index=genes)
df = pd.DataFrame({params["stat"]: score, **pval_results}, index=index)

if corr_method is not None:
for pv in filter(lambda x: "pval" in x, df.columns):
Expand Down Expand Up @@ -372,8 +409,8 @@ def co_occurrence(
n_splits = max(min(n_splits, n_obs), 1)

# split array and labels
spatial_splits = tuple(s for s in np.array_split(spatial, n_splits, axis=0) if len(s))
labs_splits = tuple(s for s in np.array_split(labs, n_splits, axis=0) if len(s))
spatial_splits = tuple(s for s in np.array_split(spatial, n_splits, axis=0) if len(s)) # type: ignore[arg-type]
labs_splits = tuple(s for s in np.array_split(labs, n_splits, axis=0) if len(s)) # type: ignore[arg-type]
# create idx array including unique combinations and self-comparison
x, y = np.triu_indices_from(np.empty((n_splits, n_splits))) # type: ignore[arg-type]
idx_splits = [(i, j) for i, j in zip(x, y)]
Expand Down
4 changes: 2 additions & 2 deletions squidpy/gr/_sepal.py
Expand Up @@ -129,7 +129,7 @@ def sepal(

score = parallelize(
_score_helper,
collection=np.arange(len(genes)),
collection=np.arange(len(genes)).tolist(),
extractor=np.hstack,
use_ixs=False,
n_jobs=n_jobs,
Expand Down Expand Up @@ -183,7 +183,7 @@ def _score_helper(

score, sparse = [], issparse(vals)
for i in ixs:
conc = vals[:, i].A.flatten() if sparse else vals[:, i].copy()
conc = vals[:, i].A.flatten() if sparse else vals[:, i].copy() # type: ignore[union-attr]
time_iter = _diffusion(conc, fun, n_iter, sat, sat_idx, unsat, unsat_idx, dt=dt, thresh=thresh)
score.append(dt * time_iter)

Expand Down
2 changes: 1 addition & 1 deletion squidpy/im/_container.py
Expand Up @@ -972,7 +972,7 @@ def show(
raise ValueError("No channels have been selected.")
arr = arr[{arr.dims[-1]: channel}]
else:
channel = np.arange(arr.shape[-1])
channel = np.arange(arr.shape[-1]) # type: ignore[assignment]
if TYPE_CHECKING:
assert isinstance(channel, Sequence)

Expand Down
2 changes: 1 addition & 1 deletion squidpy/pl/_utils.py
Expand Up @@ -550,7 +550,7 @@ def _heatmap(
if method is not None:
row_order, col_order, _, col_link = _dendrogram(adata.X, method, optimal_ordering=adata.n_obs <= 1500)
else:
row_order = col_order = np.arange(len(adata.uns[Key.uns.colors(key)]))
row_order = col_order = np.arange(len(adata.uns[Key.uns.colors(key)])).tolist()

row_order = row_order[::-1]
row_labels = adata.obs[key][row_order]
Expand Down
33 changes: 32 additions & 1 deletion tests/graph/test_ppatterns.py
@@ -1,3 +1,5 @@
from typing import Any, Literal

import numpy as np
import pytest
from anndata import AnnData
Expand Down Expand Up @@ -56,7 +58,7 @@ def test_spatial_autocorr_reproducibility(dummy_adata: AnnData, n_jobs: int, mod
df_2 = spatial_autocorr(dummy_adata, mode=mode, copy=True, n_jobs=n_jobs, seed=42, n_perms=50)

idx_df = df_1.index.values
idx_adata = dummy_adata[:, dummy_adata.var.highly_variable.values].var_names.values
idx_adata = dummy_adata[:, dummy_adata.var["highly_variable"].values].var_names.values

if mode == "moran":
UNS_KEY = MORAN_K
Expand All @@ -80,6 +82,35 @@ def test_spatial_autocorr_reproducibility(dummy_adata: AnnData, n_jobs: int, mod
assert_frame_equal(df_1, df_2)


@pytest.mark.parametrize(
"attr,layer,genes",
[
("X", None, None),
("obs", None, None),
("obs", None, "foo"),
("obsm", "spatial", None),
("obsm", "spatial", [1, 0]),
],
)
def test_spatial_autocorr_attr(dummy_adata: AnnData, attr: Literal["X", "obs", "obsm"], layer: str, genes: Any):
if attr == "obs":
if isinstance(genes, str):
dummy_adata.obs[genes] = np.random.RandomState(42).normal(size=(dummy_adata.n_obs,))
index = [genes]
else:
index = dummy_adata.obs.select_dtypes(include=np.number).columns
elif attr == "X":
index = dummy_adata.var_names if genes is None else genes
elif attr == "obsm":
index = np.arange(dummy_adata.obsm[layer].shape[1]) if genes is None else genes

spatial_autocorr(dummy_adata, attr=attr, mode="moran", layer=layer, genes=genes)

df = dummy_adata.uns[MORAN_K]
np.testing.assert_array_equal(np.isfinite(df), True)
np.testing.assert_array_equal(sorted(df.index), sorted(index))


def test_co_occurrence(adata: AnnData):
"""
check co_occurrence score and shape
Expand Down
24 changes: 0 additions & 24 deletions tox.ini
Expand Up @@ -201,30 +201,6 @@ commands =
sphinx-build --color -b html {toxinidir}/docs/source {toxinidir}/docs/build/html
python -c 'import pathlib; print(f"Documentation is available under:", pathlib.Path(f"{toxinidir}") / "docs" / "build" / "html" / "index.html")'

[testenv:news]
description = Create news fragment from a PR.
basepython = python3.9
skip_install = true
deps =
requests
commands =
python {toxinidir}/docs/source/create_news_fragment.py {posargs:}

[testenv:update-dev-notes]
description = Update release notes.
basepython = python3.9
deps =
towncrier
usedevelop = true
allowlist_externals =
rm
git
commands =
rm -f {toxinidir}/docs/source/release/notes-dev.rst
towncrier build --yes --version="dev"
git reset -- {toxinidir}/docs/source/release/changelog/
git checkout -- {toxinidir}/docs/source/release/changelog/

[testenv:build-release-notes]
description = Build release notes. Used when a new release happens.
basepython = python3.9
Expand Down