Merge pull request #672 from michalk8/feat/autocorr-obs

Feat/autocorr obs
scverse · Apr 2, 2023 · 0cd835d · 0cd835d
2 parents 21436bc + a3fdef8
commit 0cd835d
Show file tree

Hide file tree

Showing 12 changed files with 113 additions and 55 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,8 +10,7 @@ repos:
       rev: v1.0.0
       hooks:
           - id: mypy
-            additional_dependencies:
-                [numpy==1.21.0, scipy, pandas, types-requests]
+            additional_dependencies: [numpy, pandas, types-requests]
             exclude: .scripts/ci/download_data.py|squidpy/datasets/_(dataset|image).py # See https://github.com/pre-commit/mirrors-mypy/issues/33
     - repo: https://github.com/psf/black
       rev: 23.1.0

diff --git a/docs/source/release/changelog/672.misc.rst b/docs/source/release/changelog/672.misc.rst
@@ -0,0 +1,3 @@
+Add :attr:`attr` option to :func:`squidpy.gr.spatial_autocorr` to select values from :attr:`anndata.AnnData.obs`
+or :attr:`anndata.AnnData.obsm.`
+`@michalk8 <https://github.com/michalk8>`__
diff --git a/docs/source/release/notes-dev.rst b/docs/source/release/notes-dev.rst
@@ -12,3 +12,15 @@ Bugfixes
   and :func:`squidpy.read.visium`.
   `@michalk8 <https://github.com/michalk8>`__
   `#665 <https://github.com/scverse/squidpy/pull/665>`__
+
+
+Squidpy dev (2023-04-01)
+========================
+
+Miscellaneous
+-------------
+
+- Add :attr:`attr` option to :func:`squidpy.gr.spatial_autocorr` to select values from :attr:`anndata.AnnData.obs`
+  or :attr:`anndata.AnnData.obsm.`
+  `@michalk8 <https://github.com/michalk8>`__
+  `#672 <https://github.com/scverse/squidpy/pull/672>`__
diff --git a/squidpy/gr/_build.py b/squidpy/gr/_build.py
@@ -142,7 +142,7 @@ def spatial_neighbors(
 
     if library_key is not None:
         mats: List[Tuple[spmatrix, spmatrix]] = []
-        ixs = []
+        ixs = []  # type: ignore[var-annotated]
         for lib in libs:
             ixs.extend(np.where(adata.obs[library_key] == lib)[0])
             mats.append(_build_fun(adata[adata.obs[library_key] == lib]))

diff --git a/squidpy/gr/_ligrec.py b/squidpy/gr/_ligrec.py
@@ -741,7 +741,7 @@ def extractor(res: Sequence[TempResult]) -> TempResult:
 
     return parallelize(  # type: ignore[no-any-return]
         _analysis_helper,
-        np.arange(n_perms, dtype=np.int32),
+        np.arange(n_perms, dtype=np.int32).tolist(),
         n_jobs=n_jobs,
         unit="permutation",
         extractor=extractor,
@@ -816,13 +816,13 @@ def _analysis_helper(
     # keep it f64, because we're setting NaN
     res = np.zeros((len(interactions), len(interaction_clusters)), dtype=np.float64)
     numba_parallel = (
-        (np.prod(res.shape) >= 2**20 or clustering.shape[0] >= 2**15) if numba_parallel is None else numba_parallel
+        (np.prod(res.shape) >= 2**20 or clustering.shape[0] >= 2**15) if numba_parallel is None else numba_parallel  # type: ignore[assignment]
     )
 
     fn_key = f"_test_{n_cls}_{int(return_means)}_{bool(numba_parallel)}"
     if fn_key not in globals():
         exec(
-            compile(_create_template(n_cls, return_means=return_means, parallel=numba_parallel), "", "exec"), globals()
+            compile(_create_template(n_cls, return_means=return_means, parallel=numba_parallel), "", "exec"), globals()  # type: ignore[arg-type]
         )
     _test = globals()[fn_key]
 

diff --git a/squidpy/gr/_nhood.py b/squidpy/gr/_nhood.py
@@ -176,7 +176,7 @@ def nhood_enrichment(
 
     perms = parallelize(
         _nhood_enrichment_helper,
-        collection=np.arange(n_perms),
+        collection=np.arange(n_perms).tolist(),
         extractor=np.vstack,
         n_jobs=n_jobs,
         backend=backend,
@@ -332,7 +332,7 @@ def interaction_matrix(
 
     g_data = g.data if weights else np.broadcast_to(1, shape=len(g.data))
     dtype = int if pd.api.types.is_bool_dtype(g.dtype) or pd.api.types.is_integer_dtype(g.dtype) else float
-    output = np.zeros((n_cats, n_cats), dtype=dtype)
+    output = np.zeros((n_cats, n_cats), dtype=dtype)  # type: ignore[var-annotated]
 
     _interaction_matrix(g_data, g.indices, g.indptr, cats.cat.codes.to_numpy(), output)
 

diff --git a/squidpy/gr/_ppatterns.py b/squidpy/gr/_ppatterns.py
@@ -56,12 +56,13 @@
 def spatial_autocorr(
     adata: AnnData,
     connectivity_key: str = Key.obsp.spatial_conn(),
-    genes: str | Sequence[str] | None = None,
+    genes: str | int | Sequence[str] | Sequence[int] | None = None,
     mode: Literal["moran", "geary"] = SpatialAutocorr.MORAN.s,  # type: ignore[assignment]
     transformation: bool = True,
     n_perms: int | None = None,
     two_tailed: bool = False,
     corr_method: str | None = "fdr_bh",
+    attr: Literal["obs", "X", "obsm"] = "X",
     layer: str | None = None,
     seed: int | None = None,
     use_raw: bool = False,
@@ -80,11 +81,16 @@ def spatial_autocorr(
     %(adata)s
     %(conn_key)s
     genes
-        List of gene names, as stored in :attr:`anndata.AnnData.var_names`, used to compute global
-        spatial autocorrelation statistic.
+        Depending on the ``attr``:
+
+            - if ``attr = 'X'``, it corresponds to genes stored in :attr:`anndata.AnnData.var_names`.
+              If `None`, it's computed :attr:`anndata.AnnData.var` ``['highly_variable']``,
+              if present. Otherwise, it's computed for all genes.
+            - if ``attr = 'obs'``, it corresponds to a list of columns in :attr:`anndata.AnnData.obs`.
+              If `None`, use all numerical columns.
+            - if ``attr = 'obsm'``, it corresponds to indices in :attr:`anndata.AnnData.obsm` ``['{{layer}}']``.
+              If `None`, all indices are used.
 
-        If `None`, it's computed :attr:`anndata.AnnData.var` ``['highly_variable']``, if present. Otherwise,
-        it's computed for all genes.
     mode
         Mode of score calculation:
 
@@ -99,8 +105,13 @@ def spatial_autocorr(
     two_tailed
         If `True`, p-values are two-tailed, otherwise they are one-tailed.
     %(corr_method)s
+    use_raw
+        Whether to access :attr:`anndata.AnnData.raw`. Only used when ``attr = 'X'``.
     layer
+        Depending on ``attr``:
         Layer in :attr:`anndata.AnnData.layers` to use. If `None`, use :attr:`anndata.AnnData.X`.
+    attr
+        Which attribute of :class:`~anndata.AnnData` to access. See ``genes`` parameter for more information.
     %(seed)s
     %(copy)s
     %(parallelize)s
@@ -127,20 +138,46 @@ def spatial_autocorr(
     """
     _assert_connectivity_key(adata, connectivity_key)
 
-    if genes is None:
-        if "highly_variable" in adata.var:
-            genes = adata[:, adata.var["highly_variable"]].var_names.values
-        else:
-            genes = adata.var_names.values
-    if use_raw:
+    def extract_X(adata: AnnData, genes: str | Sequence[str] | None) -> tuple[NDArrayA | spmatrix, Sequence[Any]]:
+        if genes is None:
+            if "highly_variable" in adata.var:
+                genes = adata[:, adata.var["highly_variable"]].var_names.values
+            else:
+                genes = adata.var_names.values
+        elif isinstance(genes, str):
+            genes = [genes]
+
+        if not use_raw:
+            return _get_obs_rep(adata[:, genes], use_raw=False, layer=layer).T, genes
         if adata.raw is None:
             raise AttributeError("No `.raw` attribute found. Try specifying `use_raw=False`.")
         genes = list(set(genes) & set(adata.raw.var_names))
-        vals = adata.raw[:, genes].X.T
+        return adata.raw[:, genes].X.T, genes
+
+    def extract_obs(adata: AnnData, cols: str | Sequence[str] | None) -> tuple[NDArrayA | spmatrix, Sequence[Any]]:
+        if cols is None:
+            df = adata.obs.select_dtypes(include=np.number)
+            return df.T.to_numpy(), df.columns
+        if isinstance(cols, str):
+            cols = [cols]
+        return adata.obs[cols].T.to_numpy(), cols
+
+    def extract_obsm(adata: AnnData, ixs: int | Sequence[int] | None) -> tuple[NDArrayA | spmatrix, Sequence[Any]]:
+        if layer not in adata.obsm:
+            raise KeyError(f"Key `{layer!r}` not found in `adata.obsm`.")
+        if ixs is None:
+            ixs = np.arange(adata.obsm[layer].shape[1])  # type: ignore[assignment]
+        ixs = list(np.ravel([ixs]))
+        return adata.obsm[layer][:, ixs].T, ixs
+
+    if attr == "X":
+        vals, index = extract_X(adata, genes)  # type: ignore[arg-type]
+    elif attr == "obs":
+        vals, index = extract_obs(adata, genes)  # type: ignore[arg-type]
+    elif attr == "obsm":
+        vals, index = extract_obsm(adata, genes)  # type: ignore[arg-type]
     else:
-        vals = _get_obs_rep(adata[:, genes], use_raw=False, layer=layer).T
-
-    genes = _assert_non_empty_sequence(genes, name="genes")
+        raise NotImplementedError(f"Extracting from `adata.{attr}` is not yet implemented.")
 
     mode = SpatialAutocorr(mode)  # type: ignore[assignment]
     if TYPE_CHECKING:
@@ -187,7 +224,7 @@ def spatial_autocorr(
     with np.errstate(divide="ignore"):
         pval_results = _p_value_calc(score, score_perms, g, params)
 
-    df = pd.DataFrame({params["stat"]: score, **pval_results}, index=genes)
+    df = pd.DataFrame({params["stat"]: score, **pval_results}, index=index)
 
     if corr_method is not None:
         for pv in filter(lambda x: "pval" in x, df.columns):
@@ -372,8 +409,8 @@ def co_occurrence(
     n_splits = max(min(n_splits, n_obs), 1)
 
     # split array and labels
-    spatial_splits = tuple(s for s in np.array_split(spatial, n_splits, axis=0) if len(s))
-    labs_splits = tuple(s for s in np.array_split(labs, n_splits, axis=0) if len(s))
+    spatial_splits = tuple(s for s in np.array_split(spatial, n_splits, axis=0) if len(s))  # type: ignore[arg-type]
+    labs_splits = tuple(s for s in np.array_split(labs, n_splits, axis=0) if len(s))  # type: ignore[arg-type]
     # create idx array including unique combinations and self-comparison
     x, y = np.triu_indices_from(np.empty((n_splits, n_splits)))  # type: ignore[arg-type]
     idx_splits = [(i, j) for i, j in zip(x, y)]

diff --git a/squidpy/gr/_sepal.py b/squidpy/gr/_sepal.py
@@ -129,7 +129,7 @@ def sepal(
 
     score = parallelize(
         _score_helper,
-        collection=np.arange(len(genes)),
+        collection=np.arange(len(genes)).tolist(),
         extractor=np.hstack,
         use_ixs=False,
         n_jobs=n_jobs,
@@ -183,7 +183,7 @@ def _score_helper(
 
     score, sparse = [], issparse(vals)
     for i in ixs:
-        conc = vals[:, i].A.flatten() if sparse else vals[:, i].copy()
+        conc = vals[:, i].A.flatten() if sparse else vals[:, i].copy()  # type: ignore[union-attr]
         time_iter = _diffusion(conc, fun, n_iter, sat, sat_idx, unsat, unsat_idx, dt=dt, thresh=thresh)
         score.append(dt * time_iter)
 

diff --git a/squidpy/im/_container.py b/squidpy/im/_container.py
@@ -972,7 +972,7 @@ def show(
                 raise ValueError("No channels have been selected.")
             arr = arr[{arr.dims[-1]: channel}]
         else:
-            channel = np.arange(arr.shape[-1])
+            channel = np.arange(arr.shape[-1])  # type: ignore[assignment]
         if TYPE_CHECKING:
             assert isinstance(channel, Sequence)
 

diff --git a/squidpy/pl/_utils.py b/squidpy/pl/_utils.py
@@ -550,7 +550,7 @@ def _heatmap(
     if method is not None:
         row_order, col_order, _, col_link = _dendrogram(adata.X, method, optimal_ordering=adata.n_obs <= 1500)
     else:
-        row_order = col_order = np.arange(len(adata.uns[Key.uns.colors(key)]))
+        row_order = col_order = np.arange(len(adata.uns[Key.uns.colors(key)])).tolist()
 
     row_order = row_order[::-1]
     row_labels = adata.obs[key][row_order]

diff --git a/tests/graph/test_ppatterns.py b/tests/graph/test_ppatterns.py
@@ -1,3 +1,5 @@
+from typing import Any, Literal
+
 import numpy as np
 import pytest
 from anndata import AnnData
@@ -56,7 +58,7 @@ def test_spatial_autocorr_reproducibility(dummy_adata: AnnData, n_jobs: int, mod
     df_2 = spatial_autocorr(dummy_adata, mode=mode, copy=True, n_jobs=n_jobs, seed=42, n_perms=50)
 
     idx_df = df_1.index.values
-    idx_adata = dummy_adata[:, dummy_adata.var.highly_variable.values].var_names.values
+    idx_adata = dummy_adata[:, dummy_adata.var["highly_variable"].values].var_names.values
 
     if mode == "moran":
         UNS_KEY = MORAN_K
@@ -80,6 +82,35 @@ def test_spatial_autocorr_reproducibility(dummy_adata: AnnData, n_jobs: int, mod
     assert_frame_equal(df_1, df_2)
 
 
+@pytest.mark.parametrize(
+    "attr,layer,genes",
+    [
+        ("X", None, None),
+        ("obs", None, None),
+        ("obs", None, "foo"),
+        ("obsm", "spatial", None),
+        ("obsm", "spatial", [1, 0]),
+    ],
+)
+def test_spatial_autocorr_attr(dummy_adata: AnnData, attr: Literal["X", "obs", "obsm"], layer: str, genes: Any):
+    if attr == "obs":
+        if isinstance(genes, str):
+            dummy_adata.obs[genes] = np.random.RandomState(42).normal(size=(dummy_adata.n_obs,))
+            index = [genes]
+        else:
+            index = dummy_adata.obs.select_dtypes(include=np.number).columns
+    elif attr == "X":
+        index = dummy_adata.var_names if genes is None else genes
+    elif attr == "obsm":
+        index = np.arange(dummy_adata.obsm[layer].shape[1]) if genes is None else genes
+
+    spatial_autocorr(dummy_adata, attr=attr, mode="moran", layer=layer, genes=genes)
+
+    df = dummy_adata.uns[MORAN_K]
+    np.testing.assert_array_equal(np.isfinite(df), True)
+    np.testing.assert_array_equal(sorted(df.index), sorted(index))
+
+
 def test_co_occurrence(adata: AnnData):
     """
     check co_occurrence score and shape

diff --git a/tox.ini b/tox.ini
@@ -201,30 +201,6 @@ commands =
     sphinx-build --color -b html {toxinidir}/docs/source {toxinidir}/docs/build/html
     python -c 'import pathlib; print(f"Documentation is available under:", pathlib.Path(f"{toxinidir}") / "docs" / "build" / "html" / "index.html")'
 
-[testenv:news]
-description = Create news fragment from a PR.
-basepython = python3.9
-skip_install = true
-deps =
-    requests
-commands =
-    python {toxinidir}/docs/source/create_news_fragment.py {posargs:}
-
-[testenv:update-dev-notes]
-description = Update release notes.
-basepython = python3.9
-deps =
-    towncrier
-usedevelop = true
-allowlist_externals =
-    rm
-    git
-commands =
-    rm -f {toxinidir}/docs/source/release/notes-dev.rst
-    towncrier build --yes --version="dev"
-    git reset -- {toxinidir}/docs/source/release/changelog/
-    git checkout -- {toxinidir}/docs/source/release/changelog/
-
 [testenv:build-release-notes]
 description = Build release notes. Used when a new release happens.
 basepython = python3.9