From f51a30f0aa9f7236e6051c1cef04427f8771cf7f Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 20 Apr 2026 17:20:16 +0200 Subject: [PATCH] WIP base without `X_` --- pyproject.toml | 3 +- src/scanpy/_settings/presets.py | 22 +++++++++- src/scanpy/experimental/pp/_normalization.py | 25 +++++++----- src/scanpy/experimental/pp/_recipes.py | 22 +++++----- src/scanpy/neighbors/__init__.py | 20 ++-------- src/scanpy/tools/_diffmap.py | 18 +++++++-- src/scanpy/tools/_dpt.py | 12 ++++-- src/scanpy/tools/_draw_graph.py | 42 +++++++++++++++----- 8 files changed, 109 insertions(+), 55 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b1e586eab7..8450b7984a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -272,7 +272,8 @@ filterwarnings = [ "ignore:FNV hashing is not implemented in Numba.*:UserWarning", # we want to see and eventually fix these "default::numba.core.errors.NumbaPerformanceWarning", - "default:.*TSNE.*random.*to.*pca:FutureWarning", # we should set init=obsm["X_pca"] or so + # we should set init=obsm["X_pca"] or so + "default:.*TSNE.*random.*to.*pca:FutureWarning", # matplotlib <3.11 uses old pyparsing APIs "ignore::pyparsing.warnings.PyparsingDeprecationWarning", # igraph vs leidenalg warning diff --git a/src/scanpy/_settings/presets.py b/src/scanpy/_settings/presets.py index bef0280b39..5b2d828fdd 100644 --- a/src/scanpy/_settings/presets.py +++ b/src/scanpy/_settings/presets.py @@ -74,10 +74,14 @@ class HVGPreset(NamedTuple): return_df: bool -class PcaPreset(NamedTuple): +class BasicEmbeddingPreset(NamedTuple): key_added: str | None +# replace once they diverge +PcaPreset = DiffmapPreset = DrawGraphPreset = BasicEmbeddingPreset + + class RankGenesGroupsPreset(NamedTuple): method: DETest mask_var: str | None @@ -181,6 +185,22 @@ def pca() -> Mapping[Preset, PcaPreset]: Preset.ScanpyV2Preview: PcaPreset(key_added="pca"), } + @preset_property + def diffmap() -> Mapping[Preset, DiffmapPreset]: + """Settings for :func:`~scanpy.tl.diffmap`.""" # noqa: D401 + return { + Preset.ScanpyV1: DiffmapPreset(key_added=None), + Preset.ScanpyV2Preview: DiffmapPreset(key_added="diffmap"), + } + + @preset_property + def draw_graph() -> Mapping[Preset, DrawGraphPreset]: + """Settings for :func:`~scanpy.tl.draw_graph`.""" # noqa: D401 + return { + Preset.ScanpyV1: DrawGraphPreset(key_added=None), + Preset.ScanpyV2Preview: DrawGraphPreset(key_added="graph_{layout}"), + } + @preset_property def rank_genes_groups() -> Mapping[Preset, RankGenesGroupsPreset]: """Correlation method for :func:`~scanpy.tl.rank_genes_groups`.""" diff --git a/src/scanpy/experimental/pp/_normalization.py b/src/scanpy/experimental/pp/_normalization.py index 551df22be4..142dd6620d 100644 --- a/src/scanpy/experimental/pp/_normalization.py +++ b/src/scanpy/experimental/pp/_normalization.py @@ -7,6 +7,7 @@ from anndata import AnnData from ... import logging as logg +from ... import settings from ..._compat import CSBase, warn from ..._settings import Default from ..._utils import _doc_params, check_nonnegative_integers, view_to_actual @@ -207,19 +208,23 @@ def normalize_pearson_residuals_pca( `.uns['pearson_residuals_normalization']['clip']` The used value of the clipping parameter. - `.obsm['X_pca']` + `.obsm[kwargs_pca.get('key_added', 'X_pca')]` PCA representation of data after gene selection (if applicable) and Pearson residual normalization. - `.varm['PCs']` + `.varm[kwargs_pca.get('key_added', 'PCs')]` The principal components containing the loadings. When `inplace=True` and `mask_var is not None`, this will contain empty rows for the genes not selected. - `.uns['pca']['variance_ratio']` + `.uns[kwargs_pca.get('key_added', 'pca')]['variance_ratio']` Ratio of explained variance. - `.uns['pca']['variance']` + `.uns[kwargs_pca.get('key_added', 'pca')]['variance']` Explained variance, equivalent to the eigenvalues of the covariance matrix. """ + key_added = kwargs_pca.get("key_added", settings.preset.pca.key_added) + key_obsm, key_varm, key_uns = ( + ("X_pca", "PCs", "pca") if key_added is None else [key_added] * 3 + ) if isinstance(mask_var, Default): mask_var = "highly_variable" if "highly_variable" in adata.var else None mask_var = _check_mask(adata, mask_var, "var") @@ -236,19 +241,19 @@ def normalize_pearson_residuals_pca( adata_pca, theta=theta, clip=clip, check_values=check_values ) pca(adata_pca, n_comps=n_comps, rng=rng, **kwargs_pca) - n_comps = adata_pca.obsm["X_pca"].shape[1] # might be None + n_comps = adata_pca.obsm[key_obsm].shape[1] # might be None if inplace: norm_settings = adata_pca.uns["pearson_residuals_normalization"] norm_dict = dict(**norm_settings, pearson_residuals_df=adata_pca.to_df()) if mask_var is not None: - adata.varm["PCs"] = np.zeros(shape=(adata.n_vars, n_comps)) - adata.varm["PCs"][mask_var] = adata_pca.varm["PCs"] + adata.varm[key_varm] = np.zeros(shape=(adata.n_vars, n_comps)) + adata.varm[key_varm][mask_var] = adata_pca.varm[key_varm] else: - adata.varm["PCs"] = adata_pca.varm["PCs"] - adata.uns["pca"] = adata_pca.uns["pca"] + adata.varm[key_varm] = adata_pca.varm[key_varm] + adata.uns[key_uns] = adata_pca.uns[key_uns] adata.uns["pearson_residuals_normalization"] = norm_dict - adata.obsm["X_pca"] = adata_pca.obsm["X_pca"] + adata.obsm[key_obsm] = adata_pca.obsm[key_obsm] return None else: return adata_pca diff --git a/src/scanpy/experimental/pp/_recipes.py b/src/scanpy/experimental/pp/_recipes.py index 61db8e8a5e..a1b5634b0e 100644 --- a/src/scanpy/experimental/pp/_recipes.py +++ b/src/scanpy/experimental/pp/_recipes.py @@ -5,7 +5,7 @@ import numpy as np -from ... import experimental +from ... import experimental, settings from ..._utils import _doc_params from ..._utils.random import _accepts_legacy_random_state from ...experimental._docs import ( @@ -103,18 +103,22 @@ def recipe_pearson_residuals( # noqa: PLR0913 `.uns['pearson_residuals_normalization']['clip']` The used value of the clipping parameter. - `.obsm['X_pca']` + `.obsm[kwargs_pca.get('key_added', 'X_pca')]` PCA representation of data after gene selection and Pearson residual normalization. - `.varm['PCs']` + `.varm[kwargs_pca.get('key_added', 'PCs')]` The principal components containing the loadings. When `inplace=True` this will contain empty rows for the genes not selected during HVG selection. - `.uns['pca']['variance_ratio']` + `.uns[kwargs_pca.get('key_added', 'pca')]['variance_ratio']` Ratio of explained variance. - `.uns['pca']['variance']` + `.uns[kwargs_pca.get('key_added', 'pca')]['variance']` Explained variance, equivalent to the eigenvalues of the covariance matrix. """ + key_added = kwargs_pca.get("key_added", settings.preset.pca.key_added) + key_obsm, key_varm, key_uns = ( + ("X_pca", "PCs", "pca") if key_added is None else [key_added] * 3 + ) hvg_args = dict( flavor="pearson_residuals", n_top_genes=n_top_genes, @@ -145,11 +149,11 @@ def recipe_pearson_residuals( # noqa: PLR0913 **normalization_param, pearson_residuals_df=adata_pca.to_df() ) - adata.uns["pca"] = adata_pca.uns["pca"] - adata.varm["PCs"] = np.zeros(shape=(adata.n_vars, n_comps)) - adata.varm["PCs"][adata.var["highly_variable"]] = adata_pca.varm["PCs"] + adata.uns[key_uns] = adata_pca.uns[key_uns] + adata.varm[key_varm] = np.zeros(shape=(adata.n_vars, n_comps)) + adata.varm[key_varm][adata.var["highly_variable"]] = adata_pca.varm[key_varm] adata.uns["pearson_residuals_normalization"] = normalization_dict - adata.obsm["X_pca"] = adata_pca.obsm["X_pca"] + adata.obsm[key_obsm] = adata_pca.obsm[key_obsm] return None else: return adata_pca, hvg diff --git a/src/scanpy/neighbors/__init__.py b/src/scanpy/neighbors/__init__.py index 239822e3f2..9c584373fb 100644 --- a/src/scanpy/neighbors/__init__.py +++ b/src/scanpy/neighbors/__init__.py @@ -320,20 +320,6 @@ class FlatTree(NamedTuple): # noqa: D101 indices: None -def _backwards_compat_get_full_x_diffmap(adata: AnnData) -> np.ndarray: - if "X_diffmap0" in adata.obs: - return np.c_[adata.obs["X_diffmap0"].values[:, None], adata.obsm["X_diffmap"]] - else: - return adata.obsm["X_diffmap"] - - -def _backwards_compat_get_full_eval(adata: AnnData): - if "X_diffmap0" in adata.obs: - return np.r_[1, adata.uns["diffmap_evals"]] - else: - return adata.uns["diffmap_evals"] - - def _make_forest_dict(forest): d = {} props = ("hyperplanes", "offsets", "children", "indices") @@ -482,9 +468,9 @@ def count_nonzero(a: np.ndarray | CSRBase) -> int: self._connected_components = connected_components(self._connectivities) self._number_connected_components = self._connected_components[0] - if "X_diffmap" in adata.obsm: - self._eigen_values = _backwards_compat_get_full_eval(adata) - self._eigen_basis = _backwards_compat_get_full_x_diffmap(adata) + if dm := (adata.obsm.get("diffmap") or adata.obsm.get("X_diffmap")): + self._eigen_values = adata.uns["diffmap_evals"] + self._eigen_basis = dm if n_dcs is not None: if n_dcs > len(self._eigen_values): msg = ( diff --git a/src/scanpy/tools/_diffmap.py b/src/scanpy/tools/_diffmap.py index 90f2976837..0f61348d5c 100644 --- a/src/scanpy/tools/_diffmap.py +++ b/src/scanpy/tools/_diffmap.py @@ -5,6 +5,7 @@ import numpy as np from .._docs import doc_rng +from .._settings import Default, settings from .._utils import _doc_params from .._utils.random import _accepts_legacy_random_state from ._dpt import _diffmap @@ -22,6 +23,7 @@ def diffmap( n_comps: int = 15, *, neighbors_key: str | None = None, + key_added: str | None | Default = Default(preset=("diffmap", "key_added")), rng: SeedLike | RNGLike | None = None, copy: bool = False, ) -> AnnData | None: @@ -55,6 +57,8 @@ def diffmap( .obsp[.uns[neighbors_key]['connectivities_key']] and .obsp[.uns[neighbors_key]['distances_key']] for connectivities and distances, respectively. + key_added + Control where the embedding and eigenvalues are stored. {rng} copy Return a copy instead of writing to adata. @@ -63,11 +67,11 @@ def diffmap( ------- Returns `None` if `copy=False`, else returns an `AnnData` object. Sets the following fields: - `adata.obsm['X_diffmap']` : :class:`numpy.ndarray` (dtype `float`) + `adata.obsm['X_diffmap' | key_added]` : :class:`numpy.ndarray` (dtype `float`) Diffusion map representation of data, which is the right eigen basis of the transition matrix with eigenvectors as columns. - `adata.uns['diffmap_evals']` : :class:`numpy.ndarray` (dtype `float`) + `adata.uns['diffmap_evals' | key_added]` : :class:`numpy.ndarray` (dtype `float`) Array of size (number of eigen vectors). Eigenvalues of transition matrix. @@ -82,6 +86,8 @@ def diffmap( rng = np.random.default_rng(rng) if neighbors_key is None: neighbors_key = "neighbors" + if isinstance(key_added, Default): + key_added = settings.preset.diffmap.key_added if neighbors_key not in adata.uns: msg = "You need to run `pp.neighbors` first to compute a neighborhood graph." @@ -90,5 +96,11 @@ def diffmap( msg = "Provide any value greater than 2 for `n_comps`. " raise ValueError(msg) adata = adata.copy() if copy else adata - _diffmap(adata, n_comps=n_comps, neighbors_key=neighbors_key, rng=rng) + _diffmap( + adata, + n_comps=n_comps, + neighbors_key=neighbors_key, + key_added=key_added, + rng=rng, + ) return adata if copy else None diff --git a/src/scanpy/tools/_dpt.py b/src/scanpy/tools/_dpt.py index 74d7fe66ae..a53b984d60 100644 --- a/src/scanpy/tools/_dpt.py +++ b/src/scanpy/tools/_dpt.py @@ -22,21 +22,25 @@ def _diffmap( n_comps: int = 15, *, neighbors_key: str | None, + key_added: str | None, rng: np.random.Generator, ) -> None: + obsm_key, uns_key = ( + ("X_diffmap", "diffmap_evals") if key_added is None else ((key_added,) * 2) + ) start = logg.info(f"computing Diffusion Maps using {n_comps=}(=n_dcs)") dpt = DPT(adata, neighbors_key=neighbors_key) dpt.compute_transitions() dpt.compute_eigen(n_comps=n_comps, rng=rng) - adata.obsm["X_diffmap"] = dpt.eigen_basis - adata.uns["diffmap_evals"] = dpt.eigen_values + adata.obsm[obsm_key] = dpt.eigen_basis + adata.uns[uns_key] = dpt.eigen_values logg.info( " finished", time=start, deep=( "added\n" - " 'X_diffmap', diffmap coordinates (adata.obsm)\n" - " 'diffmap_evals', eigenvalues of transition matrix (adata.uns)" + f" {obsm_key!r}, diffmap coordinates (adata.obsm)\n" + f" {uns_key!r}, eigenvalues of transition matrix (adata.uns)" ), ) diff --git a/src/scanpy/tools/_draw_graph.py b/src/scanpy/tools/_draw_graph.py index bdbb37d0be..339668a00c 100644 --- a/src/scanpy/tools/_draw_graph.py +++ b/src/scanpy/tools/_draw_graph.py @@ -5,6 +5,9 @@ import numpy as np +from scanpy._compat import warn +from scanpy._settings import Default + from .. import _utils from .. import logging as logg from .._docs import doc_rng @@ -42,10 +45,12 @@ def draw_graph( # noqa: PLR0913 rng: SeedLike | RNGLike | None = None, n_jobs: int | None = None, adjacency: CSBase | None = None, - key_added_ext: str | None = None, + key_added: str | Default = Default(preset=("draw_graph", "key_added")), neighbors_key: str | None = None, obsp: str | None = None, copy: bool = False, + # deprecated + key_added_ext: str | None = None, **kwds, ) -> AnnData | None: """Force-directed graph drawing :cite:p:`Islam2011,Jacomy2014,Chippada2018`. @@ -86,10 +91,10 @@ def draw_graph( # noqa: PLR0913 Applies to layouts with random initialization like `'fr'`. adjacency Sparse adjacency matrix of the graph, defaults to neighbors connectivities. - key_added_ext - By default, append `layout`. + key_added + Template for the key. If `None`, use `'X_draw_graph_{layout}'` for `obsm` (replacing `'{layout}'` with the passed `layout`). proceed - Continue computation, starting off with 'X_draw_graph_`layout`'. + Continue computation, starting off with `f'X_draw_graph_{layout}'`. init_pos `'paga'`/`True`, `None`/`False`, or any valid 2d-`.obsm` key. Use precomputed coordinates for initialization. @@ -113,7 +118,7 @@ def draw_graph( # noqa: PLR0913 ------- Returns `None` if `copy=False`, else returns an `AnnData` object. Sets the following fields: - `adata.obsm['X_draw_graph_[layout | key_added_ext]']` : :class:`numpy.ndarray` (dtype `float`) + `adata.obsm[('X_draw_graph_{layout}' | key_added).format(layout=layout)]` : :class:`numpy.ndarray` (dtype `float`) Coordinates of graph layout. E.g. for `layout='fa'` (the default), the field is called `'X_draw_graph_fa'`. `key_added_ext` overwrites `layout`. `adata.uns['draw_graph']`: :class:`dict` @@ -121,6 +126,7 @@ def draw_graph( # noqa: PLR0913 """ start = logg.info(f"drawing single-cell graph using layout {layout!r}") + key_obsm, key_uns = _get_keys_added(key_added, layout, key_added_ext) rng = np.random.default_rng(rng) meta_random_state = ( dict(random_state=rng.arg) if isinstance(rng, _LegacyRng) else {} @@ -161,18 +167,34 @@ def draw_graph( # noqa: PLR0913 else: ig_layout = g.layout(layout, **kwds) positions = np.array(ig_layout.coords) - adata.uns["draw_graph"] = {} - adata.uns["draw_graph"]["params"] = dict(layout=layout, **meta_random_state) - key_added = f"X_draw_graph_{key_added_ext or layout}" - adata.obsm[key_added] = positions + adata.uns[key_uns] = {} + adata.uns[key_uns]["params"] = dict(layout=layout, **meta_random_state) + adata.obsm[key_obsm] = positions logg.info( " finished", time=start, - deep=f"added\n {key_added!r}, graph_drawing coordinates (adata.obsm)", + deep="added" + f"\n {key_obsm!r}, draw_graph coordinates (adata.obsm)" + f"\n {key_uns!r}, draw_graph parameters (adata.uns)", ) return adata if copy else None +def _get_keys_added( + key_added: str | Default, layout: str, key_added_ext: str | None +) -> tuple[str, str]: + if key_added_ext is not None: + msg = "Passing `key_added_ext` is deprecated, use `key_added`’s template functionality instead." + warn(msg, category=FutureWarning) + suffix = key_added_ext + else: + suffix = layout + if isinstance(key_added, Default): + return f"X_draw_graph_{suffix}", "draw_graph" + key_added = key_added.format(layout=suffix) + return key_added, key_added + + def fa2_positions( adjacency: CSBase | np.ndarray, init_coords: np.ndarray, **kwds ) -> list[tuple[float, float]]: