diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..71408d39 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,15 @@ +# EditorConfig is awesome: https://EditorConfig.org + +# top-most EditorConfig file +root = true + +[*] +end_of_line = lf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true +max_line_length = 88 + +[*.py] +indent_size = 4 +indent_style = space diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..863f9b3f --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,19 @@ +## Changes + + +* ... + +## Bug fixes + + +* ... + +## New + + +* ... + +## Related issues + + +Closes # diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 47a36de4..349a6373 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,10 +26,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - name: Setup Python 3.7 + - name: Setup Python 3.8 uses: actions/setup-python@v2 with: - python-version: 3.7 + python-version: 3.8 - name: Install dependencies run: | python -m pip install --upgrade pip @@ -42,19 +42,37 @@ jobs: # Run unit tests test: needs: linting - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} strategy: + fail-fast: false matrix: - python-version: [3.7] + os: [ubuntu-latest] + python-version: [3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: pip install -e '.[dev]' + - name: Unit tests + timeout-minutes: 60 + run: python -m pytest --durations=25 --ignore=tests/datasets/test_datasets.py --hypothesis-profile=ci --cov=scvelo -vv + + test-dataset-downloads: + needs: test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Setup Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 - name: Install dependencies run: | pip install -e . pip install hypothesis pytest pytest-cov - - name: Unit tests - run: python -m pytest --cov=scvelo + - name: Test dataset downloads + timeout-minutes: 60 + run: python -m pytest tests/datasets/test_datasets.py -vv diff --git a/.gitignore b/.gitignore index 56a467b5..bf2ff2df 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,5 @@ docs/source/scvelo* # Files generated by unit tests .hypothesis/ +.coverage* +htmlcov/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3c5e1dd8..cfc56bee 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,14 +1,14 @@ repos: -- repo: https://github.com/ambv/black - rev: 20.8b1 +- repo: https://github.com/psf/black + rev: 22.3.0 hooks: - id: black -- repo: https://gitlab.com/pycqa/flake8 +- repo: https://github.com/PyCQA/flake8 rev: 3.8.4 hooks: - id: flake8 - repo: https://github.com/pycqa/isort - rev: 5.7.0 + rev: 5.10.1 hooks: - id: isort name: isort (python) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 06b0d014..502d5cb5 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -21,7 +21,15 @@ You can now clone your fork of scVelo and install the development mode git clone https://github.com/YOUR-USER-NAME/scvelo.git cd scvelo - git checkout --track origin/develop + + +Unix Systems +^^^^^^^^^^^^ + +The installation can be completed with + +.. code:: bash + pip install -e '.[dev]' The last line can, alternatively, be replaced by @@ -31,16 +39,26 @@ The last line can, alternatively, be replaced by pip install -r requirements-dev.txt -Finally, to make sure your code follows our code style guideline, install pre-commit: +Windows +^^^^^^^ + +If running Windows, `hsnwlib` needs to be installed via `conda` and the installation with pip slightly adjusted: .. code:: bash - pre-commit install + conda install -c conda-forge hnswlib + pip install -e .[dev] Coding style ^^^^^^^^^^^^ +Finally, to make sure your code follows our code style guideline, install pre-commit: + +.. code:: bash + + pre-commit install + Our code follows `black` and `flake8` coding style. Code formatting (`black`, `isort`) is automated through pre-commit hooks. In addition, we require that - functions are fully type-annotated. @@ -69,9 +87,9 @@ The docstrings of scVelo largely follow the `numpy`-style. New docstrings should Submitting pull requests ^^^^^^^^^^^^^^^^^^^^^^^^ -New features and bug fixes are added to the code base through a pull request (PR). To implement a feature or bug fix, create a branch from `develop`. For hotfixes use `master` as base. The existence of bugs suggests insufficient test coverage. As such, bug fixes should, ideally, include a unit test or extend an existing one. Please ensure that +New features and bug fixes are added to the code base through a pull request (PR). To implement a feature or bug fix, create a branch from `master`. The existence of bugs suggests insufficient test coverage. As such, bug fixes should, ideally, include a unit test or extend an existing one. Please ensure that -- branch names have the prefix `feat/`, `bug/` or `hotfix/`. +- branch names have the prefix `feat/` or `fix/`. - your code follows the project conventions. - newly added functions are unit tested. - all tests pass locally. diff --git a/docs/requirements.txt b/docs/requirements.txt index 99078be6..4674d613 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -6,8 +6,9 @@ typing_extensions importlib_metadata sphinx_rtd_theme>=0.3 sphinx_autodoc_typehints<=1.6 +Jinja2<3.1 # converting notebooks to html ipykernel sphinx>=1.7,<4.0 -nbsphinx>=0.7,<0.8.7 \ No newline at end of file +nbsphinx>=0.7,<0.8.7 diff --git a/docs/source/_key_contributors.rst b/docs/source/_key_contributors.rst new file mode 100644 index 00000000..036d99e5 --- /dev/null +++ b/docs/source/_key_contributors.rst @@ -0,0 +1,7 @@ +.. sidebar:: Key Contributors + + * `Philipp Weiler`_: lead developer since 2021, maintainer + * `Volker Bergen`_: lead developer 2018-2021, initial conception + +.. _Philipp Weiler: https://twitter.com/PhilippWeiler7 +.. _Volker Bergen: https://twitter.com/volkerbergen diff --git a/docs/source/conf.py b/docs/source/conf.py index 64e7750b..b90d007f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -148,7 +148,7 @@ def setup(app): - app.add_stylesheet("custom.css") + app.add_css_file("custom.css") # -- Options for other output ------------------------------------------ diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index e70b105b..bcba545d 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -6,7 +6,7 @@ Once you are set, the following tutorials go straight into analysis of RNA veloc latent time, driver identification and many more. First of all, the input data for scVelo are two count matrices of pre-mature (unspliced) and mature (spliced) abundances, -which can be obtained from standard sequencing protocols, using the `velocyto`_ or `loompy/kallisto`_ +which can be obtained from standard sequencing protocols, using the `velocyto`_ or `kallisto`_ counting pipeline. scVelo workflow at a glance @@ -89,4 +89,4 @@ For every tool module there is a plotting counterpart, which allows you to exami .. _`velocyto`: http://velocyto.org/velocyto.py/tutorial/cli.html -.. _`loompy/kallisto`: https://linnarssonlab.org/loompy/kallisto/index.html +.. _`kallisto`: https://www.kallistobus.tools/tutorials/kb_velocity/python/kb_velocity/#generate-rna-velocity-count-matrices diff --git a/docs/source/index.rst b/docs/source/index.rst index 372bf016..266153ba 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -7,6 +7,8 @@ scVelo - RNA velocity generalized through dynamical modeling :width: 300px :align: left +.. include:: _key_contributors.rst + **scVelo** is a scalable toolkit for RNA velocity analysis in single cells, based on `Bergen et al. (Nature Biotech, 2020) `_. @@ -16,7 +18,7 @@ scVelo generalizes the concept of RNA velocity by relaxing previously made assumptions with a stochastic and a dynamical model that solves the full transcriptional dynamics. It thereby adapts RNA velocity to widely varying specifications such as non-stationary populations. -scVelo is compatible with scanpy_ and hosts efficient implementations of all RNA velocity models. +scVelo is compatible with Scanpy_ and hosts efficient implementations of all RNA velocity models. scVelo's key applications ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -104,7 +106,7 @@ For further information visit `scvelo.org `_. .. |travis| image:: https://travis-ci.org/theislab/scvelo.svg?branch=master :target: https://travis-ci.org/theislab/scvelo -.. _scanpy: https://scanpy.readthedocs.io +.. _Scanpy: https://scanpy.readthedocs.io .. _calendly: https://calendly.com/scvelo @@ -123,4 +125,4 @@ For further information visit `scvelo.org `_. .. |dim| raw:: html - \ No newline at end of file + diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 5e812e86..d3d205bb 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -4,13 +4,33 @@ Release Notes ============= +Version 0.2.5 :small:`Oct 14, 2022` +----------------------------------- + +Changes: + +- Catch non-positive parameter values and raise a `ValueError` if necessary (`PR 614 `_). +- `get_mean_var` uses the same size parameter for mean and variance (`PR 698 `_). + +Bugfixes: + +- `filter_genes` now works with `adata.layers['unspliced']` being sparse and `adata.layers['spliced']` dense (`PR 537 `_). +- `show_proportions` actually considers the layer `"ambiguous"` if present (`PR 587 `_). +- Fix calculation of Pearson's correlation in `csr_vcorrcoef` (`PR 679 `_). +- Fix `get_mean_var` to work with sparse input and `ignore_zeros=True` (`PR 698 `_). +- Fix bug in neighbor calculation (`PR 797 `_). +- Fix `optimization.py::get_weight` to work with numeric, non-integer values (`PR 839 `_). +- Fix inference with `fit_scaling=False` (`PR 848 `_). +- Fix saving of velocity embedding stream (`PR 900 `_). +- Fix Pandas' display precison when passed to `get_df` (`PR 907 `_). + Version 0.2.4 :small:`Aug 26, 2021` ----------------------------------- Perspectives: -- Landing page and two notebooks accompanying the perspectives manuscript at MSB. -- New datasets: Gastrulation, bone marrow, and PBMCs. +- Landing page and two notebooks accompanying the perspectives manuscript at MSB. +- New datasets: Gastrulation, bone marrow, and PBMCs. New capabilities: @@ -25,7 +45,7 @@ New capabilities: Bugfixes: - Pinned `sphinx<4.0` and `nbsphinx<0.8.7`. -- Fix IPython import at CLI. +- Fix IPython import at CLI. Version 0.2.3 :small:`Feb 13, 2021` diff --git a/pytest.ini b/pytest.ini index 67a1c148..69c38322 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,6 +1,4 @@ [pytest] python_files = test_*.py -testpaths = - tests - scvelo +testpaths = tests/ xfail_strict = true diff --git a/requirements-dev.txt b/requirements-dev.txt index af20ddc2..534bfa29 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,13 +2,15 @@ -e . -black==20.8b1 +black==22.3.0 hnswlib hypothesis flake8==3.8.4 -isort==5.7.0 +isort==5.10.1 louvain +magic-impute pre-commit>=2.9.0 pybind11 pytest>=6.2.2 +pytest-cov python-igraph diff --git a/requirements.txt b/requirements.txt index c1ad4bc3..f597e9b7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,6 @@ umap-learn>=0.3.10 # removed numba warnings (v0.3.10) numba>=0.41.0 numpy>=1.17 # extension/speedup in .nan_to_num, .exp (v1.17) scipy>=1.4.1 # introduced PCA sparsity support (v1.4) -pandas>=0.23 # merging/sorting extensions (v0.23) +pandas>=0.23, !=1.4.0 # merging/sorting extensions (v0.23) scikit-learn>=0.21.2 # bugfix in .utils.sparsefuncs (v0.21.2) -matplotlib>=3.3.0 # normalize in pie (v3.3.0) \ No newline at end of file +matplotlib>=3.3.0 # normalize in pie (v3.3.0) diff --git a/scvelo/__init__.py b/scvelo/__init__.py index 76cc70d1..272477f1 100644 --- a/scvelo/__init__.py +++ b/scvelo/__init__.py @@ -2,7 +2,12 @@ from anndata import AnnData from scanpy import read, read_loom -from scvelo import datasets, logging, pl, pp, settings, tl, utils +from scvelo import datasets, logging +from scvelo import plotting as pl +from scvelo import preprocessing as pp +from scvelo import settings +from scvelo import tools as tl +from scvelo import utils from scvelo.core import get_df from scvelo.plotting.gridspec import GridSpec from scvelo.preprocessing.neighbors import Neighbors @@ -13,6 +18,10 @@ from scvelo.tools.velocity import Velocity from scvelo.tools.velocity_graph import VelocityGraph +import sys # isort:skip + +sys.modules.update({f"{__name__}.{m}": globals()[m] for m in ["tl", "pp", "pl"]}) + try: from setuptools_scm import get_version diff --git a/scvelo/core/__init__.py b/scvelo/core/__init__.py index 156eeb6e..67f36563 100644 --- a/scvelo/core/__init__.py +++ b/scvelo/core/__init__.py @@ -12,16 +12,18 @@ set_modality, show_proportions, ) -from ._arithmetic import clipped_log, invert, prod_sum, sum +from ._arithmetic import clipped_log, invert, multiply, prod_sum, sum from ._linear_models import LinearRegression from ._metrics import l2_norm from ._models import SplicingDynamics from ._parallelize import get_n_jobs, parallelize +from ._utils import deprecated_arg_names __all__ = [ "clean_obs_names", "cleanup", "clipped_log", + "deprecated_arg_names", "get_df", "get_initial_size", "get_modality", @@ -33,6 +35,7 @@ "make_dense", "make_sparse", "merge", + "multiply", "parallelize", "prod_sum", "set_initial_size", diff --git a/scvelo/core/_anndata.py b/scvelo/core/_anndata.py index d77bbb7d..a9beab70 100644 --- a/scvelo/core/_anndata.py +++ b/scvelo/core/_anndata.py @@ -14,13 +14,17 @@ from scvelo import logging as logg from ._arithmetic import sum +from ._utils import deprecated_arg_names +@deprecated_arg_names( + {"data": "adata", "copy": "inplace", "ID_length": "id_length", "base": "alphabet"} +) def clean_obs_names( - data: AnnData, - base: str = "[AGTCBDHKMNRSVWY]", - ID_length: int = 12, - copy: bool = False, + adata: AnnData, + alphabet: str = "[AGTCBDHKMNRSVWY]", + id_length: int = 12, + inplace: bool = True, ) -> Optional[AnnData]: """Clean up the obs_names. @@ -31,14 +35,14 @@ def clean_obs_names( Arguments --------- - data + adata Annotated data matrix. - base + alphabet Genetic code letters to be identified. - ID_length + id_length Length of the Genetic Codes in the samples. - copy - Return a copy instead of writing to adata. + inplace + Whether to update `adata` inplace or not. Returns ------- @@ -50,44 +54,27 @@ def clean_obs_names( names of the identified sample batches """ - def get_base_list(name, base): - base_list = base - while re.search(base_list + base, name) is not None: - base_list += base - if len(base_list) == 0: - raise ValueError("Encountered an invalid ID in obs_names: ", name) - return base_list - - adata = data.copy() if copy else data - - names = adata.obs_names - base_list = get_base_list(names[0], base) - - if len(np.unique([len(name) for name in adata.obs_names])) == 1: - start, end = re.search(base_list, names[0]).span() - newIDs = [name[start:end] for name in names] - start, end = 0, len(newIDs[0]) - for i in range(end - ID_length): - if np.any([ID[i] not in base for ID in newIDs]): - start += 1 - if np.any([ID[::-1][i] not in base for ID in newIDs]): - end -= 1 - - newIDs = [ID[start:end] for ID in newIDs] - prefixes = [names[i].replace(newIDs[i], "") for i in range(len(names))] + if not inplace: + adata = adata.copy() + + if adata.obs_names.map(len).unique().size == 1: + start, end = re.search(alphabet * id_length, adata.obs_names[0]).span() + new_obs_names = [obs_name[start:end] for obs_name in adata.obs_names] + + prefixes = [ + obs_name.replace(new_obs_name, "") + for obs_name, new_obs_name in zip(adata.obs_names, new_obs_names) + ] else: - prefixes, newIDs = [], [] - for name in names: - match = re.search(base_list, name) - newID = ( - re.search(get_base_list(name, base), name).group() - if match is None - else match.group() - ) - newIDs.append(newID) - prefixes.append(name.replace(newID, "")) + prefixes, new_obs_names = [], [] + for obs_name in adata.obs_names: + start, end = re.search(alphabet * id_length, adata.obs_names[0]).span() + new_obs_names.append(obs_name[start:end]) + prefixes.append(obs_name.replace(obs_name[start:end], "")) + + adata.obs_names = new_obs_names + adata.obs_names_make_unique() - adata.obs_names = newIDs if len(prefixes[0]) > 0 and len(np.unique(prefixes)) > 1: adata.obs["sample_batch"] = ( pd.Categorical(prefixes) @@ -95,31 +82,32 @@ def get_base_list(name, base): else prefixes ) - adata.obs_names_make_unique() - return adata if copy else None + if not inplace: + return adata +@deprecated_arg_names({"data": "adata", "copy": "inplace"}) def cleanup( - data: AnnData, + adata: AnnData, clean: Union[ - Literal["layers", "obs", "var", "uns"], + Literal["layers", "obs", "var", "uns", "all"], List[Literal["layers", "obs", "var", "uns"]], ] = "layers", keep: Optional[Union[str, List[str]]] = None, - copy: bool = False, + inplace: bool = True, ) -> Optional[AnnData]: """Delete not needed attributes. Arguments --------- - data + adata Annotated data matrix. clean Which attributes to consider for freeing memory. keep Which attributes to keep. - copy - Return a copy instead of writing to adata. + inplace + Whether to update `adata` inplace or not. Returns ------- @@ -127,13 +115,14 @@ def cleanup( Returns or updates `adata` with selection of attributes kept. """ - adata = data.copy() if copy else data + if not inplace: + adata = adata.copy() verify_dtypes(adata) keep = list([keep] if isinstance(keep, str) else {} if keep is None else keep) - keep.extend(["spliced", "unspliced", "Ms", "Mu", "clusters", "neighbors"]) + keep.extend(["unspliced", "spliced", "Mu", "Ms", "clusters", "neighbors"]) - ann_dict = { + attributes_to_remove = { "obs": adata.obs_keys(), "var": adata.var_keys(), "uns": adata.uns_keys(), @@ -141,16 +130,22 @@ def cleanup( } if "all" not in clean: - ann_dict = {ann: values for (ann, values) in ann_dict.items() if ann in clean} + attributes_to_remove = { + attr: attr_keys + for (attr, attr_keys) in attributes_to_remove.items() + if attr in clean + } - for (ann, values) in ann_dict.items(): - for value in values: - if value not in keep: - del getattr(adata, ann)[value] + for (attr, attr_keys) in attributes_to_remove.items(): + for key in attr_keys: + if key not in keep: + del getattr(adata, attr)[key] - return adata if copy else None + if not inplace: + return adata +# TODO: Add unit test for `precision` argument def get_df( data: AnnData, keys: Optional[Union[str, List[str]]] = None, @@ -193,7 +188,7 @@ def get_df( """ if precision is not None: - pd.set_option("precision", precision) + pd.set_option("display.precision", precision) if isinstance(data, AnnData): keys, keys_split = ( @@ -365,7 +360,7 @@ def get_initial_size( return None -def get_modality(adata: AnnData, modality: str) -> Union[ndarray, spmatrix]: +def get_modality(adata: AnnData, modality: Optional[str]) -> Union[ndarray, spmatrix]: """Extract data of one modality. Arguments @@ -381,7 +376,7 @@ def get_modality(adata: AnnData, modality: str) -> Union[ndarray, spmatrix]: Retrieved modality from :class:`~anndata.AnnData` object. """ - if modality == "X": + if modality in ["X", None]: return adata.X elif modality in adata.layers.keys(): return adata.layers[modality] @@ -392,24 +387,24 @@ def get_modality(adata: AnnData, modality: str) -> Union[ndarray, spmatrix]: return adata.obsm[modality] -# TODO: Generalize to arbitray modality -def get_size(adata: AnnData, layer: Optional[str] = None) -> ndarray: - """Get counts per observation in a layer. +@deprecated_arg_names({"layer": "modality"}) +def get_size(adata: AnnData, modality: Optional[str] = None) -> ndarray: + """Get counts per observation in a modality. Arguments --------- adata Annotated data matrix. - layer - Name of later for which to retrieve initial size. + modality + Name of modality for which to retrieve size. Returns ------- np.ndarray - Initial counts per observation in the specified layer. + Counts per observation in the specified modality. """ - X = adata.X if layer is None else adata.layers[layer] + X = get_modality(adata=adata, modality=modality) return sum(X, axis=1) @@ -488,7 +483,9 @@ def make_sparse( return adata if not inplace else None -def merge(adata: AnnData, ldata: AnnData, copy: bool = True) -> Optional[AnnData]: +def merge( + adata: AnnData, ldata: AnnData, copy: bool = True, **kwargs +) -> Optional[AnnData]: """Merge two annotated data matrices. Arguments @@ -524,8 +521,10 @@ def merge(adata: AnnData, ldata: AnnData, copy: bool = True) -> Optional[AnnData common_vars = pd.unique(adata.var_names.intersection(ldata.var_names)) if len(common_obs) == 0: - clean_obs_names(adata) - clean_obs_names(ldata) + if "id_length" in kwargs: + id_length = kwargs.get("id_length") + clean_obs_names(adata, id_length=id_length) + clean_obs_names(ldata, id_length=id_length) common_obs = adata.obs_names.intersection(ldata.obs_names) if copy: @@ -629,7 +628,7 @@ def set_initial_size(adata: AnnData, layers: Optional[str] = None) -> None: """ if layers is None: - layers = ["spliced", "unspliced"] + layers = ["unspliced", "spliced"] verify_dtypes(adata) layers = [ layer @@ -705,7 +704,7 @@ def show_proportions( """ if layers is None: - layers = ["spliced", "unspliced", "ambigious"] + layers = ["unspliced", "spliced", "ambiguous"] layers_keys = [key for key in layers if key in adata.layers.keys()] counts_layers = [sum(adata.layers[key], axis=1) for key in layers_keys] if use_raw: diff --git a/scvelo/core/_arithmetic.py b/scvelo/core/_arithmetic.py index 037de5f2..92bf9998 100644 --- a/scvelo/core/_arithmetic.py +++ b/scvelo/core/_arithmetic.py @@ -49,6 +49,32 @@ def invert(x: ndarray) -> ndarray: return x_inv +def multiply( + a: Union[ndarray, spmatrix], b: Union[ndarray, spmatrix] +) -> Union[ndarray, spmatrix]: + """Point-wise multiplication of arrays or sparse matrices. + + Arguments + --------- + a + First array/sparse matrix. + b + Second array/sparse matrix. + + Returns + ------- + Union[ndarray, spmatrix] + Point-wise product of `a` and `b`. + """ + + if issparse(a): + return a.multiply(b) + elif issparse(b): + return b.multiply(a) + else: + return a * b + + def prod_sum( a1: Union[ndarray, spmatrix], a2: Union[ndarray, spmatrix], axis: Optional[int] ) -> ndarray: diff --git a/scvelo/core/_base.py b/scvelo/core/_base.py index 4990eaaa..e84b2bd9 100644 --- a/scvelo/core/_base.py +++ b/scvelo/core/_base.py @@ -29,8 +29,6 @@ def get_solution( a `numpy.ndarray` of form `(n_steps, n_vars)`. """ - return - @abstractmethod def get_steady_states( self, stacked: True, with_keys: False @@ -50,5 +48,3 @@ def get_steady_states( Union[Dict[str, ndarray], Tuple[ndarray], ndarray] Steady state of system. """ - - return diff --git a/scvelo/core/_linear_models.py b/scvelo/core/_linear_models.py index 89796761..969236ca 100644 --- a/scvelo/core/_linear_models.py +++ b/scvelo/core/_linear_models.py @@ -122,7 +122,7 @@ def fit(self, x: ndarray, y: ndarray): if self.fit_intercept: _x = sum(x, axis=0) / n_obs _y = sum(y, axis=0) / n_obs - self.coef_ = (_xy / n_obs - _x * _y) / (_xx / n_obs - _x ** 2) + self.coef_ = (_xy / n_obs - _x * _y) / (_xx / n_obs - _x**2) self.intercept_ = _y - self.coef_ * _x if self.positive_intercept: diff --git a/scvelo/core/_models.py b/scvelo/core/_models.py index c38c6688..73188eba 100644 --- a/scvelo/core/_models.py +++ b/scvelo/core/_models.py @@ -131,7 +131,9 @@ def get_steady_states( Steady state of system. """ - if (self.beta > 0) and (self.gamma > 0): + if (self.beta <= 0) or (self.gamma <= 0): + raise ValueError("Both `beta` and `gamma` need to be strictly positive.") + else: unspliced = self.alpha / self.beta spliced = self.alpha / self.gamma diff --git a/scvelo/core/_utils.py b/scvelo/core/_utils.py new file mode 100644 index 00000000..1cd3957f --- /dev/null +++ b/scvelo/core/_utils.py @@ -0,0 +1,42 @@ +import warnings +from functools import wraps +from typing import Mapping + + +# Modified from https://github.com/scverse/scanpy/blob/master/scanpy/_utils/__init__.py +def deprecated_arg_names(arg_mapping: Mapping[str, str]): + """ + Decorator which marks a functions keyword arguments as deprecated. It will + result in a warning being emitted when the deprecated keyword argument is + used, and the function being called with the new argument. + Parameters + ---------- + arg_mapping + Mapping from deprecated argument name to current argument name. + """ + + def decorator(func): + @wraps(func) + def func_wrapper(*args, **kwargs): + warnings.simplefilter("always", DeprecationWarning) # turn off filter + for old, new in arg_mapping.items(): + if old in kwargs: + warnings.warn( + f"Keyword argument '{old}' has been " + f"deprecated in favour of '{new}'. " + f"'{old}' will be removed in a future version.", + category=DeprecationWarning, + stacklevel=2, + ) + val = kwargs.pop(old) + if old == "copy": + kwargs[new] = not val + else: + kwargs[new] = val + # reset filter + warnings.simplefilter("default", DeprecationWarning) + return func(*args, **kwargs) + + return func_wrapper + + return decorator diff --git a/scvelo/core/tests/test_anndata.py b/scvelo/core/tests/test_anndata.py deleted file mode 100644 index b73cc941..00000000 --- a/scvelo/core/tests/test_anndata.py +++ /dev/null @@ -1,136 +0,0 @@ -import hypothesis.strategies as st -from hypothesis import given - -import numpy as np -from numpy.testing import assert_array_equal -from scipy.sparse import issparse - -from anndata import AnnData - -from scvelo.core import get_modality, make_dense, make_sparse, set_modality -from .test_base import get_adata, TestBase - - -class TestGetModality(TestBase): - @given(adata=get_adata()) - def test_get_modality(self, adata: AnnData): - modality_to_get = self._subset_modalities(adata, 1)[0] - modality_retrieved = get_modality(adata=adata, modality=modality_to_get) - - if modality_to_get == "X": - assert_array_equal(adata.X, modality_retrieved) - elif modality_to_get in adata.layers: - assert_array_equal(adata.layers[modality_to_get], modality_retrieved) - else: - assert_array_equal(adata.obsm[modality_to_get], modality_retrieved) - - -class TestMakeDense(TestBase): - @given( - adata=get_adata(sparse_entries=True), - inplace=st.booleans(), - n_modalities=st.integers(min_value=0), - ) - def test_make_dense(self, adata: AnnData, inplace: bool, n_modalities: int): - modalities_to_densify = self._subset_modalities(adata, n_modalities) - - returned_adata = make_dense( - adata=adata, modalities=modalities_to_densify, inplace=inplace - ) - - if inplace: - assert returned_adata is None - assert np.all( - [ - not issparse(get_modality(adata=adata, modality=modality)) - for modality in modalities_to_densify - ] - ) - else: - assert isinstance(returned_adata, AnnData) - assert np.all( - [ - not issparse(get_modality(adata=returned_adata, modality=modality)) - for modality in modalities_to_densify - ] - ) - assert np.all( - [ - issparse(get_modality(adata=adata, modality=modality)) - for modality in modalities_to_densify - ] - ) - - -class TestMakeSparse(TestBase): - @given( - adata=get_adata(), - inplace=st.booleans(), - n_modalities=st.integers(min_value=0), - ) - def test_make_sparse(self, adata: AnnData, inplace: bool, n_modalities: int): - modalities_to_make_sparse = self._subset_modalities(adata, n_modalities) - - returned_adata = make_sparse( - adata=adata, modalities=modalities_to_make_sparse, inplace=inplace - ) - - if inplace: - assert returned_adata is None - assert np.all( - [ - issparse(get_modality(adata=adata, modality=modality)) - for modality in modalities_to_make_sparse - if modality != "X" - ] - ) - else: - assert isinstance(returned_adata, AnnData) - assert np.all( - [ - issparse(get_modality(adata=returned_adata, modality=modality)) - for modality in modalities_to_make_sparse - if modality != "X" - ] - ) - assert np.all( - [ - not issparse(get_modality(adata=adata, modality=modality)) - for modality in modalities_to_make_sparse - if modality != "X" - ] - ) - - -class TestSetModality(TestBase): - @given(adata=get_adata(), inplace=st.booleans()) - def test_set_modality(self, adata: AnnData, inplace: bool): - modality_to_set = self._subset_modalities(adata, 1)[0] - - if (modality_to_set == "X") or (modality_to_set in adata.layers): - new_value = np.random.randn(adata.n_obs, adata.n_vars) - else: - new_value = np.random.randn( - adata.n_obs, np.random.randint(low=1, high=10000) - ) - - returned_adata = set_modality( - adata=adata, new_value=new_value, modality=modality_to_set, inplace=inplace - ) - - if inplace: - assert returned_adata is None - if modality_to_set == "X": - assert_array_equal(adata.X, new_value) - elif modality_to_set in adata.layers: - assert_array_equal(adata.layers[modality_to_set], new_value) - else: - assert_array_equal(adata.obsm[modality_to_set], new_value) - else: - assert isinstance(returned_adata, AnnData) - if modality_to_set == "X": - assert_array_equal(returned_adata.X, new_value) - elif modality_to_set in adata.layers: - assert_array_equal(returned_adata.layers[modality_to_set], new_value) - else: - assert_array_equal(returned_adata.obsm[modality_to_set], new_value) diff --git a/scvelo/core/tests/test_base.py b/scvelo/core/tests/test_base.py deleted file mode 100644 index 13095a52..00000000 --- a/scvelo/core/tests/test_base.py +++ /dev/null @@ -1,196 +0,0 @@ -import random -from typing import List, Optional, Union - -import hypothesis.strategies as st -from hypothesis import given -from hypothesis.extra.numpy import arrays - -import numpy as np -from scipy.sparse import csr_matrix, issparse - -from anndata import AnnData - - -# TODO: Add possibility to generate adata object with floats as counts -@st.composite -def get_adata( - draw, - n_obs: Optional[int] = None, - n_vars: Optional[int] = None, - min_obs: Optional[int] = 1, - max_obs: Optional[int] = 100, - min_vars: Optional[int] = 1, - max_vars: Optional[int] = 100, - layer_keys: Optional[Union[List, str]] = None, - min_layers: Optional[int] = 2, - max_layers: int = 2, - obsm_keys: Optional[Union[List, str]] = None, - min_obsm: Optional[int] = 2, - max_obsm: Optional[int] = 2, - sparse_entries: bool = False, -) -> AnnData: - """Generate an AnnData object. - - The largest possible value of a numerical entry is `1e5`. - - Arguments - --------- - n_obs: - Number of observations. If set to `None`, a random integer between `1` and - `max_obs` will be drawn. Defaults to `None`. - n_vars: - Number of variables. If set to `None`, a random integer between `1` and - `max_vars` will be drawn. Defaults to `None`. - min_obs: - Minimum number of observations. If set to `None`, there is no lower limit. - Defaults to `1`. - max_obs: - Maximum number of observations. If set to `None`, there is no upper limit. - Defaults to `100`. - min_vars: - Minimum number of variables. If set to `None`, there is no lower limit. - Defaults to `1`. - max_vars: - Maximum number of variables. If set to `None`, there is no upper limit. - Defaults to `100`. - layer_keys: - Names of layers. If set to `None`, layers will be named at random. Defaults - to `None`. - min_layers: - Minimum number of layers. Is set to the number of provided layer names if - `layer_keys` is not `None`. Defaults to `2`. - max_layers: Maximum number of layers. Is set to the number of provided layer - names if `layer_keys` is not `None`. Defaults to `2`. - obsm_keys: - Names of multi-dimensional observations annotation. If set to `None`, names - will be generated at random. Defaults to `None`. - min_obsm: - Minimum number of multi-dimensional observations annotation. Is set to the - number of keys if `obsm_keys` is not `None`. Defaults to `2`. - max_obsm: - Maximum number of multi-dimensional observations annotation. Is set to the - number of keys if `obsm_keys` is not `None`. Defaults to `2`. - sparse_entries: - Whether or not to make AnnData entries sparse. - - Returns - ------- - AnnData - Generated :class:`~anndata.AnnData` object. - """ - - if n_obs is None: - n_obs = draw(st.integers(min_value=min_obs, max_value=max_obs)) - if n_vars is None: - n_vars = draw(st.integers(min_value=min_vars, max_value=max_vars)) - - if isinstance(layer_keys, str): - layer_keys = [layer_keys] - if isinstance(obsm_keys, str): - obsm_keys = [obsm_keys] - - if layer_keys is not None: - min_layers = len(layer_keys) - max_layers = len(layer_keys) - if obsm_keys is not None: - min_obsm = len(obsm_keys) - max_obsm = len(obsm_keys) - - X = draw( - arrays( - dtype=int, - elements=st.integers(min_value=0, max_value=1e2), - shape=(n_obs, n_vars), - ) - ) - - layers = draw( - st.dictionaries( - st.text(min_size=1) if layer_keys is None else st.sampled_from(layer_keys), - arrays( - dtype=int, - elements=st.integers(min_value=0, max_value=1e2), - shape=(n_obs, n_vars), - ), - min_size=min_layers, - max_size=max_layers, - ) - ) - - obsm = draw( - st.dictionaries( - st.text(min_size=1) if obsm_keys is None else st.sampled_from(obsm_keys), - arrays( - dtype=int, - elements=st.integers(min_value=0, max_value=1e2), - shape=st.tuples( - st.integers(min_value=n_obs, max_value=n_obs), - st.integers(min_value=min_vars, max_value=max_vars), - ), - ), - min_size=min_obsm, - max_size=max_obsm, - ) - ) - - # Make keys for layers and obsm unique - for key in set(layers.keys()).intersection(obsm.keys()): - layers[f"{key}_"] = layers.pop(key) - - if sparse_entries: - layers = {key: csr_matrix(val) for key, val in layers.items()} - obsm = {key: csr_matrix(val) for key, val in obsm.items()} - return AnnData(X=csr_matrix(X), layers=layers, obsm=obsm) - else: - return AnnData(X=X, layers=layers, obsm=obsm) - - -class TestAdataGeneration: - @given(adata=get_adata()) - def test_default_adata_generation(self, adata: AnnData): - assert type(adata) is AnnData - - @given(adata=get_adata(sparse_entries=True)) - def test_sparse_adata_generation(self, adata: AnnData): - assert type(adata) is AnnData - assert issparse(adata.X) - assert np.all([issparse(adata.layers[layer]) for layer in adata.layers]) - assert np.all([issparse(adata.obsm[name]) for name in adata.obsm]) - - @given( - adata=get_adata( - n_obs=2, n_vars=2, layer_keys=["unspliced", "spliced"], obsm_keys="X_umap" - ) - ) - def test_custom_adata_generation(self, adata: AnnData): - assert adata.X.shape == (2, 2) - assert len(adata.layers) == 2 - assert len(adata.obsm) == 1 - assert set(adata.layers.keys()) == {"unspliced", "spliced"} - assert set(adata.obsm.keys()) == {"X_umap"} - - -class TestBase: - def _subset_modalities( - self, - adata: AnnData, - n_modalities: int, - from_layers: bool = True, - from_obsm: bool = True, - ): - """Subset modalities of an AnnData object.""" - - modalities = ["X"] - if from_layers: - modalities += list(adata.layers.keys()) - if from_obsm: - modalities += list(adata.obsm.keys()) - return random.sample(modalities, min(len(modalities), n_modalities)) - - def _convert_to_float(self, adata: AnnData): - """Convert AnnData entries in `layer` and `obsm` into floats.""" - - for layer in adata.layers: - adata.layers[layer] = adata.layers[layer].astype(float) - for obs in adata.obsm: - adata.obsm[obs] = adata.obsm[obs].astype(float) diff --git a/scvelo/core/tests/test_metrics.py b/scvelo/core/tests/test_metrics.py deleted file mode 100644 index 81f54070..00000000 --- a/scvelo/core/tests/test_metrics.py +++ /dev/null @@ -1,24 +0,0 @@ -from hypothesis import given -from hypothesis import strategies as st -from hypothesis.extra.numpy import arrays - -import numpy as np -from numpy import ndarray - -from scvelo.core import l2_norm - - -# TODO: Extend test to generate sparse inputs as well -@given( - a=arrays( - float, - shape=st.integers(min_value=1, max_value=100), - elements=st.floats(max_value=1e3, allow_infinity=False, allow_nan=False), - ), - axis=st.integers(min_value=0, max_value=1), -) -def test_l2_norm(a: ndarray, axis: int): - if a.ndim == 1: - np.allclose(np.linalg.norm(a), l2_norm(a, axis=axis)) - else: - np.allclose(np.linalg.norm(a, axis=axis), l2_norm(a, axis=axis)) diff --git a/scvelo/core/tests/test_models.py b/scvelo/core/tests/test_models.py deleted file mode 100644 index bca814fb..00000000 --- a/scvelo/core/tests/test_models.py +++ /dev/null @@ -1,90 +0,0 @@ -from typing import List - -import pytest -from hypothesis import given -from hypothesis import strategies as st -from hypothesis.extra.numpy import arrays - -import numpy as np -from numpy import ndarray -from scipy.integrate import odeint - -from scvelo.core import SplicingDynamics - - -class TestSplicingDynamics: - @given( - alpha=st.floats(min_value=0, allow_infinity=False), - beta=st.floats(min_value=0, max_value=1, exclude_min=True), - gamma=st.floats(min_value=0, max_value=1, exclude_min=True), - initial_state=st.lists( - st.floats(min_value=0, allow_infinity=False), min_size=2, max_size=2 - ), - t=arrays( - float, - shape=st.integers(min_value=1, max_value=100), - elements=st.floats( - min_value=0, max_value=1e3, allow_infinity=False, allow_nan=False - ), - ), - with_keys=st.booleans(), - ) - def test_output_form( - self, - alpha: float, - beta: float, - gamma: float, - initial_state: List[float], - t: ndarray, - with_keys: bool, - ): - if beta == gamma: - gamma = gamma + 1e-6 - - splicing_dynamics = SplicingDynamics( - alpha=alpha, beta=beta, gamma=gamma, initial_state=initial_state - ) - solution = splicing_dynamics.get_solution(t=t, with_keys=with_keys) - - if not with_keys: - assert type(solution) == ndarray - assert solution.shape == (len(t), 2) - else: - assert len(solution) == 2 - assert type(solution) == dict - assert list(solution.keys()) == ["u", "s"] - assert all([len(var) == len(t) for var in solution.values()]) - - # TODO: Check how / if hypothesis can be used instead. - @pytest.mark.parametrize( - "alpha, beta, gamma, initial_state", - [ - (5, 0.5, 0.4, [0, 1]), - ], - ) - def test_solution(self, alpha, beta, gamma, initial_state): - def model(y, t, alpha, beta, gamma): - dydt = np.zeros(2) - dydt[0] = alpha - beta * y[0] - dydt[1] = beta * y[0] - gamma * y[1] - - return dydt - - t = np.linspace(0, 20, 10000) - splicing_dynamics = SplicingDynamics( - alpha=alpha, beta=beta, gamma=gamma, initial_state=initial_state - ) - exact_solution = splicing_dynamics.get_solution(t=t) - - numerical_solution = odeint( - model, - np.array(initial_state), - t, - args=( - alpha, - beta, - gamma, - ), - ) - - assert np.allclose(numerical_solution, exact_solution) diff --git a/scvelo/datasets/__init__.py b/scvelo/datasets/__init__.py new file mode 100644 index 00000000..b90a8c72 --- /dev/null +++ b/scvelo/datasets/__init__.py @@ -0,0 +1,29 @@ +from ._datasets import ( + bonemarrow, + dentategyrus, + dentategyrus_lamanno, + forebrain, + gastrulation, + gastrulation_e75, + gastrulation_erythroid, + pancreas, + pancreatic_endocrinogenesis, + pbmc68k, + toy_data, +) +from ._simulate import simulation + +__all__ = [ + "bonemarrow", + "dentategyrus", + "dentategyrus_lamanno", + "forebrain", + "gastrulation", + "gastrulation_e75", + "gastrulation_erythroid", + "pancreas", + "pancreatic_endocrinogenesis", + "pbmc68k", + "simulation", + "toy_data", +] diff --git a/scvelo/datasets.py b/scvelo/datasets/_datasets.py similarity index 52% rename from scvelo/datasets.py rename to scvelo/datasets/_datasets.py index f4dd8fd3..48aecf07 100644 --- a/scvelo/datasets.py +++ b/scvelo/datasets/_datasets.py @@ -1,44 +1,50 @@ -"""Builtin Datasets. -""" +import warnings +from pathlib import Path +from typing import Optional, Union import numpy as np import pandas as pd -from anndata import AnnData from scanpy import read -from scvelo.core import cleanup, SplicingDynamics -from .read_load import load +from scvelo.core import cleanup +from scvelo.read_load import load url_datadir = "https://github.com/theislab/scvelo_notebooks/raw/master/" -def toy_data(n_obs=None): - """ - Randomly sampled from the Dentate Gyrus dataset. +def bonemarrow( + file_path: Optional[ + Union[str, Path] + ] = "data/BoneMarrow/human_cd34_bone_marrow.h5ad" +): + """Human bone marrow. - Arguments - --------- - n_obs: `int` (default: `None`) - Size of the sampled dataset + Data from `Setty et al. (2019) `__. + + The bone marrow is the primary site of new blood cell production or haematopoiesis. + It is composed of hematopoietic cells, marrow adipose tissue, and supportive stromal + cells. + + This dataset served to detect important landmarks of hematopoietic differentiation, + to identify key transcription factors that drive lineage fate choice and to closely + track when cells lose plasticity. + + .. image:: https://user-images.githubusercontent.com/31883718/118402252-68bd7480-b669-11eb-9ef3-5f992b74a2d3.png + :width: 600px Returns ------- Returns `adata` object - """ - - adata_dg = dentategyrus() + """ # noqa E501 - if n_obs is not None: - indices = np.random.choice(adata_dg.n_obs, n_obs) - adata = adata_dg[indices] - else: - adata = adata_dg - adata.obs_names_make_unique() - return adata.copy() + url = "https://ndownloader.figshare.com/files/27686835" + adata = read(file_path, backup_url=url, sparse=True, cache=True) + adata.var_names_make_unique() + return adata -def dentategyrus(adjusted=True): +def dentategyrus(file_path: Optional[Union[str, Path]] = None, adjusted=True): """Dentate Gyrus neurogenesis. Data from `Hochgerner et al. (2018) `__. @@ -56,20 +62,27 @@ def dentategyrus(adjusted=True): .. image:: https://user-images.githubusercontent.com/31883718/79433223-255b8700-7fcd-11ea-8ecf-3dc9eb1a6159.png :width: 600px + Arguments + --------- + file_path + Path where to save dataset and read it from. + Returns ------- Returns `adata` object """ # noqa E501 + if file_path is None and adjusted: + file_path = "data/DentateGyrus/10X43_1.h5ad" + elif file_path is None: + file_path = "data/DentateGyrus/10X43_1.loom" + if adjusted: - filename = "data/DentateGyrus/10X43_1.h5ad" url = f"{url_datadir}data/DentateGyrus/10X43_1.h5ad" - adata = read(filename, backup_url=url, sparse=True, cache=True) - + adata = read(file_path, backup_url=url, sparse=True, cache=True) else: - filename = "data/DentateGyrus/10X43_1.loom" url = "http://pklab.med.harvard.edu/velocyto/DG1/10X43_1.loom" - adata = read(filename, backup_url=url, cleanup=True, sparse=True, cache=True) + adata = read(file_path, backup_url=url, cleanup=True, sparse=True, cache=True) cleanup(adata, clean="all", keep={"spliced", "unspliced", "ambiguous"}) url_louvain = f"{url_datadir}data/DentateGyrus/DG_clusters.npy" @@ -87,65 +100,17 @@ def dentategyrus(adjusted=True): return adata -def forebrain(): - """Developing human forebrain. - - From `La Manno et al. (2018) `__. - - Forebrain tissue of a human week 10 embryo, focusing on glutamatergic neuronal - lineage, obtained from elective routine abortions (10 weeks post-conception). - - Returns - ------- - Returns `adata` object - """ # noqa E501 - - filename = "data/ForebrainGlut/hgForebrainGlut.loom" - url = "http://pklab.med.harvard.edu/velocyto/hgForebrainGlut/hgForebrainGlut.loom" - adata = read(filename, backup_url=url, cleanup=True, sparse=True, cache=True) - adata.var_names_make_unique() - return adata - - -def pancreas(): - """Pancreatic endocrinogenesis. - - Data from `Bastidas-Ponce et al. (2019) `__. - - Pancreatic epithelial and Ngn3-Venus fusion (NVF) cells during secondary transition - with transcriptome profiles sampled from embryonic day 15.5. - - Endocrine cells are derived from endocrine progenitors located in the pancreatic - epithelium. Endocrine commitment terminates in four major fates: glucagon- producing - α-cells, insulin-producing β-cells, somatostatin-producing δ-cells and - ghrelin-producing ε-cells. - - .. image:: https://user-images.githubusercontent.com/31883718/67709134-a0989480-f9bd-11e9-8ae6-f6391f5d95a0.png - :width: 600px - - Returns - ------- - Returns `adata` object - """ # noqa E501 - - filename = "data/Pancreas/endocrinogenesis_day15.h5ad" - url = f"{url_datadir}data/Pancreas/endocrinogenesis_day15.h5ad" - adata = read(filename, backup_url=url, sparse=True, cache=True) - adata.var_names_make_unique() - return adata - - -pancreatic_endocrinogenesis = pancreas # restore old conventions - - -def dentategyrus_lamanno(): +def dentategyrus_lamanno( + file_path: Optional[Union[str, Path]] = "data/DentateGyrus/DentateGyrus.loom" +): """Dentate Gyrus neurogenesis. From `La Manno et al. (2018) `__. The experiment from the developing mouse hippocampus comprises two time points (P0 and P5) and reveals the complex manifold with multiple branching lineages - towards astrocytes, oligodendrocyte precursors (OPCs), granule neurons and pyramidal neurons. + towards astrocytes, oligodendrocyte precursors (OPCs), granule neurons and pyramidal + neurons. .. image:: https://user-images.githubusercontent.com/31883718/118401264-49bce380-b665-11eb-8678-e7570ede13d6.png :width: 600px @@ -154,9 +119,9 @@ def dentategyrus_lamanno(): ------- Returns `adata` object """ # noqa E501 - filename = "data/DentateGyrus/DentateGyrus.loom" + url = "http://pklab.med.harvard.edu/velocyto/DentateGyrus/DentateGyrus.loom" - adata = read(filename, backup_url=url, sparse=True, cache=True) + adata = read(file_path, backup_url=url, sparse=True, cache=True) adata.var_names_make_unique() adata.obsm["X_tsne"] = np.column_stack([adata.obs["TSNE1"], adata.obs["TSNE2"]]) adata.obs["clusters"] = adata.obs["ClusterName"] @@ -181,19 +146,47 @@ def dentategyrus_lamanno(): return adata -def gastrulation(): +def forebrain(file_path: Union[str, Path] = "data/ForebrainGlut/hgForebrainGlut.loom"): + """Developing human forebrain. + + From `La Manno et al. (2018) `__. + + Forebrain tissue of a human week 10 embryo, focusing on glutamatergic neuronal + lineage, obtained from elective routine abortions (10 weeks post-conception). + + Arguments + --------- + file_path + Path where to save dataset and read it from. + + Returns + ------- + Returns `adata` object + """ + + url = "http://pklab.med.harvard.edu/velocyto/hgForebrainGlut/hgForebrainGlut.loom" + adata = read(file_path, backup_url=url, cleanup=True, sparse=True, cache=True) + adata.var_names_make_unique() + return adata + + +def gastrulation( + file_path: Optional[Union[str, Path]] = "data/Gastrulation/gastrulation.h5ad" +): """Mouse gastrulation. Data from `Pijuan-Sala et al. (2019) `__. Gastrulation represents a key developmental event during which embryonic pluripotent - cells diversify into lineage-specific precursors that will generate the adult organism. + cells diversify into lineage-specific precursors that will generate the adult + organism. This data contains the erythrocyte lineage from Pijuan-Sala et al. (2019). - The experiment reveals the molecular map of mouse gastrulation and early organogenesis. - It comprises transcriptional profiles of 116,312 single cells from mouse embryos - collected at nine sequential time points ranging from 6.5 to 8.5 days post-fertilization. - It served to explore the complex events involved in the convergence of visceral and primitive streak-derived endoderm. + The experiment reveals the molecular map of mouse gastrulation and early + organogenesis. It comprises transcriptional profiles of 116,312 single cells from + mouse embryos collected at nine sequential time points ranging from 6.5 to 8.5 days + post-fertilization. It served to explore the complex events involved in the + convergence of visceral and primitive streak-derived endoderm. .. image:: https://user-images.githubusercontent.com/31883718/130636066-3bae153e-1626-4d11-8f38-6efab5b81c1c.png :width: 600px @@ -202,20 +195,23 @@ def gastrulation(): ------- Returns `adata` object """ # noqa E501 - filename = "data/Gastrulation/gastrulation.h5ad" + url = "https://ndownloader.figshare.com/files/28095525" - adata = read(filename, backup_url=url, sparse=True, cache=True) + adata = read(file_path, backup_url=url, sparse=True, cache=True) adata.var_names_make_unique() return adata -def gastrulation_e75(): +def gastrulation_e75( + file_path: Optional[Union[str, Path]] = "data/Gastrulation/gastrulation_e75.h5ad" +): """Mouse gastrulation subset to E7.5. Data from `Pijuan-Sala et al. (2019) `__. Gastrulation represents a key developmental event during which embryonic pluripotent - cells diversify into lineage-specific precursors that will generate the adult organism. + cells diversify into lineage-specific precursors that will generate the adult + organism. .. image:: https://user-images.githubusercontent.com/31883718/130636292-7f2a599b-ded4-4616-99d7-604d2f324531.png :width: 600px @@ -224,20 +220,23 @@ def gastrulation_e75(): ------- Returns `adata` object """ # noqa E501 - filename = "data/Gastrulation/gastrulation_e75.h5ad" + url = "https://ndownloader.figshare.com/files/30439878" - adata = read(filename, backup_url=url, sparse=True, cache=True) + adata = read(file_path, backup_url=url, sparse=True, cache=True) adata.var_names_make_unique() return adata -def gastrulation_erythroid(): +def gastrulation_erythroid( + file_path: Optional[Union[str, Path]] = "data/Gastrulation/erythroid_lineage.h5ad" +): """Mouse gastrulation subset to erythroid lineage. Data from `Pijuan-Sala et al. (2019) `__. Gastrulation represents a key developmental event during which embryonic pluripotent - cells diversify into lineage-specific precursors that will generate the adult organism. + cells diversify into lineage-specific precursors that will generate the adult + organism. .. image:: https://user-images.githubusercontent.com/31883718/118402002-40814600-b668-11eb-8bfc-dbece2b2b34e.png :width: 600px @@ -246,50 +245,70 @@ def gastrulation_erythroid(): ------- Returns `adata` object """ # noqa E501 - filename = "data/Gastrulation/erythroid_lineage.h5ad" + url = "https://ndownloader.figshare.com/files/27686871" - adata = read(filename, backup_url=url, sparse=True, cache=True) + adata = read(file_path, backup_url=url, sparse=True, cache=True) adata.var_names_make_unique() return adata -def bonemarrow(): - """Human bone marrow. +def pancreas(file_path: Union[str, Path] = "data/Pancreas/endocrinogenesis_day15.h5ad"): + """Pancreatic endocrinogenesis - Data from `Setty et al. (2019) `__. + Data from `Bastidas-Ponce et al. (2019) `__. - The bone marrow is the primary site of new blood cell production or haematopoiesis. - It is composed of hematopoietic cells, marrow adipose tissue, and supportive stromal cells. + Pancreatic epithelial and Ngn3-Venus fusion (NVF) cells during secondary transition + with transcriptome profiles sampled from embryonic day 15.5. - This dataset served to detect important landmarks of hematopoietic differentiation, to - identify key transcription factors that drive lineage fate choice and to closely track when cells lose plasticity. + Endocrine cells are derived from endocrine progenitors located in the pancreatic + epithelium. Endocrine commitment terminates in four major fates: glucagon- producing + α-cells, insulin-producing β-cells, somatostatin-producing δ-cells and + ghrelin-producing ε-cells. - .. image:: https://user-images.githubusercontent.com/31883718/118402252-68bd7480-b669-11eb-9ef3-5f992b74a2d3.png + .. image:: https://user-images.githubusercontent.com/31883718/67709134-a0989480-f9bd-11e9-8ae6-f6391f5d95a0.png :width: 600px + Arguments + --------- + file_path + Path where to save dataset and read it from. + Returns ------- Returns `adata` object """ # noqa E501 - filename = "data/BoneMarrow/human_cd34_bone_marrow.h5ad" - url = "https://ndownloader.figshare.com/files/27686835" - adata = read(filename, backup_url=url, sparse=True, cache=True) + + url = f"{url_datadir}data/Pancreas/endocrinogenesis_day15.h5ad" + adata = read(file_path, backup_url=url, sparse=True, cache=True) adata.var_names_make_unique() return adata -def pbmc68k(): +def pancreatic_endocrinogenesis(): + warnings.warn( + "`scvelo.datasets.pancreatic_endocrinogenesis` is deprecated since scVelo " + "v0.2.5 and will be removed in a future version. Please use " + "`scvelo.datasets.pancreas` instead.", + DeprecationWarning, + stacklevel=2, + ) + + return pancreas() + + +def pbmc68k(file_path: Optional[Union[str, Path]] = "data/PBMC/pbmc68k.h5ad"): """Peripheral blood mononuclear cells. Data from `Zheng et al. (2017) `__. - This experiment contains 68k peripheral blood mononuclear cells (PBMC) measured using 10X. + This experiment contains 68k peripheral blood mononuclear cells (PBMC) measured + using 10X. PBMCs are a diverse mixture of highly specialized immune cells. They originate from hematopoietic stem cells (HSCs) that reside in the bone marrow and give rise to all blood cells of the immune system (hematopoiesis). - HSCs give rise to myeloid (monocytes, macrophages, granulocytes, megakaryocytes, dendritic cells, erythrocytes) - and lymphoid (T cells, B cells, NK cells) lineages. + HSCs give rise to myeloid (monocytes, macrophages, granulocytes, megakaryocytes, + dendritic cells, erythrocytes) and lymphoid (T cells, B cells, NK cells) lineages. .. image:: https://user-images.githubusercontent.com/31883718/118402351-e1243580-b669-11eb-8256-4a49c299da3d.png :width: 600px @@ -298,161 +317,38 @@ def pbmc68k(): ------- Returns `adata` object """ # noqa E501 - filename = "data/PBMC/pbmc68k.h5ad" + url = "https://ndownloader.figshare.com/files/27686886" - adata = read(filename, backup_url=url, sparse=True, cache=True) + adata = read(file_path, backup_url=url, sparse=True, cache=True) adata.var_names_make_unique() return adata -def simulation( - n_obs=300, - n_vars=None, - alpha=None, - beta=None, - gamma=None, - alpha_=None, - t_max=None, - noise_model="normal", - noise_level=1, - switches=None, - random_seed=0, +# TODO: Remove function and add subsetting functionality for each dataset +def toy_data( + file_path: Union[str, Path] = "data/DentateGyrus/10X43_1.h5ad", n_obs=None ): - """Simulation of mRNA splicing kinetics. - - - Simulated mRNA metabolism with transcription, splicing and degradation. - The parameters for each reaction are randomly sampled from a log-normal distribution - and time events follow the Poisson law. The total time spent in a transcriptional - state is varied between two and ten hours. + """ + Randomly sampled from the Dentate Gyrus dataset. - .. image:: https://user-images.githubusercontent.com/31883718/79432471-16c0a000-7fcc-11ea-8d62-6971bcf4181a.png - :width: 600px + Arguments + --------- + file_path + Path where to save dataset and read it from. + n_obs: `int` (default: `None`) + Size of the sampled dataset Returns ------- Returns `adata` object - """ # noqa E501 - - from .tools.dynamical_model_utils import vectorize - - np.random.seed(random_seed) - - def draw_poisson(n): - from random import seed, uniform # draw from poisson - - seed(random_seed) - t = np.cumsum([-0.1 * np.log(uniform(0, 1)) for _ in range(n - 1)]) - return np.insert(t, 0, 0) # prepend t0=0 - - def simulate_dynamics(tau, alpha, beta, gamma, u0, s0, noise_model, noise_level): - ut, st = SplicingDynamics( - alpha=alpha, beta=beta, gamma=gamma, initial_state=[u0, s0] - ).get_solution(tau, stacked=False) - if noise_model == "normal": # add noise - ut += np.random.normal( - scale=noise_level * np.percentile(ut, 99) / 10, size=len(ut) - ) - st += np.random.normal( - scale=noise_level * np.percentile(st, 99) / 10, size=len(st) - ) - ut, st = np.clip(ut, 0, None), np.clip(st, 0, None) - return ut, st - - def simulate_gillespie(alpha, beta, gamma): - # update rules: - # transcription (u+1,s), splicing (u-1,s+1), degradation (u,s-1), nothing (u,s) - update_rule = np.array([[1, 0], [-1, 1], [0, -1], [0, 0]]) - - def update(props): - if np.sum(props) > 0: - props /= np.sum(props) - p_cumsum = props.cumsum() - p = np.random.rand() - i = 0 - while p > p_cumsum[i]: - i += 1 - return update_rule[i] - - u, s = np.zeros(len(alpha)), np.zeros(len(alpha)) - for i, alpha_i in enumerate(alpha): - u_, s_ = (u[i - 1], s[i - 1]) if i > 0 else (0, 0) - du, ds = update(props=np.array([alpha_i, beta * u_, gamma * s_])) - u[i], s[i] = (u_ + du, s_ + ds) - return u, s - - alpha = 5 if alpha is None else alpha - beta = 0.5 if beta is None else beta - gamma = 0.3 if gamma is None else gamma - alpha_ = 0 if alpha_ is None else alpha_ - - t = draw_poisson(n_obs) - if t_max is not None: - t *= t_max / np.max(t) - t_max = np.max(t) - - def cycle(array, n_vars=None): - if isinstance(array, (np.ndarray, list, tuple)): - return ( - array if n_vars is None else array * int(np.ceil(n_vars / len(array))) - ) - else: - return [array] if n_vars is None else [array] * n_vars - - # switching time point obtained as fraction of t_max rounded down - switches = ( - cycle([0.4, 0.7, 1, 0.1], n_vars) - if switches is None - else cycle(switches, n_vars) - ) - t_ = np.array([np.max(t[t < t_i * t_max]) for t_i in switches]) - - noise_level = cycle(noise_level, len(switches) if n_vars is None else n_vars) - - n_vars = min(len(switches), len(noise_level)) if n_vars is None else n_vars - U = np.zeros(shape=(len(t), n_vars)) - S = np.zeros(shape=(len(t), n_vars)) - - def is_list(x): - return isinstance(x, (tuple, list, np.ndarray)) - - for i in range(n_vars): - alpha_i = alpha[i] if is_list(alpha) and len(alpha) != n_obs else alpha - beta_i = beta[i] if is_list(beta) and len(beta) != n_obs else beta - gamma_i = gamma[i] if is_list(gamma) and len(gamma) != n_obs else gamma - tau, alpha_vec, u0_vec, s0_vec = vectorize( - t, t_[i], alpha_i, beta_i, gamma_i, alpha_=alpha_, u0=0, s0=0 - ) + """ - if noise_model == "gillespie": - U[:, i], S[:, i] = simulate_gillespie(alpha_vec, beta, gamma) - else: - U[:, i], S[:, i] = simulate_dynamics( - tau, - alpha_vec, - beta_i, - gamma_i, - u0_vec, - s0_vec, - noise_model, - noise_level[i], - ) - - if is_list(alpha) and len(alpha) == n_obs: - alpha = np.nan - if is_list(beta) and len(beta) == n_obs: - beta = np.nan - if is_list(gamma) and len(gamma) == n_obs: - gamma = np.nan - - obs = {"true_t": t.round(2)} - var = { - "true_t_": t_[:n_vars], - "true_alpha": np.ones(n_vars) * alpha, - "true_beta": np.ones(n_vars) * beta, - "true_gamma": np.ones(n_vars) * gamma, - "true_scaling": np.ones(n_vars), - } - layers = {"unspliced": U, "spliced": S} + adata_dg = dentategyrus(file_path=file_path) - return AnnData(S, obs, var, layers=layers) + if n_obs is not None: + indices = np.random.choice(adata_dg.n_obs, n_obs) + adata = adata_dg[indices] + else: + adata = adata_dg + adata.obs_names_make_unique() + return adata.copy() diff --git a/scvelo/datasets/_simulate.py b/scvelo/datasets/_simulate.py new file mode 100644 index 00000000..4172df53 --- /dev/null +++ b/scvelo/datasets/_simulate.py @@ -0,0 +1,195 @@ +import warnings + +import numpy as np + +from anndata import AnnData + +from scvelo.core import invert, SplicingDynamics + + +# TODO Use `SplicingDynamics` +def unspliced(tau, u0, alpha, beta): + expu = np.exp(-beta * tau) + return u0 * expu + alpha / beta * (1 - expu) + + +def spliced(tau, s0, u0, alpha, beta, gamma): + c = (alpha - u0 * beta) * invert(gamma - beta) + expu, exps = np.exp(-beta * tau), np.exp(-gamma * tau) + return s0 * exps + alpha / gamma * (1 - exps) + c * (exps - expu) + + +def vectorize(t, t_, alpha, beta, gamma=None, alpha_=0, u0=0, s0=0, sorted=False): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + o = np.array(t < t_, dtype=int) + tau = t * o + (t - t_) * (1 - o) + + u0_ = unspliced(t_, u0, alpha, beta) + s0_ = spliced(t_, s0, u0, alpha, beta, gamma if gamma is not None else beta / 2) + + # vectorize u0, s0 and alpha + u0 = u0 * o + u0_ * (1 - o) + s0 = s0 * o + s0_ * (1 - o) + alpha = alpha * o + alpha_ * (1 - o) + + if sorted: + idx = np.argsort(t) + tau, alpha, u0, s0 = tau[idx], alpha[idx], u0[idx], s0[idx] + return tau, alpha, u0, s0 + + +def simulation( + n_obs=300, + n_vars=None, + alpha=None, + beta=None, + gamma=None, + alpha_=None, + t_max=None, + noise_model="normal", + noise_level=1, + switches=None, + random_seed=0, +): + """Simulation of mRNA splicing kinetics. + + + Simulated mRNA metabolism with transcription, splicing and degradation. + The parameters for each reaction are randomly sampled from a log-normal distribution + and time events follow the Poisson law. The total time spent in a transcriptional + state is varied between two and ten hours. + + .. image:: https://user-images.githubusercontent.com/31883718/79432471-16c0a000-7fcc-11ea-8d62-6971bcf4181a.png + :width: 600px + + Returns + ------- + Returns `adata` object + """ # noqa E501 + + np.random.seed(random_seed) + + def draw_poisson(n): + from random import seed, uniform # draw from poisson + + seed(random_seed) + t = np.cumsum([-0.1 * np.log(uniform(0, 1)) for _ in range(n - 1)]) + return np.insert(t, 0, 0) # prepend t0=0 + + def simulate_dynamics(tau, alpha, beta, gamma, u0, s0, noise_model, noise_level): + ut, st = SplicingDynamics( + alpha=alpha, beta=beta, gamma=gamma, initial_state=[u0, s0] + ).get_solution(tau, stacked=False) + if noise_model == "normal": # add noise + ut += np.random.normal( + scale=noise_level * np.percentile(ut, 99) / 10, size=len(ut) + ) + st += np.random.normal( + scale=noise_level * np.percentile(st, 99) / 10, size=len(st) + ) + ut, st = np.clip(ut, 0, None), np.clip(st, 0, None) + return ut, st + + def simulate_gillespie(alpha, beta, gamma): + # update rules: + # transcription (u+1,s), splicing (u-1,s+1), degradation (u,s-1), nothing (u,s) + update_rule = np.array([[1, 0], [-1, 1], [0, -1], [0, 0]]) + + def update(props): + if np.sum(props) > 0: + props /= np.sum(props) + p_cumsum = props.cumsum() + p = np.random.rand() + i = 0 + while p > p_cumsum[i]: + i += 1 + return update_rule[i] + + u, s = np.zeros(len(alpha)), np.zeros(len(alpha)) + for i, alpha_i in enumerate(alpha): + u_, s_ = (u[i - 1], s[i - 1]) if i > 0 else (0, 0) + + if (alpha_i == 0) and (u_ == 0) and (s_ == 0): + du, ds = 0, 0 + else: + du, ds = update(props=np.array([alpha_i, beta * u_, gamma * s_])) + + u[i], s[i] = (u_ + du, s_ + ds) + return u, s + + alpha = 5 if alpha is None else alpha + beta = 0.5 if beta is None else beta + gamma = 0.3 if gamma is None else gamma + alpha_ = 0 if alpha_ is None else alpha_ + + t = draw_poisson(n_obs) + if t_max is not None: + t *= t_max / np.max(t) + t_max = np.max(t) + + def cycle(array, n_vars=None): + if isinstance(array, (np.ndarray, list, tuple)): + return ( + array if n_vars is None else array * int(np.ceil(n_vars / len(array))) + ) + else: + return [array] if n_vars is None else [array] * n_vars + + # switching time point obtained as fraction of t_max rounded down + switches = ( + cycle([0.4, 0.7, 1, 0.1], n_vars) + if switches is None + else cycle(switches, n_vars) + ) + t_ = np.array([np.max(t[t < t_i * t_max]) for t_i in switches]) + + noise_level = cycle(noise_level, len(switches) if n_vars is None else n_vars) + + n_vars = min(len(switches), len(noise_level)) if n_vars is None else n_vars + U = np.zeros(shape=(len(t), n_vars)) + S = np.zeros(shape=(len(t), n_vars)) + + def is_list(x): + return isinstance(x, (tuple, list, np.ndarray)) + + for i in range(n_vars): + alpha_i = alpha[i] if is_list(alpha) and len(alpha) != n_obs else alpha + beta_i = beta[i] if is_list(beta) and len(beta) != n_obs else beta + gamma_i = gamma[i] if is_list(gamma) and len(gamma) != n_obs else gamma + tau, alpha_vec, u0_vec, s0_vec = vectorize( + t, t_[i], alpha_i, beta_i, gamma_i, alpha_=alpha_, u0=0, s0=0 + ) + + if noise_model == "gillespie": + U[:, i], S[:, i] = simulate_gillespie(alpha_vec, beta, gamma) + else: + U[:, i], S[:, i] = simulate_dynamics( + tau, + alpha_vec, + beta_i, + gamma_i, + u0_vec, + s0_vec, + noise_model, + noise_level[i], + ) + + if is_list(alpha) and len(alpha) == n_obs: + alpha = np.nan + if is_list(beta) and len(beta) == n_obs: + beta = np.nan + if is_list(gamma) and len(gamma) == n_obs: + gamma = np.nan + + obs = {"true_t": t.round(2)} + var = { + "true_t_": t_[:n_vars], + "true_alpha": np.ones(n_vars) * alpha, + "true_beta": np.ones(n_vars) * beta, + "true_gamma": np.ones(n_vars) * gamma, + "true_scaling": np.ones(n_vars), + } + layers = {"unspliced": U, "spliced": S} + + return AnnData(S, obs, var, layers=layers) diff --git a/scvelo/pl.py b/scvelo/pl.py deleted file mode 100644 index f6c2497a..00000000 --- a/scvelo/pl.py +++ /dev/null @@ -1 +0,0 @@ -from scvelo.plotting import * # noqa diff --git a/scvelo/plotting/utils.py b/scvelo/plotting/utils.py index 7f6ec1b7..db255bd1 100644 --- a/scvelo/plotting/utils.py +++ b/scvelo/plotting/utils.py @@ -1033,7 +1033,7 @@ def plot_linfit( mu_x, mu_y = (0, 0) else: mu_x, mu_y = np.mean(x), np.mean(y) - slope = (np.mean(x * y) - mu_x * mu_y) / (np.mean(x ** 2) - mu_x ** 2) + slope = (np.mean(x * y) - mu_x * mu_y) / (np.mean(x**2) - mu_x**2) offset = mu_y - slope * mu_x if isinstance(add_linfit, str) and "intercept" in add_linfit: @@ -1506,7 +1506,7 @@ def log_fmt(x, pos): if xscale == "log": if xticks is None: lspace = np.linspace(-10, 10, 21) - ticks = [a for a in [10 ** a for a in lspace] if bmin < a < bmax] + ticks = [a for a in [10**a for a in lspace] if bmin < a < bmax] ax.set_xticks(ticks) ax.xaxis.set_major_formatter(log_fmt) ax.minorticks_off() diff --git a/scvelo/plotting/velocity.py b/scvelo/plotting/velocity.py index d68229a9..41c9f060 100644 --- a/scvelo/plotting/velocity.py +++ b/scvelo/plotting/velocity.py @@ -227,7 +227,7 @@ def velocity( beta = _adata.var[f"{fit}_beta"] if f"{fit}_offset" in adata.var.keys(): offset = _adata.var[f"{fit}_offset"] - x = np.array(2 * (ss - s ** 2) - s) + x = np.array(2 * (ss - s**2) - s) y = np.array(2 * (us - u * s) + u + 2 * s * offset / beta) kwargs["xlabel"] = r"2 $\Sigma_s - \langle s \rangle$" kwargs["ylabel"] = r"2 $\Sigma_{us} + \langle u \rangle$" diff --git a/scvelo/plotting/velocity_embedding_grid.py b/scvelo/plotting/velocity_embedding_grid.py index c8bd86f9..73c84f6c 100644 --- a/scvelo/plotting/velocity_embedding_grid.py +++ b/scvelo/plotting/velocity_embedding_grid.py @@ -77,7 +77,7 @@ def compute_velocity_on_grid( ns = int(np.sqrt(len(V_grid[:, 0]))) V_grid = V_grid.T.reshape(2, ns, ns) - mass = np.sqrt((V_grid ** 2).sum(0)) + mass = np.sqrt((V_grid**2).sum(0)) min_mass = 10 ** (min_mass - 6) # default min_mass = 1e-5 min_mass = np.clip(min_mass, None, np.max(mass) * 0.9) cutoff = mass.reshape(V_grid[0].shape) < min_mass diff --git a/scvelo/plotting/velocity_embedding_stream.py b/scvelo/plotting/velocity_embedding_stream.py index 5a7acd89..e62fbcd2 100644 --- a/scvelo/plotting/velocity_embedding_stream.py +++ b/scvelo/plotting/velocity_embedding_stream.py @@ -152,7 +152,7 @@ def velocity_embedding_stream( adjust_for_stream=True, cutoff_perc=cutoff_perc, ) - lengths = np.sqrt((V_grid ** 2).sum(0)) + lengths = np.sqrt((V_grid**2).sum(0)) linewidth = 1 if linewidth is None else linewidth linewidth *= 2 * lengths / lengths[~np.isnan(lengths)].max() @@ -259,7 +259,8 @@ def velocity_embedding_stream( zorder=0, **scatter_kwargs, ) - + if save is True: + save = "embedding_stream" savefig_or_show(dpi=dpi, save=save, show=show) if show is False: return ax diff --git a/scvelo/pp.py b/scvelo/pp.py deleted file mode 100644 index c0870c14..00000000 --- a/scvelo/pp.py +++ /dev/null @@ -1 +0,0 @@ -from scvelo.preprocessing import * # noqa diff --git a/scvelo/preprocessing/neighbors.py b/scvelo/preprocessing/neighbors.py index b7f9e850..826c3647 100644 --- a/scvelo/preprocessing/neighbors.py +++ b/scvelo/preprocessing/neighbors.py @@ -1,5 +1,8 @@ import warnings from collections import Counter +from typing import Dict, Optional + +from typing_extensions import Literal import numpy as np import pandas as pd @@ -14,25 +17,145 @@ from scvelo.core import get_initial_size +def _get_hnsw_neighbors( + adata: AnnData, + use_rep: str, + n_pcs: int, + n_neighbors: int, + num_threads: int, + **kwargs, +): + X = adata.X if use_rep == "X" else adata.obsm[use_rep] + neighbors = FastNeighbors(n_neighbors=n_neighbors, num_threads=num_threads) + neighbors.fit(X if n_pcs is None else X[:, :n_pcs], **kwargs) + + return neighbors + + +def _get_scanpy_neighbors(adata: AnnData, **kwargs): + logg.switch_verbosity("off", module="scanpy") + with warnings.catch_warnings(): # ignore numba warning (umap/issues/252) + warnings.simplefilter("ignore") + neighbors = Neighbors(adata) + neighbors.compute_neighbors(write_knn_indices=True, **kwargs) + logg.switch_verbosity("on", module="scanpy") + + return neighbors + + +def _get_sklearn_neighbors( + adata: AnnData, use_rep: str, n_pcs: Optional[int], n_neighbors: int, **kwargs +): + from sklearn.neighbors import NearestNeighbors + + # TODO: Use `scv.core.get_modality` + X = adata.X if use_rep == "X" else adata.obsm[use_rep] + neighbors = NearestNeighbors(n_neighbors=n_neighbors - 1, **kwargs) + neighbors.fit(X if n_pcs is None else X[:, :n_pcs]) + knn_distances, neighbors.knn_indices = neighbors.kneighbors() + knn_distances, neighbors.knn_indices = set_diagonal( + knn_distances, neighbors.knn_indices + ) + neighbors.distances, neighbors.connectivities = compute_connectivities_umap( + neighbors.knn_indices, knn_distances, X.shape[0], n_neighbors=n_neighbors + ) + + return neighbors + + +def _get_rep(adata: AnnData, use_rep: str, n_pcs: int): + if use_rep is None: + rep = "X" if adata.n_vars < 50 or n_pcs == 0 else "X_pca" + elif use_rep not in adata.obsm.keys() and f"X_{use_rep}" in adata.obsm.keys(): + rep = f"X_{use_rep}" + else: + rep = use_rep + + if (rep == "X") and (n_pcs is not None): + logg.warn( + f"Unexpected pair of parameters: `use_rep='X'` but `n_pcs={n_pcs}`. " + f"This will only consider the frist {n_pcs} variables when calculating the " + "neighbor graph. To use all of `X`, pass `n_pcs=None`." + ) + + return rep + + +def _set_neighbors_data( + adata: AnnData, + neighbors, + n_neighbors: int, + method: str, + metric: str, + n_pcs: int, + use_rep: str, +): + adata.uns["neighbors"] = {} + try: + adata.obsp["distances"] = neighbors.distances + adata.obsp["connectivities"] = neighbors.connectivities + adata.uns["neighbors"]["connectivities_key"] = "connectivities" + adata.uns["neighbors"]["distances_key"] = "distances" + except Exception: + adata.uns["neighbors"]["distances"] = neighbors.distances + adata.uns["neighbors"]["connectivities"] = neighbors.connectivities + + if hasattr(neighbors, "knn_indices"): + adata.uns["neighbors"]["indices"] = neighbors.knn_indices + adata.uns["neighbors"]["params"] = { + "n_neighbors": n_neighbors, + "method": method, + "metric": metric, + "n_pcs": n_pcs, + "use_rep": use_rep, + } + + +def _set_pca(adata, n_pcs: Optional[int], use_highly_variable: bool): + if ( + "X_pca" not in adata.obsm.keys() + or n_pcs is not None + and n_pcs > adata.obsm["X_pca"].shape[1] + ): + if use_highly_variable and "highly_variable" in adata.var.keys(): + n_vars = np.sum(adata.var["highly_variable"]) + else: + n_vars = adata.n_vars + + n_comps = min(30 if n_pcs is None else n_pcs, n_vars - 1, adata.n_obs - 1) + use_highly_variable &= "highly_variable" in adata.var.keys() + pca( + adata, + n_comps=n_comps, + use_highly_variable=use_highly_variable, + svd_solver="arpack", + ) + elif n_pcs is None and adata.obsm["X_pca"].shape[1] < 10: + logg.warn( + f"Neighbors are computed on {adata.obsm['X_pca'].shape[1]} " + "principal components only." + ) + + def neighbors( - adata, - n_neighbors=30, - n_pcs=None, - use_rep=None, - use_highly_variable=True, - knn=True, - random_state=0, - method="umap", - metric="euclidean", - metric_kwds=None, - num_threads=-1, - copy=False, + adata: AnnData, + n_neighbors: int = 30, + n_pcs: Optional[int] = None, + use_rep: Optional[str] = None, + use_highly_variable: bool = True, + knn: bool = True, + random_state: int = 0, + method: Literal["umap", "sklearn", "hnsw", "gauss", "rapids"] = "umap", + metric: str = "euclidean", + metric_kwds: Optional[Dict] = None, + num_threads: int = -1, + copy: bool = False, ): """ Compute a neighborhood graph of observations. The neighbor graph methods (umap, hnsw, sklearn) only differ in runtime and - yield the same result as scanpy [Wolf18]_. Connectivities are computed with + yield the same result as Scanpy [Wolf18]_. Connectivities are computed with adaptive kernel width as proposed in Haghverdi et al. 2016 (doi:10.1038/nmeth.3971). Parameters @@ -87,36 +210,10 @@ def neighbors( adata = adata.copy() if copy else adata - if use_rep is None: - use_rep = "X" if adata.n_vars < 50 or n_pcs == 0 else "X_pca" - n_pcs = None if use_rep == "X" else n_pcs - elif use_rep not in adata.obsm.keys() and f"X_{use_rep}" in adata.obsm.keys(): - use_rep = f"X_{use_rep}" + use_rep = _get_rep(adata=adata, use_rep=use_rep, n_pcs=n_pcs) if use_rep == "X_pca": - if ( - "X_pca" not in adata.obsm.keys() - or n_pcs is not None - and n_pcs > adata.obsm["X_pca"].shape[1] - ): - n_vars = ( - np.sum(adata.var["highly_variable"]) - if use_highly_variable and "highly_variable" in adata.var.keys() - else adata.n_vars - ) - n_comps = min(30 if n_pcs is None else n_pcs, n_vars - 1, adata.n_obs - 1) - use_highly_variable &= "highly_variable" in adata.var.keys() - pca( - adata, - n_comps=n_comps, - use_highly_variable=use_highly_variable, - svd_solver="arpack", - ) - elif n_pcs is None and adata.obsm["X_pca"].shape[1] < 10: - logg.warn( - f"Neighbors are computed on {adata.obsm['X_pca'].shape[1]} " - f"principal components only." - ) + _set_pca(adata=adata, n_pcs=n_pcs, use_highly_variable=use_highly_variable) n_duplicate_cells = len(get_duplicate_cells(adata)) if n_duplicate_cells > 0: @@ -131,71 +228,53 @@ def neighbors( logg.info("computing neighbors", r=True) if method == "sklearn": - from sklearn.neighbors import NearestNeighbors - - X = adata.X if use_rep == "X" else adata.obsm[use_rep] - neighbors = NearestNeighbors( - n_neighbors=n_neighbors - 1, + neighbors = _get_sklearn_neighbors( + adata=adata, + use_rep=use_rep, + n_neighbors=n_neighbors, + n_pcs=n_pcs, metric=metric, metric_params=metric_kwds, n_jobs=num_threads, ) - neighbors.fit(X if n_pcs is None else X[:, :n_pcs]) - knn_distances, neighbors.knn_indices = neighbors.kneighbors() - knn_distances, neighbors.knn_indices = set_diagonal( - knn_distances, neighbors.knn_indices - ) - neighbors.distances, neighbors.connectivities = compute_connectivities_umap( - neighbors.knn_indices, knn_distances, X.shape[0], n_neighbors=n_neighbors - ) - elif method == "hnsw": - X = adata.X if use_rep == "X" else adata.obsm[use_rep] - neighbors = FastNeighbors(n_neighbors=n_neighbors, num_threads=num_threads) - neighbors.fit( - X if n_pcs is None else X[:, :n_pcs], + neighbors = _get_hnsw_neighbors( + adata=adata, + use_rep=use_rep, + n_pcs=n_pcs, + n_neighbors=n_neighbors, + num_threads=num_threads, metric=metric, random_state=random_state, **metric_kwds, ) - + elif method in ["umap", "gauss", "rapids"]: + neighbors = _get_scanpy_neighbors( + adata=adata, + n_neighbors=n_neighbors, + knn=knn, + n_pcs=n_pcs, + method=method, + use_rep=use_rep, + random_state=random_state, + metric=metric, + metric_kwds=metric_kwds, + ) else: - logg.switch_verbosity("off", module="scanpy") - with warnings.catch_warnings(): # ignore numba warning (umap/issues/252) - warnings.simplefilter("ignore") - neighbors = Neighbors(adata) - neighbors.compute_neighbors( - n_neighbors=n_neighbors, - knn=knn, - n_pcs=n_pcs, - method=method, - use_rep=use_rep, - random_state=random_state, - metric=metric, - metric_kwds=metric_kwds, - write_knn_indices=True, - ) - logg.switch_verbosity("on", module="scanpy") - - adata.uns["neighbors"] = {} - try: - adata.obsp["distances"] = neighbors.distances - adata.obsp["connectivities"] = neighbors.connectivities - adata.uns["neighbors"]["connectivities_key"] = "connectivities" - adata.uns["neighbors"]["distances_key"] = "distances" - except Exception: - adata.uns["neighbors"]["distances"] = neighbors.distances - adata.uns["neighbors"]["connectivities"] = neighbors.connectivities + raise ValueError( + f"Provided `method={method}`. Admissible values are `'umap'`, `'sklearn'`, " + "`'hnsw'`, `'gauss'`, and `'rapids'`." + ) - if hasattr(neighbors, "knn_indices"): - adata.uns["neighbors"]["indices"] = neighbors.knn_indices - adata.uns["neighbors"]["params"] = { - "n_neighbors": n_neighbors, - "method": method, - "metric": metric, - "n_pcs": n_pcs, - "use_rep": use_rep, - } + _set_neighbors_data( + adata=adata, + neighbors=neighbors, + n_neighbors=n_neighbors, + method=method, + metric=metric, + n_pcs=n_pcs, + use_rep=use_rep, + ) logg.info(" finished", time=True, end=" " if settings.verbosity > 2 else "\n") logg.hint( diff --git a/scvelo/preprocessing/utils.py b/scvelo/preprocessing/utils.py index fca3db11..70026628 100644 --- a/scvelo/preprocessing/utils.py +++ b/scvelo/preprocessing/utils.py @@ -11,6 +11,7 @@ from scvelo.core import cleanup as _cleanup from scvelo.core import get_initial_size as _get_initial_size from scvelo.core import get_size as _get_size +from scvelo.core import multiply from scvelo.core import set_initial_size as _set_initial_size from scvelo.core import show_proportions as _show_proportions from scvelo.core import sum @@ -233,14 +234,9 @@ def filter_genes( X = adata.layers[layer] else: # shared counts/cells Xs, Xu = adata.layers["spliced"], adata.layers["unspliced"] - nonzeros = ( - (Xs > 0).multiply(Xu > 0) if issparse(Xs) else (Xs > 0) * (Xu > 0) - ) - X = ( - nonzeros.multiply(Xs) + nonzeros.multiply(Xu) - if issparse(nonzeros) - else nonzeros * (Xs + Xu) - ) + + nonzeros = multiply(Xs > 0, Xu > 0) + X = multiply(nonzeros, Xs) + multiply(nonzeros, Xu) gene_subset = np.ones(adata.n_vars, dtype=bool) @@ -282,13 +278,20 @@ def get_mean_var(X, ignore_zeros=False, perc=None): data = X.data if issparse(X) else X mask_nans = np.isnan(data) | np.isinf(data) | np.isneginf(data) - n_nonzeros = (X != 0).sum(0) - n_counts = n_nonzeros if ignore_zeros else X.shape[0] + if issparse(X): + n_nonzeros = X.getnnz(axis=0) + else: + n_nonzeros = (X != 0).sum(axis=0) + + if ignore_zeros: + n_counts = n_nonzeros + else: + n_counts = X.shape[0] if mask_nans.sum() > 0: if issparse(X): - data[np.isnan(data) | np.isinf(data) | np.isneginf(data)] = 0 - n_nans = n_nonzeros - (X != 0).sum(0) + data[mask_nans] = 0 + n_nans = (n_nonzeros - (X != 0).sum(0)).A1 else: X[mask_nans] = 0 n_nans = mask_nans.sum(0) @@ -298,7 +301,10 @@ def get_mean_var(X, ignore_zeros=False, perc=None): if np.size(perc) < 2: perc = [perc, 100] if perc < 50 else [0, perc] lb, ub = np.percentile(data, perc) - data = np.clip(data, lb, ub) + if issparse(X): + X.data = np.clip(data, lb, ub) + else: + X = np.clip(data, lb, ub) if issparse(X): mean = (X.sum(0) / n_counts).A1 @@ -306,11 +312,13 @@ def get_mean_var(X, ignore_zeros=False, perc=None): else: mean = X.sum(0) / n_counts mean_sq = np.multiply(X, X).sum(0) / n_counts - n_cells = np.clip(X.shape[0], 2, None) # to avoid division by zero - var = (mean_sq - mean ** 2) * (n_cells / (n_cells - 1)) + + n_counts = np.clip(n_counts, 2, None) # to avoid division by zero + var = (mean_sq - mean**2) * (n_counts / (n_counts - 1)) mean = np.nan_to_num(mean) var = np.nan_to_num(var) + return mean, var @@ -496,16 +504,22 @@ def csr_vcorrcoef(X, y): mu_x = np.ravel(np.mean(X, axis=-1)) mu_y = np.ravel(np.mean(y, axis=-1)) nom = X.dot(y) - X.dot(np.repeat(mu_y, len(y))) - mu_x * np.sum(y - mu_y) + + if X.ndim == 1: + n_features = len(X) + else: + n_features = X.shape[1] + denom_x = ( np.ravel(np.sum(X.multiply(X), axis=-1)) if issparse(X) else np.sum(X * X, axis=-1) ) - denom_x = denom_x - np.ravel(np.sum(X, axis=-1)) * mu_x + mu_x ** 2 + denom_x = denom_x - 2 * np.ravel(np.sum(X, axis=-1)) * mu_x + n_features * mu_x**2 denom_y = ( np.ravel(np.sum(y * y, axis=-1)) - - (np.ravel(np.sum(y, axis=-1)) * mu_y) - + mu_y ** 2 + - 2 * (np.ravel(np.sum(y, axis=-1)) * mu_y) + + n_features * mu_y**2 ) return nom / np.sqrt(denom_x * denom_y) diff --git a/scvelo/tl.py b/scvelo/tl.py deleted file mode 100644 index 5d71ecb5..00000000 --- a/scvelo/tl.py +++ /dev/null @@ -1 +0,0 @@ -from scvelo.tools import * # noqa diff --git a/scvelo/tools/dynamical_model.py b/scvelo/tools/dynamical_model.py index cf24fe06..942b0953 100644 --- a/scvelo/tools/dynamical_model.py +++ b/scvelo/tools/dynamical_model.py @@ -34,7 +34,13 @@ def initialize(self): if self.std_u == 0 or self.std_s == 0: self.std_u = self.std_s = 1 _scaling = self.fit_scaling - scaling = self.std_u / self.std_s if isinstance(_scaling, bool) else _scaling + if isinstance(_scaling, bool) and _scaling: + scaling = self.std_u / self.std_s + elif isinstance(_scaling, bool): + scaling = 1 + else: + scaling = _scaling + u, u_w = u / scaling, u_w / scaling # initialize beta and gamma from extreme quantiles of s @@ -93,8 +99,9 @@ def initialize(self): self.t, self.tau, self.o = self.get_time_assignment() self.loss = [self.get_loss()] - self.initialize_scaling(sight=0.5) - self.initialize_scaling(sight=0.1) + if self.fit_scaling: + self.initialize_scaling(sight=0.5) + self.initialize_scaling(sight=0.1) self.steady_state_ratio = self.gamma / self.beta @@ -113,7 +120,8 @@ def fit(self, assignment_mode=None): # pre-train with explicit time assignment self.fit_t_and_alpha() - self.fit_scaling_() + if self.fit_scaling: + self.fit_scaling_() self.fit_rates() self.fit_t_() diff --git a/scvelo/tools/dynamical_model_utils.py b/scvelo/tools/dynamical_model_utils.py index 8d929d55..bbb9a079 100644 --- a/scvelo/tools/dynamical_model_utils.py +++ b/scvelo/tools/dynamical_model_utils.py @@ -62,7 +62,7 @@ def convolve(x, weights=None): def linreg(u, s): # linear regression fit - ss_ = s.multiply(s).sum(0) if issparse(s) else (s ** 2).sum(0) + ss_ = s.multiply(s).sum(0) if issparse(s) else (s**2).sum(0) us_ = s.multiply(u).sum(0) if issparse(s) else (s * u).sum(0) return us_ / ss_ @@ -300,7 +300,7 @@ def compute_divergence( distu, distu_ = (u - ut) / std_u, (u - ut_) / std_u dists, dists_ = (s - st) / std_s, (s - st_) / std_s - res = np.array([distu_ ** 2 + dists_ ** 2, distu ** 2 + dists ** 2]) + res = np.array([distu_**2 + dists_**2, distu**2 + dists**2]) if connectivities is not None and connectivities is not False: res = ( np.array([connectivities.dot(r) for r in res]) @@ -342,8 +342,8 @@ def compute_divergence( elif mode == "outside_of_trajectory": return np.sign(distu) * np.sign(distu_) == 1 - distx = distu ** 2 + dists ** 2 - distx_ = distu_ ** 2 + dists_ ** 2 + distx = distu**2 + dists**2 + distx_ = distu_**2 + dists_**2 res, varx = np.array([distx_, distx]), 1 # default vals; @@ -353,8 +353,8 @@ def compute_divergence( varu = np.nanvar(distu * o + distu_ + (1 - o), axis=0) vars = np.nanvar(dists * o + dists_ + (1 - o), axis=0) - distx = distu ** 2 / varu + dists ** 2 / vars - distx_ = distu_ ** 2 / varu + dists_ ** 2 / vars + distx = distu**2 / varu + dists**2 / vars + distx_ = distu_**2 / varu + dists_**2 / vars varx = varu * vars @@ -383,10 +383,10 @@ def compute_divergence( sign = np.sign(dists * o + dists_ * (1 - o)) varx = np.mean(dist, axis=0) - np.mean(sign * np.sqrt(dist), axis=0) ** 2 if kernel_width is not None: - varx *= kernel_width ** 2 + varx *= kernel_width**2 res /= varx elif kernel_width is not None: - res /= kernel_width ** 2 + res /= kernel_width**2 if reg_time is not None and len(reg_time) == len(distu_): o = np.argmin(res, axis=0) @@ -509,7 +509,7 @@ def compute_divergence( distu = distu * (o == 1) + distu_ * (o == 0) dists = dists * (o == 1) + dists_ * (o == 0) - res = distu ** 2 + dists ** 2 + res = distu**2 + dists**2 elif mode == "gene_likelihood": o = np.argmin(res, axis=0) @@ -530,7 +530,7 @@ def compute_divergence( distu *= idx dists *= idx - distx = distu ** 2 + dists ** 2 + distx = distu**2 + dists**2 # compute variance / equivalent to np.var(np.sign(sdiff) * np.sqrt(distx)) varx = ( @@ -705,9 +705,9 @@ def curve_dists( # match each curve point to nearest observation dist, dist_ = np.zeros(len(curve_t)), np.zeros(len(curve_t_)) for i, ci in enumerate(curve_t): - dist[i] = np.min(np.sum((x_obs - ci) ** 2 / std_x ** 2, 1)) + dist[i] = np.min(np.sum((x_obs - ci) ** 2 / std_x**2, 1)) for i, ci in enumerate(curve_t_): - dist_[i] = np.min(np.sum((x_obs - ci) ** 2 / std_x ** 2, 1)) + dist_[i] = np.min(np.sum((x_obs - ci) ** 2 / std_x**2, 1)) return dist, dist_ @@ -1055,13 +1055,13 @@ def get_residuals_linear(self, **kwargs): def get_residuals(self, **kwargs): udiff, sdiff, reg = self.get_dists(**kwargs) - return np.sign(sdiff) * np.sqrt(udiff ** 2 + sdiff ** 2) + return np.sign(sdiff) * np.sqrt(udiff**2 + sdiff**2) def get_distx(self, noise_model="normal", regularize=True, **kwargs): udiff, sdiff, reg = self.get_dists(**kwargs) - distx = udiff ** 2 + sdiff ** 2 + distx = udiff**2 + sdiff**2 if regularize: - distx += reg ** 2 + distx += reg**2 return np.sqrt(distx) if noise_model == "laplace" else distx def get_se(self, **kwargs): @@ -1091,7 +1091,7 @@ def get_loglikelihood(self, varx=None, noise_model="normal", **kwargs): kwargs.update({"weighted": "upper"}) udiff, sdiff, reg = self.get_dists(**kwargs) - distx = udiff ** 2 + sdiff ** 2 + reg ** 2 + distx = udiff**2 + sdiff**2 + reg**2 eucl_distx = np.sqrt(distx) n = np.clip(len(distx) - len(self.u) * 0.01, 2, None) @@ -1136,7 +1136,7 @@ def get_variance(self, **kwargs): if "weighted" not in kwargs: kwargs.update({"weighted": "upper"}) udiff, sdiff, reg = self.get_dists(**kwargs) - distx = udiff ** 2 + sdiff ** 2 + distx = udiff**2 + sdiff**2 return np.mean(distx) - np.mean(np.sign(sdiff) * np.sqrt(distx)) ** 2 def get_ut(self, **kwargs): @@ -1590,8 +1590,8 @@ def initialize_diff_kinetics(self, clusters): def get_orth_fit(self, **kwargs): kwargs["weighted"] = True # include inner vals for orthogonal regression u, s = self.get_reads(**kwargs) - a, b = np.sum(s * u), np.sum(u ** 2 - s ** 2) - orth_beta = (b + ((b ** 2 + 4 * a ** 2) ** 0.5)) / (2 * a) + a, b = np.sum(s * u), np.sum(u**2 - s**2) + orth_beta = (b + ((b**2 + 4 * a**2) ** 0.5)) / (2 * a) return orth_beta def get_orth_distx(self, orth_beta=None, **kwargs): @@ -1600,10 +1600,10 @@ def get_orth_distx(self, orth_beta=None, **kwargs): u, s = self.get_reads(**kwargs) if orth_beta is None: orth_beta = self.get_orth_fit(**kwargs) - s_real = np.array((s + (orth_beta * u)) / (1 + orth_beta ** 2)) + s_real = np.array((s + (orth_beta * u)) / (1 + orth_beta**2)) sdiff = np.array(s_real - s) / self.std_s udiff = np.array(orth_beta * s_real - u) / self.std_u * self.scaling - return udiff ** 2 + sdiff ** 2 + return udiff**2 + sdiff**2 def get_pval(self, model="dynamical", **kwargs): # assuming var-scaled udiff, sdiff follow N(0,1), diff --git a/scvelo/tools/optimization.py b/scvelo/tools/optimization.py index 86bcf72e..e6acdb4d 100644 --- a/scvelo/tools/optimization.py +++ b/scvelo/tools/optimization.py @@ -1,3 +1,4 @@ +import numbers import warnings import numpy as np @@ -15,7 +16,7 @@ def get_weight(x, y=None, perc=95): y = y.A xy_norm = xy_norm / np.clip(np.max(xy_norm, axis=0), 1e-3, None) xy_norm += y / np.clip(np.max(y, axis=0), 1e-3, None) - if isinstance(perc, int): + if isinstance(perc, numbers.Number): weights = xy_norm >= np.percentile(xy_norm, perc, axis=0) else: lb, ub = np.percentile(xy_norm, perc, axis=0) @@ -50,7 +51,7 @@ def leastsq_NxN(x, y, fit_offset=False, perc=None, constraint_positive_offset=Tr n_obs = x.shape[0] if weights is None else sum(weights, axis=0) x_ = sum(x, axis=0) / n_obs y_ = sum(y, axis=0) / n_obs - gamma = (xy_ / n_obs - x_ * y_) / (xx_ / n_obs - x_ ** 2) + gamma = (xy_ / n_obs - x_ * y_) / (xx_ / n_obs - x_**2) offset = y_ - gamma * x_ # fix negative offsets: diff --git a/scvelo/tools/rank_velocity_genes.py b/scvelo/tools/rank_velocity_genes.py index 324fd9cd..ee0b88dd 100644 --- a/scvelo/tools/rank_velocity_genes.py +++ b/scvelo/tools/rank_velocity_genes.py @@ -11,13 +11,20 @@ def get_mean_var(X, ignore_zeros=False, perc=None): data = X.data if issparse(X) else X mask_nans = np.isnan(data) | np.isinf(data) | np.isneginf(data) - n_nonzeros = (X != 0).sum(0) - n_counts = n_nonzeros if ignore_zeros else X.shape[0] + if issparse(X): + n_nonzeros = X.getnnz(axis=0) + else: + n_nonzeros = (X != 0).sum(axis=0) + + if ignore_zeros: + n_counts = n_nonzeros + else: + n_counts = X.shape[0] if mask_nans.sum() > 0: if issparse(X): - data[np.isnan(data) | np.isinf(data) | np.isneginf(data)] = 0 - n_nans = n_nonzeros - (X != 0).sum(0) + data[mask_nans] = 0 + n_nans = (n_nonzeros - (X != 0).sum(0)).A1 else: X[mask_nans] = 0 n_nans = mask_nans.sum(0) @@ -27,7 +34,10 @@ def get_mean_var(X, ignore_zeros=False, perc=None): if np.size(perc) < 2: perc = [perc, 100] if perc < 50 else [0, perc] lb, ub = np.percentile(data, perc) - data = np.clip(data, lb, ub) + if issparse(X): + X.data = np.clip(data, lb, ub) + else: + X = np.clip(data, lb, ub) if issparse(X): mean = (X.sum(0) / n_counts).A1 @@ -35,11 +45,13 @@ def get_mean_var(X, ignore_zeros=False, perc=None): else: mean = X.sum(0) / n_counts mean_sq = np.multiply(X, X).sum(0) / n_counts - n_cells = np.clip(X.shape[0], 2, None) # to avoid division by zero - var = (mean_sq - mean ** 2) * (n_cells / (n_cells - 1)) + + n_counts = np.clip(n_counts, 2, None) # to avoid division by zero + var = (mean_sq - mean**2) * (n_counts / (n_counts - 1)) mean = np.nan_to_num(mean) var = np.nan_to_num(var) + return mean, var diff --git a/scvelo/tools/transition_matrix.py b/scvelo/tools/transition_matrix.py index 5279c86d..eea8401b 100644 --- a/scvelo/tools/transition_matrix.py +++ b/scvelo/tools/transition_matrix.py @@ -145,13 +145,13 @@ def transition_matrix( diffusion_kernel = dists_emb.copy() diffusion_kernel.data = np.exp( - -0.5 * dists_emb.data ** 2 / scale_diffusion ** 2 + -0.5 * dists_emb.data**2 / scale_diffusion**2 ) T = T.multiply(diffusion_kernel) # combine velocity kernel & diffusion kernel if 0 < weight_diffusion < 1: # add diffusion kernel (Brownian motion - like) diffusion_kernel.data = np.exp( - -0.5 * dists_emb.data ** 2 / (scale_diffusion / 2) ** 2 + -0.5 * dists_emb.data**2 / (scale_diffusion / 2) ** 2 ) T = (1 - weight_diffusion) * T + weight_diffusion * diffusion_kernel diff --git a/scvelo/tools/utils.py b/scvelo/tools/utils.py index 2465a102..e38ae7eb 100644 --- a/scvelo/tools/utils.py +++ b/scvelo/tools/utils.py @@ -505,7 +505,7 @@ def vcorrcoef(X, y, mode="pearsons", axis=-1): Xm = np.array(X - (np.nanmean(X, -1)[:, None] if X.ndim > 1 else np.nanmean(X, -1))) ym = np.array(y - (np.nanmean(y, -1)[:, None] if y.ndim > 1 else np.nanmean(y, -1))) corr = np.nansum(Xm * ym, -1) / np.sqrt( - np.nansum(Xm ** 2, -1) * np.nansum(ym ** 2, -1) + np.nansum(Xm**2, -1) * np.nansum(ym**2, -1) ) return corr diff --git a/scvelo/tools/velocity.py b/scvelo/tools/velocity.py index eadc53fb..f47b40c4 100644 --- a/scvelo/tools/velocity.py +++ b/scvelo/tools/velocity.py @@ -162,7 +162,7 @@ def compute_stochastic( self._residual -= self._offset _residual2 = (cov_us - 2 * _Ms * _Mu) - self._gamma[idx] * ( - var_ss - 2 * _Ms ** 2 + var_ss - 2 * _Ms**2 ) if fit_offset: _residual2 += 2 * self._offset[idx] * _Ms diff --git a/scvelo/tools/velocity_embedding.py b/scvelo/tools/velocity_embedding.py index d62c46ef..0d3aeef6 100644 --- a/scvelo/tools/velocity_embedding.py +++ b/scvelo/tools/velocity_embedding.py @@ -46,15 +46,17 @@ def velocity_embedding( """Projects the single cell velocities into any embedding. Given normalized difference of the embedding positions - :math: - `\\tilde \\delta_{ij} = \\frac{x_j-x_i}{\\left\\lVert x_j-x_i \\right\\rVert}`. + + .. math:: + \\tilde \\delta_{ij} = \\frac{x_j-x_i}{\\left\\lVert x_j-x_i \\right\\rVert}, + the projections are obtained as expected displacements with respect to the transition matrix :math:`\\tilde \\pi_{ij}` as .. math:: \\tilde \\nu_i = E_{\\tilde \\pi_{i\\cdot}} [\\tilde \\delta_{i \\cdot}] - = \\sum_{j \\neq i} \\left( \\tilde \\pi_{ij} - \\frac1n \\right) \\tilde \\ - delta_{ij}. + = \\sum_{j \\neq i} \\left( \\tilde \\pi_{ij} - \\frac1n \\right) \\tilde + \\delta_{ij}. Arguments diff --git a/scvelo/tools/velocity_graph.py b/scvelo/tools/velocity_graph.py index a1a5be4b..f8dff063 100644 --- a/scvelo/tools/velocity_graph.py +++ b/scvelo/tools/velocity_graph.py @@ -228,7 +228,7 @@ def _compute_cosines(self, obs_idx, queue): if self.compute_uncertainties: dX /= l2_norm(dX)[:, None] uncertainties.extend( - np.nansum(dX ** 2 * moments[obs_id][None, :], 1) + np.nansum(dX**2 * moments[obs_id][None, :], 1) ) vals.extend(val) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/_data/dentategyrus_100obs.h5ad b/tests/_data/dentategyrus_100obs.h5ad new file mode 100644 index 00000000..f2bdf065 Binary files /dev/null and b/tests/_data/dentategyrus_100obs.h5ad differ diff --git a/tests/_data/dentategyrus_100obs_preprocessed.h5ad b/tests/_data/dentategyrus_100obs_preprocessed.h5ad new file mode 100644 index 00000000..9e4121b2 Binary files /dev/null and b/tests/_data/dentategyrus_100obs_preprocessed.h5ad differ diff --git a/tests/_data/dentategyrus_50obs.h5ad b/tests/_data/dentategyrus_50obs.h5ad new file mode 100644 index 00000000..0911c2cc Binary files /dev/null and b/tests/_data/dentategyrus_50obs.h5ad differ diff --git a/tests/_data/dentategyrus_50obs_preprocessed.h5ad b/tests/_data/dentategyrus_50obs_preprocessed.h5ad new file mode 100644 index 00000000..33b852f9 Binary files /dev/null and b/tests/_data/dentategyrus_50obs_preprocessed.h5ad differ diff --git a/tests/_data/pancreas_100obs.h5ad b/tests/_data/pancreas_100obs.h5ad new file mode 100644 index 00000000..bda9a708 Binary files /dev/null and b/tests/_data/pancreas_100obs.h5ad differ diff --git a/tests/_data/pancreas_100obs_preprocessed.h5ad b/tests/_data/pancreas_100obs_preprocessed.h5ad new file mode 100644 index 00000000..2ae0da85 Binary files /dev/null and b/tests/_data/pancreas_100obs_preprocessed.h5ad differ diff --git a/tests/_data/pancreas_50obs.h5ad b/tests/_data/pancreas_50obs.h5ad new file mode 100644 index 00000000..b69aa87f Binary files /dev/null and b/tests/_data/pancreas_50obs.h5ad differ diff --git a/tests/_data/pancreas_50obs_preprocessed.h5ad b/tests/_data/pancreas_50obs_preprocessed.h5ad new file mode 100644 index 00000000..d36edc58 Binary files /dev/null and b/tests/_data/pancreas_50obs_preprocessed.h5ad differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=connectivities_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=connectivities_first_moment.npy new file mode 100644 index 00000000..e7c2a1ee Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=connectivities_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=connectivities_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=connectivities_second_moment.npy new file mode 100644 index 00000000..46283999 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=connectivities_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=distances_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=distances_first_moment.npy new file mode 100644 index 00000000..ee8aac04 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=distances_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=distances_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=distances_second_moment.npy new file mode 100644 index 00000000..920777af Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=distances_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=connectivities_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=connectivities_first_moment.npy new file mode 100644 index 00000000..b596ac6e Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=connectivities_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=connectivities_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=connectivities_second_moment.npy new file mode 100644 index 00000000..5a114ed6 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=connectivities_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=distances_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=distances_first_moment.npy new file mode 100644 index 00000000..e05560dd Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=distances_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=distances_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=distances_second_moment.npy new file mode 100644 index 00000000..aecb3c48 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=distances_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=connectivities_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=connectivities_first_moment.npy new file mode 100644 index 00000000..9048b19f Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=connectivities_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=connectivities_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=connectivities_second_moment.npy new file mode 100644 index 00000000..a70433f1 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=connectivities_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=distances_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=distances_first_moment.npy new file mode 100644 index 00000000..6b8bbc40 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=distances_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=distances_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=distances_second_moment.npy new file mode 100644 index 00000000..c7232aa9 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=distances_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=connectivities_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=connectivities_first_moment.npy new file mode 100644 index 00000000..a34db0d6 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=connectivities_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=connectivities_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=connectivities_second_moment.npy new file mode 100644 index 00000000..579f94ed Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=connectivities_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=distances_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=distances_first_moment.npy new file mode 100644 index 00000000..2e3f04af Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=distances_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=distances_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=distances_second_moment.npy new file mode 100644 index 00000000..cde75e35 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=distances_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=connectivities_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=connectivities_first_moment.npy new file mode 100644 index 00000000..ad4025be Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=connectivities_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=connectivities_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=connectivities_second_moment.npy new file mode 100644 index 00000000..e49e4141 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=connectivities_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=distances_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=distances_first_moment.npy new file mode 100644 index 00000000..e1a93a65 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=distances_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=distances_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=distances_second_moment.npy new file mode 100644 index 00000000..fd499640 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=distances_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=connectivities_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=connectivities_first_moment.npy new file mode 100644 index 00000000..21d03e94 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=connectivities_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=connectivities_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=connectivities_second_moment.npy new file mode 100644 index 00000000..f049d280 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=connectivities_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=distances_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=distances_first_moment.npy new file mode 100644 index 00000000..9433ef49 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=distances_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=distances_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=distances_second_moment.npy new file mode 100644 index 00000000..47f2532c Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=distances_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=connectivities_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=connectivities_first_moment.npy new file mode 100644 index 00000000..f376f3c4 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=connectivities_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=connectivities_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=connectivities_second_moment.npy new file mode 100644 index 00000000..47fa4275 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=connectivities_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=distances_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=distances_first_moment.npy new file mode 100644 index 00000000..e8abd980 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=distances_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=distances_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=distances_second_moment.npy new file mode 100644 index 00000000..4c288153 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=distances_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=connectivities_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=connectivities_first_moment.npy new file mode 100644 index 00000000..3b0dd3fa Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=connectivities_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=connectivities_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=connectivities_second_moment.npy new file mode 100644 index 00000000..a96554bf Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=connectivities_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=distances_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=distances_first_moment.npy new file mode 100644 index 00000000..2f0623c1 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=distances_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=distances_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=distances_second_moment.npy new file mode 100644 index 00000000..0106fddd Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=distances_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=connectivities_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=connectivities_first_moment.npy new file mode 100644 index 00000000..ac04bdd5 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=connectivities_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=connectivities_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=connectivities_second_moment.npy new file mode 100644 index 00000000..09174fc6 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=connectivities_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=distances_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=distances_first_moment.npy new file mode 100644 index 00000000..75dea5a6 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=distances_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=distances_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=distances_second_moment.npy new file mode 100644 index 00000000..3db383a1 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=distances_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=connectivities_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=connectivities_first_moment.npy new file mode 100644 index 00000000..6ffb55d8 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=connectivities_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=connectivities_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=connectivities_second_moment.npy new file mode 100644 index 00000000..e147cf7f Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=connectivities_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=distances_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=distances_first_moment.npy new file mode 100644 index 00000000..67d8777c Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=distances_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=distances_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=distances_second_moment.npy new file mode 100644 index 00000000..b0f91b5c Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=distances_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=connectivities_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=connectivities_first_moment.npy new file mode 100644 index 00000000..f4d8bb94 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=connectivities_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=connectivities_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=connectivities_second_moment.npy new file mode 100644 index 00000000..24fa8d88 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=connectivities_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=distances_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=distances_first_moment.npy new file mode 100644 index 00000000..e8b523d6 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=distances_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=distances_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=distances_second_moment.npy new file mode 100644 index 00000000..16181fbe Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=distances_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=connectivities_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=connectivities_first_moment.npy new file mode 100644 index 00000000..54620258 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=connectivities_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=connectivities_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=connectivities_second_moment.npy new file mode 100644 index 00000000..00d86cc1 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=connectivities_second_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=distances_first_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=distances_first_moment.npy new file mode 100644 index 00000000..b328deea Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=distances_first_moment.npy differ diff --git a/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=distances_second_moment.npy b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=distances_second_moment.npy new file mode 100644 index 00000000..ebf9a281 Binary files /dev/null and b/tests/_data/test_moments/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=distances_second_moment.npy differ diff --git a/tests/_data/test_moments/magic_impute/dataset=dentategyrus-n_obs=100-layer=spliced_magic_impute.npy b/tests/_data/test_moments/magic_impute/dataset=dentategyrus-n_obs=100-layer=spliced_magic_impute.npy new file mode 100644 index 00000000..76f3b1e3 Binary files /dev/null and b/tests/_data/test_moments/magic_impute/dataset=dentategyrus-n_obs=100-layer=spliced_magic_impute.npy differ diff --git a/tests/_data/test_moments/magic_impute/dataset=dentategyrus-n_obs=100-layer=unspliced_magic_impute.npy b/tests/_data/test_moments/magic_impute/dataset=dentategyrus-n_obs=100-layer=unspliced_magic_impute.npy new file mode 100644 index 00000000..5d013b9a Binary files /dev/null and b/tests/_data/test_moments/magic_impute/dataset=dentategyrus-n_obs=100-layer=unspliced_magic_impute.npy differ diff --git a/tests/_data/test_moments/magic_impute/dataset=dentategyrus-n_obs=50-layer=spliced_magic_impute.npy b/tests/_data/test_moments/magic_impute/dataset=dentategyrus-n_obs=50-layer=spliced_magic_impute.npy new file mode 100644 index 00000000..34ed77cd Binary files /dev/null and b/tests/_data/test_moments/magic_impute/dataset=dentategyrus-n_obs=50-layer=spliced_magic_impute.npy differ diff --git a/tests/_data/test_moments/magic_impute/dataset=dentategyrus-n_obs=50-layer=unspliced_magic_impute.npy b/tests/_data/test_moments/magic_impute/dataset=dentategyrus-n_obs=50-layer=unspliced_magic_impute.npy new file mode 100644 index 00000000..a8346feb Binary files /dev/null and b/tests/_data/test_moments/magic_impute/dataset=dentategyrus-n_obs=50-layer=unspliced_magic_impute.npy differ diff --git a/tests/_data/test_moments/magic_impute/dataset=pancreas-n_obs=100-layer=spliced_magic_impute.npy b/tests/_data/test_moments/magic_impute/dataset=pancreas-n_obs=100-layer=spliced_magic_impute.npy new file mode 100644 index 00000000..5566669f Binary files /dev/null and b/tests/_data/test_moments/magic_impute/dataset=pancreas-n_obs=100-layer=spliced_magic_impute.npy differ diff --git a/tests/_data/test_moments/magic_impute/dataset=pancreas-n_obs=100-layer=unspliced_magic_impute.npy b/tests/_data/test_moments/magic_impute/dataset=pancreas-n_obs=100-layer=unspliced_magic_impute.npy new file mode 100644 index 00000000..44d65b83 Binary files /dev/null and b/tests/_data/test_moments/magic_impute/dataset=pancreas-n_obs=100-layer=unspliced_magic_impute.npy differ diff --git a/tests/_data/test_moments/magic_impute/dataset=pancreas-n_obs=50-layer=spliced_magic_impute.npy b/tests/_data/test_moments/magic_impute/dataset=pancreas-n_obs=50-layer=spliced_magic_impute.npy new file mode 100644 index 00000000..e2ef46c3 Binary files /dev/null and b/tests/_data/test_moments/magic_impute/dataset=pancreas-n_obs=50-layer=spliced_magic_impute.npy differ diff --git a/tests/_data/test_moments/magic_impute/dataset=pancreas-n_obs=50-layer=unspliced_magic_impute.npy b/tests/_data/test_moments/magic_impute/dataset=pancreas-n_obs=50-layer=unspliced_magic_impute.npy new file mode 100644 index 00000000..613e4e2d Binary files /dev/null and b/tests/_data/test_moments/magic_impute/dataset=pancreas-n_obs=50-layer=unspliced_magic_impute.npy differ diff --git a/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=100first_moment_spliced.npy b/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=100first_moment_spliced.npy new file mode 100644 index 00000000..97df7e2b Binary files /dev/null and b/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=100first_moment_spliced.npy differ diff --git a/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=100first_moment_unspliced.npy b/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=100first_moment_unspliced.npy new file mode 100644 index 00000000..314bb2eb Binary files /dev/null and b/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=100first_moment_unspliced.npy differ diff --git a/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=50first_moment_spliced.npy b/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=50first_moment_spliced.npy new file mode 100644 index 00000000..c86fbacb Binary files /dev/null and b/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=50first_moment_spliced.npy differ diff --git a/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=50first_moment_unspliced.npy b/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=50first_moment_unspliced.npy new file mode 100644 index 00000000..20a2c1cd Binary files /dev/null and b/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=50first_moment_unspliced.npy differ diff --git a/tests/_data/test_moments/moments/dataset=pancreas-n_obs=100first_moment_spliced.npy b/tests/_data/test_moments/moments/dataset=pancreas-n_obs=100first_moment_spliced.npy new file mode 100644 index 00000000..92f11125 Binary files /dev/null and b/tests/_data/test_moments/moments/dataset=pancreas-n_obs=100first_moment_spliced.npy differ diff --git a/tests/_data/test_moments/moments/dataset=pancreas-n_obs=100first_moment_unspliced.npy b/tests/_data/test_moments/moments/dataset=pancreas-n_obs=100first_moment_unspliced.npy new file mode 100644 index 00000000..9f24a5c7 Binary files /dev/null and b/tests/_data/test_moments/moments/dataset=pancreas-n_obs=100first_moment_unspliced.npy differ diff --git a/tests/_data/test_moments/moments/dataset=pancreas-n_obs=50first_moment_spliced.npy b/tests/_data/test_moments/moments/dataset=pancreas-n_obs=50first_moment_spliced.npy new file mode 100644 index 00000000..cc5b717b Binary files /dev/null and b/tests/_data/test_moments/moments/dataset=pancreas-n_obs=50first_moment_spliced.npy differ diff --git a/tests/_data/test_moments/moments/dataset=pancreas-n_obs=50first_moment_unspliced.npy b/tests/_data/test_moments/moments/dataset=pancreas-n_obs=50first_moment_unspliced.npy new file mode 100644 index 00000000..0b114e6d Binary files /dev/null and b/tests/_data/test_moments/moments/dataset=pancreas-n_obs=50first_moment_unspliced.npy differ diff --git a/tests/_data/test_moments/second_order_moments/dataset=dentategyrus-n_obs=100-mode=connectivities_second_moment_mixed.npy b/tests/_data/test_moments/second_order_moments/dataset=dentategyrus-n_obs=100-mode=connectivities_second_moment_mixed.npy new file mode 100644 index 00000000..23eba865 Binary files /dev/null and b/tests/_data/test_moments/second_order_moments/dataset=dentategyrus-n_obs=100-mode=connectivities_second_moment_mixed.npy differ diff --git a/tests/_data/test_moments/second_order_moments/dataset=dentategyrus-n_obs=50-mode=connectivities_second_moment_mixed.npy b/tests/_data/test_moments/second_order_moments/dataset=dentategyrus-n_obs=50-mode=connectivities_second_moment_mixed.npy new file mode 100644 index 00000000..995d55ec Binary files /dev/null and b/tests/_data/test_moments/second_order_moments/dataset=dentategyrus-n_obs=50-mode=connectivities_second_moment_mixed.npy differ diff --git a/tests/_data/test_moments/second_order_moments/dataset=pancreas-n_obs=100-mode=connectivities_second_moment_mixed.npy b/tests/_data/test_moments/second_order_moments/dataset=pancreas-n_obs=100-mode=connectivities_second_moment_mixed.npy new file mode 100644 index 00000000..47166805 Binary files /dev/null and b/tests/_data/test_moments/second_order_moments/dataset=pancreas-n_obs=100-mode=connectivities_second_moment_mixed.npy differ diff --git a/tests/_data/test_moments/second_order_moments/dataset=pancreas-n_obs=50-mode=connectivities_second_moment_mixed.npy b/tests/_data/test_moments/second_order_moments/dataset=pancreas-n_obs=50-mode=connectivities_second_moment_mixed.npy new file mode 100644 index 00000000..e8292d08 Binary files /dev/null and b/tests/_data/test_moments/second_order_moments/dataset=pancreas-n_obs=50-mode=connectivities_second_moment_mixed.npy differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..33a8053c Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..650081c9 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..5c5e905e Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..2cae02a8 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..1f5c4142 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..e9c53ba2 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..9d292c43 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..7b277a5c Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..7959c420 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..c7618011 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..187d4c10 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..a81c829d Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..eca4fbc5 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..25b27616 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..bfbb615c Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..4d036095 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..2f6571f8 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..db18dc9e Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..bbc65582 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..9f655eef Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..be6279fd Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..93ac19d4 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..ce7dae9b Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..c390bf65 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..b2aa64c4 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..df67fc73 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..6720fea4 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..25174180 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..d0acadc7 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..f11f2fe1 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..35f32522 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..da5de584 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..73a90c95 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..f5f8c2e8 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..0f2c2a6a Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..47a2747a Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..a97a1591 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..36571f32 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..3ca88d12 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..6071a132 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..85aacf5f Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..5d50530c Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..f7f5357c Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..65927a3f Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..d0991497 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..d8a5fc62 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..2eb4d33b Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..32c7dbef Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..344bf595 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..811d1601 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..dd87a1d1 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..c1418b3c Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..330e3cfb Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..e1990b34 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..7e69fc1c Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..07587a16 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..214c667e Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..876beb7b Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..580affbe Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..2d95ac38 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..3bf3c493 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..df59b17a Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..c1d8d1bd Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..1ae964f2 Binary files /dev/null and b/tests/_data/test_neighbors/_get_hnsw_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..645fbee3 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..8f073a34 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..049545e3 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..266dc48d Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..645fbee3 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..8f073a34 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..049545e3 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..266dc48d Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..dad57b18 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..00ba3b9d Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..c3beab72 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..8730d94f Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..b294c3d4 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..67bc3b37 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..9fd91478 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..7dd3fcc7 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..85bca2dc Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..b61a970c Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..1fe27ef6 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..08f0e0ac Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..85bca2dc Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..b61a970c Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..1fe27ef6 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..08f0e0ac Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..79f02c38 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..59d62eab Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..b793b5af Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..eceacce9 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..2906d3e8 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..4cbe4798 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..185b9a70 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..6f4f6170 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..317fa07d Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..13289269 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..b9569951 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..5aade55b Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..317fa07d Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..13289269 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..b9569951 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..5aade55b Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..6899684b Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..8dc0f2a2 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..b420c841 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..030d4a40 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..cda8a343 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..620c08e4 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..0d06426f Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..90fb600b Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..3639e54b Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..c664bee9 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..c5644c31 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..e25b273a Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..3639e54b Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..c664bee9 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..c5644c31 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..e25b273a Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..bf01a921 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..ad01fb9b Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..b89b7e8c Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..1e95fbd4 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..ae0c5adb Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..6a9c7bc6 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..7003d6f5 Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..5498549a Binary files /dev/null and b/tests/_data/test_neighbors/_get_scanpy_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..34ff6091 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..5ed88923 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..a55ee2ca Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..726f232f Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..c7e6c3c5 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..6013139c Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..5b313727 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..8c614055 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..a3a3b3c7 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..e1d03ccc Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..07f270ae Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..45d9bb0c Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..b294c3d4 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..67bc3b37 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..9fd91478 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..7dd3fcc7 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..495ddcfb Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..8cf8ffac Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..974c263c Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..330fdaeb Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..eaf4f843 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..afd16aec Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..abf5b71a Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..551dcd66 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..8aa1f8c7 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..baeb592e Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..b793b5af Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..eceacce9 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..2906d3e8 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..4cbe4798 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..185b9a70 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..6f4f6170 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=dentategyrus-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..89b241f3 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..42d3e988 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..abdc52db Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..2a887c8e Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..b25edb46 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..dbec9052 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..b1431156 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..dbc7a232 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..21008359 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..eba85a3d Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..fcfb34bc Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..57c5c0e4 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..cda8a343 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..620c08e4 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..0d06426f Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..90fb600b Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=100-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..736c427a Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..3c64d0ea Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..073a4ec1 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..a5369ac2 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..da88746d Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..5761d0ef Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..7f8afd24 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..f283bca9 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..13b12ff0 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz new file mode 100644 index 00000000..ac8c8b7d Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..b89b7e8c Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz new file mode 100644 index 00000000..1e95fbd4 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=15-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz new file mode 100644 index 00000000..ae0c5adb Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz new file mode 100644 index 00000000..6a9c7bc6 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=15_distances.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz new file mode 100644 index 00000000..7003d6f5 Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_connectivites.npz differ diff --git a/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz new file mode 100644 index 00000000..5498549a Binary files /dev/null and b/tests/_data/test_neighbors/_get_sklearn_neighbors/dataset=pancreas-n_obs=50-rep='X_pca'-n_pcs=30-n_neighbors=30_distances.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=100-n_neighbors=15-mode=connectivities.npz b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=100-n_neighbors=15-mode=connectivities.npz new file mode 100644 index 00000000..c10ecdf8 Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=100-n_neighbors=15-mode=connectivities.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=100-n_neighbors=15-mode=distances.npz b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=100-n_neighbors=15-mode=distances.npz new file mode 100644 index 00000000..cab3d463 Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=100-n_neighbors=15-mode=distances.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=100-n_neighbors=None-mode=connectivities.npz b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=100-n_neighbors=None-mode=connectivities.npz new file mode 100644 index 00000000..d752d11a Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=100-n_neighbors=None-mode=connectivities.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=100-n_neighbors=None-mode=distances.npz b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=100-n_neighbors=None-mode=distances.npz new file mode 100644 index 00000000..2578f5a3 Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=100-n_neighbors=None-mode=distances.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=50-n_neighbors=15-mode=connectivities.npz b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=50-n_neighbors=15-mode=connectivities.npz new file mode 100644 index 00000000..e2fc7c3c Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=50-n_neighbors=15-mode=connectivities.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=50-n_neighbors=15-mode=distances.npz b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=50-n_neighbors=15-mode=distances.npz new file mode 100644 index 00000000..168ef376 Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=50-n_neighbors=15-mode=distances.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=50-n_neighbors=None-mode=connectivities.npz b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=50-n_neighbors=None-mode=connectivities.npz new file mode 100644 index 00000000..9d34800f Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=50-n_neighbors=None-mode=connectivities.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=50-n_neighbors=None-mode=distances.npz b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=50-n_neighbors=None-mode=distances.npz new file mode 100644 index 00000000..10d49ce5 Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=dentategyrus-n_obs=50-n_neighbors=None-mode=distances.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=100-n_neighbors=15-mode=connectivities.npz b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=100-n_neighbors=15-mode=connectivities.npz new file mode 100644 index 00000000..8ff712f3 Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=100-n_neighbors=15-mode=connectivities.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=100-n_neighbors=15-mode=distances.npz b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=100-n_neighbors=15-mode=distances.npz new file mode 100644 index 00000000..509ce4ae Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=100-n_neighbors=15-mode=distances.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=100-n_neighbors=None-mode=connectivities.npz b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=100-n_neighbors=None-mode=connectivities.npz new file mode 100644 index 00000000..b001997b Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=100-n_neighbors=None-mode=connectivities.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=100-n_neighbors=None-mode=distances.npz b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=100-n_neighbors=None-mode=distances.npz new file mode 100644 index 00000000..061f8df1 Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=100-n_neighbors=None-mode=distances.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=50-n_neighbors=15-mode=connectivities.npz b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=50-n_neighbors=15-mode=connectivities.npz new file mode 100644 index 00000000..0ee69034 Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=50-n_neighbors=15-mode=connectivities.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=50-n_neighbors=15-mode=distances.npz b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=50-n_neighbors=15-mode=distances.npz new file mode 100644 index 00000000..6110e86d Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=50-n_neighbors=15-mode=distances.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=50-n_neighbors=None-mode=connectivities.npz b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=50-n_neighbors=None-mode=connectivities.npz new file mode 100644 index 00000000..ab1d6a9a Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=50-n_neighbors=None-mode=connectivities.npz differ diff --git a/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=50-n_neighbors=None-mode=distances.npz b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=50-n_neighbors=None-mode=distances.npz new file mode 100644 index 00000000..320ae06d Binary files /dev/null and b/tests/_data/test_neighbors/get_connectivities/dataset=pancreas-n_obs=50-n_neighbors=None-mode=distances.npz differ diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..b72e1a07 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,429 @@ +from datetime import timedelta +from typing import Tuple, Union + +import pytest +from hypothesis import settings + +import scanpy as sc +from anndata import AnnData + +settings.register_profile("ci", deadline=timedelta(milliseconds=500)) + + +# TODO: Make datasets smaller (less variables) +_dentategyrus_50obs = sc.read("tests/_data/dentategyrus_50obs.h5ad") +_dentategyrus_50obs_preprocessed = sc.read( + "tests/_data/dentategyrus_50obs_preprocessed.h5ad" +) +_dentategyrus_100obs = sc.read("tests/_data/dentategyrus_100obs.h5ad") +_dentategyrus_100obs_preprocessed = sc.read( + "tests/_data/dentategyrus_100obs_preprocessed.h5ad" +) +_pancreas_50obs = sc.read("tests/_data/pancreas_50obs.h5ad") +_pancreas_50obs_preprocessed = sc.read("tests/_data/pancreas_50obs_preprocessed.h5ad") +_pancreas_100obs = sc.read("tests/_data/pancreas_100obs.h5ad") +_pancreas_100obs_preprocessed = sc.read("tests/_data/pancreas_100obs_preprocessed.h5ad") + + +@pytest.fixture +def dentategyrus_50obs() -> AnnData: + return _dentategyrus_50obs.copy() + + +@pytest.fixture +def dentategyrus_50obs_preprocessed() -> AnnData: + """Preprocessed dentategyrus dataset with 50 observations. + + The data has been preprocessed using + + .. code:: python + import scanpy as sc + import scvelo as scv + + adata = sc.read(f"tests/_data/dentategyrus_50obs.h5ad") + + scv.pp.filter_and_normalize( + adata, + min_shared_counts=20, + n_top_genes=200, + retain_genes=None, + subset_highly_variable=True, + flavor="seurat", + log=True, + layers_normalize=None, + copy=False, + ) + scv.pp.neighbors( + adata, + n_neighbors=30, + n_pcs=None, + use_rep=None, + use_highly_variable=True, + knn=True, + random_state=0, + method="umap", + metric="euclidean", + metric_kwds=None, + num_threads=-1, + copy=False, + ) + scv.pp.moments( + adata, + n_neighbors=30, + n_pcs=None, + mode="connectivities", + method="umap", + use_rep=None, + use_highly_variable=True, + copy=False, + ) + """ + + return _dentategyrus_50obs_preprocessed.copy() + + +@pytest.fixture +def dentategyrus_100obs() -> AnnData: + return _dentategyrus_100obs.copy() + + +@pytest.fixture +def dentategyrus_100obs_preprocessed() -> AnnData: + """Preprocessed dentategyrus dataset with 100 observations. + + The data has been preprocessed using + + .. code:: python + import scanpy as sc + import scvelo as scv + + adata = sc.read(f"tests/_data/dentategyrus_100obs.h5ad") + + scv.pp.filter_and_normalize( + adata, + min_shared_counts=20, + n_top_genes=200, + retain_genes=None, + subset_highly_variable=True, + flavor="seurat", + log=True, + layers_normalize=None, + copy=False, + ) + scv.pp.neighbors( + adata, + n_neighbors=30, + n_pcs=None, + use_rep=None, + use_highly_variable=True, + knn=True, + random_state=0, + method="umap", + metric="euclidean", + metric_kwds=None, + num_threads=-1, + copy=False, + ) + scv.pp.moments( + adata, + n_neighbors=30, + n_pcs=None, + mode="connectivities", + method="umap", + use_rep=None, + use_highly_variable=True, + copy=False, + ) + """ + + return _dentategyrus_100obs_preprocessed.copy() + + +@pytest.fixture +def pancreas_50obs() -> AnnData: + return _pancreas_50obs.copy() + + +@pytest.fixture +def pancreas_50obs_preprocessed() -> AnnData: + """Preprocessed pancreas dataset with 50 observations. + + The data has been preprocessed using + + .. code:: python + import scanpy as sc + import scvelo as scv + + adata = sc.read(f"tests/_data/pancreas_50obs.h5ad") + + scv.pp.filter_and_normalize( + adata, + min_shared_counts=20, + n_top_genes=200, + retain_genes=None, + subset_highly_variable=True, + flavor="seurat", + log=True, + layers_normalize=None, + copy=False, + ) + scv.pp.neighbors( + adata, + n_neighbors=30, + n_pcs=None, + use_rep=None, + use_highly_variable=True, + knn=True, + random_state=0, + method="umap", + metric="euclidean", + metric_kwds=None, + num_threads=-1, + copy=False, + ) + scv.pp.moments( + adata, + n_neighbors=30, + n_pcs=None, + mode="connectivities", + method="umap", + use_rep=None, + use_highly_variable=True, + copy=False, + ) + """ + + return _pancreas_50obs_preprocessed.copy() + + +@pytest.fixture +def pancreas_100obs() -> AnnData: + return _pancreas_100obs.copy() + + +@pytest.fixture +def pancreas_100obs_preprocessed() -> AnnData: + """Preprocessed dentategyrus dataset with 100 observations. + + The data has been preprocessed using + + .. code:: python + import scanpy as sc + import scvelo as scv + + adata = sc.read(f"tests/_data/pancreas_100obs.h5ad") + + scv.pp.filter_and_normalize( + adata, + min_shared_counts=20, + n_top_genes=200, + retain_genes=None, + subset_highly_variable=True, + flavor="seurat", + log=True, + layers_normalize=None, + copy=False, + ) + scv.pp.neighbors( + adata, + n_neighbors=30, + n_pcs=None, + use_rep=None, + use_highly_variable=True, + knn=True, + random_state=0, + method="umap", + metric="euclidean", + metric_kwds=None, + num_threads=-1, + copy=False, + ) + scv.pp.moments( + adata, + n_neighbors=30, + n_pcs=None, + mode="connectivities", + method="umap", + use_rep=None, + use_highly_variable=True, + copy=False, + ) + """ + + return _pancreas_100obs_preprocessed.copy() + + +def _get_dentategyrus_50obs( + raw: bool, preprocessed: bool +) -> Union[AnnData, Tuple[AnnData, AnnData]]: + """Get AnnData of dentategyrus dataset with 50 observations. + + Parameters + ---------- + raw + Boolean identifier whether or not to return raw dataset. + preprocessed + Boolean identifier whether or not to return preprocessed dataset. + + Returns + ------- + Union[AnnData, Tuple[AnnData, AnnData]] + Specified version of dataset. + """ + + if raw and not preprocessed: + return _dentategyrus_50obs.copy() + elif not raw and preprocessed: + return _dentategyrus_50obs_preprocessed.copy() + elif raw and preprocessed: + return _dentategyrus_50obs.copy(), _dentategyrus_50obs_preprocessed.copy() + + +def _get_dentategyrus_100obs( + raw: bool, preprocessed: bool +) -> Union[AnnData, Tuple[AnnData, AnnData]]: + """Get AnnData of dentategyrus dataset with 100 observations. + + Parameters + ---------- + raw + Boolean identifier whether or not to return raw dataset. + preprocessed + Boolean identifier whether or not to return preprocessed dataset. + + Returns + ------- + Union[AnnData, Tuple[AnnData, AnnData]] + Specified version of dataset. + """ + + if raw and not preprocessed: + return _dentategyrus_100obs.copy() + elif not raw and preprocessed: + return _dentategyrus_100obs_preprocessed.copy() + elif raw and preprocessed: + return _dentategyrus_100obs.copy(), _dentategyrus_100obs_preprocessed.copy() + + +def _get_dentategyrus_adata( + n_obs: int, raw: bool, preprocessed: bool +) -> Union[AnnData, Tuple[AnnData, AnnData]]: + """Get AnnData of raw or preprocessed dentategyrus dataset. + + Parameters + ---------- + n_obs + Number of observations of dataset to return. + raw + Boolean identifier whether or not to return raw dataset. + preprocessed + Boolean identifier whether or not to return preprocessed dataset. + + Returns + ------- + Union[AnnData, Tuple[AnnData, AnnData]] + Specified version of dataset. + """ + + if n_obs == 50: + return _get_dentategyrus_50obs(raw=raw, preprocessed=preprocessed) + elif n_obs == 100: + return _get_dentategyrus_100obs(raw=raw, preprocessed=preprocessed) + + +def _get_pancreas_50obs( + raw: bool, preprocessed: bool +) -> Union[AnnData, Tuple[AnnData, AnnData]]: + """Get AnnData of pancreas dataset with 50 observations. + + Parameters + ---------- + raw + Boolean identifier whether or not to return raw dataset. + preprocessed + Boolean identifier whether or not to return preprocessed dataset. + + Returns + ------- + Union[AnnData, Tuple[AnnData, AnnData]] + Specified version of dataset. + """ + + if raw and not preprocessed: + return _pancreas_50obs.copy() + elif not raw and preprocessed: + return _pancreas_50obs_preprocessed.copy() + elif raw and preprocessed: + return _pancreas_50obs.copy(), _pancreas_50obs_preprocessed.copy() + + +def _get_pancreas_100obs( + raw: bool, preprocessed: bool +) -> Union[AnnData, Tuple[AnnData, AnnData]]: + """Get AnnData of raw or preprocessed pancreas dataset with 100 observations. + + Parameters + ---------- + raw + Boolean identifier whether or not to return raw dataset. + preprocessed + Boolean identifier whether or not to return preprocessed dataset. + + Returns + ------- + Union[AnnData, Tuple[AnnData, AnnData]] + Specified version of dataset. + """ + + if raw and not preprocessed: + return _pancreas_100obs.copy() + elif not raw and preprocessed: + return _pancreas_100obs_preprocessed.copy() + elif raw and preprocessed: + return _pancreas_100obs.copy(), _pancreas_100obs_preprocessed.copy() + + +def _get_pancreas_adata( + n_obs: int, raw: bool, preprocessed: bool +) -> Union[AnnData, Tuple[AnnData, AnnData]]: + """Get AnnData of raw or preprocessed pancreas dataset. + + Parameters + ---------- + n_obs + Number of observations of dataset to return. + raw + Boolean identifier whether or not to return raw dataset. + preprocessed + Boolean identifier whether or not to return preprocessed dataset. + + Returns + ------- + Union[AnnData, Tuple[AnnData, AnnData]] + Specified version of dataset. + """ + + if n_obs == 50: + return _get_pancreas_50obs(raw=raw, preprocessed=preprocessed) + elif n_obs == 100: + return _get_pancreas_100obs(raw=raw, preprocessed=preprocessed) + + +@pytest.fixture +def adata(): + """Fixture to easily use available datasets in unit tests. + + The fixture returns a function to load the AnnData objects of a specified dataset + (`"pancreas"` or `"dentategyrus"`). The function is then used in the unit test to + load the needed version(s) (raw or preprocessed) of the dataset. + """ + + def _get_adata(dataset: str, n_obs: int, raw: bool, preprocessed: bool): + if dataset == "pancreas": + return _get_pancreas_adata(n_obs=n_obs, raw=raw, preprocessed=preprocessed) + elif dataset == "dentategyrus": + return _get_dentategyrus_adata( + n_obs=n_obs, raw=raw, preprocessed=preprocessed + ) + + return _get_adata diff --git a/scvelo/core/tests/__init__.py b/tests/core/__init__.py similarity index 100% rename from scvelo/core/tests/__init__.py rename to tests/core/__init__.py diff --git a/tests/core/test_anndata.py b/tests/core/test_anndata.py new file mode 100644 index 00000000..ebb5a6df --- /dev/null +++ b/tests/core/test_anndata.py @@ -0,0 +1,1198 @@ +from typing import Dict, List, Optional, Tuple, Union + +import hypothesis.strategies as st +import pytest +from hypothesis import given + +import numpy as np +import pandas as pd +from numpy.testing import assert_array_equal +from scipy.sparse import csr_matrix, issparse + +from anndata import AnnData + +from scvelo.core import ( + clean_obs_names, + cleanup, + get_df, + get_initial_size, + get_modality, + get_size, + make_dense, + make_sparse, + merge, + set_initial_size, + set_modality, + show_proportions, + sum, +) +from scvelo.core._anndata import obs_df, var_df +from .test_base import get_adata, TestBase + + +# TODO: Make more sophisticated +class TestCleanObsNames: + @pytest.mark.parametrize( + "obs_names, obs_names_cleaned, id_length", + [ + ( + ["sample1_ABCD", "sample2_ABCD", "sample3_DCBA"], + ["ABCD", "ABCD-1", "DCBA"], + 4, + ), + ( + ["sample1_ABCD0815", "sample2_AMNC0707", "sample3_AAAA0902"], + ["ABCD", "AMNC", "AAAA"], + 4, + ), + ( + [ + "possorted_genome_bam_H66NQ:AAAGAACAGACATATGx", + "possorted_genome_bam_7YCS2:AAACCCAGTCGGCTACx", + "possorted_genome_bam_10UXK:AAAGGATGTGAATGATx", + ], + ["AAAGAACAGACATATG", "AAACCCAGTCGGCTAC", "AAAGGATGTGAATGAT"], + 16, + ), + ], + ) + @pytest.mark.parametrize("inplace", (True, False)) + def test_equal_obs_id_length( + self, + obs_names: List[str], + obs_names_cleaned: List[str], + id_length: int, + inplace: bool, + ): + adata = AnnData(np.eye(3)) + adata.obs_names = obs_names + + _adata = clean_obs_names(adata, inplace=inplace, id_length=id_length) + + if inplace: + assert _adata is None + else: + assert isinstance(_adata, AnnData) + adata = _adata + + assert (adata.obs_names == obs_names_cleaned).all() + assert "sample_batch" in adata.obs + assert adata.obs["sample_batch"].str.startswith(("sample", "possorted")).all() + + @pytest.mark.parametrize( + "obs_names, obs_names_cleaned", + [ + ( + ["sample1_ABCDE0815", "sample2_AMNC0707", "sample3_AAAA0902"], + ["ABCD", "AMNC", "AAAA"], + ) + ], + ) + @pytest.mark.parametrize("inplace", (True, False)) + def test_different_obs_id_length( + self, + obs_names: List[str], + obs_names_cleaned: List[str], + inplace: bool, + ): + adata = AnnData(np.eye(3)) + adata.obs_names = obs_names + + _adata = clean_obs_names(adata, inplace=inplace, id_length=4) + + if inplace: + assert _adata is None + else: + assert isinstance(_adata, AnnData) + adata = _adata + + assert (adata.obs_names == obs_names_cleaned).all() + assert "sample_batch" in adata.obs + assert adata.obs["sample_batch"].str.startswith("sample").all() + + +class TestCleanup(TestBase): + @given(adata=get_adata(max_obs=5, max_vars=5), inplace=st.booleans()) + def test_cleanup_all(self, adata: AnnData, inplace: bool): + returned_adata = cleanup(adata=adata, clean="all", inplace=inplace) + + if not inplace: + assert isinstance(returned_adata, AnnData) + adata = returned_adata + else: + assert returned_adata is None + + assert len(adata.layers) == 0 + assert len(adata.uns) == 0 + assert len(adata.obs.columns) == 0 + assert len(adata.var.columns) == 0 + + @given(adata=get_adata(max_obs=5, max_vars=5), inplace=st.booleans()) + def test_cleanup_default_clean_w_random_adata(self, adata: AnnData, inplace: bool): + n_obs_cols = len(adata.obs.columns) + n_var_cols = len(adata.var.columns) + n_uns_slots = len(adata.uns) + + returned_adata = cleanup(adata=adata, inplace=inplace) + if not inplace: + assert isinstance(returned_adata, AnnData) + adata = returned_adata + else: + assert returned_adata is None + + assert len(adata.layers) == len( + set(adata.layers).intersection(["unspliced", "spliced", "Mu", "Ms"]) + ) + assert len(adata.uns) == n_uns_slots + assert len(adata.obs.columns) == n_obs_cols + assert len(adata.var.columns) == n_var_cols + + @given( + adata=get_adata( + max_obs=5, + max_vars=5, + layer_keys=["unspliced", "spliced", "Ms", "Mu", "random"], + ), + inplace=st.booleans(), + ) + def test_cleanup_default_clean(self, adata: AnnData, inplace: bool): + n_obs_cols = len(adata.obs.columns) + n_var_cols = len(adata.var.columns) + n_uns_slots = len(adata.uns) + + returned_adata = cleanup(adata=adata, inplace=inplace) + + if not inplace: + assert isinstance(returned_adata, AnnData) + adata = returned_adata + else: + assert returned_adata is None + + assert len(adata.layers) == 4 + assert len(adata.uns) == n_uns_slots + assert len(adata.obs.columns) == n_obs_cols + assert len(adata.var.columns) == n_var_cols + + @given( + adata=get_adata(max_obs=5, max_vars=5), + inplace=st.booleans(), + n_modalities=st.integers(min_value=0), + n_cols=st.integers(min_value=0), + ) + def test_cleanup_some( + self, adata: AnnData, inplace: bool, n_modalities: int, n_cols: int + ): + layers_to_keep = self._subset_modalities( + adata, + n_modalities, + from_obsm=False, + ) + obs_cols_to_keep = self._subset_columns(adata, n_cols=n_cols, from_var=False) + var_cols_to_keep = self._subset_columns(adata, n_cols=n_cols, from_obs=False) + + # Update in case adata.layers, adata.obs, adata.var share same keys + layers_to_keep += set(adata.layers).intersection(obs_cols_to_keep) + layers_to_keep += set(adata.layers).intersection(var_cols_to_keep) + + obs_cols_to_keep += set(adata.obs.columns).intersection(var_cols_to_keep) + obs_cols_to_keep += set(adata.obs.columns).intersection(layers_to_keep) + + var_cols_to_keep += set(adata.var.columns).intersection(obs_cols_to_keep) + var_cols_to_keep += set(adata.var.columns).intersection(layers_to_keep) + + returned_adata = cleanup( + adata=adata, + keep=layers_to_keep + obs_cols_to_keep + var_cols_to_keep, + clean="all", + inplace=inplace, + ) + + if not inplace: + assert isinstance(returned_adata, AnnData) + adata = returned_adata + else: + assert returned_adata is None + + assert set(adata.layers.keys()) == set(layers_to_keep).difference({"X"}) + assert set(adata.obs.columns) == set(obs_cols_to_keep) + assert set(adata.var.columns) == set(var_cols_to_keep) + + +class TestGetDf: + @given( + data=st.data(), + adata=get_adata( + max_obs=5, + max_vars=5, + layer_keys=["layer_1", "layer_2"], + ), + modality=st.sampled_from([None, "X", "layer_1", "layer_2"]), + ) + def test_indexed_by_obs_names(self, data, adata: AnnData, modality: Optional[None]): + adata.var_names = [f"var_{var_id}" for var_id in adata.var_names] + + index = data.draw( + st.lists( + st.sampled_from(adata.obs_names.to_list()), + min_size=1, + max_size=len(adata.obs_names), + unique=True, + ) + ) + keys = data.draw( + st.lists( + st.sampled_from(adata.var_names.to_list()), + min_size=1, + max_size=len(adata.var_names), + unique=True, + ) + ) + + df = get_df(adata, keys=keys, index=index, layer=modality) + + assert isinstance(df, pd.DataFrame) + assert (df.index == index).all() + assert (df.columns == keys).all() + np.testing.assert_equal( + get_modality(adata=adata[index, keys], modality=modality), df.values + ) + + @given( + data=st.data(), + adata=get_adata( + max_obs=5, + max_vars=5, + layer_keys=["layer_1", "layer_2"], + ), + modality=st.sampled_from([None, "X", "layer_1", "layer_2"]), + ) + def test_indexed_by_var_names(self, data, adata: AnnData, modality: Optional[None]): + adata.obs_names = [f"obs_{obs_id}" for obs_id in adata.obs_names] + adata.var_names = [f"var_{var_id}" for var_id in adata.var_names] + + index = data.draw( + st.lists( + st.sampled_from(adata.var_names.to_list()), + min_size=1, + max_size=len(adata.var_names), + unique=True, + ) + ) + keys = data.draw( + st.lists( + st.sampled_from(adata.obs_names.to_list()), + min_size=1, + max_size=len(adata.obs_names), + unique=True, + ) + ) + + df = get_df(adata, keys=keys, index=index, layer=modality) + + assert isinstance(df, pd.DataFrame) + assert (df.index == index).all() + assert (df.columns == keys).all() + np.testing.assert_equal( + get_modality(adata=adata[keys, index], modality=modality).T, df.values + ) + + @given( + data=st.data(), + adata=get_adata( + max_obs=5, + max_vars=5, + ), + ) + def test_sorted_values(self, data, adata: AnnData): + adata.obs_names = [f"obs_{obs_id}" for obs_id in adata.obs_names] + adata.var_names = [f"var_{var_id}" for var_id in adata.var_names] + + sort_values = data.draw(st.sampled_from([True, False] + [*adata.var_names])) + + df = get_df( + adata, index=adata.obs_names, keys=adata.var_names, sort_values=sort_values + ) + + assert isinstance(df, pd.DataFrame) + if isinstance(sort_values, str): + assert (np.diff(df[sort_values]) <= 0).all() + elif sort_values: + assert (np.diff(df.values[:, 0]) <= 0).all() + + @given( + adata=get_adata( + max_obs=5, + max_vars=5, + layer_keys=["layer_1"], + obsm_keys=["obsm_1"], + obs_col_names=["obs_col_1"], + var_col_names=["var_col_1"], + ), + ) + def test_keys_not_present(self, adata: AnnData): + with pytest.raises( + ValueError, + match=( + "'not_existing_column_name' not found in any of obs, var, obsm, varm, " + "uns, layers, obsp, varp." + ), + ): + _ = get_df(adata, keys="not_existing_column_name") + + @given( + data=st.data(), + adata=get_adata( + max_obs=5, + max_vars=5, + layer_keys=["layer_1"], + obsm_keys=["obsm_1"], + varm_keys=["varm_1"], + var_col_names=["var_col_1"], + obs_col_names=["obs_col_1", "obs_col_2"], + ), + keys_as_string=st.booleans(), + ) + def test_from_obs(self, data, adata: AnnData, keys_as_string: bool): + adata.obs_names = [f"obs_{obs_id}" for obs_id in adata.obs_names] + adata.var_names = [f"var_{var_id}" for var_id in adata.var_names] + + if keys_as_string: + keys = data.draw(st.sampled_from([*adata.obs.columns])) + else: + keys = data.draw( + st.lists( + st.sampled_from(adata.obs.columns.to_list()), + min_size=1, + max_size=len(adata.obs.columns), + unique=True, + ) + ) + + df = get_df(adata, keys=keys) + + assert isinstance(df, pd.DataFrame) + assert (df.columns == keys).all() + if isinstance(keys, str): + assert (df == adata.obs[[keys]]).values.all() + else: + assert (df == adata.obs[keys]).values.all() + + @pytest.mark.parametrize("uns_name", ("neighbors", "random_name")) + def test_from_uns(self, uns_name: str): + adata = AnnData( + np.eye(2), + uns={uns_name: 5 * np.eye(2)}, + obs=pd.DataFrame({"obs_col_1": ["a", "b"], "obs_col_2": [0, 0]}), + ) + + df = get_df(adata, keys=uns_name) + + assert isinstance(df, pd.DataFrame) + np.testing.assert_array_equal(df.values, 5 * np.eye(2)) + + @pytest.mark.parametrize("categorical", (True, False)) + @pytest.mark.parametrize("shape", ((2, 2), (2, 1))) + def test_from_uns_with_categorical_column(self, categorical: bool, shape: Tuple): + adata = AnnData( + np.eye(*shape), + uns={"random_name": 5 * np.eye(*shape)}, + obs=pd.DataFrame({"obs_col_1": ["a", "b"], "obs_col_2": [0, 0]}), + ) + + if categorical: + adata.obs["obs_col_1"] = adata.obs["obs_col_1"].astype("category") + + df = get_df(adata, keys="random_name") + + assert isinstance(df, pd.DataFrame) + if categorical: + assert (df.index == adata.obs["obs_col_1"].values).all() + if shape[0] == shape[1]: + assert (df.columns == adata.obs["obs_col_1"].values).all() + + @given( + data=st.data(), + adata=get_adata( + max_obs=5, + max_vars=5, + layer_keys=["layer_1"], + obsm_keys=["obsm_1"], + varm_keys=["varm_1"], + obs_col_names=["obs_col_1"], + var_col_names=["var_col_1", "var_col_2"], + ), + keys_as_string=st.booleans(), + ) + def test_from_var(self, data, adata: AnnData, keys_as_string: bool): + adata.obs_names = [f"obs_{obs_id}" for obs_id in adata.obs_names] + adata.var_names = [f"var_{var_id}" for var_id in adata.var_names] + + if keys_as_string: + keys = data.draw(st.sampled_from([*adata.var.columns])) + else: + keys = data.draw( + st.lists( + st.sampled_from(adata.var.columns.to_list()), + min_size=1, + max_size=len(adata.var.columns), + unique=True, + ) + ) + + df = get_df(adata, keys=keys) + + assert isinstance(df, pd.DataFrame) + assert (df.columns == keys).all() + if isinstance(keys, str): + assert (df == adata.var[[keys]]).values.all() + else: + assert (df == adata.var[keys]).values.all() + + @given( + adata=get_adata( + max_obs=5, + max_vars=5, + layer_keys=["layer_1", "layer_2"], + obsm_keys=["obsm_1"], + varm_keys=["varm_1"], + var_col_names=["var_col_1", "var_col_2"], + ), + layer_name=st.sampled_from(["layer_1", "layer_2"]), + ) + def test_keys_as_layer(self, adata: AnnData, layer_name: str): + adata.var_names = [f"var_{var_id}" for var_id in adata.var_names] + + df = get_df(adata, keys=layer_name) + + assert isinstance(df, pd.DataFrame) + assert (df.columns == adata.var_names).all() + assert (df.values == adata.layers[layer_name]).all() + + @pytest.mark.parametrize("keys", (None, "col_1", "col_2", ["col_1", "col_2"])) + def test_data_as_data_frame(self, keys): + df = get_df(data=pd.DataFrame(np.eye(2), columns=["col_1", "col_2"]), keys=keys) + + assert isinstance(df, pd.DataFrame) + if isinstance(keys, str): + assert df.shape == (2, 1) + else: + assert df.shape == (2, 2) + + @pytest.mark.parametrize("dropna", (True, False, "any", "all")) + def test_dropna(self, dropna: Union[bool, str]): + df = get_df( + data=pd.DataFrame( + np.array([[1, np.nan, 0], [np.nan, 1, 0], [np.nan, np.nan, np.nan]]), + columns=["col_1", "col_2", "col_3"], + ), + dropna=dropna, + ) + + assert isinstance(df, pd.DataFrame) + assert (df.columns == ["col_1", "col_2", "col_3"]).all() + if dropna == "all": + np.testing.assert_equal( + df.values, np.array([[1, np.nan, 0], [np.nan, 1, 0]]) + ) + elif dropna or dropna == "any": + assert ( + df == pd.DataFrame(columns=["col_1", "col_2", "col_3"]) + ).values.all() + else: + np.testing.assert_equal( + df.values, + np.array([[1, np.nan, 0], [np.nan, 1, 0], [np.nan, np.nan, np.nan]]), + ) + + def test_index_from_obs_col(self): + adata = AnnData( + X=np.eye(2), + layers={"layer_1": 2 * np.eye(2), "layer_2": 3 * np.eye(2)}, + obs=pd.DataFrame({"obs_col_1": ["a", "b"]}), + ) + adata.var_names = ["var_name_1", "var_name_2"] + + df = get_df(adata, keys="layer_1", index="obs_col_1") + + assert isinstance(df, pd.DataFrame) + np.testing.assert_array_equal(df.values, 2 * np.eye(2)) + assert (df.index == adata.obs["obs_col_1"]).all() + assert (df.columns == ["var_name_1", "var_name_2"]).all() + + def test_columns_from_obs(self): + adata = AnnData( + X=np.eye(2), + varm={"varm_1": 2 * np.eye(2)}, + obs=pd.DataFrame({"obs_col_1": ["a", "b"]}), + ) + adata.var_names = ["var_name_1", "var_name_2"] + + df = get_df(adata, keys="varm_1", columns="obs_col_1") + + assert isinstance(df, pd.DataFrame) + np.testing.assert_array_equal(df.values, 2 * np.eye(2)) + assert (df.index == ["var_name_1", "var_name_2"]).all() + assert (df.columns == adata.obs["obs_col_1"]).all() + + @pytest.mark.parametrize("sparse", (True, False)) + @pytest.mark.parametrize("index", (None, ["index_1", "index_2"])) + @pytest.mark.parametrize("columns", (None, ["col_1", "col_2"])) + def test_data_as_array( + self, index: Optional[List[str]], columns: Optional[List[str]], sparse: bool + ): + if sparse: + data = csr_matrix(np.eye(2)) + else: + data = np.eye(2) + + df = get_df(data, index=index, columns=columns) + + assert isinstance(df, pd.DataFrame) + if index is None: + assert (df.index == [0, 1]).all() + else: + assert (df.index == ["index_1", "index_2"]).all() + if columns is None: + assert (df.columns == [0, 1]).all() + else: + assert (df.columns == ["col_1", "col_2"]).all() + + +class TestGetInitialSize(TestBase): + @given( + adata=get_adata( + max_obs=5, + max_vars=5, + layer_keys=["unspliced", "spliced", "ambiguous"], + obs_col_names=[ + "initial_size", + "initial_size_unspliced", + "initial_size_spliced", + "initial_size_ambiguous", + ], + ), + by_total_size=st.booleans(), + layer=st.sampled_from([None, "X", "unspliced", "spliced", "ambiguous"]), + ) + def test_get_initial_size( + self, adata: AnnData, layer: Optional[None], by_total_size: bool + ): + initial_size = get_initial_size( + adata=adata, layer=layer, by_total_size=by_total_size + ) + + if by_total_size: + assert np.allclose( + initial_size, + adata.obs["initial_size_unspliced"] + adata.obs["initial_size_spliced"], + ) + elif layer in adata.layers: + assert np.allclose(initial_size, adata.obs[f"initial_size_{layer}"]) + else: + assert np.allclose(initial_size, adata.obs["initial_size"]) + + @given( + adata=get_adata( + max_obs=5, + max_vars=5, + layer_keys=["unspliced", "spliced", "ambiguous"], + ), + layer=st.text(min_size=2, max_size=5), + ) + def test_not_existing_modality(self, adata: AnnData, layer: str): + initial_size = get_initial_size(adata=adata, layer=layer) + + assert initial_size is None + + @given( + adata=get_adata( + max_obs=5, + max_vars=5, + layer_keys=["unspliced", "spliced", "ambiguous"], + ), + layer=st.sampled_from([None, "X", "unspliced", "spliced", "ambiguous"]), + ) + def test_initial_size_not_in_adata_obs(self, adata: AnnData, layer: Optional[str]): + initial_size = get_initial_size(adata=adata, layer=layer) + + if layer in [None, "X"]: + np.testing.assert_allclose(initial_size, get_size(adata=adata)) + else: + np.testing.assert_allclose(initial_size, get_size(adata=adata, layer=layer)) + + +class TestGetModality(TestBase): + @given(adata=get_adata(max_obs=5, max_vars=5)) + def test_get_modality(self, adata: AnnData): + modality_to_get = self._subset_modalities(adata, 1)[0] + modality_retrieved = get_modality(adata=adata, modality=modality_to_get) + + if modality_to_get == "X": + assert_array_equal(adata.X, modality_retrieved) + elif modality_to_get in adata.layers: + assert_array_equal(adata.layers[modality_to_get], modality_retrieved) + else: + assert_array_equal(adata.obsm[modality_to_get], modality_retrieved) + + @given(adata=get_adata(max_obs=5, max_vars=5)) + def test_modality_equals_none(self, adata: AnnData): + modality_retrieved = get_modality(adata=adata, modality=None) + + assert_array_equal(adata.X, modality_retrieved) + + +class TestGetSize(TestBase): + @given(adata=get_adata(max_obs=5, max_vars=5)) + def test_get_size(self, adata: AnnData): + modality = self._subset_modalities(adata, n_modalities=1)[0] + + np.testing.assert_allclose( + sum(get_modality(adata=adata, modality=modality), axis=1), + get_size(adata=adata, modality=modality), + ) + + @given(adata=get_adata(max_obs=5, max_vars=5)) + def test_modality_set_to_none(self, adata: AnnData): + np.testing.assert_allclose( + sum(adata.X, axis=1), + get_size(adata=adata, modality=None), + ) + + +class TestMakeDense(TestBase): + @given( + adata=get_adata(max_obs=5, max_vars=5, sparse_entries=True), + inplace=st.booleans(), + n_modalities=st.integers(min_value=0), + ) + def test_make_dense(self, adata: AnnData, inplace: bool, n_modalities: int): + modalities_to_densify = self._subset_modalities(adata, n_modalities) + + returned_adata = make_dense( + adata=adata, modalities=modalities_to_densify, inplace=inplace + ) + + if inplace: + assert returned_adata is None + assert np.all( + [ + not issparse(get_modality(adata=adata, modality=modality)) + for modality in modalities_to_densify + ] + ) + else: + assert isinstance(returned_adata, AnnData) + assert np.all( + [ + not issparse(get_modality(adata=returned_adata, modality=modality)) + for modality in modalities_to_densify + ] + ) + assert np.all( + [ + issparse(get_modality(adata=adata, modality=modality)) + for modality in modalities_to_densify + ] + ) + + @given( + adata=get_adata(max_obs=5, max_vars=5, sparse_entries=True), + inplace=st.booleans(), + ) + def test_modalities_passed_as_string(self, adata: AnnData, inplace: bool): + modality_to_densify = self._subset_modalities(adata, n_modalities=1)[0] + + returned_adata = make_dense( + adata=adata, modalities=modality_to_densify, inplace=inplace + ) + + if inplace: + assert returned_adata is None + assert not issparse(get_modality(adata=adata, modality=modality_to_densify)) + else: + assert isinstance(returned_adata, AnnData) + assert not issparse( + get_modality(adata=returned_adata, modality=modality_to_densify) + ) + assert issparse(get_modality(adata=adata, modality=modality_to_densify)) + + +class TestMerge: + def _assert_all_entries_present( + self, returned_adata: AnnData, adata: AnnData, ldata: AnnData + ): + assert set(returned_adata.layers) == set(adata.layers).union(ldata.layers) + assert set(returned_adata.obsm) == set(adata.obsm).union(ldata.obsm) + assert set(returned_adata.varm) == set(adata.varm).union(ldata.varm) + assert set(returned_adata.uns) == set(adata.uns).union(ldata.uns) + assert set(returned_adata.obs.columns) == set(adata.obs.columns).union( + ldata.obs.columns + ) + assert set(returned_adata.var.columns) == set(adata.var.columns).union( + ldata.var.columns + ) + + def _assert_copy_worked(self, returned_adata: AnnData, adata: AnnData, copy: bool): + if copy: + assert isinstance(returned_adata, AnnData) + else: + assert returned_adata is None + returned_adata = adata + + return returned_adata + + @given( + adata=get_adata(max_obs=5, max_vars=5), + ldata=get_adata(max_obs=5, max_vars=5), + copy=st.booleans(), + ) + def test_common_var_names(self, adata: AnnData, ldata: AnnData, copy: bool): + adata.uns["a"] = ["a", 0, 3] + ldata.uns["cluster_colors"] = {"cluster_1": "blue", "cluster_2": "red"} + returned_adata = merge(adata=adata, ldata=ldata, copy=copy) + + returned_adata = self._assert_copy_worked(returned_adata, adata, copy) + self._assert_all_entries_present(returned_adata, adata, ldata) + + @given( + adata=get_adata(max_obs=5, max_vars=5, layer_keys=["spliced"]), + ldata=get_adata(max_obs=5, max_vars=5), + copy=st.booleans(), + ) + def test_spliced_in_adata(self, adata: AnnData, ldata: AnnData, copy: bool): + adata.uns["a"] = ["a", 0, 3] + ldata.uns["cluster_colors"] = {"cluster_1": "blue", "cluster_2": "red"} + returned_adata = merge(adata=adata, ldata=ldata, copy=copy) + + returned_adata = self._assert_copy_worked(returned_adata, adata, copy) + self._assert_all_entries_present(returned_adata, adata, ldata) + assert "initial_size" in returned_adata.obs.columns + assert "initial_size_spliced" in returned_adata.obs.columns + + @given( + adata=get_adata(max_obs=5, max_vars=5), + ldata=get_adata(max_obs=5, max_vars=5, layer_keys=["spliced"]), + copy=st.booleans(), + ) + def test_spliced_in_ldata(self, adata: AnnData, ldata: AnnData, copy: bool): + adata.uns["a"] = ["a", 0, 3] + ldata.uns["cluster_colors"] = {"cluster_1": "blue", "cluster_2": "red"} + returned_adata = merge(adata=adata, ldata=ldata, copy=copy) + + returned_adata = self._assert_copy_worked(returned_adata, adata, copy) + self._assert_all_entries_present(returned_adata, adata, ldata) + assert "initial_size" in returned_adata.obs.columns + assert "initial_size_spliced" in returned_adata.obs.columns + + @given( + adata=get_adata(min_obs=3, max_obs=3, max_vars=5), + ldata=get_adata(min_obs=3, max_obs=3, max_vars=5), + copy=st.booleans(), + ) + def test_no_common_obs_names(self, adata: AnnData, ldata: AnnData, copy: bool): + adata.uns["a"] = ["a", 0, 3] + ldata.uns["cluster_colors"] = {"cluster_1": "blue", "cluster_2": "red"} + + adata.obs_names = ["sample1_ABCD", "sample2_ABCD", "sample3_DCBA"] + ldata.obs_names = ["_sample1_ABCD", "_sample2_ABCD", "_sample3_DCBA"] + + returned_adata = merge(adata=adata, ldata=ldata, copy=copy, id_length=4) + + returned_adata = self._assert_copy_worked(returned_adata, adata, copy) + self._assert_all_entries_present(returned_adata, adata, ldata) + assert returned_adata.obs_names.isin(["ABCD", "ABCD-1", "DCBA"]).all() + + +class TestMakeSparse(TestBase): + @given( + adata=get_adata(max_obs=5, max_vars=5), + inplace=st.booleans(), + n_modalities=st.integers(min_value=0), + ) + def test_make_sparse(self, adata: AnnData, inplace: bool, n_modalities: int): + modalities_to_make_sparse = self._subset_modalities(adata, n_modalities) + + returned_adata = make_sparse( + adata=adata, modalities=modalities_to_make_sparse, inplace=inplace + ) + + if inplace: + assert returned_adata is None + assert np.all( + [ + issparse(get_modality(adata=adata, modality=modality)) + for modality in modalities_to_make_sparse + if modality != "X" + ] + ) + else: + assert isinstance(returned_adata, AnnData) + assert np.all( + [ + issparse(get_modality(adata=returned_adata, modality=modality)) + for modality in modalities_to_make_sparse + if modality != "X" + ] + ) + assert np.all( + [ + not issparse(get_modality(adata=adata, modality=modality)) + for modality in modalities_to_make_sparse + if modality != "X" + ] + ) + + @given( + adata=get_adata(max_obs=5, max_vars=5), + inplace=st.booleans(), + ) + def test_modalities_passed_as_string(self, adata: AnnData, inplace: bool): + modality_to_make_sparse = self._subset_modalities(adata, n_modalities=1)[0] + + returned_adata = make_sparse( + adata=adata, modalities=modality_to_make_sparse, inplace=inplace + ) + + if inplace: + assert returned_adata is None + if modality_to_make_sparse != "X": + assert issparse( + get_modality(adata=adata, modality=modality_to_make_sparse) + ) + else: + assert isinstance(returned_adata, AnnData) + if modality_to_make_sparse != "X": + assert issparse( + get_modality(adata=returned_adata, modality=modality_to_make_sparse) + ) + assert not issparse( + get_modality(adata=adata, modality=modality_to_make_sparse) + ) + + +class TestObsDf(TestBase): + @given(data=st.data(), adata=get_adata(max_obs=5, max_vars=5)) + def test_obs_df(self, data, adata: AnnData): + adata.var_names = "var_" + adata.var_names + + modality = self._subset_modalities(adata, n_modalities=1, from_obsm=False)[0] + + var_names = data.draw( + st.lists( + st.sampled_from(adata.var_names.to_list()), + max_size=len(adata.var_names), + unique=True, + ) + ) + + if modality == "X": + df = obs_df(adata=adata, keys=var_names) + else: + df = obs_df(adata=adata, keys=var_names, layer=modality) + + assert isinstance(df, pd.DataFrame) + assert (df.columns == var_names).all() + if len(var_names) == 0: + assert df.shape == (adata.n_obs, 0) + else: + np.testing.assert_equal( + df.values, get_modality(adata[:, var_names], modality=modality) + ) + assert (df.index == adata.obs_names).all() + + @pytest.mark.parametrize( + "var_names", (["var_1", "var_2"], ["var_0", "Var_1", "var_2"]) + ) + def test_warning_for_nonexisting_var_names(self, capfd, var_names): + adata = AnnData(np.eye(len(var_names)), var=pd.DataFrame(index=var_names)) + + df = obs_df(adata=adata, keys=var_names + ["VAR_1", "VAR_2"]) + + actual_warning, _ = capfd.readouterr() + expected_warning = ( + "WARNING: Keys ['VAR_1', 'VAR_2'] were not found in `adata.var_names`.\n" + ) + + assert actual_warning == expected_warning + assert isinstance(df, pd.DataFrame) + assert (df.index == adata.obs_names).all() + + +class TestSetInitialSize(TestBase): + @given( + adata=get_adata(max_obs=5, max_vars=5), n_modalities=st.integers(min_value=0) + ) + def test_added_columns(self, adata: AnnData, n_modalities: int): + layers = self._subset_modalities( + adata=adata, n_modalities=n_modalities, from_obsm=False + ) + + set_initial_size(adata=adata, layers=layers) + + if "X" in layers: + assert ( + sum( + adata.obs.columns.isin( + [f"initial_size_{layer}" for layer in layers] + ) + ) + == len(layers) - 1 + ) + else: + assert sum( + adata.obs.columns.isin([f"initial_size_{layer}" for layer in layers]) + ) == len(layers) + + assert "initial_size" in adata.obs.columns + + @given(adata=get_adata(max_obs=5, max_vars=5)) + def test_non_existing_columns_specified(self, adata: AnnData): + layers = "_" + adata.obs.columns + set_initial_size(adata=adata, layers=layers) + + assert "initial_size" in adata.obs.columns + assert len(adata.obs.columns) == 3 + + @given(adata=get_adata(max_obs=5, max_vars=5, layer_keys=["unspliced", "spliced"])) + def test_layers_not_specified(self, adata: AnnData): + set_initial_size(adata=adata) + + assert "initial_size" in adata.obs.columns + assert "initial_size_unspliced" in adata.obs.columns + assert "initial_size_spliced" in adata.obs.columns + assert adata.obs.columns.str.startswith("initial_size").sum() == 3 + + @pytest.mark.parametrize( + "X, layers, initial_size", + [ + ( + np.eye(2), + {"unspliced": np.ones((2, 2)), "spliced": np.array([[1, 2], [3, 3]])}, + { + "X": np.ones(2), + "unspliced": 2 * np.ones(2), + "spliced": np.array([3, 6]), + }, + ) + ], + ) + def test_calculated_initial_size( + self, X: np.ndarray, layers: np.ndarray, initial_size: np.ndarray + ): + adata = AnnData(X=X, layers=layers) + set_initial_size(adata=adata, layers=["unspliced", "spliced"]) + + np.testing.assert_equal(adata.obs["initial_size"], initial_size["X"]) + np.testing.assert_equal( + adata.obs["initial_size_unspliced"], initial_size["unspliced"] + ) + np.testing.assert_equal( + adata.obs["initial_size_spliced"], initial_size["spliced"] + ) + + +class TestSetModality(TestBase): + @given(adata=get_adata(max_obs=5, max_vars=5), inplace=st.booleans()) + def test_set_modality(self, adata: AnnData, inplace: bool): + modality_to_set = self._subset_modalities(adata, 1)[0] + + if (modality_to_set == "X") or (modality_to_set in adata.layers): + new_value = np.random.randn(adata.n_obs, adata.n_vars) + else: + new_value = np.random.randn( + adata.n_obs, np.random.randint(low=1, high=10000) + ) + + returned_adata = set_modality( + adata=adata, new_value=new_value, modality=modality_to_set, inplace=inplace + ) + + if inplace: + assert returned_adata is None + if modality_to_set == "X": + assert_array_equal(adata.X, new_value) + elif modality_to_set in adata.layers: + assert_array_equal(adata.layers[modality_to_set], new_value) + else: + assert_array_equal(adata.obsm[modality_to_set], new_value) + else: + assert isinstance(returned_adata, AnnData) + if modality_to_set == "X": + assert_array_equal(returned_adata.X, new_value) + elif modality_to_set in adata.layers: + assert_array_equal(returned_adata.layers[modality_to_set], new_value) + else: + assert_array_equal(returned_adata.obsm[modality_to_set], new_value) + + +class TestShowProportions(TestBase): + @pytest.mark.parametrize( + "layers", + ( + {"unspliced": np.eye(2), "spliced": 2 * np.eye(2)}, + { + "unspliced": np.eye(2), + "spliced": 2 * np.eye(2), + "ambiguous": 3 * np.eye(2), + }, + {"unspliced": np.eye(2), "spliced": 2 * np.eye(2)}, + {"unspliced": np.eye(2), "spliced": 2 * np.eye(2)}, + ), + ) + @pytest.mark.parametrize("use_raw", (True, False)) + def test_layers_not_specified(self, capfd, layers: Dict, use_raw: bool): + adata = AnnData(X=np.eye(2), layers=layers) + + show_proportions(adata=adata, layers=None, use_raw=use_raw) + actual_output, _ = capfd.readouterr() + + if len(layers) == 2: + expected_output = f"Abundance of {[*layers]}: [0.33 0.67]\n" + else: + expected_output = f"Abundance of {[*layers]}: [0.17 0.33 0.5 ]\n" + + assert actual_output == expected_output + + @pytest.mark.parametrize( + "layers", + ( + {"unspliced": np.eye(2), "spliced": 2 * np.eye(2)}, + { + "unspliced": np.eye(2), + "spliced": 2 * np.eye(2), + "ambiguous": 3 * np.eye(2), + }, + {"layer_1": np.eye(2), "layer_2": 2 * np.eye(2)}, + ), + ) + @pytest.mark.parametrize("use_raw", (True, False)) + def test_layers_specified(self, capfd, layers: Dict, use_raw: bool): + adata = AnnData(X=np.eye(2), layers=layers) + + show_proportions(adata=adata, layers=layers.keys(), use_raw=use_raw) + actual_output, _ = capfd.readouterr() + + if len(layers) == 2: + expected_output = f"Abundance of {[*layers]}: [0.33 0.67]\n" + else: + expected_output = f"Abundance of {[*layers]}: [0.17 0.33 0.5 ]\n" + + assert actual_output == expected_output + + @pytest.mark.parametrize( + "layers", + ( + {"unspliced": np.eye(2), "spliced": 2 * np.eye(2)}, + { + "unspliced": np.eye(2), + "spliced": 2 * np.eye(2), + "ambiguous": 3 * np.eye(2), + }, + {"layer_1": np.eye(2), "layer_2": 2 * np.eye(2)}, + ), + ) + @pytest.mark.parametrize("use_raw", (True, False)) + def test_passing_nonexisting_layers(self, capfd, layers: Dict, use_raw: bool): + adata = AnnData(X=np.eye(2), layers=layers) + + show_proportions( + adata=adata, layers=[*layers] + ["random_1", "random_2"], use_raw=use_raw + ) + actual_output, _ = capfd.readouterr() + + if len(layers) == 2: + expected_output = f"Abundance of {[*layers]}: [0.33 0.67]\n" + else: + expected_output = f"Abundance of {[*layers]}: [0.17 0.33 0.5 ]\n" + + assert actual_output == expected_output + + @pytest.mark.parametrize( + "layers, obs", + ( + ( + {"unspliced": np.eye(2), "spliced": 2 * np.eye(2)}, + { + "initial_size_unspliced": np.ones(2), + "initial_size_spliced": np.ones(2), + }, + ), + ( + {"unspliced": np.eye(2), "spliced": 2 * np.eye(2)}, + { + "initial_size_unspliced": np.ones(2), + "initial_size_spliced": np.ones(2), + }, + ), + ( + {"unspliced": np.eye(2), "spliced": 2 * np.eye(2)}, + {"initial_size_unspliced": np.ones(2)}, + ), + ), + ) + @pytest.mark.parametrize("use_raw", (True, False)) + def test_initial_size_specified( + self, capfd, layers: Dict, obs: Dict, use_raw: bool + ): + adata = AnnData(X=np.eye(2), layers=layers, obs=obs) + + show_proportions(adata=adata, layers=[*layers], use_raw=use_raw) + actual_output, _ = capfd.readouterr() + + if len(adata.obs.columns) == 2: + if use_raw: + expected_output = f"Abundance of {[*layers]}: [0.5 0.5]\n" + else: + expected_output = f"Abundance of {[*layers]}: [0.33 0.67]\n" + else: + expected_output = f"Abundance of {[*layers]}: [0.33 0.67]\n" + + assert actual_output == expected_output + + +class TestVarDf(TestBase): + @given(data=st.data(), adata=get_adata(max_obs=5, max_vars=5)) + def test_var_df(self, data, adata: AnnData): + adata.obs_names = "obs_" + adata.obs_names + + modality = self._subset_modalities(adata, n_modalities=1, from_obsm=False)[0] + + obs_names = data.draw( + st.lists( + st.sampled_from(adata.obs_names.to_list()), + max_size=len(adata.obs_names), + unique=True, + ) + ) + + if modality == "X": + df = var_df(adata=adata, keys=obs_names) + else: + df = var_df(adata=adata, keys=obs_names, layer=modality) + + assert isinstance(df, pd.DataFrame) + assert (df.columns == obs_names).all() + if len(obs_names) == 0: + assert df.shape == (adata.n_vars, 0) + else: + np.testing.assert_equal( + df.values, get_modality(adata[obs_names, :], modality=modality).T + ) + assert (df.index == adata.var_names).all() + + @pytest.mark.parametrize( + "obs_names", (["obs_1", "obs_2"], ["obs_0", "Obs_1", "obs_2"]) + ) + def test_warning_for_nonexisting_obs_names(self, capfd, obs_names): + adata = AnnData(np.eye(len(obs_names)), obs=pd.DataFrame(index=obs_names)) + + df = var_df(adata=adata, keys=obs_names + ["OBS_1", "OBS_2"]) + + actual_warning, _ = capfd.readouterr() + expected_warning = ( + "WARNING: Keys ['OBS_1', 'OBS_2'] were not found in `adata.obs_names`.\n" + ) + + assert actual_warning == expected_warning + assert isinstance(df, pd.DataFrame) + assert (df.index == adata.var_names).all() diff --git a/scvelo/core/tests/test_arithmetic.py b/tests/core/test_arithmetic.py similarity index 77% rename from scvelo/core/tests/test_arithmetic.py rename to tests/core/test_arithmetic.py index d8f12092..82e4cdf8 100644 --- a/scvelo/core/tests/test_arithmetic.py +++ b/tests/core/test_arithmetic.py @@ -7,8 +7,9 @@ import numpy as np from numpy import ndarray from numpy.testing import assert_almost_equal, assert_array_equal +from scipy.sparse import csr_matrix, issparse -from scvelo.core import clipped_log, invert, prod_sum, sum +from scvelo.core import clipped_log, invert, multiply, prod_sum, sum class TestClippedLog: @@ -133,6 +134,61 @@ def test_2d_arrays(self, a: ndarray): assert set(a_inv[a == 0]) == set() +class TestMultiply: + @given( + a=arrays( + float, + shape=st.integers(min_value=1, max_value=100), + elements=st.floats(max_value=1e3, allow_infinity=False, allow_nan=False), + ) + ) + def test_flat_arrays(self, a: ndarray): + b = csr_matrix(a) + + res = multiply(a, a) + assert res.shape == a.shape + assert not issparse(res) + np.testing.assert_almost_equal(res, a * a) + + res = multiply(a, b) + assert res.shape == b.shape + assert issparse(res) + np.testing.assert_almost_equal(res.data, b.multiply(a).data) + + res = multiply(b, a) + assert res.shape == b.shape + assert issparse(res) + np.testing.assert_almost_equal(res.data, b.multiply(a).data) + + @given( + a=arrays( + float, + shape=st.tuples( + st.integers(min_value=1, max_value=100), + st.integers(min_value=1, max_value=100), + ), + elements=st.floats(max_value=1e3, allow_infinity=False, allow_nan=False), + ), + ) + def test_2d_arrays(self, a: ndarray): + b = csr_matrix(a) + + res = multiply(a, a) + assert res.shape == a.shape + assert not issparse(res) + np.testing.assert_almost_equal(res, a * a) + + res = multiply(a, b) + assert res.shape == b.shape + assert issparse(res) + np.testing.assert_almost_equal(res.data, b.multiply(a).data) + + res = multiply(b, a) + assert res.shape == b.shape + assert issparse(res) + np.testing.assert_almost_equal(res.data, b.multiply(a).data) + + # TODO: Extend test to generate sparse inputs as well # TODO: Make test to generate two different arrays a1, a2 # TODO: Check why tests fail with assert_almost_equal diff --git a/tests/core/test_base.py b/tests/core/test_base.py new file mode 100644 index 00000000..a145e599 --- /dev/null +++ b/tests/core/test_base.py @@ -0,0 +1,361 @@ +import random +from typing import List, Optional, Union + +import hypothesis.strategies as st +from hypothesis import given +from hypothesis.extra.numpy import arrays + +import numpy as np +from scipy.sparse import csr_matrix, issparse + +from anndata import AnnData + + +# TODO: Add possibility to generate adata object with floats as counts +# TODO: Add possibility to generate different columns with different data types in +# adata.obs and adata.var +@st.composite +def get_adata( + draw, + n_obs: Optional[int] = None, + n_vars: Optional[int] = None, + min_obs: Optional[int] = 1, + max_obs: Optional[int] = 100, + min_vars: Optional[int] = 1, + max_vars: Optional[int] = 100, + layer_keys: Optional[Union[List, str]] = None, + min_layers: Optional[int] = 2, + max_layers: int = 2, + obsm_keys: Optional[Union[List, str]] = None, + min_obsm: Optional[int] = 2, + max_obsm: Optional[int] = 2, + varm_keys: Optional[Union[List, str]] = None, + min_varm: Optional[int] = 2, + max_varm: Optional[int] = 2, + obs_col_names=None, + min_obs_cols=2, + max_obs_cols=2, + var_col_names=None, + min_var_cols=2, + max_var_cols=2, + sparse_entries: bool = False, +) -> AnnData: + """Generate an AnnData object. + + The largest possible value of a numerical entry is `1e5`. + + Arguments + --------- + n_obs + Number of observations. If set to `None`, a random integer between `1` and + `max_obs` will be drawn. Defaults to `None`. + n_vars + Number of variables. If set to `None`, a random integer between `1` and + `max_vars` will be drawn. Defaults to `None`. + min_obs + Minimum number of observations. If set to `None`, there is no lower limit. + Defaults to `1`. + max_obs + Maximum number of observations. If set to `None`, there is no upper limit. + Defaults to `100`. + min_vars + Minimum number of variables. If set to `None`, there is no lower limit. + Defaults to `1`. + max_vars + Maximum number of variables. If set to `None`, there is no upper limit. + Defaults to `100`. + layer_keys + Names of layers. If set to `None`, layers will be named at random. Defaults + to `None`. + min_layers + Minimum number of layers. Is set to the number of provided layer names if + `layer_keys` is not `None`. Defaults to `2`. + max_layers + Maximum number of layers. Is set to the number of provided layer + names if `layer_keys` is not `None`. Defaults to `2`. + obsm_keys + Names of multi-dimensional observations annotation. If set to `None`, names + will be generated at random. Defaults to `None`. + min_obsm + Minimum number of multi-dimensional observations annotation. Is set to the + number of keys if `obsm_keys` is not `None`. Defaults to `2`. + max_obsm + Maximum number of multi-dimensional observations annotation. Is set to the + number of keys if `obsm_keys` is not `None`. Defaults to `2`. + varm_keys + Names of multi-dimensional variables annotation. If set to `None`, names + will be generated at random. Defaults to `None`. + min_varm + Minimum number of multi-dimensional variables annotation. Is set to the + number of keys if `varm_keys` is not `None`. Defaults to `2`. + max_varm + Maximum number of multi-dimensional variables annotation. Is set to the + number of keys if `varm_keys` is not `None`. Defaults to `2`. + obs_col_names + Names of columns in `adata.obs`. If set to `None`, colums will be named at + random. Defaults to `None`. + min_obs_cols + Minimum number of columns in `adata.obs`. Is set to the number of provided + column names if `obs_col_names` is not `None`. Defaults to `2`. + max_obs_cols + Maximum number of columns in `adata.obs`. Is set to the number of provided + column names if `obs_col_names` is not `None`. Defaults to `2`. + var_col_names + Names of columns in `adata.var`. If set to `None`, colums will be named at + random. Defaults to `None`. + min_var_cols + Minimum number of columns in `adata.var`. Is set to the number of provided + column names if `var_col_names` is not `None`. Defaults to `2`. + max_var_cols + Maximum number of columns in `adata.var`. Is set to the number of provided + column names if `var_col_names` is not `None`. Defaults to `2`. + sparse_entries + Whether or not to make AnnData entries sparse. + + Returns + ------- + AnnData + Generated :class:`~anndata.AnnData` object. + """ + + if n_obs is None: + n_obs = draw(st.integers(min_value=min_obs, max_value=max_obs)) + if n_vars is None: + n_vars = draw(st.integers(min_value=min_vars, max_value=max_vars)) + + if isinstance(layer_keys, str): + layer_keys = [layer_keys] + if isinstance(obsm_keys, str): + obsm_keys = [obsm_keys] + if isinstance(obs_col_names, str): + obs_col_names = [obs_col_names] + if isinstance(var_col_names, str): + var_col_names = [var_col_names] + + if layer_keys is not None: + min_layers = len(layer_keys) + max_layers = len(layer_keys) + if obsm_keys is not None: + min_obsm = len(obsm_keys) + max_obsm = len(obsm_keys) + if varm_keys is not None: + min_varm = len(varm_keys) + max_varm = len(varm_keys) + if obs_col_names is not None: + min_obs_cols = len(obs_col_names) + max_obs_cols = len(obs_col_names) + if var_col_names is not None: + min_var_cols = len(var_col_names) + max_var_cols = len(var_col_names) + + X = draw( + arrays( + dtype=int, + elements=st.integers(min_value=0, max_value=1e2), + shape=(n_obs, n_vars), + ) + ) + + layers = draw( + st.dictionaries( + st.text( + st.characters( + blacklist_categories=("Cs",), + blacklist_characters=("X"), + ), + min_size=1, + ) + if layer_keys is None + else st.sampled_from(layer_keys), + arrays( + dtype=int, + elements=st.integers(min_value=0, max_value=1e2), + shape=(n_obs, n_vars), + ), + min_size=min_layers, + max_size=max_layers, + ) + ) + + obsm = draw( + st.dictionaries( + st.text( + st.characters( + blacklist_categories=("Cs",), + blacklist_characters=("X"), + ), + min_size=1, + ) + if obsm_keys is None + else st.sampled_from(obsm_keys), + arrays( + dtype=int, + elements=st.integers(min_value=0, max_value=1e2), + shape=st.tuples( + st.integers(min_value=n_obs, max_value=n_obs), + st.integers(min_value=min_vars, max_value=max_vars), + ), + ), + min_size=min_obsm, + max_size=max_obsm, + ) + ) + + varm = draw( + st.dictionaries( + st.text( + st.characters( + blacklist_categories=("Cs",), + blacklist_characters=("X"), + ), + min_size=1, + ) + if varm_keys is None + else st.sampled_from(varm_keys), + arrays( + dtype=int, + elements=st.integers(min_value=0, max_value=1e2), + shape=st.tuples( + st.integers(min_value=n_vars, max_value=n_vars), + st.integers(min_value=min_obs, max_value=max_obs), + ), + ), + min_size=min_varm, + max_size=max_varm, + ) + ) + + obs = draw( + st.dictionaries( + st.text(min_size=1) + if obs_col_names is None + else st.sampled_from(obs_col_names), + st.lists( + elements=st.integers(min_value=0, max_value=1e2), + min_size=n_obs, + max_size=n_obs, + ), + min_size=min_obs_cols, + max_size=max_obs_cols, + ) + ) + + var = draw( + st.dictionaries( + st.text(min_size=1) + if var_col_names is None + else st.sampled_from(var_col_names), + st.lists( + elements=st.integers(min_value=0, max_value=1e2), + min_size=n_vars, + max_size=n_vars, + ), + min_size=min_var_cols, + max_size=max_var_cols, + ) + ) + + # Make keys for layers and obsm unique + for key in set(layers.keys()).intersection(obsm.keys()): + layers[f"{key}_"] = layers.pop(key) + + if sparse_entries: + layers = {key: csr_matrix(val) for key, val in layers.items()} + obsm = {key: csr_matrix(val) for key, val in obsm.items()} + varm = {key: csr_matrix(val) for key, val in varm.items()} + return AnnData( + X=csr_matrix(X), layers=layers, obsm=obsm, varm=varm, obs=obs, var=var + ) + else: + return AnnData(X=X, layers=layers, obsm=obsm, varm=varm, obs=obs, var=var) + + +class TestAdataGeneration: + @given(adata=get_adata(max_obs=5, max_vars=5)) + def test_default_adata_generation(self, adata: AnnData): + assert type(adata) is AnnData + assert "X" not in adata.layers + assert "X" not in adata.obsm + assert "X" not in adata.varm + + @given(adata=get_adata(max_obs=5, max_vars=5, sparse_entries=True)) + def test_sparse_adata_generation(self, adata: AnnData): + assert type(adata) is AnnData + assert issparse(adata.X) + assert np.all([issparse(adata.layers[layer]) for layer in adata.layers]) + assert np.all([issparse(adata.obsm[name]) for name in adata.obsm]) + assert np.all([issparse(adata.varm[name]) for name in adata.varm]) + + @given( + adata=get_adata( + n_obs=2, + n_vars=2, + layer_keys=["unspliced", "spliced"], + obsm_keys="X_umap", + varm_keys=["varm_entry_1", "varm_entry_2"], + obs_col_names=["louvain", "donor", "day"], + var_col_names=["alpha", "beta", "gamma"], + ) + ) + def test_custom_adata_generation(self, adata: AnnData): + assert adata.X.shape == (2, 2) + assert len(adata.layers) == 2 + assert len(adata.obsm) == 1 + assert len(adata.varm) == 2 + assert set(adata.layers.keys()) == {"unspliced", "spliced"} + assert set(adata.obsm.keys()) == {"X_umap"} + assert set(adata.varm.keys()) == {"varm_entry_1", "varm_entry_2"} + assert set(adata.obs.columns) == {"louvain", "donor", "day"} + assert set(adata.var.columns) == {"alpha", "beta", "gamma"} + + @given(adata=get_adata(max_obs=5, max_vars=5, min_obs_cols=0, max_obs_cols=10)) + def test_setting_number_obs_columns(self, adata): + assert len(adata.obs.columns) >= 0 + assert len(adata.obs.columns) <= 10 + + @given(adata=get_adata(max_obs=5, max_vars=5, min_var_cols=0, max_var_cols=10)) + def test_setting_number_var_columns(self, adata): + assert len(adata.var.columns) >= 0 + assert len(adata.var.columns) <= 10 + + +class TestBase: + def _subset_modalities( + self, + adata: AnnData, + n_modalities: int, + from_layers: bool = True, + from_obsm: bool = True, + ): + """Subset modalities of an AnnData object.""" + + modalities = ["X"] + if from_layers: + modalities += list(adata.layers.keys()) + if from_obsm: + modalities += list(adata.obsm.keys()) + return random.sample(modalities, min(len(modalities), n_modalities)) + + def _subset_columns( + self, + adata: AnnData, + n_cols: int, + from_obs: bool = True, + from_var: bool = True, + ): + """Subset columns of an AnnData object in `obs` and `var` slots.""" + + columns = [] + if from_obs: + columns += list(adata.obs.columns) + if from_var: + columns += list(adata.var.columns) + return random.sample(columns, min(len(columns), n_cols)) + + def _convert_to_float(self, adata: AnnData): + """Convert AnnData entries in `layer` and `obsm` into floats.""" + + for layer in adata.layers: + adata.layers[layer] = adata.layers[layer].astype(float) + for obs in adata.obsm: + adata.obsm[obs] = adata.obsm[obs].astype(float) diff --git a/scvelo/core/tests/test_linear_models.py b/tests/core/test_linear_models.py similarity index 79% rename from scvelo/core/tests/test_linear_models.py rename to tests/core/test_linear_models.py index 49dcca65..33f681b4 100644 --- a/scvelo/core/tests/test_linear_models.py +++ b/tests/core/test_linear_models.py @@ -11,13 +11,11 @@ class TestLinearRegression: + # TODO: Check if arrays strategy can be used instead. See e.g. + # https://github.com/theislab/scvelo/issues/939 @given( - x=arrays( - float, - shape=st.integers(min_value=1, max_value=100), - elements=st.floats( - min_value=-1e3, max_value=1e3, allow_infinity=False, allow_nan=False - ), + x=st.sampled_from( + [np.array([0]), np.array([1]), np.array([-4.5, 3.7, 1683.37])] ), coef=st.floats( min_value=-1000, max_value=1000, allow_infinity=False, allow_nan=False @@ -32,15 +30,14 @@ def test_perfect_fit(self, x: ndarray, coef: float): assert_almost_equal(lr.coef_, coef) @given( - x=arrays( - float, - shape=st.tuples( - st.integers(min_value=1, max_value=100), - st.integers(min_value=1, max_value=100), - ), - elements=st.floats( - min_value=-1e3, max_value=1e3, allow_infinity=False, allow_nan=False - ), + x=st.sampled_from( + [ + np.array([[0]]), + np.array([[1]]), + np.array([[-4.5, 3.7, 1683.37], [2.0, 14.3, -23.83]]), + np.eye(100), + np.array([[1e-7], [-398581.8223]]), + ] ), coef=arrays( float, @@ -53,6 +50,8 @@ def test_perfect_fit(self, x: ndarray, coef: float): # TODO: Extend test to use `percentile`. Zero columns (after trimming) make the # previous implementation of the unit test fail # TODO: Check why test fails if number of columns is increased to e.g. 1000 (500) + # TODO: Check if arrays strategy can be used instead. See e.g. + # https://github.com/theislab/scvelo/issues/939 def test_perfect_fit_2d(self, x: ndarray, coef: ndarray): coef = coef[: x.shape[1]] lr = LinearRegression() diff --git a/tests/core/test_metrics.py b/tests/core/test_metrics.py new file mode 100644 index 00000000..039f1093 --- /dev/null +++ b/tests/core/test_metrics.py @@ -0,0 +1,55 @@ +from hypothesis import given +from hypothesis import strategies as st +from hypothesis.extra.numpy import arrays + +import numpy as np +from numpy import ndarray +from scipy.sparse import csr_matrix + +from scvelo.core import l2_norm + + +# TODO: Extend test to generate sparse inputs as well +class TestL2Norm: + @given( + a=arrays( + float, + shape=st.integers(min_value=1, max_value=100), + elements=st.floats(max_value=1e3, allow_infinity=False, allow_nan=False), + ), + axis=st.integers(min_value=0, max_value=1), + ) + def test_1d_array(self, a: ndarray, axis: int): + np.allclose(np.linalg.norm(a), l2_norm(a, axis=axis)) + + @given( + a=arrays( + float, + shape=st.tuples( + st.integers(min_value=1, max_value=100), + st.integers(min_value=1, max_value=100), + ), + elements=st.floats( + min_value=-1e3, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + axis=st.integers(min_value=0, max_value=1), + ) + def test_2d_array(self, a: ndarray, axis: int): + np.allclose(np.linalg.norm(a, axis=axis), l2_norm(a, axis=axis)) + + @given( + a=arrays( + float, + shape=st.tuples( + st.integers(min_value=1, max_value=100), + st.integers(min_value=1, max_value=100), + ), + elements=st.floats( + min_value=-1e3, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + axis=st.integers(min_value=0, max_value=1), + ) + def test_sparse_input(self, a: ndarray, axis: int): + np.allclose(np.linalg.norm(a, axis=axis), l2_norm(csr_matrix(a), axis=axis)) diff --git a/tests/core/test_models.py b/tests/core/test_models.py new file mode 100644 index 00000000..9c685f76 --- /dev/null +++ b/tests/core/test_models.py @@ -0,0 +1,262 @@ +from typing import List + +import pytest +from hypothesis import given +from hypothesis import strategies as st +from hypothesis.extra.numpy import arrays + +import numpy as np +from numpy import ndarray +from scipy.integrate import odeint + +from scvelo.core import SplicingDynamics + + +class TestSplicingDynamics: + @given( + alpha=st.floats(min_value=0, allow_infinity=False), + beta=st.floats(min_value=0, max_value=1, exclude_min=True), + gamma=st.floats(min_value=0, max_value=1, exclude_min=True), + initial_state=st.lists( + st.floats(min_value=0, allow_infinity=False), min_size=2, max_size=2 + ), + t=arrays( + float, + shape=st.integers(min_value=1, max_value=100), + elements=st.floats( + min_value=0, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + with_keys=st.booleans(), + ) + def test_output_form( + self, + alpha: float, + beta: float, + gamma: float, + initial_state: List[float], + t: ndarray, + with_keys: bool, + ): + if beta == gamma: + gamma = gamma + 1e-6 + + splicing_dynamics = SplicingDynamics( + alpha=alpha, beta=beta, gamma=gamma, initial_state=initial_state + ) + solution = splicing_dynamics.get_solution(t=t, with_keys=with_keys) + + if not with_keys: + assert type(solution) == ndarray + assert solution.shape == (len(t), 2) + else: + assert len(solution) == 2 + assert type(solution) == dict + assert list(solution.keys()) == ["u", "s"] + assert all([len(var) == len(t) for var in solution.values()]) + + # TODO: Check how / if hypothesis can be used instead. + @pytest.mark.parametrize( + "alpha, beta, gamma, initial_state", + [ + (5, 0.5, 0.4, [0, 1]), + ], + ) + def test_solution(self, alpha, beta, gamma, initial_state): + def model(y, t, alpha, beta, gamma): + dydt = np.zeros(2) + dydt[0] = alpha - beta * y[0] + dydt[1] = beta * y[0] - gamma * y[1] + + return dydt + + t = np.linspace(0, 20, 10000) + splicing_dynamics = SplicingDynamics( + alpha=alpha, beta=beta, gamma=gamma, initial_state=initial_state + ) + exact_solution = splicing_dynamics.get_solution(t=t) + + numerical_solution = odeint( + model, + np.array(initial_state), + t, + args=( + alpha, + beta, + gamma, + ), + ) + + assert np.allclose(numerical_solution, exact_solution) + + @pytest.mark.parametrize( + "alpha, beta, gamma, initial_state", + [ + (5, 0.5, 0.4, [0, 1]), + ], + ) + def test_2d_time( + self, + alpha: float, + beta: float, + gamma: float, + initial_state: List[float], + ): + def model(y, t, alpha, beta, gamma): + dydt = np.zeros(2) + dydt[0] = alpha - beta * y[0] + dydt[1] = beta * y[0] - gamma * y[1] + + return dydt + + t = np.linspace(0, 20, 10000) + t = np.vstack([t, t, t]).T + + splicing_dynamics = SplicingDynamics( + alpha=alpha, beta=beta, gamma=gamma, initial_state=initial_state + ) + exact_solution = splicing_dynamics.get_solution(t=t) + + assert exact_solution.shape == (*t.shape, 2) + + numerical_solution = np.stack( + [ + odeint( + model, + np.array(initial_state), + t[:, col_id], + args=( + alpha, + beta, + gamma, + ), + ) + for col_id in range(t.shape[1]) + ], + axis=1, + ) + + assert np.allclose(numerical_solution, exact_solution) + + @given( + alpha=st.floats(), + beta=st.floats(), + gamma=st.floats(), + initial_state=arrays( + float, + shape=st.integers(min_value=2, max_value=2), + elements=st.floats( + min_value=-1e3, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + ) + def test_intitial_state_1d(self, alpha, beta, gamma, initial_state): + dm = SplicingDynamics( + alpha=alpha, beta=beta, gamma=gamma, initial_state=initial_state + ) + + np.testing.assert_array_equal(dm.initial_state, initial_state) + + dm.initial_state = np.array([0, 0]) + np.testing.assert_array_equal(dm.initial_state, np.array([0, 0])) + + @given( + alpha=st.floats(), + beta=st.floats(), + gamma=st.floats(), + initial_state=arrays( + float, + shape=st.tuples( + st.integers(min_value=1, max_value=100), + st.integers(min_value=2, max_value=2), + ), + elements=st.floats( + min_value=-1e3, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + ) + def test_intitial_state_2d(self, alpha, beta, gamma, initial_state): + dm = SplicingDynamics( + alpha=alpha, beta=beta, gamma=gamma, initial_state=initial_state + ) + + np.testing.assert_array_equal(dm.initial_state, initial_state) + + dm.initial_state = np.zeros(2) + np.testing.assert_array_equal(dm.initial_state, np.zeros(2)) + + @given( + alpha=st.floats(allow_infinity=False, allow_nan=False), + beta=st.floats(min_value=1e-10, allow_infinity=False, allow_nan=False), + gamma=st.floats(min_value=1e-10, allow_infinity=False, allow_nan=False), + initial_state=arrays( + float, + shape=st.integers(min_value=2, max_value=2), + elements=st.floats( + min_value=-1e3, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + with_keys=st.booleans(), + stacked=st.booleans(), + ) + def test_steady_state_1d( + self, + alpha: float, + beta: float, + gamma: float, + initial_state: ndarray, + with_keys: bool, + stacked: bool, + ): + dm = SplicingDynamics( + alpha=alpha, beta=beta, gamma=gamma, initial_state=initial_state + ) + steady_states = dm.get_steady_states(stacked=stacked, with_keys=with_keys) + + if with_keys: + assert isinstance(steady_states, dict) + assert [*steady_states] == ["u", "s"] + assert steady_states["u"] == alpha / beta + assert steady_states["s"] == alpha / gamma + elif not stacked: + assert isinstance(steady_states, tuple) + assert len(steady_states) == 2 + assert steady_states[0] == alpha / beta + assert steady_states[1] == alpha / gamma + else: + assert isinstance(steady_states, np.ndarray) + assert steady_states.shape == (2,) + assert steady_states[0] == alpha / beta + assert steady_states[1] == alpha / gamma + + @given( + alpha=st.floats(), + beta=st.floats(max_value=0, allow_infinity=False, allow_nan=False), + gamma=st.floats(max_value=0, allow_infinity=False, allow_nan=False), + initial_state=arrays( + float, + shape=st.integers(min_value=2, max_value=2), + elements=st.floats( + min_value=-1e3, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + with_keys=st.booleans(), + stacked=st.booleans(), + ) + def test_steady_state_not_defined( + self, + alpha: float, + beta: float, + gamma: float, + initial_state: ndarray, + with_keys: bool, + stacked: bool, + ): + dm = SplicingDynamics( + alpha=alpha, beta=beta, gamma=gamma, initial_state=initial_state + ) + + with pytest.raises( + ValueError, match=("Both `beta` and `gamma` need to be strictly positive.") + ): + _ = dm.get_steady_states(stacked=stacked, with_keys=with_keys) diff --git a/tests/datasets/test_datasets.py b/tests/datasets/test_datasets.py new file mode 100644 index 00000000..06ed961a --- /dev/null +++ b/tests/datasets/test_datasets.py @@ -0,0 +1,65 @@ +import os +import sys +from typing import Optional + +import pytest + +from anndata import AnnData + +import scvelo as scv + + +@pytest.fixture(scope="session") +def dentategyrus_adata(tmpdir_factory): + path_to_file = tmpdir_factory.mktemp("dentategyrus").join("adata.h5ad") + _ = scv.datasets.dentategyrus(file_path=path_to_file) + + return path_to_file + + +@pytest.mark.skipif( + sys.version_info[:2] != (3, 8) or sys.platform != "linux", + reason="Limit number of downloads to speed up testing.", +) +class TestDataSets: + def test_dentategyrus_adjusted(self, dentategyrus_adata): + adata = scv.datasets.dentategyrus(file_path=dentategyrus_adata, adjusted=True) + + assert isinstance(adata, AnnData) + assert adata.shape == (2930, 13913) + + def test_dentategyrus_not_adjusted(self, tmpdir_factory): + adata = scv.datasets.dentategyrus( + file_path=tmpdir_factory.mktemp("dentategyrus").join("loomfile.loom"), + adjusted=False, + ) + + assert isinstance(adata, AnnData) + assert adata.shape == (3396, 25919) + + @pytest.mark.parametrize("n_obs", (None, 0, 1, 10)) + def test_toy_data(self, dentategyrus_adata, n_obs: Optional[int]): + assert os.path.isfile(dentategyrus_adata) + adata = scv.datasets.toy_data(file_path=dentategyrus_adata, n_obs=n_obs) + + assert isinstance(adata, AnnData) + if n_obs is None: + assert adata.shape == (2930, 13913) + else: + assert adata.shape == (n_obs, 13913) + + def test_forebrain(self, tmpdir_factory): + adata = scv.datasets.forebrain( + file_path=tmpdir_factory.mktemp("forebrain").join("loomfile.loom") + ) + + assert isinstance(adata, AnnData) + assert adata.shape == (1720, 32738) + + def test_pancreas(self, tmpdir_factory): + adata = scv.datasets.pancreas( + file_path=tmpdir_factory.mktemp("pancreas").join("adata.h5ad") + ) + + assert isinstance(adata, AnnData) + assert adata.shape == (3696, 27998) diff --git a/tests/datasets/test_simulate.py b/tests/datasets/test_simulate.py new file mode 100644 index 00000000..a5188b6e --- /dev/null +++ b/tests/datasets/test_simulate.py @@ -0,0 +1,155 @@ +from typing import List + +import pytest +from hypothesis import given +from hypothesis import strategies as st + +import numpy as np + +from anndata import AnnData + +from scvelo.datasets import simulation + + +class TestSimulation: + @given( + n_obs=st.integers(min_value=5, max_value=300), + n_vars=st.integers(min_value=5, max_value=300), + t_max=st.floats( + min_value=1, max_value=50, allow_nan=False, allow_infinity=False + ), + alpha=st.floats( + min_value=0, max_value=10, allow_nan=False, allow_infinity=False + ), + beta=st.floats( + min_value=0.1, max_value=10, allow_nan=False, allow_infinity=False + ), + gamma=st.floats( + min_value=0.1, max_value=10, allow_nan=False, allow_infinity=False + ), + noise_level=st.floats( + min_value=0, max_value=5, allow_nan=False, allow_infinity=False + ), + ) + def test_normal_noise( + self, + n_obs: int, + n_vars: int, + t_max: float, + alpha: float, + beta: float, + gamma: float, + noise_level: float, + ): + if beta == gamma: + beta += 1e-3 + adata = simulation( + n_obs=n_obs, + n_vars=n_vars, + t_max=t_max, + alpha=alpha, + beta=beta, + gamma=gamma, + noise_level=noise_level, + ) + + assert isinstance(adata, AnnData) + assert adata.shape == (n_obs, n_vars) + assert len(adata.layers) == 2 + assert set(adata.layers) == set(["unspliced", "spliced"]) + + assert len(adata.obs.columns) == 1 + assert adata.obs.columns.isin(["true_t"]).all() + assert adata.obs["true_t"].max() == np.round(t_max, 2) + + assert len(adata.var.columns) == 5 + assert adata.var.columns.isin( + ["true_t_", "true_alpha", "true_beta", "true_gamma", "true_scaling"] + ).all() + assert (adata.var["true_alpha"] == alpha).all() + assert (adata.var["true_beta"] == beta).all() + assert (adata.var["true_gamma"] == gamma).all() + assert (adata.var["true_scaling"] == 1).all() + + def test_time_dependent_parameters(self): + adata = simulation( + n_obs=5, + alpha=np.array([5, 4, 0, 0, 0]), + beta=np.array([0.5, 0.3, 0.6, 0.4, 0.7]), + gamma=np.array([0.25, 0.4, 0.5, 0.2, 0.4]), + ) + + assert isinstance(adata, AnnData) + assert adata.shape == (5, 4) + + assert len(adata.var.columns) == 5 + assert ( + adata.var.columns + == ["true_t_", "true_alpha", "true_beta", "true_gamma", "true_scaling"] + ).all() + assert adata.var["true_alpha"].isna().sum() == 4 + assert adata.var["true_beta"].isna().sum() == 4 + assert adata.var["true_gamma"].isna().sum() == 4 + + @pytest.mark.parametrize("noise_model", ("gillespie", "normal")) + @pytest.mark.parametrize( + "switches", + ([0.25, 0.5, 0.75, 1], [0.3, 0.4, 0.5], [0.01, 0.2, 0.4, 0.3, 0.61, 0.7]), + ) + def test_switch(self, noise_model, switches: List[float]): + def ceil(x, precision=0): + return np.true_divide(np.ceil(x * 10**precision), 10**precision) + + adata = simulation(t_max=1, switches=switches, noise_model=noise_model) + + assert isinstance(adata, AnnData) + np.testing.assert_equal( + ceil(adata.var["true_t_"].values, precision=2), switches + ) + + @pytest.mark.parametrize("n_obs", (5, 10, 100)) + @pytest.mark.parametrize("n_vars", (5, 10, 100)) + @pytest.mark.parametrize("t_max", (1, 10)) + @pytest.mark.parametrize("alpha", (5, 7)) + @pytest.mark.parametrize("beta", (0.5, 0.3)) + @pytest.mark.parametrize("gamma", (0.4, 0.2)) + def test_gillespie( + self, + n_obs: int, + n_vars: int, + t_max: float, + alpha: float, + beta: float, + gamma: float, + ): + adata = simulation( + n_obs=n_obs, + n_vars=n_vars, + t_max=t_max, + alpha=alpha, + beta=beta, + gamma=gamma, + noise_model="gillespie", + ) + + assert isinstance(adata, AnnData) + assert adata.shape == (n_obs, n_vars) + assert len(adata.layers) == 2 + assert set(adata.layers) == set(["unspliced", "spliced"]) + + np.testing.assert_equal(adata.X % 1, 0) + np.testing.assert_equal(adata.layers["unspliced"] % 1, 0) + np.testing.assert_equal(adata.layers["spliced"] % 1, 0) + + assert len(adata.obs.columns) == 1 + assert adata.obs.columns.isin(["true_t"]).all() + assert adata.obs["true_t"].max() == np.round(t_max, 2) + + assert len(adata.var.columns) == 5 + assert adata.var.columns.isin( + ["true_t_", "true_alpha", "true_beta", "true_gamma", "true_scaling"] + ).all() + assert (adata.var["true_alpha"] == alpha).all() + assert (adata.var["true_beta"] == beta).all() + assert (adata.var["true_gamma"] == gamma).all() + assert (adata.var["true_scaling"] == 1).all() diff --git a/tests/preprocessing/__init__.py b/tests/preprocessing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/preprocessing/test_moments.py b/tests/preprocessing/test_moments.py new file mode 100644 index 00000000..61962884 --- /dev/null +++ b/tests/preprocessing/test_moments.py @@ -0,0 +1,535 @@ +from typing import List + +import pytest + +import numpy as np +from scipy.sparse import csr_matrix, issparse + +from anndata import AnnData + +from scvelo.preprocessing.moments import ( + get_moments, + magic_impute, + moments, + second_order_moments, + second_order_moments_u, +) + + +class TestGetMoments: + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_neighbor_graph_not_present(self, adata, dataset: str, n_obs: int): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + del adata.uns["neighbors"] + + with pytest.raises( + ValueError, + match=( + "You need to run `pp.neighbors` first to compute a neighborhood graph." + ), + ): + _ = get_moments(adata=adata) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("layer", [None, "unspliced", "spliced"]) + @pytest.mark.parametrize("mode", ["connectivities", "distances"]) + @pytest.mark.parametrize("dense", [True, False]) + def test_first_moments( + self, adata, dataset: str, n_obs: int, layer: bool, mode: str, dense: bool + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + if dense: + if layer is None: + adata.X = adata.X.A + else: + adata.layers[layer] = adata.layers[layer].A + + first_order_moment = get_moments(adata=adata, layer=layer, mode=mode) + assert isinstance(first_order_moment, np.ndarray) + + ground_truth = np.load( + file=( + f"tests/_data/test_moments/get_moments/dataset={dataset}-n_obs={n_obs}" + f"-layer={layer}-mode={mode}_first_moment.npy" + ) + ) + np.testing.assert_almost_equal(first_order_moment, ground_truth) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("layer", [None, "unspliced", "spliced"]) + @pytest.mark.parametrize("mode", ["connectivities", "distances"]) + @pytest.mark.parametrize("dense", [True, False]) + def test_second_moments( + self, adata, dataset: str, n_obs: int, layer: bool, mode: str, dense: bool + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + if dense: + if layer is None: + adata.X = adata.X.A + else: + adata.layers[layer] = adata.layers[layer].A + + second_order_moment = get_moments( + adata=adata, layer=layer, mode=mode, second_order=True, centered=False + ) + assert isinstance(second_order_moment, np.ndarray) + + ground_truth = np.load( + file=( + f"tests/_data/test_moments/get_moments/dataset={dataset}-n_obs={n_obs}" + f"-layer={layer}-mode={mode}_second_moment.npy" + ) + ) + np.testing.assert_almost_equal(second_order_moment, ground_truth) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("layer", [None, "unspliced", "spliced"]) + @pytest.mark.parametrize("mode", ["connectivities", "distances"]) + @pytest.mark.parametrize("dense", [True, False]) + def test_passing_array_for_layer( + self, adata, dataset: str, n_obs: int, layer: bool, mode: str, dense: bool + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + if dense: + if layer is None: + adata.X = adata.X.A + else: + adata.layers[layer] = adata.layers[layer].A + + if layer is None: + first_order_moment = get_moments(adata=adata, layer=adata.X, mode=mode) + else: + first_order_moment = get_moments( + adata=adata, layer=adata.layers[layer], mode=mode + ) + + assert isinstance(first_order_moment, np.ndarray) + + ground_truth = np.load( + file=( + f"tests/_data/test_moments/get_moments/dataset={dataset}-n_obs={n_obs}" + f"-layer={layer}-mode={mode}_first_moment.npy" + ) + ) + np.testing.assert_almost_equal(first_order_moment, ground_truth) + + @pytest.mark.parametrize("sparse", [True, False]) + def test_analytic_example(self, sparse: bool): + adata = AnnData( + X=np.array([[1, 2, 0], [2, 3, 1], [1, 0.5, 2]]), + obsp={ + "connectivities": csr_matrix( + np.array([[0, 0.5, 0.1], [0.5, 0, 0], [0.5, 0, 0]]) + ) + }, + uns={"neighbors": []}, + ) + if sparse: + adata.X = csr_matrix(adata.X) + + first_order_moment = get_moments(adata=adata) + first_order_moment_ground_truth = np.array( + [[4 / 3, 5.5 / 3, 1], [1.5, 2.5, 0.5], [1, 1.25, 1]] + ) + np.testing.assert_almost_equal( + first_order_moment, first_order_moment_ground_truth + ) + + second_order_moment_uncentered = get_moments( + adata=adata, second_order=True, centered=False + ) + second_order_moment_uncentered_ground_truth = np.array( + [[2, 13.25 / 3, 5 / 3], [2.5, 6.5, 0.5], [1, 2.125, 2]] + ) + np.testing.assert_almost_equal( + second_order_moment_uncentered, + second_order_moment_uncentered_ground_truth, + decimal=5, + ) + + second_order_moment_centered = get_moments(adata=adata, second_order=True) + np.testing.assert_almost_equal( + second_order_moment_centered, + second_order_moment_uncentered - first_order_moment_ground_truth**2, + ) + + +class TestMagicImpute: + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_output(self, adata, capfd, dataset: str, n_obs: int): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + del adata.layers["Mu"] + del adata.layers["Ms"] + + magic_impute(adata=adata) + + ground_truth_unspliced = np.load( + file=( + f"tests/_data/test_moments/magic_impute/dataset={dataset}-n_obs={n_obs}" + f"-layer=unspliced_magic_impute.npy" + ) + ) + np.testing.assert_almost_equal( + adata.layers["Mu"], ground_truth_unspliced, decimal=5 + ) + + ground_truth_spliced = np.load( + file=( + f"tests/_data/test_moments/magic_impute/dataset={dataset}-n_obs={n_obs}" + f"-layer=spliced_magic_impute.npy" + ) + ) + np.testing.assert_almost_equal( + adata.layers["Ms"], ground_truth_spliced, decimal=5 + ) + + expected_log = ( + "To be used carefully. Magic has not yet been tested for this " + "application.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + +class TestMoments: + def _compare_adatas(self, adata_1, adata_2): + # Check `.layers` + assert set(adata_1.layers) == set(adata_2.layers).union(["Mu", "Ms"]) + for layer in set(adata_1.layers).difference(["Mu", "Ms"]): + assert (adata_1.layers[layer] != adata_2.layers[layer]).getnnz() == 0 + + # Check `.obsm` is unchanged + assert set(adata_1.obsm) == set(["X_pca"]) + np.testing.assert_equal(adata_1.obsm["X_pca"], adata_2.obsm["X_pca"]) + + # Check `.obsp` is unchanged + assert set(adata_1.obsp) == set(["distances", "connectivities"]) + assert issparse(adata_1.obsp["connectivities"]) + np.testing.assert_almost_equal( + adata_1.obsp["connectivities"].A, + adata_2.obsp["connectivities"].A, + decimal=4, + ) + assert issparse(adata_1.obsp["distances"]) + np.testing.assert_almost_equal( + adata_1.obsp["distances"].A, adata_2.obsp["distances"].A, decimal=4 + ) + + # Check `.uns` is unchanged + assert set(adata_1.uns["pca"]) == set(["params", "variance", "variance_ratio"]) + assert adata_1.uns["pca"]["params"] == adata_2.uns["pca"]["params"] + np.testing.assert_equal( + adata_1.uns["pca"]["variance"], adata_2.uns["pca"]["variance"] + ) + np.testing.assert_equal( + adata_1.uns["pca"]["variance_ratio"], + adata_2.uns["pca"]["variance_ratio"], + ) + + assert set(adata_1.uns["neighbors"]) == set( + ["connectivities_key", "distances_key", "indices", "params"] + ) + assert ( + adata_1.uns["neighbors"]["connectivities_key"] + == adata_2.uns["neighbors"]["connectivities_key"] + ) + assert ( + adata_1.uns["neighbors"]["distances_key"] + == adata_2.uns["neighbors"]["distances_key"] + ) + np.testing.assert_equal( + adata_1.uns["neighbors"]["indices"], + adata_2.uns["neighbors"]["indices"], + ) + assert adata_1.uns["neighbors"]["params"] == adata_2.uns["neighbors"]["params"] + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize( + "layers_to_remove", [["unspliced"], ["spliced"], ["unspliced", "spliced"]] + ) + def test_skip_moment_calculation( + self, adata, capfd, dataset: str, n_obs: int, layers_to_remove: List[str] + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + for layer in layers_to_remove: + del adata.layers[layer] + + moments(data=adata) + + expected_log = ( + "WARNING: Skipping moments, because un/spliced counts were not found.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("mode", ["connectivities", "distances"]) + @pytest.mark.parametrize("copy", [True, False]) + def test_moment_calculation( + self, adata, dataset: str, n_obs: int, mode: str, copy: bool + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + original_adata = adata.copy() + del adata.layers["Mu"] + del adata.layers["Ms"] + + returned_adata = moments(data=adata, mode=mode, copy=copy) + + if copy: + assert isinstance(returned_adata, AnnData) + else: + assert returned_adata is None + returned_adata = adata.copy() + + self._compare_adatas(returned_adata, original_adata) + + # Check calculated moments + ground_truth_unspliced = np.load( + file=( + f"tests/_data/test_moments/get_moments/dataset={dataset}-n_obs={n_obs}" + f"-layer=unspliced-mode={mode}_first_moment.npy" + ) + ) + np.testing.assert_almost_equal( + returned_adata.layers["Mu"], ground_truth_unspliced + ) + + ground_truth_spliced = np.load( + file=( + f"tests/_data/test_moments/get_moments/dataset={dataset}-n_obs={n_obs}" + f"-layer=spliced-mode={mode}_first_moment.npy" + ) + ) + np.testing.assert_almost_equal( + returned_adata.layers["Ms"], ground_truth_spliced + ) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("mode", ["connectivities", "distances"]) + @pytest.mark.parametrize("copy", [True, False]) + def test_log(self, adata, capfd, dataset: str, n_obs: int, mode: str, copy: bool): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + del adata.layers["Mu"] + del adata.layers["Ms"] + + _ = moments(data=adata, mode=mode, copy=copy) + + expected_log = f"computing moments based on {mode}\n finished (" + + actual_log, _ = capfd.readouterr() + assert actual_log.startswith(expected_log) + + # `[7:]` removes execution time + actual_log = actual_log.split(expected_log)[1][7:] + expected_log = ( + ") --> added \n" + " 'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)\n" + ) + assert actual_log == expected_log + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("copy", [True, False]) + def test_neighbors_and_moments_calculation( + self, adata, capfd, dataset: str, n_obs: int, copy: bool + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + adata.uns["neighbors"]["params"]["n_pcs"] = None + original_adata = adata.copy() + del adata.layers["Mu"] + del adata.layers["Ms"] + del adata.uns["neighbors"] + adata.obsp = {} + + returned_adata = moments(data=adata, copy=copy) + if copy: + assert isinstance(returned_adata, AnnData) + else: + assert returned_adata is None + returned_adata = adata.copy() + + self._compare_adatas(returned_adata, original_adata) + + np.testing.assert_almost_equal( + returned_adata.layers["Mu"], original_adata.layers["Mu"] + ) + np.testing.assert_almost_equal( + returned_adata.layers["Ms"], original_adata.layers["Ms"] + ) + + expected_log = "computing neighbors\n finished (" + + actual_log, _ = capfd.readouterr() + assert actual_log.startswith(expected_log) + + # `[7:]` removes execution time + actual_log = actual_log.split(expected_log)[1][7:] + expected_log = ( + ") --> added \n" + " 'distances' and 'connectivities', weighted adjacency matrices " + "(adata.obsp)\n" + "computing moments based on connectivities\n finished (" + ) + assert actual_log.startswith(expected_log) + + # `[7:]` removes execution time + actual_log = actual_log.split(expected_log)[1][7:] + expected_log = ( + ") --> added \n" + " 'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)\n" + ) + assert actual_log == expected_log + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_raw_input(self, adata, dataset: str, n_obs: int): + adata = adata(dataset=dataset, n_obs=n_obs, raw=True, preprocessed=False) + + moments(data=adata) + + ground_truth_unspliced = np.load( + file=( + f"tests/_data/test_moments/moments/dataset={dataset}-n_obs={n_obs}" + f"first_moment_unspliced.npy" + ) + ) + np.testing.assert_almost_equal(adata.layers["Mu"], ground_truth_unspliced) + + ground_truth_spliced = np.load( + file=( + f"tests/_data/test_moments/moments/dataset={dataset}-n_obs={n_obs}" + f"first_moment_spliced.npy" + ) + ) + np.testing.assert_almost_equal(adata.layers["Ms"], ground_truth_spliced) + + +class TestSecondOrderMoments: + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_neighbor_graph_not_present(self, adata, dataset: str, n_obs: int): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + del adata.uns["neighbors"] + + with pytest.raises( + ValueError, + match=( + "You need to run `pp.neighbors` first to compute a neighborhood graph." + ), + ): + _ = second_order_moments(adata=adata) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_output(self, adata, dataset: str, n_obs: int): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + second_order_moment_spliced, second_order_moment_mixed = second_order_moments( + adata=adata + ) + assert isinstance(second_order_moment_spliced, np.ndarray) + assert isinstance(second_order_moment_mixed, np.ndarray) + + ground_truth_spliced = np.load( + file=( + f"tests/_data/test_moments/get_moments/dataset={dataset}-n_obs={n_obs}" + f"-layer=spliced-mode=connectivities_second_moment.npy" + ) + ) + np.testing.assert_almost_equal( + second_order_moment_spliced, ground_truth_spliced + ) + + ground_truth_mixed = np.load( + file=( + f"tests/_data/test_moments/second_order_moments/dataset={dataset}" + f"-n_obs={n_obs}-mode=connectivities_second_moment_mixed.npy" + ) + ) + np.testing.assert_almost_equal(second_order_moment_mixed, ground_truth_mixed) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_adjusted(self, adata, dataset: str, n_obs: int): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + adata.layers["Mu"] = np.load( + file=( + f"tests/_data/test_moments/get_moments/dataset={dataset}-n_obs={n_obs}" + f"-layer=unspliced-mode=connectivities_first_moment.npy" + ) + ) + adata.layers["Ms"] = np.load( + file=( + f"tests/_data/test_moments/get_moments/dataset={dataset}-n_obs={n_obs}" + f"-layer=spliced-mode=connectivities_first_moment.npy" + ) + ) + + second_order_moment_spliced, second_order_moment_mixed = second_order_moments( + adata=adata, adjusted=True + ) + assert isinstance(second_order_moment_spliced, np.ndarray) + assert isinstance(second_order_moment_mixed, np.ndarray) + + second_order_spliced = np.load( + file=( + f"tests/_data/test_moments/get_moments/dataset={dataset}-n_obs={n_obs}" + f"-layer=spliced-mode=connectivities_second_moment.npy" + ) + ) + np.testing.assert_almost_equal( + second_order_moment_spliced, 2 * second_order_spliced - adata.layers["Ms"] + ) + + second_order_mixed = np.load( + file=( + f"tests/_data/test_moments/second_order_moments/dataset={dataset}" + f"-n_obs={n_obs}-mode=connectivities_second_moment_mixed.npy" + ) + ) + np.testing.assert_almost_equal( + second_order_moment_mixed, 2 * second_order_mixed - adata.layers["Mu"] + ) + + +class TestSecondOrderMomentsU: + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_neighbor_graph_not_present(self, adata, dataset: str, n_obs: int): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + del adata.uns["neighbors"] + + with pytest.raises( + ValueError, + match=( + "You need to run `pp.neighbors` first to compute a neighborhood graph." + ), + ): + _ = second_order_moments_u(adata=adata) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_output(self, adata, dataset: str, n_obs: int): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + second_order_moment = second_order_moments_u(adata=adata) + assert isinstance(second_order_moment, np.ndarray) + + ground_truth = np.load( + file=( + f"tests/_data/test_moments/get_moments/dataset={dataset}-n_obs={n_obs}" + f"-layer=unspliced-mode=connectivities_second_moment.npy" + ) + ) + np.testing.assert_almost_equal(second_order_moment, ground_truth) diff --git a/tests/preprocessing/test_neighbors.py b/tests/preprocessing/test_neighbors.py new file mode 100644 index 00000000..4003d113 --- /dev/null +++ b/tests/preprocessing/test_neighbors.py @@ -0,0 +1,2108 @@ +from typing import Callable, Dict, Optional + +import hypothesis.strategies as st +import pytest +from hypothesis import given + +import numpy as np +import pandas as pd +from scipy.sparse import csc_matrix, csr_matrix, issparse, load_npz, spmatrix +from sklearn.neighbors import NearestNeighbors + +from anndata import AnnData + +from scvelo.preprocessing.neighbors import ( + _get_hnsw_neighbors, + _get_rep, + _get_scanpy_neighbors, + _get_sklearn_neighbors, + _set_pca, + compute_connectivities_umap, + get_connectivities, + get_csr_from_indices, + get_duplicate_cells, + get_n_neighs, + get_neighs, + neighbors, + neighbors_to_be_recomputed, + remove_duplicate_cells, + select_connectivities, + select_distances, + set_diagonal, + verify_neighbors, +) +from tests.core import get_adata + + +class TestComputeConnectivitiesUmap: + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_real_data(self, adata, dataset, n_obs): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + knn_indices = adata.uns["neighbors"]["indices"] + + knn_distances = [] + for row_distance, row_index in zip(adata.obsp["distances"], knn_indices): + knn_distances.append(row_distance.A[0, row_index]) + knn_distances = np.array(knn_distances) + + distance_matrix, connectivity_matrix = compute_connectivities_umap( + knn_indices=knn_indices, + knn_dists=knn_distances, + n_obs=n_obs, + n_neighbors=adata.uns["neighbors"]["params"]["n_neighbors"], + ) + + assert isinstance(distance_matrix, csr_matrix) + np.testing.assert_almost_equal( + distance_matrix.A, adata.obsp["distances"].A, decimal=4 + ) + + assert isinstance(connectivity_matrix, csr_matrix) + np.testing.assert_almost_equal( + connectivity_matrix.A, adata.obsp["connectivities"].A, decimal=4 + ) + + +class TestGetConnectivities: + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("raw", [True, False]) + @pytest.mark.parametrize("uns", [{}, {"random": 0}]) + @pytest.mark.parametrize("n_neighbors", [None, 15, 30]) + @pytest.mark.parametrize("recurse_neighbors", [True, False]) + def test_neighbors_not_present( + self, + adata, + dataset: str, + n_obs: int, + raw: bool, + uns: Dict, + n_neighbors: Optional[int], + recurse_neighbors: bool, + ): + if raw: + adata = adata(dataset=dataset, n_obs=n_obs, raw=True, preprocessed=False) + else: + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + adata.uns = uns + + returned_val = get_connectivities( + adata=adata, n_neighbors=n_neighbors, recurse_neighbors=recurse_neighbors + ) + assert returned_val is None + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_neighbors", [5, 15, 29]) + @pytest.mark.parametrize("mode", ["connectivities", "distances"]) + def test_connectivities_with_trimmed_neighbors( + self, adata, dataset: str, n_obs: int, n_neighbors: Optional[int], mode: str + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + connectivities = get_connectivities( + adata=adata, mode=mode, n_neighbors=n_neighbors + ) + + assert issparse(connectivities) + assert (connectivities.getnnz(axis=1) == (n_neighbors + 1)).all() + assert (connectivities.data == 1 / (n_neighbors + 1)).all() + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_neighbors", [None, 30, 45]) + @pytest.mark.parametrize("mode", ["connectivities", "distances"]) + def test_connectivities_with_original_neighbors( + self, adata, dataset: str, n_obs: int, n_neighbors: Optional[int], mode: str + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + connectivities = get_connectivities( + adata=adata, mode=mode, n_neighbors=n_neighbors + ) + + assert issparse(connectivities) + assert ( + connectivities.getnnz(axis=1) == adata.obsp[mode].getnnz(axis=1) + 1 + ).all() + + for row in connectivities: + np.testing.assert_equal(row.data, 1 / row.getnnz()) + + @pytest.mark.parametrize("mode", ["connectivities", "distances"]) + @pytest.mark.parametrize( + "adjacency_matrix, recursed_neighbors_matrix", + [ + ( + np.array([[1, 0, 1], [0, 1, 1], [0, 1, 1]]), + csr_matrix( + np.array([[0.4, 0.2, 0.4], [0, 0.5, 0.5], [0, 0.5, 0.5]]), + ).astype(np.float32), + ), + ( + np.array([[1, 0, 1, 0], [1, 1, 0, 0], [0, 1, 1, 0], [0, 1, 0, 1]]), + csr_matrix( + np.array( + [ + [0.4, 0.2, 0.4, 0], + [0.4, 0.4, 0.2, 0], + [0.2, 0.4, 0.4, 0], + [0.2, 0.4, 0, 0.4], + ] + ) + ).astype(np.float32), + ), + ], + ) + def test_recursed_neighbors( + self, + mode: str, + adjacency_matrix: np.ndarray, + recursed_neighbors_matrix: spmatrix, + ): + adata = AnnData( + np.eye(*adjacency_matrix.shape), + obsp={mode: csr_matrix(adjacency_matrix)}, + uns={ + "neighbors": {"params": {"n_neighbors": adjacency_matrix[0, :].sum()}} + }, + ) + + connectivities = get_connectivities( + adata=adata, + mode=mode, + recurse_neighbors=True, + ) + + assert issparse(connectivities) + assert (connectivities != recursed_neighbors_matrix).getnnz() == 0 + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_neighbors", [None, 15]) + @pytest.mark.parametrize("mode", ["connectivities", "distances"]) + def test_recursed_neighbors_real_data( + self, adata, dataset: str, n_obs: int, n_neighbors: Optional[int], mode: str + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + connectivities = get_connectivities( + adata=adata, mode=mode, n_neighbors=n_neighbors + ) + ground_truth = load_npz( + "tests/_data/test_neighbors/get_connectivities/" + f"dataset={dataset}-n_obs={n_obs}-n_neighbors={n_neighbors}-mode={mode}.npz" + ) + + assert issparse(connectivities) + np.testing.assert_almost_equal(connectivities.A, ground_truth.A) + + +class TestGetCsrFromIndices: + @pytest.mark.parametrize( + "knn_indices, knn_dists, n_obs, n_neighbors, ground_truth", + ( + [ + ( + np.array([[0, 1, 2], [1, 3, 0], [2, 1, 3], [3, 0, 1]]), + np.array( + [[0, 0.1, 0.2], [0, 0.5, 1.7], [0, 0.01, 0.02], [0, 0.5, 1]] + ), + 4, + 3, + csr_matrix( + [ + [0.0, 0.1, 0.2, 0.0], + [1.7, 0.0, 0.0, 0.5], + [0.0, 0.01, 0.0, 0.02], + [0.5, 1.0, 0.0, 0.0], + ] + ), + ), + ( + np.array([[0, 1, 2], [1, 3, 0], [2, 1, 3], [3, 0, 1]]), + np.array( + [[0, 0.1, 0.2], [0, 0.5, 1.7], [0, 0.01, 0.3], [0, 0.5, 1]] + ), + 4, + 2, + csr_matrix( + [ + [0.0, 0.1, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.5], + [0.0, 0.01, 0.0, 0.0], + [0.5, 0.0, 0.0, 0.0], + ] + ), + ), + ( + np.array([[0, 1, -1], [1, -1, 0], [2, 1, 3], [3, 0, 1]]), + np.array( + [[0, 0.1, 0.2], [0, 0.5, 1.7], [0, 0.01, 0.3], [0, 0.5, 1]] + ), + 4, + 3, + csr_matrix( + [ + [0.0, 0.1, 0.0, 0.0], + [1.7, 0.0, 0.0, 0.0], + [0.0, 0.01, 0.0, 0.3], + [0.5, 1.0, 0.0, 0.0], + ] + ), + ), + ] + ), + ) + def test_manual_data( + self, + knn_indices: np.ndarray, + knn_dists: np.ndarray, + n_obs: int, + n_neighbors: int, + ground_truth: spmatrix, + ): + returned_matrix = get_csr_from_indices( + knn_indices=knn_indices, + knn_dists=knn_dists, + n_obs=n_obs, + n_neighbors=n_neighbors, + ) + + assert isinstance(returned_matrix, csr_matrix) + assert (returned_matrix != ground_truth).getnnz() == 0 + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_real_data(self, adata, dataset, n_obs): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + knn_indices = adata.uns["neighbors"]["indices"] + + knn_distances = [] + for row_distance, row_index in zip(adata.obsp["distances"], knn_indices): + knn_distances.append(row_distance.A[0, row_index]) + knn_distances = np.array(knn_distances) + + returned_matrix = get_csr_from_indices( + knn_indices=knn_indices, + knn_dists=knn_distances, + n_obs=n_obs, + n_neighbors=adata.uns["neighbors"]["params"]["n_neighbors"], + ) + + assert isinstance(returned_matrix, csr_matrix) + assert (returned_matrix != adata.obsp["distances"]).getnnz() == 0 + + +class TestGetDuplicateCells: + @pytest.mark.parametrize( + "X, true_duplicate_row_idx", + ( + (np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0]]), np.array([2])), + (np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0]]), np.array([2, 3])), + ( + np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + [1.3, 0.2, -0.7], + ] + ), + np.array([3]), + ), + ), + ) + @pytest.mark.parametrize("sparse_format", (None, csr_matrix, csc_matrix)) + def test_array( + self, + X: np.ndarray, + true_duplicate_row_idx: np.ndarray, + sparse_format: Optional[Callable], + ): + if sparse_format: + X = sparse_format(X) + returned_duplicate_idx = get_duplicate_cells(data=X) + + np.testing.assert_almost_equal(returned_duplicate_idx, true_duplicate_row_idx) + + @pytest.mark.parametrize( + "X, X_pca, true_duplicate_row_idx", + ( + ( + np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0]]), + np.array([[1, 0], [2, 7], [1, 0]]), + np.array([2]), + ), + ( + np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0]]), + np.array([[1, 0], [2, 7], [0, 0]]), + np.array([]), + ), + ( + np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0]]), + np.array([[1, 0], [0, 1], [1, 0], [0, 1]]), + np.array([2, 3]), + ), + ( + np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0]]), + np.array([[1, 0], [0, 1], [1, 0], [1, 1]]), + np.array([2]), + ), + ( + np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0]]), + np.array([[1, 0], [0, 1], [1, 1], [0, 1]]), + np.array([3]), + ), + ( + np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + [1.3, 0.2, -0.7], + ] + ), + np.array([[0], [1], [0.1], [0]]), + np.array([3]), + ), + ), + ) + @pytest.mark.parametrize("sparse_format", (None, csr_matrix, csc_matrix)) + def test_anndata( + self, + X: np.ndarray, + X_pca: np.ndarray, + true_duplicate_row_idx: np.ndarray, + sparse_format: Optional[Callable], + ): + if sparse_format: + X = sparse_format(X) + + adata = AnnData(X=X, obsm={"X_pca": X_pca}) + returned_duplicate_idx = get_duplicate_cells(data=adata) + + np.testing.assert_almost_equal(returned_duplicate_idx, true_duplicate_row_idx) + + +class TestGetHnswNeighbors: + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [None, 15]) + @pytest.mark.parametrize("n_neighbors", [15, 30]) + def test_neighbors_with_X_pca( + self, + adata, + dataset: str, + n_obs: int, + n_pcs: Optional[int], + n_neighbors: int, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + neighbors = _get_hnsw_neighbors( + adata=adata, + use_rep="X_pca", + n_pcs=n_pcs, + n_neighbors=n_neighbors, + num_threads=-1, + ) + if n_pcs is None: + n_pcs = adata.obsm["X_pca"].shape[1] + + ground_truth_distances = load_npz( + file=( + f"tests/_data/test_neighbors/_get_hnsw_neighbors/dataset={dataset}-" + f"n_obs={n_obs}-rep='X_pca'-n_pcs={n_pcs}-n_neighbors={n_neighbors}" + "_distances.npz" + ), + ) + ground_truth_connectivities = load_npz( + file=( + f"tests/_data/test_neighbors/_get_hnsw_neighbors/dataset={dataset}-" + f"n_obs={n_obs}-rep='X_pca'-n_pcs={n_pcs}-n_neighbors={n_neighbors}" + "_connectivites.npz" + ), + ) + + assert hasattr(neighbors, "distances") + assert issparse(neighbors.distances) + assert (neighbors.distances.getnnz(axis=1) == n_neighbors - 1).all() + np.testing.assert_almost_equal( + neighbors.distances.A, ground_truth_distances.A, decimal=4 + ) + + assert hasattr(neighbors, "connectivities") + assert issparse(neighbors.connectivities) + assert (neighbors.connectivities.getnnz(axis=1) >= n_neighbors - 1).all() + assert (neighbors.connectivities != neighbors.connectivities.T).getnnz() == 0 + np.testing.assert_almost_equal( + neighbors.connectivities.A, ground_truth_connectivities.A, decimal=4 + ) + + assert hasattr(neighbors, "knn_indices") + assert neighbors.knn_indices.shape == (adata.n_obs, n_neighbors) + np.testing.assert_equal(neighbors.knn_indices[:, 0], np.arange(adata.n_obs)) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [15, 30]) + @pytest.mark.parametrize("n_neighbors", [15, 30]) + def test_neighbors_with_X( + self, + adata, + dataset: str, + n_obs: int, + n_pcs: Optional[int], + n_neighbors: int, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + neighbors = _get_hnsw_neighbors( + adata=adata, + use_rep="X", + n_pcs=n_pcs, + n_neighbors=n_neighbors, + num_threads=-1, + ) + + ground_truth_distances = load_npz( + file=( + f"tests/_data/test_neighbors/_get_hnsw_neighbors/dataset={dataset}" + f"-n_obs={n_obs}-rep='X'-n_pcs={n_pcs}-n_neighbors={n_neighbors}" + "_distances.npz" + ), + ) + ground_truth_connectivities = load_npz( + file=( + f"tests/_data/test_neighbors/_get_hnsw_neighbors/dataset={dataset}" + f"-n_obs={n_obs}-rep='X'-n_pcs={n_pcs}-n_neighbors={n_neighbors}" + "_connectivites.npz" + ), + ) + + assert hasattr(neighbors, "distances") + assert issparse(neighbors.distances) + assert (neighbors.distances.getnnz(axis=1) == n_neighbors - 1).all() + np.testing.assert_almost_equal( + neighbors.distances.A, ground_truth_distances.A, decimal=4 + ) + + assert hasattr(neighbors, "connectivities") + assert issparse(neighbors.connectivities) + assert (neighbors.connectivities.getnnz(axis=1) >= n_neighbors - 1).all() + assert (neighbors.connectivities != neighbors.connectivities.T).getnnz() == 0 + np.testing.assert_almost_equal( + neighbors.connectivities.A, ground_truth_connectivities.A, decimal=4 + ) + + assert hasattr(neighbors, "knn_indices") + assert neighbors.knn_indices.shape == (adata.n_obs, n_neighbors) + np.testing.assert_equal(neighbors.knn_indices[:, 0], np.arange(adata.n_obs)) + + +class TestGetNeighs: + @pytest.mark.parametrize( + "adata, ground_truth", + ( + ( + AnnData( + np.eye(5), + obsp={"distances": np.eye(5), "connectivities": np.eye(5)}, + uns={"neighbors": {"distances": 2}}, + ), + np.eye(5), + ), + ( + AnnData( + np.eye(5), + obsp={ + "distances": csr_matrix(np.eye(5)), + "connectivities": np.eye(5), + }, + uns={"neighbors": {"distances": 2}}, + ), + csr_matrix(np.eye(5)), + ), + ( + AnnData( + np.eye(5), + obsp={ + "connectivities": np.eye(5), + }, + uns={"neighbors": {"distances": csr_matrix(np.eye(5))}}, + ), + csr_matrix(np.eye(5)), + ), + ( + AnnData( + np.eye(5), + uns={"neighbors": {"distances": csr_matrix(np.eye(5))}}, + ), + csr_matrix(np.eye(5)), + ), + ), + ) + def test_with_default_values(self, adata: AnnData, ground_truth): + returned_value = get_neighs(adata=adata) + + if issparse(ground_truth): + assert (returned_value != ground_truth).sum() == 0 + else: + np.testing.assert_almost_equal(returned_value, ground_truth) + + @pytest.mark.parametrize( + "adata, mode", + ( + (AnnData(np.eye(5), obsp={"distances": np.eye(5)}), "distances"), + ( + AnnData( + np.eye(5), + obsp={"distances": np.eye(5)}, + uns={"neigbors": {"distances": 2}}, + ), + "distances", + ), + ( + AnnData( + np.eye(5), + obsp={"distances": np.eye(5), "random_obsp": np.ones(shape=(5, 5))}, + ), + "random_obsp", + ), + ( + AnnData( + np.eye(5), + obsp={ + "distances": csr_matrix(np.eye(5)), + "random_obsp": csr_matrix(np.triu(np.ones(shape=(5, 5)), k=0)), + }, + ), + "random_obsp", + ), + ( + AnnData( + np.eye(5), + obsp={ + "distances": csr_matrix(np.eye(5)), + "random_obsp": csr_matrix(np.triu(np.ones(shape=(5, 5)), k=0)), + }, + ), + "distances", + ), + ( + AnnData( + np.eye(5), + obsp={ + "distances": csr_matrix(np.eye(5)), + "random_obsp": csc_matrix(np.triu(np.ones(shape=(5, 5)), k=0)), + }, + ), + "random_obsp", + ), + ), + ) + def test_get_from_obsp(self, adata: AnnData, mode: str): + returned_value = get_neighs(adata=adata, mode=mode) + + if issparse(returned_value): + assert (returned_value != adata.obsp[mode]).sum() == 0 + else: + np.testing.assert_almost_equal(returned_value, adata.obsp[mode]) + + @pytest.mark.parametrize( + "adata, mode", + ( + ( + AnnData(np.eye(5), uns={"neighbors": {"distances": np.eye(5)}}), + "distances", + ), + ( + AnnData( + np.eye(5), uns={"neighbors": {"distances": csr_matrix(np.eye(5))}} + ), + "distances", + ), + ( + AnnData( + np.eye(5), + obsp={"distances": csr_matrix(np.eye(5))}, + uns={"neighbors": {"n_neighbors": 1}}, + ), + "n_neighbors", + ), + ( + AnnData( + np.eye(5), + obsp={"distances": csr_matrix(np.eye(5))}, + uns={"neighbors": {"n_neighbors": 1, "random_entry": np.eye(2)}}, + ), + "n_neighbors", + ), + ), + ) + def test_get_from_uns(self, adata: AnnData, mode: str): + returned_value = get_neighs(adata=adata, mode=mode) + + if issparse(returned_value): + assert (returned_value != adata.uns["neighbors"][mode]).sum() == 0 + else: + np.testing.assert_almost_equal(returned_value, adata.uns["neighbors"][mode]) + + def test_selected_mode_not_available(self): + adata = AnnData(np.eye(2)) + + with pytest.raises(ValueError, match=r"The selected mode is not valid."): + _ = get_neighs(adata=adata, mode="distances") + + +class TestGetNNeighs: + @pytest.mark.parametrize( + "adata, expected_return_value", + ( + (AnnData(np.eye(2)), 0), + (AnnData(np.eye(2), uns={"neighbors": {}}), 0), + (AnnData(np.eye(2), uns={"neighbors": {"random_key": 0}}), 0), + (AnnData(np.eye(2), uns={"neighbors": {"params": {}}}), 0), + (AnnData(np.eye(2), uns={"neighbors": {"params": {"random_key": 2}}}), 0), + (AnnData(np.eye(2), uns={"neighbors": {"params": {"n_neighbors": 5}}}), 5), + ), + ) + def test_get_n_neighs(self, adata: AnnData, expected_return_value: int): + n_neigbors = get_n_neighs(adata=adata) + + assert n_neigbors == expected_return_value + + +class TestGetRep: + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize( + "use_rep, expected_rep", + [(None, "X_pca"), ("X", "X"), ("pca", "X_pca"), ("X_pca", "X_pca")], + ) + @pytest.mark.parametrize("n_pcs", [None, 10, 30, 100]) + def test_pca_not_yet_calculated( + self, + adata, + dataset: str, + n_obs: int, + use_rep: Optional[str], + expected_rep: Optional[str], + n_pcs: Optional[int], + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + returned_rep = _get_rep(adata=adata, use_rep=use_rep, n_pcs=n_pcs) + assert returned_rep == expected_rep + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [None, 10, 30, 100]) + @pytest.mark.parametrize("n_vars", [5, 10, 49, 50]) + def test_small_n_vars( + self, + adata, + dataset: str, + n_obs: int, + n_pcs: Optional[int], + n_vars: int, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + adata = adata[:, adata.var_names[:n_vars]] + + returned_rep = _get_rep(adata=adata, use_rep=None, n_pcs=n_pcs) + if n_vars < 50: + assert returned_rep == "X" + else: + assert returned_rep == "X_pca" + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_zero_n_pcs( + self, + adata, + dataset: str, + n_obs: int, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + returned_rep = _get_rep(adata=adata, use_rep=None, n_pcs=0) + assert returned_rep == "X" + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [5, 10, 30]) + def test_X_and_n_pcs_specified( + self, + adata, + capfd, + dataset: str, + n_obs: int, + n_pcs: int, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + returned_rep = _get_rep(adata=adata, use_rep="X", n_pcs=n_pcs) + assert returned_rep == "X" + + expected_log = ( + "WARNING: Unexpected pair of parameters: `use_rep='X'` but " + f"`n_pcs={n_pcs}`. This will only consider the frist {n_pcs} variables " + f"when calculating the neighbor graph. To use all of `X`, pass " + "`n_pcs=None`.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + +class TestGetScanpyNeighbors: + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [None, 15]) + @pytest.mark.parametrize("n_neighbors", [15, 30]) + def test_neighbors_with_X_pca( + self, + adata, + dataset: str, + n_obs: int, + n_pcs: Optional[int], + n_neighbors: int, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + neighbors = _get_scanpy_neighbors( + adata=adata, use_rep="X_pca", n_pcs=n_pcs, n_neighbors=n_neighbors + ) + if n_pcs is None: + n_pcs = adata.obsm["X_pca"].shape[1] + + ground_truth_distances = load_npz( + file=( + f"tests/_data/test_neighbors/_get_scanpy_neighbors/dataset={dataset}" + f"-n_obs={n_obs}-rep='X_pca'-n_pcs={n_pcs}-n_neighbors={n_neighbors}" + "_distances.npz" + ), + ) + ground_truth_connectivities = load_npz( + file=( + f"tests/_data/test_neighbors/_get_scanpy_neighbors/dataset={dataset}" + f"-n_obs={n_obs}-rep='X_pca'-n_pcs={n_pcs}-n_neighbors={n_neighbors}" + "_connectivites.npz" + ), + ) + + assert hasattr(neighbors, "distances") + assert issparse(neighbors.distances) + assert (neighbors.distances.getnnz(axis=1) == n_neighbors - 1).all() + np.testing.assert_almost_equal( + neighbors.distances.A, ground_truth_distances.A, decimal=4 + ) + + assert hasattr(neighbors, "connectivities") + assert issparse(neighbors.connectivities) + assert (neighbors.connectivities.getnnz(axis=1) >= n_neighbors - 1).all() + assert (neighbors.connectivities != neighbors.connectivities.T).getnnz() == 0 + np.testing.assert_almost_equal( + neighbors.connectivities.A, ground_truth_connectivities.A, decimal=4 + ) + + assert hasattr(neighbors, "knn_indices") + assert neighbors.knn_indices.shape == (adata.n_obs, n_neighbors) + np.testing.assert_equal(neighbors.knn_indices[:, 0], np.arange(adata.n_obs)) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [15, 30]) + @pytest.mark.parametrize("n_neighbors", [15, 30]) + def test_neighbors_with_X( + self, + adata, + dataset: str, + n_obs: int, + n_pcs: Optional[int], + n_neighbors: int, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + neighbors = _get_scanpy_neighbors( + adata=adata, use_rep="X", n_pcs=n_pcs, n_neighbors=n_neighbors + ) + + ground_truth_distances = load_npz( + file=( + f"tests/_data/test_neighbors/_get_scanpy_neighbors/dataset={dataset}" + f"-n_obs={n_obs}-rep='X'-n_pcs={n_pcs}-n_neighbors={n_neighbors}" + "_distances.npz" + ), + ) + ground_truth_connectivities = load_npz( + file=( + f"tests/_data/test_neighbors/_get_scanpy_neighbors/dataset={dataset}" + f"-n_obs={n_obs}-rep='X'-n_pcs={n_pcs}-n_neighbors={n_neighbors}" + "_connectivites.npz" + ), + ) + + assert hasattr(neighbors, "distances") + assert issparse(neighbors.distances) + assert (neighbors.distances.getnnz(axis=1) == n_neighbors - 1).all() + np.testing.assert_almost_equal( + neighbors.distances.A, ground_truth_distances.A, decimal=4 + ) + + assert hasattr(neighbors, "connectivities") + assert issparse(neighbors.connectivities) + assert (neighbors.connectivities.getnnz(axis=1) >= n_neighbors - 1).all() + assert (neighbors.connectivities != neighbors.connectivities.T).getnnz() == 0 + np.testing.assert_almost_equal( + neighbors.connectivities.A, ground_truth_connectivities.A, decimal=4 + ) + + assert hasattr(neighbors, "knn_indices") + assert neighbors.knn_indices.shape == (adata.n_obs, n_neighbors) + np.testing.assert_equal(neighbors.knn_indices[:, 0], np.arange(adata.n_obs)) + + +class TestGetSklearnNeighbors: + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [None, 15]) + @pytest.mark.parametrize("n_neighbors", [15, 30]) + def test_neighbors_with_X_pca( + self, + adata, + dataset: str, + n_obs: int, + n_pcs: Optional[int], + n_neighbors: int, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + neighbors = _get_sklearn_neighbors( + adata=adata, use_rep="X_pca", n_pcs=n_pcs, n_neighbors=n_neighbors + ) + if n_pcs is None: + n_pcs = adata.obsm["X_pca"].shape[1] + + ground_truth_distances = load_npz( + file=( + f"tests/_data/test_neighbors/_get_sklearn_neighbors/dataset={dataset}" + f"-n_obs={n_obs}-rep='X_pca'-n_pcs={n_pcs}-n_neighbors={n_neighbors}" + "_distances.npz" + ), + ) + ground_truth_connectivities = load_npz( + file=( + f"tests/_data/test_neighbors/_get_sklearn_neighbors/dataset={dataset}" + f"-n_obs={n_obs}-rep='X_pca'-n_pcs={n_pcs}-n_neighbors={n_neighbors}" + "_connectivites.npz" + ), + ) + + assert isinstance(neighbors, NearestNeighbors) + + assert hasattr(neighbors, "distances") + assert issparse(neighbors.distances) + assert (neighbors.distances.getnnz(axis=1) == n_neighbors - 1).all() + np.testing.assert_almost_equal( + neighbors.distances.A, ground_truth_distances.A, decimal=4 + ) + + assert hasattr(neighbors, "connectivities") + assert issparse(neighbors.connectivities) + assert (neighbors.connectivities.getnnz(axis=1) >= n_neighbors - 1).all() + assert (neighbors.connectivities != neighbors.connectivities.T).getnnz() == 0 + np.testing.assert_almost_equal( + neighbors.connectivities.A, ground_truth_connectivities.A, decimal=4 + ) + + assert hasattr(neighbors, "knn_indices") + assert neighbors.knn_indices.shape == (adata.n_obs, n_neighbors) + np.testing.assert_equal(neighbors.knn_indices[:, 0], np.arange(adata.n_obs)) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [15, 30]) + @pytest.mark.parametrize("n_neighbors", [15, 30]) + def test_neighbors_with_X( + self, + adata, + dataset: str, + n_obs: int, + n_pcs: Optional[int], + n_neighbors: int, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + neighbors = _get_sklearn_neighbors( + adata=adata, use_rep="X", n_pcs=n_pcs, n_neighbors=n_neighbors + ) + + ground_truth_distances = load_npz( + file=( + f"tests/_data/test_neighbors/_get_sklearn_neighbors/dataset={dataset}" + f"-n_obs={n_obs}-rep='X'-n_pcs={n_pcs}-n_neighbors={n_neighbors}" + "_distances.npz" + ), + ) + ground_truth_connectivities = load_npz( + file=( + f"tests/_data/test_neighbors/_get_sklearn_neighbors/dataset={dataset}" + f"-n_obs={n_obs}-rep='X'-n_pcs={n_pcs}-n_neighbors={n_neighbors}" + "_connectivites.npz" + ), + ) + + assert isinstance(neighbors, NearestNeighbors) + + assert hasattr(neighbors, "distances") + assert issparse(neighbors.distances) + assert (neighbors.distances.getnnz(axis=1) == n_neighbors - 1).all() + np.testing.assert_almost_equal( + neighbors.distances.A, ground_truth_distances.A, decimal=4 + ) + + assert hasattr(neighbors, "connectivities") + assert issparse(neighbors.connectivities) + assert (neighbors.connectivities.getnnz(axis=1) >= n_neighbors - 1).all() + assert (neighbors.connectivities != neighbors.connectivities.T).getnnz() == 0 + np.testing.assert_almost_equal( + neighbors.connectivities.A, ground_truth_connectivities.A, decimal=4 + ) + + assert hasattr(neighbors, "knn_indices") + assert neighbors.knn_indices.shape == (adata.n_obs, n_neighbors) + np.testing.assert_equal(neighbors.knn_indices[:, 0], np.arange(adata.n_obs)) + + +class TestNeighbors: + @given( + adata=get_adata(max_obs=5, max_vars=5), + method=st.sampled_from(["random", "Scanpy", "UMAP"]), + ) + def test_provide_not_supported_method(self, adata: AnnData, method: str): + expected_value_error = ( + f"Provided `method={method}`. Admissible values are `'umap'`, `'sklearn'`, " + "`'hnsw'`, `'gauss'`, and `'rapids'`." + ) + with pytest.raises(ValueError, match=rf"{expected_value_error}"): + neighbors(adata=adata, use_rep="random", method=method) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [15, 30]) + @pytest.mark.parametrize("method", ["hnsw", "sklearn", "umap"]) + @pytest.mark.parametrize("n_neighbors", [15, 30]) + @pytest.mark.parametrize("copy", [True, False]) + def test_output( + self, + adata, + capfd, + dataset: str, + n_obs: int, + n_pcs: Optional[int], + method: str, + n_neighbors: int, + copy: bool, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + del adata.layers["Mu"] + del adata.layers["Ms"] + adata.obsm = {} + adata.obsp = {} + adata.uns = {} + adata.varm = {} + + returned_adata = neighbors( + adata=adata, method=method, n_pcs=n_pcs, n_neighbors=n_neighbors, copy=copy + ) + + # Check returned value + if copy: + assert isinstance(returned_adata, AnnData) + else: + assert returned_adata is None + returned_adata = adata.copy() + + assert returned_adata.obs_names.equals(adata.obs_names) + assert returned_adata.var_names.equals(adata.var_names) + + # Check PCA + assert set(returned_adata.obsm) == set(["X_pca"]) + assert returned_adata.obsm["X_pca"].shape == (adata.n_obs, n_pcs) + + assert "pca" in returned_adata.uns + assert set(returned_adata.uns["pca"]) == set( + ["params", "variance", "variance_ratio"] + ) + assert isinstance(returned_adata.uns["pca"]["params"], Dict) + assert set(returned_adata.uns["pca"]["params"]) == set( + ["use_highly_variable", "zero_center"] + ) + if "highly_variable" not in adata.var: + assert returned_adata.uns["pca"]["params"]["use_highly_variable"] is False + else: + assert returned_adata.uns["pca"]["params"]["use_highly_variable"] is True + + # Check data related to neighbor graph + assert set(returned_adata.obsp) == set(["distances", "connectivities"]) + assert issparse(returned_adata.obsp["connectivities"]) + assert issparse(returned_adata.obsp["distances"]) + np.testing.assert_equal( + returned_adata.obsp["distances"].getnnz(axis=1), n_neighbors - 1 + ) + + assert "neighbors" in returned_adata.uns + assert returned_adata.uns["neighbors"]["connectivities_key"] == "connectivities" + assert returned_adata.uns["neighbors"]["distances_key"] == "distances" + assert "indices" in returned_adata.uns["neighbors"] + assert returned_adata.uns["neighbors"]["indices"].shape == ( + returned_adata.n_obs, + n_neighbors, + ) + np.testing.assert_equal( + returned_adata.uns["neighbors"]["indices"][:, 0], + np.arange(returned_adata.n_obs), + ) + assert set(returned_adata.uns["neighbors"]["params"]) == set( + ["n_neighbors", "method", "metric", "n_pcs", "use_rep"] + ) + assert returned_adata.uns["neighbors"]["params"]["n_neighbors"] == n_neighbors + assert returned_adata.uns["neighbors"]["params"]["method"] == method + assert returned_adata.uns["neighbors"]["params"]["metric"] == "euclidean" + assert returned_adata.uns["neighbors"]["params"]["n_pcs"] == n_pcs + assert returned_adata.uns["neighbors"]["params"]["use_rep"] == "X_pca" + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_log(self, adata, capfd, dataset, n_obs): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + del adata.layers["Mu"] + del adata.layers["Ms"] + adata.obsm = {} + adata.obsp = {} + adata.uns = {} + adata.varm = {} + + neighbors(adata=adata) + + expected_log = "computing neighbors\n finished (" + + actual_log, _ = capfd.readouterr() + assert actual_log.startswith(expected_log) + + # `[7:]` removes execution time + actual_log = actual_log.split(expected_log)[1][7:] + expected_log = ( + ") --> added \n" + " 'distances' and 'connectivities', weighted adjacency matrices " + "(adata.obsp)\n" + ) + assert actual_log == expected_log + + # FIXME: See https://github.com/theislab/scvelo/issues/922 + """ + # TODO: Make test more sophisticated to test multiple data matrices + # TODO: Use additional representations besides `X` + def test_duplicate_cells(self, capfd): + adata = AnnData( + X=np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + [1.3, 0.2, -0.7], + ] + ), + layers={ + "unspliced": np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + [1.3, 0.2, -0.7], + ] + ), + "spliced": np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + [1.3, 0.2, -0.7], + ] + ), + }, + ) + + neighbors(adata=adata, use_rep="X_pca") + + expected_log = ( + "WARNING: You seem to have 1 duplicate cells in your " + "data. Consider removing these via pp.remove_duplicate_cells.\n" + "computing neighbors\n" + " finished (" + ) + + actual_log, _ = capfd.readouterr() + assert actual_log.startswith(expected_log) + + # `[7:]` removes execution time + actual_log = actual_log.split(expected_log)[1][7:] + expected_log = ( + ") --> added \n" + " 'distances' and 'connectivities', weighted adjacency matrices " + "(adata.obsp)\n" + ) + assert actual_log == expected_log + """ + + +class TestNeighborsToBeRecomputed: + @given( + adata=get_adata(max_obs=5, max_vars=5), + n_neighbors=st.integers(), + uns=st.sampled_from([{}, {"neighbors": []}, {"neighbors": {"param": []}}]), + ) + def test_incomplete_uns(self, adata: AnnData, n_neighbors: int, uns: Dict): + adata.uns = uns + assert neighbors_to_be_recomputed(adata=adata, n_neighbors=n_neighbors) + + @given( + adata=get_adata(max_obs=5, max_vars=5), + n_neighbors_original=st.integers(), + n_additional_neighbors=st.integers(min_value=1), + ) + def test_more_neighbors_than_originally_used( + self, adata: AnnData, n_neighbors_original: int, n_additional_neighbors: int + ): + adata.uns = {"neighbors": {"params": {"n_neighbors": n_neighbors_original}}} + + assert neighbors_to_be_recomputed( + adata=adata, n_neighbors=n_neighbors_original + n_additional_neighbors + ) + + def test_unexpected_distance_matrix(self): + distance_matrix = np.eye(11) + distance_matrix[0, :] = 1 + adata = AnnData(np.eye(11), obsp={"distances": csr_matrix(distance_matrix)}) + + assert neighbors_to_be_recomputed(adata=adata) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_with_real_data(self, adata, dataset, n_obs): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + assert not neighbors_to_be_recomputed(adata=adata) + + +class TestRemoveDuplicateCells: + @pytest.mark.parametrize( + "X, X_pca, X_without_duplicates, X_pca_without_duplicates, n_duplicates", + ( + ( + np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0]]), + np.array([[1, 0], [2, 7], [1, 0]]), + np.array([[1, 0, 0], [0, 1, 0]]), + np.array([[1, 0], [2, 7]]), + 1, + ), + ( + np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0]]), + np.array([[1, 0], [2, 7], [0, 0]]), + np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0]]), + np.array([[1, 0], [2, 7], [0, 0]]), + 0, + ), + ( + np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0]]), + np.array([[1, 0], [0, 1], [1, 0], [0, 1]]), + np.array([[1, 0, 0], [0, 1, 0]]), + np.array([[1, 0], [0, 1]]), + 2, + ), + ( + np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0]]), + np.array([[1, 0], [0, 1], [1, 0], [1, 1]]), + np.array([[1, 0, 0], [0, 1, 0], [0, 1, 0]]), + np.array([[1, 0], [0, 1], [1, 1]]), + 1, + ), + ( + np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 1, 0]]), + np.array([[1, 0], [0, 1], [1, 1], [0, 1]]), + np.array([[1, 0, 0], [0, 1, 0], [1, 0, 0]]), + np.array([[1, 0], [0, 1], [1, 1]]), + 1, + ), + ( + np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + [1.3, 0.2, -0.7], + ] + ), + np.array([[0], [1], [0.1], [0]]), + np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + ] + ), + np.array([[0], [1], [0.1]]), + 1, + ), + ), + ) + @pytest.mark.parametrize("sparse_format", (None, csr_matrix, csc_matrix)) + def test_with_pca_present( + self, + capfd, + X, + X_pca, + X_without_duplicates, + X_pca_without_duplicates, + n_duplicates, + sparse_format, + ): + if sparse_format: + X = sparse_format(X) + adata = AnnData(X=X, obsm={"X_pca": X_pca}) + remove_duplicate_cells(adata=adata) + + if sparse_format: + assert issparse(adata.X) + np.testing.assert_almost_equal(adata.X.A, X_without_duplicates) + else: + np.testing.assert_almost_equal(adata.X, X_without_duplicates) + np.testing.assert_almost_equal(adata.obsm["X_pca"], X_pca_without_duplicates) + + if n_duplicates > 0: + expected_log = f"Removed {n_duplicates} duplicate cells.\n" + else: + expected_log = "" + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + # FIXME: See https://github.com/theislab/scvelo/issues/922 + """ + @pytest.mark.parametrize( + "X, X_without_duplicates, n_duplicates", + ( + ( + np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + [1.3, 0.2, -0.7], + ] + ), + np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + ] + ), + 1, + ), + ( + np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + ] + ), + np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + ] + ), + 0, + ), + ), + ) + @pytest.mark.parametrize("sparse_format", (None, csr_matrix, csc_matrix)) + def test_without_pca_present( + self, capfd, X, X_without_duplicates, n_duplicates, sparse_format + ): + if sparse_format: + X = sparse_format(X) + adata = AnnData(X=X) + remove_duplicate_cells(adata=adata) + + assert "X_pca" in adata.obsm + assert "pca" in adata.uns + assert "PCs" in adata.varm + + if sparse_format: + assert issparse(adata.X) + np.testing.assert_almost_equal(adata.X.A, X_without_duplicates) + else: + np.testing.assert_almost_equal(adata.X, X_without_duplicates) + + if n_duplicates > 0: + expected_log = f"Removed {n_duplicates} duplicate cells.\n" + else: + expected_log = "" + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + """ + + # FIXME: See https://github.com/theislab/scvelo/issues/922 + """ + @pytest.mark.parametrize( + "X, X_without_duplicates, n_duplicates", + ( + ( + np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + [1.3, 0.2, -0.7], + ] + ), + np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + ] + ), + 1, + ), + ( + np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + ] + ), + np.array( + [ + [1.3, 0.2, -0.7], + [0.5, 1, -10], + [1.31, 0.21, -0.71], + ] + ), + 0, + ), + ), + ) + @pytest.mark.parametrize("sparse_format", (None, csr_matrix, csc_matrix)) + def test_neighbors_recalculated( + self, capfd, X, X_without_duplicates, n_duplicates, sparse_format + ): + if sparse_format: + X = sparse_format(X) + adata = AnnData( + X=X, + uns={"neighbors": {}}, + obsp={ + "distances": np.eye(X.shape[0]), + "connectivities": np.eye(X.shape[0]), + }, + ) + remove_duplicate_cells(adata=adata) + actual_log, _ = capfd.readouterr() + + assert "X_pca" in adata.obsm + assert "pca" in adata.uns + assert "PCs" in adata.varm + + if sparse_format: + assert issparse(adata.X) + np.testing.assert_almost_equal(adata.X.A, X_without_duplicates) + else: + np.testing.assert_almost_equal(adata.X, X_without_duplicates) + + if n_duplicates > 0: + assert issparse(adata.obsp["distances"]) + assert issparse(adata.obsp["connectivities"]) + assert adata.uns["neighbors"]["connectivities_key"] == "connectivities" + assert adata.uns["neighbors"]["distances_key"] == "distances" + assert isinstance(adata.uns["neighbors"]["indices"], np.ndarray) + assert adata.uns["neighbors"]["params"] == { + "n_neighbors": 30, + "method": "umap", + "metric": "euclidean", + "n_pcs": None, + "use_rep": "X", + } + expected_log = ( + f"Removed {n_duplicates} duplicate cells.\n" + "computing neighbors\n" + " finished (" + ) + # `[7:]` removes execution time + actual_log = actual_log.split(expected_log)[1][7:] + expected_log = ( + ") --> added \n" + " 'distances' and 'connectivities', weighted adjacency matrices " + "(adata.obsp)\n" + ) + assert actual_log == expected_log + else: + np.testing.assert_almost_equal(adata.obsp["distances"], np.eye(adata.n_obs)) + np.testing.assert_almost_equal( + adata.obsp["connectivities"], np.eye(adata.n_obs) + ) + assert adata.uns["neighbors"] == {} + expected_log = "" + assert actual_log == expected_log + """ + + +class TestSelectConnectivities: + @pytest.mark.parametrize( + "connectivities, ground_truth_result", + ( + ( + csr_matrix( + np.array([[0, 1, 2, 3], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + csr_matrix( + np.array([[0, 0, 0, 3], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + ), + ( + csr_matrix( + np.array([[0, 1, 2, 3], [2, 0, 0, 1], [1, 1, 0, 0], [1, 0, 0, 1]]) + ), + csr_matrix( + np.array([[0, 0, 2, 3], [2, 0, 0, 1], [1, 1, 0, 0], [1, 0, 0, 1]]) + ), + ), + ( + csr_matrix(np.array([[0, 1, 2], [2, 0, 1], [1, 1, 0], [1, 0, 1]])), + csr_matrix(np.array([[0, 1, 2], [2, 0, 1], [1, 1, 0], [1, 0, 1]])), + ), + ), + ) + def test_default(self, connectivities, ground_truth_result): + adjusted_connectivities = select_connectivities(connectivities=connectivities) + + assert issparse(adjusted_connectivities) + assert (ground_truth_result != adjusted_connectivities).getnnz() == 0 + + @pytest.mark.parametrize( + "connectivities, n_neighbors, ground_truth_result", + ( + ( + csr_matrix( + np.array([[0, 1, 2, 3], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + 0, + csr_matrix(np.zeros((4, 4))), + ), + ( + csr_matrix( + np.array([[0, 1, 2, 3], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + 1, + csr_matrix( + np.array([[0, 0, 0, 3], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + ), + ( + csr_matrix( + np.array([[0, 1, 2, 3], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + 2, + csr_matrix( + np.array([[0, 0, 0, 3], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + ), + ( + csr_matrix( + np.array([[0, 1, 2, 3], [2, 0, 0, 1], [1, 1, 0, 0], [1, 0, 0, 1]]) + ), + 1, + csr_matrix( + np.array([[0, 0, 0, 3], [2, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + ), + ( + csr_matrix( + np.array([[0, 1, 2, 3], [2, 0, 0, 1], [1, 2, 0, 0], [1, 0, 2, 0]]) + ), + 1, + csr_matrix( + np.array([[0, 0, 0, 3], [2, 0, 0, 0], [0, 2, 0, 0], [0, 0, 2, 0]]) + ), + ), + ( + csr_matrix( + np.array([[0, 1, 2, 3], [2, 0, 0, 1], [1, 1, 0, 0], [1, 0, 0, 1]]) + ), + 2, + csr_matrix( + np.array([[0, 0, 2, 3], [2, 0, 0, 1], [1, 1, 0, 0], [1, 0, 0, 1]]) + ), + ), + ( + csr_matrix(np.array([[0, 1, 2], [2, 0, 1], [1, 1, 0], [1, 1, 1]])), + 1, + csr_matrix(np.array([[0, 0, 2], [2, 0, 0], [0, 1, 0], [0, 0, 1]])), + ), + ), + ) + def test_n_neighbors(self, connectivities, n_neighbors, ground_truth_result): + adjusted_connectivities = select_connectivities( + connectivities=connectivities, + n_neighbors=n_neighbors, + ) + + assert issparse(adjusted_connectivities) + assert (adjusted_connectivities.getnnz(axis=1) <= n_neighbors).all() + assert (adjusted_connectivities != ground_truth_result).getnnz() == 0 + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_neighbors", [0, 1, 5, 10, 30, None]) + def test_real_data(self, adata, dataset, n_obs, n_neighbors): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + adjusted_connectivities = select_connectivities( + adata.obsp["connectivities"], + n_neighbors=n_neighbors, + ) + + assert issparse(adjusted_connectivities) + if ( + n_neighbors is not None + and n_neighbors <= adata.obsp["connectivities"].getnnz(axis=1).min() + ): + assert (adjusted_connectivities.getnnz(axis=1) == n_neighbors).all() + assert all( + [ + all( + adjusted_row.data + >= np.sort(original_row.data)[-n_neighbors - 1] + ) + for adjusted_row, original_row in zip( + adjusted_connectivities, adata.obsp["connectivities"] + ) + ] + ) + else: + n_neighbors = adata.obsp["connectivities"].getnnz(axis=1).min() + assert (adjusted_connectivities.getnnz(axis=1) == n_neighbors).all() + assert all( + [ + all(adjusted_row.data >= np.sort(original_row.data)[-n_neighbors]) + for adjusted_row, original_row in zip( + adjusted_connectivities, adata.obsp["connectivities"] + ) + ] + ) + + +class TestSelectDistances: + @pytest.mark.parametrize( + "distances, ground_truth_result", + ( + ( + csr_matrix( + np.array([[0, 1, 2, 3], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + csr_matrix( + np.array([[0, 1, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + ), + ( + csr_matrix( + np.array([[0, 1, 2, 3], [2, 0, 0, 1], [1, 1, 0, 0], [1, 0, 0, 1]]) + ), + csr_matrix( + np.array([[0, 1, 2, 0], [2, 0, 0, 1], [1, 1, 0, 0], [1, 0, 0, 1]]) + ), + ), + ( + csr_matrix(np.array([[0, 1, 2], [2, 0, 1], [1, 1, 0], [1, 0, 1]])), + csr_matrix(np.array([[0, 1, 2], [2, 0, 1], [1, 1, 0], [1, 0, 1]])), + ), + ), + ) + def test_default(self, distances, ground_truth_result): + adjusted_distances = select_distances(dist=distances) + + assert issparse(adjusted_distances) + assert (ground_truth_result != adjusted_distances).getnnz() == 0 + + @pytest.mark.parametrize( + "distances, n_neighbors, ground_truth_result", + ( + ( + csr_matrix( + np.array([[0, 1, 2, 3], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + 0, + csr_matrix(np.zeros((4, 4))), + ), + ( + csr_matrix( + np.array([[0, 1, 2, 3], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + 1, + csr_matrix( + np.array([[0, 1, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + ), + ( + csr_matrix( + np.array([[0, 1, 2, 3], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + 2, + csr_matrix( + np.array([[0, 1, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 0, 1]]) + ), + ), + ( + csr_matrix( + np.array([[0, 1, 2, 3], [2, 0, 0, 1], [1, 1, 0, 0], [1, 0, 0, 1]]) + ), + 1, + csr_matrix( + np.array([[0, 1, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [1, 0, 0, 0]]) + ), + ), + ( + csr_matrix( + np.array([[0, 1, 2, 3], [2, 0, 0, 1], [1, 2, 0, 0], [1, 0, 2, 0]]) + ), + 1, + csr_matrix( + np.array([[0, 1, 0, 0], [0, 0, 0, 1], [1, 0, 0, 0], [1, 0, 0, 0]]) + ), + ), + ( + csr_matrix( + np.array([[0, 1, 2, 3], [2, 0, 0, 1], [1, 1, 0, 0], [1, 0, 0, 1]]) + ), + 2, + csr_matrix( + np.array([[0, 1, 2, 0], [2, 0, 0, 1], [1, 1, 0, 0], [1, 0, 0, 1]]) + ), + ), + ( + csr_matrix(np.array([[0, 1, 2], [2, 0, 1], [1, 1, 0], [1, 1, 1]])), + 1, + csr_matrix(np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0], [1, 0, 0]])), + ), + ), + ) + def test_n_neighbors(self, distances, n_neighbors, ground_truth_result): + adjusted_dinstances = select_distances(dist=distances, n_neighbors=n_neighbors) + + assert issparse(adjusted_dinstances) + assert (adjusted_dinstances.getnnz(axis=1) <= n_neighbors).all() + assert (adjusted_dinstances != ground_truth_result).getnnz() == 0 + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_neighbors", [0, 1, 5, 10, 30, None]) + def test_real_data(self, adata, dataset, n_obs, n_neighbors): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + adjusted_distances = select_distances( + adata.obsp["distances"], + n_neighbors=n_neighbors, + ) + + assert issparse(adjusted_distances) + if ( + n_neighbors is not None + and n_neighbors <= adata.obsp["distances"].getnnz(axis=1).min() + ): + assert (adjusted_distances.getnnz(axis=1) == n_neighbors).all() + assert all( + [ + all(adjusted_row.data <= np.sort(original_row.data)[n_neighbors]) + for adjusted_row, original_row in zip( + adjusted_distances, adata.obsp["distances"] + ) + ] + ) + else: + n_neighbors = adata.obsp["distances"].getnnz(axis=1).min() + assert (adjusted_distances.getnnz(axis=1) == n_neighbors).all() + assert all( + [ + all( + adjusted_row.data <= np.sort(original_row.data)[n_neighbors - 1] + ) + for adjusted_row, original_row in zip( + adjusted_distances, adata.obsp["distances"] + ) + ] + ) + + +class TestSetDiagonal: + @pytest.mark.parametrize( + "knn_distances", + [ + np.array([[0, 1, 2, 3], [0, 2, 3, 1], [0, 3, 4, 2]]), + np.array( + [ + [0, 0.21, 2.4, 0.4], + [0, 0.327, 0.3, 0.22], + [0, 0.3, 0.5, 1.7], + ] + ), + ], + ) + @pytest.mark.parametrize( + "knn_indices", + [ + np.array([[0, 1, 2, 4], [1, 4, 5, 2], [2, 7, 3, 1]]), + np.array([[0, 2, 1, 4], [1, 4, 2, 3], [2, 3, 4, 1]]), + ], + ) + @pytest.mark.parametrize("remove_diag", [True, False]) + def test_remove_diag(self, knn_distances, knn_indices, remove_diag): + knn_distances_, knn_indices_ = set_diagonal( + knn_distances=knn_distances, + knn_indices=knn_indices, + remove_diag=remove_diag, + ) + + if remove_diag: + assert knn_distances_.shape == (3, 3) + np.testing.assert_equal(knn_distances_, knn_distances[:, 1:]) + np.testing.assert_equal(knn_indices_, knn_indices[:, 1:]) + assert knn_indices_.shape == (3, 3) + else: + np.testing.assert_equal(knn_distances_, knn_distances) + np.testing.assert_equal(knn_indices_, knn_indices) + + @pytest.mark.parametrize( + "knn_distances", + [ + np.array([[1, 2, 3], [2, 3, 1], [3, 4, 2]]), + np.array( + [ + [0.21, 2.4, 0.4], + [0.327, 0.3, 0.22], + [0.3, 0.5, 1.7], + ] + ), + ], + ) + @pytest.mark.parametrize( + "knn_indices", + [ + np.array([[1, 2, 4], [4, 5, 2], [7, 3, 1]]), + np.array([[2, 1, 4], [4, 2, 3], [3, 4, 1]]), + ], + ) + @pytest.mark.parametrize("remove_diag", [True, False]) + def test_set_diagonal(self, knn_distances, knn_indices, remove_diag): + knn_distances_, knn_indices_ = set_diagonal( + knn_distances=knn_distances, + knn_indices=knn_indices, + remove_diag=remove_diag, + ) + + assert knn_distances_.shape == (3, 4) + np.testing.assert_equal(knn_distances_[:, 0], np.zeros(3)) + np.testing.assert_equal(knn_distances_[:, 1:], knn_distances) + + assert knn_indices_.shape == (3, 4) + np.testing.assert_equal(knn_indices_[:, 0], np.arange(3)) + np.testing.assert_equal(knn_indices_[:, 1:], knn_indices) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("remove_diag", [True, False]) + def test_real_data(self, adata, dataset, n_obs, remove_diag): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + n_neighbors = adata.uns["neighbors"]["params"]["n_neighbors"] + + knn_distances = adata.obsp["distances"][ + np.repeat(np.arange(adata.n_obs), n_neighbors).reshape(adata.n_obs, -1), + adata.uns["neighbors"]["indices"], + ].A + knn_indices = adata.uns["neighbors"]["indices"] + + knn_distances_, knn_indices_ = set_diagonal( + knn_distances=knn_distances, + knn_indices=knn_indices, + remove_diag=remove_diag, + ) + + if remove_diag: + np.testing.assert_equal(knn_distances_, knn_distances[:, 1:]) + np.testing.assert_equal(knn_indices_, knn_indices[:, 1:]) + else: + np.testing.assert_equal(knn_distances_, knn_distances) + np.testing.assert_equal(knn_indices_, knn_indices) + + +class TestSetPCA: + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [None, 5, 10, 30]) + @pytest.mark.parametrize("use_highly_variable", [True, False]) + def test_pca_not_yet_calculated( + self, + adata, + dataset: str, + n_obs: int, + n_pcs: Optional[int], + use_highly_variable: bool, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + original_adata = adata.copy() + + del adata.layers["Mu"] + del adata.layers["Ms"] + adata.obsm = {} + adata.obsp = {} + adata.uns = {} + adata.varm = {} + cleaned_adata = adata.copy() + + returned_val = _set_pca( + adata=adata, n_pcs=n_pcs, use_highly_variable=use_highly_variable + ) + + assert returned_val is None + + assert adata.obs_names.identical(cleaned_adata.obs_names) + assert adata.var_names.identical(cleaned_adata.var_names) + assert set(adata.layers) == set(cleaned_adata.layers) + assert (adata.obs.columns == cleaned_adata.obs.columns).all() + pd.testing.assert_frame_equal(adata.obs, cleaned_adata.obs) + pd.testing.assert_frame_equal(adata.var, cleaned_adata.var) + np.testing.assert_almost_equal(adata.X.A, cleaned_adata.X.A) + for layer in adata.layers: + np.testing.assert_almost_equal( + adata.layers[layer].A, + cleaned_adata.layers[layer].A, + ) + + if n_pcs is None: + n_pcs = 30 + + assert set(adata.obsm) == set(["X_pca"]) + assert adata.obsm["X_pca"].shape == (adata.n_obs, n_pcs) + np.testing.assert_almost_equal( + adata.obsm["X_pca"][:, :n_pcs], + original_adata.obsm["X_pca"][:, :n_pcs], + decimal=2, + ) + + assert "pca" in adata.uns + assert set(adata.uns["pca"]) == set(["params", "variance", "variance_ratio"]) + assert isinstance(adata.uns["pca"]["params"], Dict) + assert set(adata.uns["pca"]["params"]) == set( + ["use_highly_variable", "zero_center"] + ) + if "highly_variable" not in adata.var: + assert adata.uns["pca"]["params"]["use_highly_variable"] is False + else: + assert ( + adata.uns["pca"]["params"]["use_highly_variable"] is use_highly_variable + ) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [1, 5, 9]) + @pytest.mark.parametrize("use_highly_variable", [True, False]) + @pytest.mark.parametrize("pass_n_pcs", [True, False]) + def test_small_n_pcs( + self, + adata, + capfd, + dataset: str, + n_obs: int, + n_pcs: Optional[int], + use_highly_variable: bool, + pass_n_pcs: bool, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + adata.obsm["X_pca"] = adata.obsm["X_pca"][:, :n_pcs] + + if pass_n_pcs: + returned_val = _set_pca( + adata=adata, n_pcs=n_pcs, use_highly_variable=use_highly_variable + ) + else: + returned_val = _set_pca( + adata=adata, n_pcs=None, use_highly_variable=use_highly_variable + ) + + assert returned_val is None + + if pass_n_pcs: + expected_log = "" + else: + expected_log = ( + f"WARNING: Neighbors are computed on {n_pcs} principal components " + "only.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [None, 50]) + @pytest.mark.parametrize("use_highly_variable", [True, False]) + def test_n_pcs_exceed_n_vars( + self, + adata, + dataset: str, + n_obs: int, + n_pcs: Optional[int], + use_highly_variable: bool, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + del adata.layers["Mu"] + del adata.layers["Ms"] + adata.obsm = {} + adata.obsp = {} + adata.uns = {} + adata.varm = {} + adata = adata[:, adata.var_names[:15]] + + returned_val = _set_pca( + adata=adata, n_pcs=n_pcs, use_highly_variable=use_highly_variable + ) + assert returned_val is None + + assert set(adata.obsm) == set(["X_pca"]) + assert adata.obsm["X_pca"].shape == (adata.n_obs, adata.n_vars - 1) + + assert "pca" in adata.uns + assert set(adata.uns["pca"]) == set(["params", "variance", "variance_ratio"]) + assert isinstance(adata.uns["pca"]["params"], Dict) + assert set(adata.uns["pca"]["params"]) == set( + ["use_highly_variable", "zero_center"] + ) + if "highly_variable" not in adata.var: + assert adata.uns["pca"]["params"]["use_highly_variable"] is False + else: + assert ( + adata.uns["pca"]["params"]["use_highly_variable"] is use_highly_variable + ) + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("n_pcs", [None, 50]) + @pytest.mark.parametrize("use_highly_variable", [True, False]) + def test_n_pcs_exceed_n_obs( + self, + adata, + dataset: str, + n_obs: int, + n_pcs: Optional[int], + use_highly_variable: bool, + ): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + del adata.layers["Mu"] + del adata.layers["Ms"] + adata.obsm = {} + adata.obsp = {} + adata.uns = {} + adata.varm = {} + adata = adata[adata.obs_names[:15], :] + + returned_val = _set_pca( + adata=adata, n_pcs=n_pcs, use_highly_variable=use_highly_variable + ) + + assert returned_val is None + + assert set(adata.obsm) == set(["X_pca"]) + assert adata.obsm["X_pca"].shape == (adata.n_obs, adata.n_obs - 1) + + assert "pca" in adata.uns + assert set(adata.uns["pca"]) == set(["params", "variance", "variance_ratio"]) + assert isinstance(adata.uns["pca"]["params"], Dict) + assert set(adata.uns["pca"]["params"]) == set( + ["use_highly_variable", "zero_center"] + ) + if "highly_variable" not in adata.var: + assert adata.uns["pca"]["params"]["use_highly_variable"] is False + else: + assert ( + adata.uns["pca"]["params"]["use_highly_variable"] is use_highly_variable + ) + + +class TestVerifyNeighbors: + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_invalid_graph(self, capfd, adata, dataset, n_obs): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + adata.obsp["distances"][0, :] = 0 + adata.obsp["distances"].eliminate_zeros() + + returned_val = verify_neighbors(adata=adata) + assert returned_val is None + + actual_warning, _ = capfd.readouterr() + expected_warning = ( + "WARNING: The neighbor graph has an unexpected format " + "(e.g. computed outside scvelo) \n" + "or is corrupted (e.g. due to subsetting). " + "Consider recomputing with `pp.neighbors`.\n" + ) + assert actual_warning == expected_warning + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + @pytest.mark.parametrize("raw", [True, False]) + @pytest.mark.parametrize( + "uns", [{}, {"neighbors": []}, {"neighbors": {"param": []}}, {"random": 0}] + ) + def test_neighbors_or_params_not_present( + self, capfd, adata, dataset, n_obs, raw, uns + ): + if raw: + adata = adata(dataset=dataset, n_obs=n_obs, raw=True, preprocessed=False) + else: + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + adata.uns = uns + + returned_val = verify_neighbors(adata=adata) + assert returned_val is None + + actual_warning, _ = capfd.readouterr() + expected_warning = ( + "WARNING: The neighbor graph has an unexpected format " + "(e.g. computed outside scvelo) \n" + "or is corrupted (e.g. due to subsetting). " + "Consider recomputing with `pp.neighbors`.\n" + ) + assert actual_warning == expected_warning + + @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"]) + @pytest.mark.parametrize("n_obs", [50, 100]) + def test_valid_graph(self, capfd, adata, dataset, n_obs): + adata = adata(dataset=dataset, n_obs=n_obs, raw=False, preprocessed=True) + + returned_val = verify_neighbors(adata=adata) + assert returned_val is None + + actual_warning, _ = capfd.readouterr() + expected_warning = "" + assert actual_warning == expected_warning diff --git a/tests/preprocessing/test_utils.py b/tests/preprocessing/test_utils.py new file mode 100644 index 00000000..8bb733e7 --- /dev/null +++ b/tests/preprocessing/test_utils.py @@ -0,0 +1,2427 @@ +from typing import Callable, Dict, List, Optional, Union + +import pytest +from hypothesis import given +from hypothesis import strategies as st +from hypothesis.extra.numpy import arrays + +import numpy as np +import pandas as pd +from scipy.sparse import csc_matrix, csr_matrix, issparse, spmatrix + +from anndata import AnnData + +from scvelo.preprocessing.utils import ( + _filter, + check_if_valid_dtype, + counts_per_cell_quantile, + csr_vcorrcoef, + filter_and_normalize, + filter_genes, + filter_genes_dispersion, + get_mean_var, + log1p, + materialize_as_ndarray, + normalize_per_cell, + not_yet_normalized, + recipe_velocity, +) +from tests.core import get_adata + + +class TestCheckIfValidDtype: + @given(adata=get_adata(max_obs=5, max_vars=5)) + def test_check_x(self, adata: AnnData): + if "X" in adata.layers: + adata.layers["_X"] = adata.layers.pop("X") + # anndata stores array as float in `AnnData.X` + adata.X = adata.X.astype(int) + + check_if_valid_dtype(adata, layer="X") + + assert adata.X.dtype == "float32" + for layer in adata.layers: + assert np.issubdtype(adata.layers[layer].dtype, np.integer) + + @given(data=st.data(), adata=get_adata(max_obs=5, max_vars=5)) + def test_check_layers(self, data, adata: AnnData): + if "X" in adata.layers: + adata.layers["_X"] = adata.layers.pop("X") + layer_to_convert = data.draw(st.sampled_from([*adata.layers])) + # anndata stores array as float in `AnnData.X` + adata.X = adata.X.astype(int) + + check_if_valid_dtype(adata, layer=layer_to_convert) + + assert np.issubdtype(adata.X.dtype, np.integer) + for layer in adata.layers: + if layer == layer_to_convert: + assert adata.layers[layer].dtype == "float32" + else: + assert np.issubdtype(adata.layers[layer].dtype, np.integer) + + +class TestCountsPerCellQuantile: + @given( + data=st.data(), + X=arrays( + float, + shape=st.tuples( + st.integers(min_value=1, max_value=100), + st.integers(min_value=1, max_value=100), + ), + elements=st.floats( + min_value=0, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + max_proportion_per_cell=st.floats( + min_value=0, max_value=1, allow_infinity=False, allow_nan=False + ), + provide_counts_per_cell=st.booleans(), + sparse=st.sampled_from([False, csr_matrix, csc_matrix]), + ) + def test_with_random_input( + self, + data, + X: np.ndarray, + max_proportion_per_cell, + provide_counts_per_cell, + sparse: Union[bool, Callable], + ): + n_obs = X.shape[0] + if provide_counts_per_cell: + counts_per_cell = data.draw( + arrays( + int, + shape=st.integers(min_value=X.shape[0], max_value=X.shape[0]), + elements=st.integers(min_value=0, max_value=1e3), + ) + ) + else: + counts_per_cell = None + if sparse: + X = sparse(X) + counts = counts_per_cell_quantile( + X=X, + max_proportion_per_cell=max_proportion_per_cell, + counts_per_cell=counts_per_cell, + ) + + if sparse: + assert issparse(X) + else: + assert isinstance(X, np.ndarray) + assert counts.shape == (n_obs,) + + @pytest.mark.parametrize( + "X", + ( + np.ones(shape=(3, 100)), + np.hstack((np.ones((5, 100)), np.zeros((5, 5)))), + np.array([[1, 4, 95], [4, 7, 89]]), + ), + ) + @pytest.mark.parametrize( + "max_proportion_per_cell", (0.0, 0.005, 0.1, 0.3, 0.5, 0.9, 1.0) + ) + @pytest.mark.parametrize("sparse", (False, csr_matrix, csc_matrix)) + def test_max_proportion_per_cell(self, X, max_proportion_per_cell, sparse): + ground_truth = X[:, (X <= max_proportion_per_cell * 100).all(axis=0)].sum( + axis=1 + ) + + if sparse: + X = sparse(X) + + counts = counts_per_cell_quantile( + X=X, max_proportion_per_cell=max_proportion_per_cell + ) + + np.testing.assert_almost_equal(counts, ground_truth) + + @given( + X=arrays( + float, + shape=st.tuples( + st.integers(min_value=1, max_value=100), + st.integers(min_value=1, max_value=100), + ), + elements=st.floats( + min_value=0, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + ) + @pytest.mark.parametrize("sparse", (False, csr_matrix, csc_matrix)) + def test_include_all_variables(self, X: np.ndarray, sparse: Union[bool, Callable]): + counts_with_all_vars = X.sum(axis=1) + + if sparse: + X = sparse(X) + counts = counts_per_cell_quantile(X=X, max_proportion_per_cell=1) + + np.testing.assert_almost_equal(counts_with_all_vars, counts) + + @given( + X=arrays( + float, + shape=st.tuples( + st.integers(min_value=1, max_value=100), + st.integers(min_value=1, max_value=100), + ), + elements=st.floats( + min_value=0, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + ) + @pytest.mark.parametrize("sparse", (False, csr_matrix, csc_matrix)) + def test_exclude_all_variables(self, X: np.ndarray, sparse: Union[bool, Callable]): + if sparse: + X = sparse(X) + counts = counts_per_cell_quantile(X=X, max_proportion_per_cell=0) + + np.testing.assert_almost_equal(np.zeros(X.shape[0]), counts) + + @given( + X=arrays( + float, + shape=st.tuples( + st.integers(min_value=1, max_value=100), + st.integers(min_value=1, max_value=100), + ), + elements=st.floats( + min_value=0, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + max_proportion_per_cell=st.floats(min_value=0, max_value=1), + sparse=st.sampled_from([False, csr_matrix, csc_matrix]), + ) + def test_counts_per_cell_specified( + self, + X: np.ndarray, + max_proportion_per_cell: float, + sparse: Union[bool, Callable], + ): + ground_truth = X[:, (X <= max_proportion_per_cell * 100).all(axis=0)].sum( + axis=1 + ) + + if sparse: + X = sparse(X) + counts = counts_per_cell_quantile( + X=X, + max_proportion_per_cell=max_proportion_per_cell, + counts_per_cell=100 * np.ones(X.shape[0]), + ) + + np.testing.assert_almost_equal(ground_truth, counts) + + +class TestCsrVcorrcoef: + @pytest.mark.parametrize( + "X", + ( + np.zeros(3), + np.array([1, 0, -4]), + np.array([-0.3, 0.5, 0.93]), + np.zeros(shape=(3, 3)), + np.eye(3), + np.array([[1, 2, 3], [1, -1, 1]]), + np.array([[0.1, -0.3, 7.5], [8.3, 0.4, -0.9]]), + ), + ) + @pytest.mark.parametrize( + "y", + ( + np.zeros(3), + np.ones(3), + np.array([1, 0, 0]), + np.array([1, 2, 3]), + np.array([-0.24, 0.7, 0.4]), + ), + ) + def test_dense_arrays(self, X: np.ndarray, y: np.ndarray): + pearsonr = csr_vcorrcoef(X=X, y=y) + + if X.ndim == 1: + np.testing.assert_almost_equal(np.corrcoef(X, y)[0, 1], pearsonr) + else: + assert all( + np.allclose(np.corrcoef(sample, y)[0, 1], corr, equal_nan=True) + for corr, sample in zip(pearsonr, X) + ) + + @pytest.mark.parametrize( + "X", + ( + csr_matrix(np.zeros(3)), + csr_matrix(np.array([1, 0, -4])), + csr_matrix(np.array([-0.3, 0.5, 0.93])), + csr_matrix(np.zeros(shape=(3, 3))), + csr_matrix(np.eye(3)), + csr_matrix(np.array([[1, 2, 3], [1, -1, 1]])), + csr_matrix(np.array([[0.1, -0.3, 7.5], [8.3, 0.4, -0.9]])), + ), + ) + @pytest.mark.parametrize( + "y", + ( + np.zeros(3), + np.ones(3), + np.array([1, 0, 0]), + np.array([1, 2, 3]), + np.array([-0.24, 0.7, 0.4]), + ), + ) + def test_sparse_arrays(self, X: spmatrix, y: np.ndarray): + pearsonr = csr_vcorrcoef(X=X, y=y) + + X_dense = X.A.squeeze() + + if X_dense.ndim == 1: + np.testing.assert_almost_equal(np.corrcoef(X_dense, y)[0, 1], pearsonr) + else: + assert all( + np.allclose(np.corrcoef(sample, y)[0, 1], corr, equal_nan=True) + for corr, sample in zip(pearsonr, X_dense) + ) + + +class TestGetMeanVar: + @given( + X=arrays( + float, + shape=st.tuples( + st.integers(min_value=1, max_value=100), + st.integers(min_value=1, max_value=100), + ), + elements=st.floats( + min_value=-1e3, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + ) + @pytest.mark.parametrize("sparse", (True, False)) + @pytest.mark.parametrize("flat_array", (True, False)) + def test_mean_var_w_random_arrays(self, X, sparse, flat_array): + if flat_array: + X = X.flatten() + numpy_mean = X.mean(axis=0) + numpy_var = np.nan_to_num(X.var(axis=0, ddof=1)) + + if sparse and not flat_array: + X = csr_matrix(X) + mean, var = get_mean_var(X=X) + + if flat_array: + assert np.isscalar(mean) + assert np.isscalar(var) + else: + assert mean.shape == (X.shape[1],) + assert var.shape == (X.shape[1],) + np.testing.assert_almost_equal(numpy_mean, mean) + np.testing.assert_almost_equal(numpy_var, var) + + @pytest.mark.parametrize( + "X, analytic_mean, analytic_variance", + ( + (np.array([1, 2, 3, np.nan]), 2, 1), + (np.array([1, 2, 2, np.nan, np.inf]), 5 / 3, 1 / 3), + (np.array([1, 2, 2, 2, np.nan, np.inf, -np.inf]), 7 / 4, 0.25), + ( + np.array([[1, 2, 3, np.nan], [1, 2, np.inf, 3]]), + np.array([1, 2, 3, 3]), + np.array([0, 0, 0, 0]), + ), + ( + np.array([[1, 2, 3, np.nan], [1, 2, np.inf, 3], [2, -np.inf, 1, 3]]), + np.array([4 / 3, 2, 2, 3]), + np.array([1 / 3, 0, 2, 0]), + ), + ), + ) + def test_with_nan_and_inf_entries( + self, X: np.ndarray, analytic_mean: np.ndarray, analytic_variance: np.ndarray + ): + nan_or_inf_entries = np.isnan(X) | np.isinf(X) | np.isneginf(X) + mean, var = get_mean_var(X=X) + + assert (X[nan_or_inf_entries] == 0).all() + + np.testing.assert_almost_equal(mean, analytic_mean) + np.testing.assert_almost_equal(var, analytic_variance) + + @pytest.mark.parametrize( + "X, analytic_mean, analytic_variance", + ( + ( + np.array([[1, 2, 3, np.nan], [1, 2, np.inf, 3]]), + np.array([1, 2, 3, 3]), + np.array([0, 0, 0, 0]), + ), + ( + np.array([[1, 2, 3, np.nan], [1, 2, np.inf, 3], [2, -np.inf, 1, 3]]), + np.array([4 / 3, 2, 2, 3]), + np.array([1 / 3, 0, 2, 0]), + ), + ), + ) + @pytest.mark.parametrize("sparse_format", (csr_matrix, csc_matrix)) + def test_sparse_with_nan_and_inf_entries( + self, + X: spmatrix, + analytic_mean: np.ndarray, + analytic_variance: np.ndarray, + sparse_format: Callable, + ): + X = sparse_format(X) + nan_or_inf_entries = np.isnan(X.data) | np.isinf(X.data) | np.isneginf(X.data) + mean, var = get_mean_var(X=X) + + assert (X.data[nan_or_inf_entries] == 0).all() + + np.testing.assert_almost_equal(mean, analytic_mean) + np.testing.assert_almost_equal(var, analytic_variance) + + @pytest.mark.parametrize( + "X", + ( + np.arange(0, 101), + np.array([np.arange(0, 101), np.arange(0, 101)]), + np.array([np.arange(0, 101), np.arange(0, 101)[::-1]]), + ), + ) + @pytest.mark.parametrize( + "percentile, lower_percentile, upper_percentile", + ((10, 10, 100), (60, 0, 60), ([10, 90], 10, 90), (np.array([10, 90]), 10, 90)), + ) + def test_percentile_dense_input( + self, X, percentile, lower_percentile, upper_percentile + ): + clipped_array = X.copy() + clipped_array[clipped_array <= lower_percentile] = lower_percentile + clipped_array[clipped_array >= upper_percentile] = upper_percentile + + numpy_mean = clipped_array.mean(axis=0) + numpy_var = np.var(clipped_array, axis=0, ddof=1) + + mean, var = get_mean_var(X=X, perc=percentile) + + np.testing.assert_almost_equal(mean, numpy_mean) + np.testing.assert_almost_equal(var, numpy_var) + + @pytest.mark.parametrize( + "X", + ( + csr_matrix(np.array([np.arange(0, 101), np.arange(0, 101)])), + csr_matrix(np.array([np.arange(0, 101), np.arange(0, 101)][::-1])), + ), + ) + @pytest.mark.parametrize( + "percentile, lower_percentile, upper_percentile", + ( + (10, 10.9, 100), + (60, 1, 60.4), + ([10, 90], 10.9, 90.1), + (np.array([10, 90]), 10.9, 90.1), + ), + ) + def test_percentile_sparse_input( + self, X, percentile, lower_percentile, upper_percentile + ): + clipped_array = X.A.copy().astype(float) + clipped_array[ + (clipped_array <= lower_percentile) & (clipped_array != 0) + ] = lower_percentile + clipped_array[ + (clipped_array >= upper_percentile) & (clipped_array != 0) + ] = upper_percentile + + numpy_mean = clipped_array.mean(axis=0) + numpy_var = np.var(clipped_array, axis=0, ddof=1) + + mean, var = get_mean_var(X=X.copy(), perc=percentile) + + np.testing.assert_almost_equal(mean, numpy_mean) + np.testing.assert_almost_equal(var, numpy_var) + + @pytest.mark.parametrize( + "X, analytic_mean, analytic_var", + ( + (np.array([1, 0, 3]), 2, 2), + ( + np.array([[1, 2, 3], [0, 5, 0], [0, -1, 1]]), + np.array([1, 2, 2]), + np.array([0, 9, 2]), + ), + ( + np.array([[1, 2, 3], [np.nan, 5, np.inf], [0, -1, 1]]), + np.array([1, 2, 2]), + np.array([0, 9, 2]), + ), + ( + csr_matrix(np.array([[1, 2, 3], [0, 5, 0], [-np.inf, -1, 1]])), + np.array([1, 2, 2]), + np.array([0, 9, 2]), + ), + ), + ) + def test_ignore_zeros(self, X, analytic_mean, analytic_var): + mean, var = get_mean_var(X=X, ignore_zeros=True) + + np.testing.assert_almost_equal(mean, analytic_mean) + np.testing.assert_almost_equal(var, analytic_var) + + +class TestFilter: + @given( + X=arrays( + float, + shape=st.tuples( + st.integers(min_value=1, max_value=100), + st.integers(min_value=1, max_value=100), + ), + elements=st.floats( + min_value=-1e3, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + min_max_counts=st.lists( + st.floats(allow_infinity=True, allow_nan=False), + min_size=2, + max_size=2, + unique=True, + ), + ) + def test_filter_based_on_counts(self, X, min_max_counts): + min_counts = min(min_max_counts) + max_counts = min(min_max_counts) + + filter_mask, counts_per_var = _filter( + X=X, min_counts=min_counts, max_counts=max_counts + ) + + assert filter_mask.dtype == np.dtype("bool") + assert filter_mask.shape == (X.shape[1],) + assert isinstance(counts_per_var, np.ndarray) + assert counts_per_var.shape == (X.shape[1],) + assert counts_per_var.dtype == X.dtype + + @given( + X=arrays( + float, + shape=st.tuples( + st.integers(min_value=1, max_value=100), + st.integers(min_value=1, max_value=100), + ), + elements=st.floats( + min_value=-1e3, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + min_max_cells=st.lists( + st.floats(allow_infinity=True, allow_nan=False), + min_size=2, + max_size=2, + unique=True, + ), + ) + def test_filter_based_on_cells(self, X, min_max_cells): + min_cells = min(min_max_cells) + max_cells = min(min_max_cells) + + filter_mask, counts_per_var = _filter( + X=X, min_cells=min_cells, max_cells=max_cells + ) + + assert filter_mask.dtype == np.dtype("bool") + assert filter_mask.shape == (X.shape[1],) + assert isinstance(counts_per_var, np.ndarray) + assert counts_per_var.shape == (X.shape[1],) + assert np.issubdtype(counts_per_var.dtype, np.integer) + + @pytest.mark.parametrize( + "min_counts, filtered_out_low", + ( + (None, np.ones(6, dtype=bool)), + (0, np.ones(6, dtype=bool)), + (1, np.ones(6, dtype=bool)), + (2, np.array([True, True, True, False, True, True])), + (3, np.array([True, True, True, False, False, True])), + (4, np.array([True, True, True, False, False, False])), + ), + ) + @pytest.mark.parametrize( + "max_counts, filtered_out_upper", + ( + (None, np.ones(6, dtype=bool)), + (9, np.array([False, False, False, True, True, True])), + (10, np.ones(6, dtype=bool)), + (11, np.ones(6, dtype=bool)), + ), + ) + def test_filter_mask( + self, min_counts, filtered_out_low, max_counts, filtered_out_upper + ): + filter_mask, _ = _filter( + X=np.diag([10, 10, 10, 1, 2, 3]), + min_counts=min_counts, + max_counts=max_counts, + ) + + np.testing.assert_equal(filter_mask, filtered_out_low & filtered_out_upper) + + +class TestFilterAndNormalize: + @pytest.mark.parametrize("sparse_X", (False, csr_matrix, csc_matrix)) + @pytest.mark.parametrize("sparse_layers", (False, csr_matrix, csc_matrix)) + @pytest.mark.parametrize("log", (True, False)) + def test_X_looks_processed( + self, capfd, sparse_X: bool, sparse_layers: bool, log: bool + ): + original_X = sparse_X(np.eye(10)) if sparse_X else np.eye(10) + if sparse_layers: + layers_value = sparse_layers(np.triu(np.ones(10), k=0)) + else: + layers_value = np.triu(np.ones(10), k=0) + adata = AnnData( + X=original_X.copy(), + layers={"unspliced": layers_value, "spliced": layers_value}, + ) + + filter_and_normalize(adata, log=log) + + assert type(adata.X) is type(original_X) + if issparse(original_X): + assert (adata.X != original_X).sum() == 0 + else: + assert (adata.X == original_X).all() + + expected_log = "Normalized count data: X, spliced, unspliced.\n" + if log: + expected_log += ( + "WARNING: Did not modify X as it looks preprocessed already.\n" + ) + + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + @pytest.mark.parametrize("sparse_X", (False, csr_matrix, csc_matrix)) + @pytest.mark.parametrize("sparse_layers", (False, csr_matrix, csc_matrix)) + @pytest.mark.parametrize("log", (True, False)) + def test_X_log_advised(self, capfd, sparse_X: bool, sparse_layers: bool, log: bool): + original_X = sparse_X(np.eye(10)) if sparse_X else np.eye(10) + if sparse_layers: + layers_value = sparse_layers(np.eye(10)) + else: + layers_value = np.eye(10) + adata = AnnData( + X=original_X.copy(), + layers={"unspliced": layers_value, "spliced": layers_value}, + ) + + filter_and_normalize(adata, log=log) + + assert type(adata.X) is type(original_X) + if not log and issparse(original_X): + assert (adata.X != original_X).sum() == 0 + elif not log: + assert (adata.X == original_X).all() + + expected_log = "Normalized count data: X, spliced, unspliced.\n" + if log: + expected_log += "Logarithmized X.\n" + else: + expected_log += ( + "WARNING: Consider logarithmizing X with `scv.pp.log1p` for better " + "results.\n" + ) + + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + def test_pancreas_50obs(self, capfd, pancreas_50obs: AnnData): + filter_and_normalize( + pancreas_50obs, + min_shared_counts=20, + counts_per_cell_after=1, + n_top_genes=5, + ) + + assert pancreas_50obs.shape == (50, 5) + assert pancreas_50obs.var_names.equals( + pd.Index(["Ppy", "Isl1", "Ppp1r1a", "Lrpprc", "Nnat"]) + ) + expected_log = ( + "Filtered out 27530 genes that are detected 20 counts (shared).\n" + "Normalized count data: X, spliced, unspliced.\n" + "Extracted 5 highly variable genes.\n" + "Logarithmized X.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + def test_pancreas_100obs(self, capfd, pancreas_100obs: AnnData): + filter_and_normalize( + pancreas_100obs, + min_shared_counts=20, + counts_per_cell_after=1, + n_top_genes=5, + ) + + assert pancreas_100obs.shape == (100, 5) + assert pancreas_100obs.var_names.equals( + pd.Index(["Ppy", "Gch1", "Ppp1r1a", "Sst", "Maged2"]) + ) + expected_log = ( + "Filtered out 27029 genes that are detected 20 counts (shared).\n" + "Normalized count data: X, spliced, unspliced.\n" + "Extracted 5 highly variable genes.\n" + "Logarithmized X.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + def test_counts_after_normalization_pancreas_50obs(self, pancreas_50obs: AnnData): + filter_and_normalize( + pancreas_50obs, + min_shared_counts=20, + counts_per_cell_after=1, + log=False, + use_initial_size=False, + ) + np.testing.assert_almost_equal(pancreas_50obs.X.sum(axis=1), 1) + np.testing.assert_almost_equal( + pancreas_50obs.layers["unspliced"].sum(axis=1), 1, decimal=6 + ) + np.testing.assert_almost_equal(pancreas_50obs.layers["spliced"].sum(axis=1), 1) + + @pytest.mark.parametrize("log", (True, False)) + def test_without_spliced(self, capfd, log: bool): + adata = AnnData(np.triu(np.ones(10), k=0)) + filter_and_normalize(adata, counts_per_cell_after=1, log=log) + + expected_log = ( + "WARNING: Could not find spliced / unspliced counts.\n" + "Normalized count data: X.\n" + ) + if log: + expected_log += "Logarithmized X.\n" + else: + expected_log += ( + "WARNING: Consider logarithmizing X with `scv.pp.log1p` for better " + "results.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + @pytest.mark.parametrize( + "layers", + ( + {}, + {"unspliced": np.eye(10)}, + {"unspliced": np.eye(10), "random_layer": np.eye(10)}, + {"spliced": np.eye(10)}, + ), + ) + def test_unspliced_spliced_not_found(self, capfd, layers: Dict): + adata = AnnData(np.eye(10), layers=layers) + filter_and_normalize(adata) + + expected_log_start = "WARNING: Could not find spliced / unspliced counts.\n" + actual_log, _ = capfd.readouterr() + assert actual_log.startswith(expected_log_start) + + @pytest.mark.parametrize("layers_to_normalize", (None, "all")) + def test_enforce_normalization(self, capfd, layers_to_normalize: Optional[str]): + adata = AnnData( + 0.1 * np.eye(10), + layers={ + "unspliced": 0.1 * np.eye(10), + "spliced": 0.1 * np.eye(10), + "random_layer": 0.1 * np.eye(10), + }, + ) + filter_and_normalize(adata, layers_normalize=layers_to_normalize) + + if layers_to_normalize is not None: + expected_log = ( + "Normalized count data: X, unspliced, spliced, random_layer.\n" + ) + else: + expected_log = ( + "WARNING: Did not normalize X as it looks processed already. To " + "enforce normalization, set `enforce=True`.\n" + "WARNING: Did not normalize spliced as it looks processed already. To " + "enforce normalization, set `enforce=True`.\n" + "WARNING: Did not normalize unspliced as it looks processed already. " + "To enforce normalization, set `enforce=True`.\n" + ) + expected_log += "Logarithmized X.\n" + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + +class TestFilterGenes: + def get_vars_to_keep(self, data, adata: AnnData, as_string: bool): + """Draw subset from variable names.""" + + if as_string: + return data.draw(st.sampled_from(adata.var_names.to_list())) + else: + return data.draw( + st.lists( + st.sampled_from(adata.var_names.to_list()), + min_size=1, + max_size=len(adata.var_names), + unique=True, + ) + ) + + @given( + data=st.data(), + adata=get_adata( + max_obs=5, + max_vars=5, + layer_keys=["unspliced", "spliced"], + ), + copy=st.booleans(), + min_counts=st.integers(min_value=0, max_value=100), + max_counts=st.integers(min_value=0, max_value=100), + min_counts_u=st.integers(min_value=0, max_value=100), + max_counts_u=st.integers(min_value=0, max_value=100), + min_cells=st.integers(min_value=0, max_value=100), + max_cells=st.integers(min_value=0, max_value=100), + min_cells_u=st.integers(min_value=0, max_value=100), + max_cells_u=st.integers(min_value=0, max_value=100), + pass_var_to_keep_as_string=st.booleans(), + ) + def test_retain_genes_and_copy( + self, + data, + adata: AnnData, + copy: bool, + min_counts: int, + max_counts: int, + min_counts_u: int, + max_counts_u: int, + min_cells: int, + max_cells: int, + min_cells_u: int, + max_cells_u: int, + pass_var_to_keep_as_string: bool, + ): + vars_to_keep = self.get_vars_to_keep( + data=data, + adata=adata, + as_string=pass_var_to_keep_as_string, + ) + + returned_adata = filter_genes( + adata, + retain_genes=vars_to_keep, + copy=copy, + min_counts=min_counts, + max_counts=max_counts, + min_counts_u=min_counts_u, + max_counts_u=max_counts_u, + min_cells=min_cells, + max_cells=max_cells, + min_cells_u=min_cells_u, + max_cells_u=max_cells_u, + ) + + if copy: + assert isinstance(returned_adata, AnnData) + adata = returned_adata + else: + assert returned_adata is None + + if isinstance(vars_to_keep, str): + assert pd.Index([vars_to_keep]).isin(adata.var_names).all() + else: + assert pd.Index(vars_to_keep).isin(adata.var_names).all() + + @given( + data=st.data(), + adata=get_adata( + max_obs=5, + max_vars=5, + layer_keys=["unspliced", "spliced"], + ), + copy=st.booleans(), + min_shared_counts=st.integers(min_value=0, max_value=100), + min_shared_cells=st.integers(min_value=0, max_value=100), + pass_var_to_keep_as_string=st.booleans(), + ) + def test_retain_genes_and_copy_w_shared_counts( + self, + data, + adata: AnnData, + copy: bool, + min_shared_counts: int, + min_shared_cells: int, + pass_var_to_keep_as_string: bool, + ): + vars_to_keep = self.get_vars_to_keep( + data=data, + adata=adata, + as_string=pass_var_to_keep_as_string, + ) + + returned_adata = filter_genes( + adata, + retain_genes=vars_to_keep, + copy=copy, + min_shared_counts=min_shared_counts, + min_shared_cells=min_shared_cells, + ) + + if copy: + assert isinstance(returned_adata, AnnData) + adata = returned_adata + else: + assert returned_adata is None + + if isinstance(vars_to_keep, str): + assert pd.Index([vars_to_keep]).isin(adata.var_names).all() + else: + assert pd.Index(vars_to_keep).isin(adata.var_names).all() + + @pytest.mark.parametrize( + "X, unspliced, spliced", + ( + ( + np.diag([10, 10, 10, 1, 2, 3]), + np.diag([10, 10, 10, 1, 2, 3]), + np.diag([10, 10, 10, 1, 2, 3]), + ), + ( + csr_matrix(np.diag([10, 10, 10, 1, 2, 3])), + csr_matrix(np.diag([10, 10, 10, 1, 2, 3])), + csr_matrix(np.diag([10, 10, 10, 1, 2, 3])), + ), + ), + ) + @pytest.mark.parametrize( + "min_counts, filtered_out_low_spliced", + ( + (None, np.ones(6, dtype=bool)), + (1, np.ones(6, dtype=bool)), + (2, np.array([True, True, True, False, True, True])), + (3, np.array([True, True, True, False, False, True])), + (4, np.array([True, True, True, False, False, False])), + ), + ) + @pytest.mark.parametrize( + "max_counts, filtered_out_upper_spliced", + ( + (None, np.ones(6, dtype=bool)), + (9, np.array([False, False, False, True, True, True])), + (10, np.ones(6, dtype=bool)), + (11, np.ones(6, dtype=bool)), + ), + ) + @pytest.mark.parametrize( + "min_counts_u, filtered_out_low_unspliced", + ( + (None, np.ones(6, dtype=bool)), + (1, np.ones(6, dtype=bool)), + (2, np.array([True, True, True, False, True, True])), + (3, np.array([True, True, True, False, False, True])), + (4, np.array([True, True, True, False, False, False])), + ), + ) + @pytest.mark.parametrize( + "max_counts_u, filtered_out_upper_unspliced", + ( + (None, np.ones(6, dtype=bool)), + (9, np.array([False, False, False, True, True, True])), + (10, np.ones(6, dtype=bool)), + (11, np.ones(6, dtype=bool)), + ), + ) + def test_min_max_counts_filter( + self, + X: Union[np.ndarray, spmatrix], + unspliced: Union[np.ndarray, spmatrix], + spliced: Union[np.ndarray, spmatrix], + min_counts: Optional[int], + max_counts: Optional[int], + min_counts_u: Optional[int], + max_counts_u: Optional[int], + filtered_out_low_spliced: bool, + filtered_out_upper_spliced: bool, + filtered_out_low_unspliced: bool, + filtered_out_upper_unspliced: bool, + ): + var_names = pd.Index([f"var_{var_id}" for var_id in range(X.shape[1])]) + adata = AnnData( + X=X, + layers={"unspliced": unspliced, "spliced": spliced}, + var=pd.DataFrame(index=var_names), + ) + + filter_genes( + adata, + min_counts=min_counts, + max_counts=max_counts, + min_counts_u=min_counts_u, + max_counts_u=max_counts_u, + ) + assert pd.Index.equals( + adata.var_names, + var_names[ + filtered_out_low_spliced + & filtered_out_upper_spliced + & filtered_out_low_unspliced + & filtered_out_upper_unspliced + ], + ) + + @pytest.mark.parametrize( + "X, unspliced, spliced", + ( + ( + np.triu(np.ones(6), k=0), + np.triu(np.ones(6), k=0), + np.triu(np.ones(6), k=0), + ), + ( + csr_matrix(np.triu(np.ones(6), k=0)), + csr_matrix(np.triu(np.ones(6), k=0)), + csr_matrix(np.triu(np.ones(6), k=0)), + ), + ), + ) + @pytest.mark.parametrize( + "min_cells, filtered_out_low_spliced", + ( + (None, np.ones(6, dtype=bool)), + (1, np.ones(6, dtype=bool)), + (2, np.array([False, True, True, True, True, True])), + (3, np.array([False, False, True, True, True, True])), + (4, np.array([False, False, False, True, True, True])), + ), + ) + @pytest.mark.parametrize( + "max_cells, filtered_out_upper_spliced", + ( + (None, np.ones(6, dtype=bool)), + (5, np.array([True, True, True, True, True, False])), + (6, np.ones(6, dtype=bool)), + (7, np.ones(6, dtype=bool)), + ), + ) + @pytest.mark.parametrize( + "min_cells_u, filtered_out_low_unspliced", + ( + (None, np.ones(6, dtype=bool)), + (1, np.ones(6, dtype=bool)), + (2, np.array([False, True, True, True, True, True])), + (3, np.array([False, False, True, True, True, True])), + (4, np.array([False, False, False, True, True, True])), + ), + ) + @pytest.mark.parametrize( + "max_cells_u, filtered_out_upper_unspliced", + ( + (None, np.ones(6, dtype=bool)), + (5, np.array([True, True, True, True, True, False])), + (6, np.ones(6, dtype=bool)), + (7, np.ones(6, dtype=bool)), + ), + ) + def test_min_max_cells_filter( + self, + X: Union[np.ndarray, spmatrix], + unspliced: Union[np.ndarray, spmatrix], + spliced: Union[np.ndarray, spmatrix], + min_cells, + max_cells, + min_cells_u, + max_cells_u, + filtered_out_low_spliced, + filtered_out_upper_spliced, + filtered_out_low_unspliced, + filtered_out_upper_unspliced, + ): + var_names = pd.Index([f"var_{var_id}" for var_id in range(X.shape[1])]) + adata = AnnData( + X=X, + layers={"unspliced": unspliced, "spliced": spliced}, + var=pd.DataFrame(index=var_names), + ) + + filter_genes( + adata, + min_cells=min_cells, + max_counts=max_cells, + min_cells_u=min_cells_u, + max_cells_u=max_cells_u, + ) + + assert pd.Index.equals( + adata.var_names, + var_names[ + filtered_out_low_spliced + & filtered_out_upper_spliced + & filtered_out_low_unspliced + & filtered_out_upper_unspliced + ], + ) + + @pytest.mark.parametrize( + "X, unspliced, spliced", + ( + ( + np.triu(np.ones(6), k=0), + np.triu(np.ones(6), k=0) + + np.diag([1, 2, -1, 1, 4, 2]) + + np.diag([0, 1, 0, 2, 1], k=-1), + np.triu(np.ones(6), k=0), + ), + ( + csr_matrix(np.triu(np.ones(6), k=0)), + csr_matrix( + np.triu(np.ones(6), k=0) + + np.diag([1, 2, -1, 1, 4, 2]) + + np.diag([0, 1, 0, 2, 1], k=-1) + ), + csr_matrix(np.triu(np.ones(6), k=0)), + ), + ), + ) + @pytest.mark.parametrize( + "min_shared_counts, filter_mask_shared_counts", + ( + (None, np.ones(6, dtype=bool)), + (0, np.ones(6, dtype=bool)), + (3, np.array([True, True, True, True, True, True])), + (4, np.array([False, True, True, True, True, True])), + (6, np.array([False, True, False, True, True, True])), + ), + ) + @pytest.mark.parametrize( + "min_shared_cells, filter_mask_shared_cells", + ( + (None, np.ones(6, dtype=bool)), + (1, np.ones(6, dtype=bool)), + (2, np.array([False, True, True, True, True, True])), + (4, np.array([False, False, False, True, True, True])), + ), + ) + def test_shared_counts_cells_filter( + self, + X: Union[np.ndarray, spmatrix], + unspliced: Union[np.ndarray, spmatrix], + spliced: Union[np.ndarray, spmatrix], + min_shared_counts, + filter_mask_shared_counts, + min_shared_cells, + filter_mask_shared_cells, + ): + var_names = pd.Index([f"var_{var_id}" for var_id in range(X.shape[1])]) + adata = AnnData( + X=X, + layers={"unspliced": unspliced, "spliced": spliced}, + var=pd.DataFrame(index=var_names), + ) + + filter_genes( + adata, + min_shared_counts=min_shared_counts, + min_shared_cells=min_shared_cells, + ) + + assert pd.Index.equals( + adata.var_names, + var_names[filter_mask_shared_counts & filter_mask_shared_cells], + ) + + @pytest.mark.parametrize( + "min_counts, min_counts_u, n_vars_filtered_out_spliced, " + "n_vars_filtered_out_unspliced", + ((0, 1, 0, 0), (2, 0, 1, 0), (0, 3, 0, 2), (4, 3, 3, 0)), + ) + def test_min_counts_logging( + self, + capfd, + min_counts: int, + min_counts_u: int, + n_vars_filtered_out_spliced: int, + n_vars_filtered_out_unspliced: int, + ): + expected_log = "" + if n_vars_filtered_out_spliced > 0: + expected_log += ( + f"Filtered out {n_vars_filtered_out_spliced} genes that are detected " + f"{min_counts} counts (spliced).\n" + ) + if n_vars_filtered_out_unspliced > 0: + expected_log += ( + f"Filtered out {n_vars_filtered_out_unspliced} genes that are detected " + f"{min_counts_u} counts (unspliced).\n" + ) + + adata = AnnData( + X=np.triu(np.ones(6), k=0), + layers={ + "unspliced": np.triu(np.ones(6), k=0), + "spliced": np.triu(np.ones(6), k=0), + }, + ) + + filter_genes(adata, min_counts=min_counts, min_counts_u=min_counts_u) + actual_log, _ = capfd.readouterr() + + assert actual_log == expected_log + + @pytest.mark.parametrize( + "max_counts, max_counts_u, n_vars_filtered_out_spliced, " + "n_vars_filtered_out_unspliced", + ((6, 7, 0, 0), (5, 6, 1, 0), (6, 4, 0, 2), (4, 3, 2, 1)), + ) + def test_max_counts_logging( + self, + capfd, + max_counts: int, + max_counts_u: int, + n_vars_filtered_out_spliced: int, + n_vars_filtered_out_unspliced: int, + ): + expected_log = "" + if n_vars_filtered_out_spliced > 0: + expected_log += ( + f"Filtered out {n_vars_filtered_out_spliced} genes that are detected " + f"{max_counts} counts (spliced).\n" + ) + if n_vars_filtered_out_unspliced > 0: + expected_log += ( + f"Filtered out {n_vars_filtered_out_unspliced} genes that are detected " + f"{max_counts_u} counts (unspliced).\n" + ) + + adata = AnnData( + X=np.triu(np.ones(6), k=0), + layers={ + "unspliced": np.triu(np.ones(6), k=0), + "spliced": np.triu(np.ones(6), k=0), + }, + ) + + filter_genes(adata, max_counts=max_counts, max_counts_u=max_counts_u) + actual_log, _ = capfd.readouterr() + + assert actual_log == expected_log + + @pytest.mark.parametrize( + "min_cells, min_cells_u, n_vars_filtered_out_spliced, " + "n_vars_filtered_out_unspliced", + ((0, 1, 0, 0), (2, 0, 1, 0), (0, 3, 0, 2), (4, 3, 3, 0)), + ) + def test_min_cells_logging( + self, + capfd, + min_cells: int, + min_cells_u: int, + n_vars_filtered_out_spliced: int, + n_vars_filtered_out_unspliced: int, + ): + expected_log = "" + if n_vars_filtered_out_spliced > 0: + expected_log += ( + f"Filtered out {n_vars_filtered_out_spliced} genes that are detected " + f"in less than {min_cells} cells (spliced).\n" + ) + if n_vars_filtered_out_unspliced > 0: + expected_log += ( + f"Filtered out {n_vars_filtered_out_unspliced} genes that are detected " + f"in less than {min_cells_u} cells (unspliced).\n" + ) + + adata = AnnData( + X=np.triu(np.ones(6), k=0), + layers={ + "unspliced": np.triu(np.ones(6), k=0), + "spliced": np.triu(np.ones(6), k=0), + }, + ) + + filter_genes(adata, min_cells=min_cells, min_cells_u=min_cells_u) + actual_log, _ = capfd.readouterr() + + assert actual_log == expected_log + + @pytest.mark.parametrize( + "max_cells, max_cells_u, n_vars_filtered_out_spliced, " + "n_vars_filtered_out_unspliced", + ((6, 7, 0, 0), (5, 6, 1, 0), (6, 4, 0, 2), (4, 3, 2, 1)), + ) + def test_max_cells_logging( + self, + capfd, + max_cells: int, + max_cells_u: int, + n_vars_filtered_out_spliced: int, + n_vars_filtered_out_unspliced: int, + ): + expected_log = "" + if n_vars_filtered_out_spliced > 0: + expected_log += ( + f"Filtered out {n_vars_filtered_out_spliced} genes that are detected " + f"in more than {max_cells} cells (spliced).\n" + ) + if n_vars_filtered_out_unspliced > 0: + expected_log += ( + f"Filtered out {n_vars_filtered_out_unspliced} genes that are detected " + f"in more than {max_cells_u} cells (unspliced).\n" + ) + + adata = AnnData( + X=np.triu(np.ones(6), k=0), + layers={ + "unspliced": np.triu(np.ones(6), k=0), + "spliced": np.triu(np.ones(6), k=0), + }, + ) + + filter_genes(adata, max_cells=max_cells, max_cells_u=max_cells_u) + actual_log, _ = capfd.readouterr() + + assert actual_log == expected_log + + +class TestFilterGenesDispersion: + @given( + adata=get_adata( + max_obs=5, + max_vars=5, + ), + subset=st.booleans(), + copy=st.booleans(), + ) + def test_subsetting_and_copy(self, adata: AnnData, subset: bool, copy: bool): + original_n_obs = adata.n_obs + + returned_adata = filter_genes_dispersion(data=adata, subset=subset, copy=copy) + + if copy: + assert isinstance(returned_adata, AnnData) + adata = returned_adata + else: + assert returned_adata is None + + if subset: + assert original_n_obs <= adata.n_obs + else: + assert original_n_obs == adata.n_obs + + def test_wrong_flavor(self): + with pytest.raises( + ValueError, match=r'`flavor` needs to be "seurat" or "cell_ranger"' + ): + filter_genes_dispersion(data=AnnData(np.eye(2)), flavor="random_flavor") + + @pytest.mark.parametrize( + "flavor, n_top_genes, vars_after_filter", + ( + ("svr", 5, pd.Index(["Ppy", "Ppp1r1a", "Gcg", "Nnat", "Ins2"])), + ( + "svr", + 10, + pd.Index( + [ + "Ppy", + "Ppp1r1a", + "Lrpprc", + "Ttr", + "Rbp4", + "Gcg", + "Chgb", + "Nnat", + "Iapp", + "Ins2", + ] + ), + ), + ("seurat", 5, pd.Index(["Ppy", "Ppp1r1a", "Nnat", "Cyr61", "Tyms"])), + ( + "seurat", + 10, + pd.Index( + [ + "Dtymk", + "Col18a1", + "Ppy", + "Ppp1r1a", + "Lrpprc", + "Gcg", + "Nnat", + "Cyr61", + "Tyms", + "Hmgb2", + ] + ), + ), + ("cell_ranger", 5, pd.Index(["Ppy", "Ppp1r1a", "Rbp4", "Nnat", "Ins2"])), + ( + "cell_ranger", + 10, + pd.Index( + [ + "Ppy", + "Cdc14b", + "Ghr", + "Ppp1r1a", + "Ttr", + "Rbp4", + "Gcg", + "Nnat", + "Spp1", + "Ins2", + ] + ), + ), + ), + ) + def test_n_top_genes_pancreas_50obs( + self, + capfd, + pancreas_50obs: AnnData, + flavor: str, + n_top_genes: int, + vars_after_filter: pd.Index, + ): + adata = filter_genes(pancreas_50obs, min_shared_counts=20, copy=True) + filter_genes_dispersion(adata, flavor=flavor, n_top_genes=n_top_genes) + + assert adata.shape == (50, n_top_genes) + assert adata.var_names.equals(vars_after_filter) + if flavor == "svr": + assert adata.var.columns.equals(pd.Index(["highly_variable"])) + elif flavor == "seurat": + assert adata.var.columns.equals( + pd.Index( + ["means", "dispersions", "dispersions_norm", "highly_variable"] + ) + ) + + expected_log = ( + "Filtered out 27530 genes that are detected 20 counts (shared).\n" + f"Extracted {n_top_genes} highly variable genes.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + @pytest.mark.parametrize( + "flavor, n_top_genes, vars_after_filter", + ( + ("svr", 5, pd.Index(["Ppy", "Sst", "Gcg", "Ghrl", "Ins2"])), + ( + "svr", + 10, + pd.Index( + [ + "Ppy", + "Ppp1r1a", + "Sst", + "Gcg", + "Chgb", + "Nnat", + "Ghrl", + "Iapp", + "Ins2", + "Cck", + ] + ), + ), + ("seurat", 5, pd.Index(["Cdk1", "Top2a", "Ppy", "Ppp1r1a", "Stmn1"])), + ( + "seurat", + 10, + pd.Index( + [ + "Cdk1", + "Top2a", + "Ppy", + "Ppp1r1a", + "Sst", + "Papss2", + "Igfbpl1", + "Stmn1", + "Tyms", + "Sytl4", + ] + ), + ), + ("cell_ranger", 5, pd.Index(["Ppy", "Sst", "Gcg", "Ghrl", "Iapp"])), + ( + "cell_ranger", + 10, + pd.Index( + [ + "Cdk1", + "Ppy", + "Pyy", + "Ppp1r1a", + "Sst", + "Gcg", + "Igfbpl1", + "Ghrl", + "Iapp", + "Sytl4", + ] + ), + ), + ), + ) + def test_n_top_genes_pancreas_100obs( + self, + capfd, + pancreas_100obs: AnnData, + flavor: str, + n_top_genes: int, + vars_after_filter: pd.Index, + ): + adata = filter_genes(pancreas_100obs, min_shared_counts=20, copy=True) + filter_genes_dispersion(adata, flavor=flavor, n_top_genes=n_top_genes) + + assert adata.shape == (100, n_top_genes) + assert adata.var_names.equals(vars_after_filter) + if flavor == "svr": + assert adata.var.columns.equals(pd.Index(["highly_variable"])) + elif flavor == "seurat": + assert adata.var.columns.equals( + pd.Index( + ["means", "dispersions", "dispersions_norm", "highly_variable"] + ) + ) + + expected_log = ( + "Filtered out 27029 genes that are detected 20 counts (shared).\n" + f"Extracted {n_top_genes} highly variable genes.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + @pytest.mark.parametrize( + "flavor, n_top_genes, vars_after_filter", + ( + ("svr", 5, pd.Index(["Cxcl14", "Atp9b", "Nlgn1", "Sirt2", "Slc17a7"])), + ( + "svr", + 10, + pd.Index( + [ + "Adarb2", + "Cxcl14", + "Atp9b", + "Gad2", + "Nlgn1", + "Dab1", + "Tmsb10", + "Sirt2", + "Slc17a7", + "Spock3", + ] + ), + ), + ("seurat", 5, pd.Index(["Adarb2", "Cxcl14", "Atp9b", "Nlgn1", "Sirt2"])), + ( + "seurat", + 10, + pd.Index( + [ + "Syt1", + "Prkca", + "Adarb2", + "Cxcl14", + "Atp9b", + "Apba1", + "Nlgn1", + "Igfbpl1", + "Sirt2", + "Slc17a7", + ] + ), + ), + ( + "cell_ranger", + 5, + pd.Index(["Gria1", "Adarb2", "Atp9b", "Nlgn1", "Sirt2"]), + ), + ( + "cell_ranger", + 10, + pd.Index( + [ + "Gria1", + "Adarb2", + "Cxcl14", + "Atp9b", + "Rtn3", + "Nlgn1", + "Tmsb10", + "Sirt2", + "Spock3", + "Rps25", + ] + ), + ), + ), + ) + def test_n_top_genes_dentategyrus_50obs( + self, + capfd, + dentategyrus_50obs: AnnData, + flavor: str, + n_top_genes: int, + vars_after_filter: pd.Index, + ): + adata = filter_genes(dentategyrus_50obs, min_shared_counts=20, copy=True) + filter_genes_dispersion(adata, flavor=flavor, n_top_genes=n_top_genes) + + assert adata.shape == (50, n_top_genes) + assert adata.var_names.equals(vars_after_filter) + if flavor == "svr": + assert adata.var.columns.equals(pd.Index(["highly_variable"])) + elif flavor == "seurat": + assert adata.var.columns.equals( + pd.Index( + ["means", "dispersions", "dispersions_norm", "highly_variable"] + ) + ) + + expected_log = ( + "Filtered out 13719 genes that are detected 20 counts (shared).\n" + f"Extracted {n_top_genes} highly variable genes.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + @pytest.mark.parametrize( + "flavor, n_top_genes, vars_after_filter", + ( + ("svr", 5, pd.Index(["Cxcl14", "Atp9b", "Cst3", "Npy", "Sirt2"])), + ( + "svr", + 10, + pd.Index( + [ + "Cxcl14", + "Atp9b", + "Gad2", + "Slc1a2", + "Cst3", + "Npy", + "Sirt2", + "Hs3st4", + "Spock3", + "Sgcz", + ] + ), + ), + ("seurat", 5, pd.Index(["Adarb2", "Cxcl14", "Atp9b", "Npy", "Cpne4"])), + ( + "seurat", + 10, + pd.Index( + [ + "Adarb2", + "Cxcl14", + "Atp9b", + "Gad2", + "Slc1a2", + "Nlgn1", + "Npy", + "Sirt2", + "Hs3st4", + "Cpne4", + ] + ), + ), + ( + "cell_ranger", + 5, + pd.Index(["Adarb2", "Atp9b", "Gad2", "Sirt2", "Spock3"]), + ), + ( + "cell_ranger", + 10, + pd.Index( + [ + "Adarb2", + "Cxcl14", + "Atp9b", + "Gad2", + "Cst3", + "Nlgn1", + "Tmsb10", + "Sirt2", + "Hs3st4", + "Spock3", + ] + ), + ), + ), + ) + def test_n_top_genes_dentategyrus_100obs( + self, + capfd, + dentategyrus_100obs: AnnData, + flavor: str, + n_top_genes: int, + vars_after_filter: pd.Index, + ): + adata = filter_genes(dentategyrus_100obs, min_shared_counts=20, copy=True) + filter_genes_dispersion(adata, flavor=flavor, n_top_genes=n_top_genes) + + assert adata.shape == (100, n_top_genes) + assert adata.var_names.equals(vars_after_filter) + if flavor == "svr": + assert adata.var.columns.equals(pd.Index(["highly_variable"])) + elif flavor == "seurat": + assert adata.var.columns.equals( + pd.Index( + ["means", "dispersions", "dispersions_norm", "highly_variable"] + ) + ) + + expected_log = ( + "Filtered out 13615 genes that are detected 20 counts (shared).\n" + f"Extracted {n_top_genes} highly variable genes.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + @pytest.mark.parametrize( + "flavor, min_disp, max_disp, min_mean, max_mean, vars_after_filter", + ( + ("seurat", None, None, None, None, pd.Index([])), + ( + "seurat", + 0.0125, + np.inf, + 0.004, + 3, + pd.Index(["Pyy", "Gcg", "Gnas", "Iapp", "Rps9"]), + ), + ("cell_ranger", None, None, None, None, pd.Index([])), + ( + "cell_ranger", + 0.0125, + np.inf, + 0.0055, + 3, + pd.Index(["Pyy", "Malat1", "Iapp", "Rpl13a"]), + ), + ), + ) + def test_min_max_disp_min_max_mean_pancreas_50obs( + self, + capfd, + pancreas_50obs: AnnData, + min_disp: float, + max_disp: float, + min_mean: float, + max_mean: float, + flavor: str, + vars_after_filter: pd.Index, + ): + adata = filter_genes(pancreas_50obs, min_shared_counts=20, copy=True) + normalize_per_cell(adata, counts_per_cell_after=1) + log1p(adata) + + filter_genes_dispersion( + adata, + flavor=flavor, + min_disp=min_disp, + max_disp=max_disp, + min_mean=min_mean, + max_mean=max_mean, + ) + + assert adata.shape == (50, len(vars_after_filter)) + assert adata.var_names.equals(vars_after_filter) + if flavor == "svr": + assert adata.var.columns.equals(pd.Index(["highly_variable"])) + elif flavor == "seurat": + assert adata.var.columns.equals( + pd.Index( + ["means", "dispersions", "dispersions_norm", "highly_variable"] + ) + ) + + expected_log = ( + "Filtered out 27530 genes that are detected 20 counts (shared).\n" + "Normalized count data: X, spliced, unspliced.\n" + f"Extracted {len(vars_after_filter)} highly variable genes.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + @pytest.mark.parametrize( + "flavor, min_disp, max_disp, min_mean, max_mean, vars_after_filter", + ( + ("seurat", None, None, None, None, pd.Index([])), + ( + "seurat", + 0.0125, + np.inf, + 0.005, + 3, + pd.Index(["Pyy", "Malat1", "Iapp", "Rpl13a"]), + ), + ("cell_ranger", None, None, None, None, pd.Index([])), + ( + "cell_ranger", + 0.0125, + np.inf, + 0.0055, + 3, + pd.Index(["Pyy", "Malat1", "Rpl13a"]), + ), + ), + ) + def test_min_max_disp_min_max_mean_pancreas_100obs( + self, + capfd, + pancreas_100obs: AnnData, + min_disp: float, + max_disp: float, + min_mean: float, + max_mean: float, + flavor: str, + vars_after_filter: pd.Index, + ): + adata = filter_genes(pancreas_100obs, min_shared_counts=20, copy=True) + normalize_per_cell(adata, counts_per_cell_after=1) + log1p(adata) + + filter_genes_dispersion( + adata, + flavor=flavor, + min_disp=min_disp, + max_disp=max_disp, + min_mean=min_mean, + max_mean=max_mean, + ) + + assert adata.shape == (100, len(vars_after_filter)) + assert adata.var_names.equals(vars_after_filter) + if flavor == "svr": + assert adata.var.columns.equals(pd.Index(["highly_variable"])) + elif flavor == "seurat": + assert adata.var.columns.equals( + pd.Index( + ["means", "dispersions", "dispersions_norm", "highly_variable"] + ) + ) + + expected_log = ( + "Filtered out 27029 genes that are detected 20 counts (shared).\n" + "Normalized count data: X, spliced, unspliced.\n" + f"Extracted {len(vars_after_filter)} highly variable genes.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + @pytest.mark.parametrize( + "flavor, min_disp, max_disp, min_mean, max_mean, vars_after_filter", + ( + ("seurat", None, None, None, None, pd.Index([])), + ("seurat", 0.0125, np.inf, 0.005, 3, pd.Index(["Ubb", "Fth1"])), + ("cell_ranger", None, None, None, None, pd.Index([])), + ( + "cell_ranger", + 0.0125, + np.inf, + 0.005, + 3, + pd.Index(["Ubb", "Rpl3", "Fth1"]), + ), + ), + ) + def test_min_max_disp_min_max_mean_dentategyrus_50obs( + self, + capfd, + dentategyrus_50obs: AnnData, + flavor: str, + min_disp: float, + max_disp: float, + min_mean: float, + max_mean: float, + vars_after_filter: pd.Index, + ): + adata = filter_genes(dentategyrus_50obs, min_shared_counts=20, copy=True) + normalize_per_cell(adata, counts_per_cell_after=1) + log1p(adata) + + filter_genes_dispersion( + adata, + flavor=flavor, + min_disp=min_disp, + max_disp=max_disp, + min_mean=min_mean, + max_mean=max_mean, + ) + + assert adata.shape == (50, len(vars_after_filter)) + assert adata.var_names.equals(vars_after_filter) + if flavor == "svr": + assert adata.var.columns.equals(pd.Index(["highly_variable"])) + elif flavor == "seurat": + assert adata.var.columns.equals( + pd.Index( + ["means", "dispersions", "dispersions_norm", "highly_variable"] + ) + ) + + expected_log = ( + "Filtered out 13719 genes that are detected 20 counts (shared).\n" + "Normalized count data: X, spliced, unspliced.\n" + f"Extracted {len(vars_after_filter)} highly variable genes.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + @pytest.mark.parametrize( + "flavor, min_disp, max_disp, min_mean, max_mean, vars_after_filter", + ( + ("seurat", None, None, None, None, pd.Index([])), + ("seurat", 0.0125, np.inf, 0.005, 3, pd.Index(["Ubb", "Rpl3", "Fth1"])), + ("cell_ranger", None, None, None, None, pd.Index([])), + ("cell_ranger", 0.0125, np.inf, 0.005, 3, pd.Index(["Ubb", "Fth1"])), + ), + ) + def test_min_max_disp_min_max_mean_dentategyrus_100obs( + self, + capfd, + dentategyrus_100obs: AnnData, + flavor: str, + min_disp: float, + max_disp: float, + min_mean: float, + max_mean: float, + vars_after_filter: pd.Index, + ): + adata = filter_genes(dentategyrus_100obs, min_shared_counts=20, copy=True) + normalize_per_cell(adata, counts_per_cell_after=1) + log1p(adata) + + filter_genes_dispersion( + adata, + flavor=flavor, + min_disp=min_disp, + max_disp=max_disp, + min_mean=min_mean, + max_mean=max_mean, + ) + + assert adata.shape == (100, len(vars_after_filter)) + assert adata.var_names.equals(vars_after_filter) + if flavor == "svr": + assert adata.var.columns.equals(pd.Index(["highly_variable"])) + elif flavor == "seurat": + assert adata.var.columns.equals( + pd.Index( + ["means", "dispersions", "dispersions_norm", "highly_variable"] + ) + ) + + expected_log = ( + "Filtered out 13615 genes that are detected 20 counts (shared).\n" + "Normalized count data: X, spliced, unspliced.\n" + f"Extracted {len(vars_after_filter)} highly variable genes.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + @pytest.mark.parametrize("retain_genes", ("Tram1", ["Tram1", "Ankrd44", "Cryba2"])) + def test_retain_genes( + self, capfd, pancreas_50obs: AnnData, retain_genes: Union[str, List[str]] + ): + adata = filter_genes(pancreas_50obs, min_shared_counts=20, copy=True) + normalize_per_cell(adata, counts_per_cell_after=1) + log1p(adata) + + filter_genes_dispersion( + adata, flavor="seurat", retain_genes=retain_genes, n_top_genes=2 + ) + + if isinstance(retain_genes, str): + retain_genes = [retain_genes] + + assert adata.shape == (50, len(retain_genes + ["Ppy", "Lrpprc"])) + assert adata.var_names.equals(pd.Index(retain_genes + ["Ppy", "Lrpprc"])) + + expected_log = ( + "Filtered out 27530 genes that are detected 20 counts (shared).\n" + "Normalized count data: X, spliced, unspliced.\n" + f"Extracted {len(retain_genes + ['Ppy', 'Lrpprc'])} highly variable " + "genes.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + def test_more_n_top_genes_than_vars(self, capfd, pancreas_50obs: AnnData): + filter_genes_dispersion(pancreas_50obs, n_top_genes=100000) + + expected_log = ( + "Skip filtering by dispersion since number of variables are less than " + "`n_top_genes`.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + def test_passing_n_top_genes_and_mean_disp(self, capfd, pancreas_50obs: AnnData): + filter_genes_dispersion(pancreas_50obs, n_top_genes=100, min_mean=0, max_mean=1) + + expected_log = ( + "If you pass `n_top_genes`, all cutoffs are ignored.\n" + "Extracted 100 highly variable genes.\n" + ) + actual_log, _ = capfd.readouterr() + assert actual_log == expected_log + + +class TestLog1p: + @given(adata=get_adata(max_obs=5, max_vars=5), copy=st.booleans()) + def test_dense_adata(self, adata: AnnData, copy: bool): + original_X = adata.X.copy() + returned_adata = log1p(data=adata, copy=copy) + + if copy: + assert isinstance(returned_adata, AnnData) + adata = returned_adata + else: + assert returned_adata is None + + np.testing.assert_almost_equal(adata.X, np.log1p(original_X)) + + @given( + adata=get_adata(max_obs=5, max_vars=5, sparse_entries=True), copy=st.booleans() + ) + def test_sparse_adata(self, adata: AnnData, copy: bool): + original_X = adata.X.copy() + returned_adata = log1p(data=adata, copy=copy) + + if copy: + assert isinstance(returned_adata, AnnData) + adata = returned_adata + else: + assert returned_adata is None + + np.testing.assert_almost_equal(adata.X.data, np.log1p(original_X.data)) + + @given( + data=arrays( + float, + shape=st.tuples( + st.integers(min_value=1, max_value=100), + st.integers(min_value=1, max_value=100), + ), + elements=st.floats( + min_value=0, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ), + copy=st.booleans(), + ) + def test_array(self, data: np.ndarray, copy: bool): + original_array = data.copy() + returned_data = log1p(data=data, copy=copy) + + if copy: + assert isinstance(returned_data, np.ndarray) + data = returned_data + else: + assert returned_data is None + + np.testing.assert_almost_equal(data, np.log1p(original_array)) + + +class TestMaterializeAsNdarray: + @pytest.mark.parametrize( + "key", (np.array([0, 1, 2]), np.eye(2), np.ones(shape=(2, 3, 4))) + ) + def test_array(self, key): + arr = materialize_as_ndarray(key) + + assert isinstance(arr, np.ndarray) + + @pytest.mark.parametrize( + "key", + ( + [np.array([0, 1, 2])], + [np.eye(2), np.ones(shape=(2, 3, 4))], + (np.eye(2), np.ones(shape=(2, 3, 4))), + [0, 1, 2], + (0, 1, 2), + [["a", "b"], [1.3, 2.7], np.zeros(shape=(2, 3))], + (["a", "b"], [1.3, 2.7], np.zeros(shape=(2, 3))), + (("a", "b"), [1.3, 2.7], np.zeros(shape=(2, 3))), + ), + ) + def test_list(self, key): + arr = materialize_as_ndarray(key) + + assert all(isinstance(entry, np.ndarray) for entry in arr) + + +class TestNormalizePerCell: + @given(data=st.data(), adata=get_adata(max_obs=5, max_vars=5)) + def test_target_sum_dense(self, data, adata): + counts_per_cell_after = data.draw( + arrays( + float, + shape=st.integers(min_value=adata.n_obs, max_value=adata.n_obs), + elements=st.floats( + min_value=1e-3, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ) + ) + zero_obs = (adata.X == 0).all(axis=1) + + normalize_per_cell(adata, counts_per_cell_after=counts_per_cell_after) + + assert adata.X[zero_obs, :].sum() == 0 + if np.any(~zero_obs): + np.testing.assert_almost_equal( + adata.X[~zero_obs, :].sum(axis=1), + counts_per_cell_after[~zero_obs], + decimal=4, + ) + + @given(data=st.data(), adata=get_adata(max_obs=5, max_vars=5, sparse_entries=True)) + def test_target_sum_sparse(self, data, adata): + counts_per_cell_after = data.draw( + arrays( + float, + shape=st.integers(min_value=adata.n_obs, max_value=adata.n_obs), + elements=st.floats( + min_value=1e-3, max_value=1e3, allow_infinity=False, allow_nan=False + ), + ) + ) + zero_obs = adata.X.getnnz(axis=1) == 0 + + normalize_per_cell(adata, counts_per_cell_after=counts_per_cell_after) + + assert adata.X[zero_obs, :].sum() == 0 + if np.any(~zero_obs): + np.testing.assert_almost_equal( + adata.X[~zero_obs, :].sum(axis=1).A1, + counts_per_cell_after[~zero_obs], + decimal=4, + ) + + @pytest.mark.parametrize( + "X, add_column", + ( + (np.eye(3), False), + (np.ones((3, 10000)), True), + (1e-2 * np.ones((3, 10000)), False), + ), + ) + def test_adding_gene_count_corr(self, X, add_column): + adata = AnnData(X=X) + normalize_per_cell(adata) + + if add_column: + assert "gene_count_corr" in adata.var.columns + else: + assert "gene_count_corr" not in adata.var.columns + + @pytest.mark.parametrize( + "X", (np.eye(3), csr_matrix(np.eye(3)), csc_matrix(np.eye(3))) + ) + @pytest.mark.parametrize( + "obs, counts_per_cell, normed_counts", + ( + (None, None, np.eye(3)), + (None, "cell_size", np.eye(3)), + (pd.DataFrame({"cell_size": 2 * np.ones(3)}), "cell_size", np.eye(3)), + ( + pd.DataFrame( + {"cell_size": 2 * np.ones(3), "initial_size": np.array([2, 1, 2])} + ), + "initial_size", + np.diag([1, 2, 1]), + ), + ), + ) + def test_counts_per_cell_size(self, X, obs, counts_per_cell, normed_counts): + adata = AnnData(X=X, obs=obs) + + normalize_per_cell(adata, counts_per_cell=counts_per_cell) + if issparse(adata.X): + assert issparse(adata.X) + np.testing.assert_almost_equal(adata.X.A, normed_counts) + else: + np.testing.assert_almost_equal(adata.X, normed_counts) + + @pytest.mark.parametrize( + "X", (np.eye(3), csr_matrix(np.eye(3)), csc_matrix(np.eye(3))) + ) + @pytest.mark.parametrize( + "obs, use_initial_size, normed_counts", + ( + (None, True, np.eye(3)), + (None, False, np.eye(3)), + ( + pd.DataFrame({"initial_size": np.array([2, 1, 2])}), + True, + np.diag([1, 2, 1]), + ), + (pd.DataFrame({"initial_size": np.array([2, 1, 2])}), False, np.eye(3)), + ), + ) + def test_use_initial_size(self, X, obs, use_initial_size, normed_counts): + adata = AnnData(X=X, obs=obs) + + normalize_per_cell(adata, use_initial_size=use_initial_size) + if issparse(adata.X): + assert issparse(adata.X) + np.testing.assert_almost_equal(adata.X.A, normed_counts) + else: + np.testing.assert_almost_equal(adata.X, normed_counts) + + @pytest.mark.parametrize( + "X", (np.eye(2), csr_matrix(np.eye(2)), csc_matrix(np.eye(2))) + ) + @pytest.mark.parametrize( + "layers", + ( + {"unspliced": np.triu(np.ones((2, 2)), k=0)}, + { + "unspliced": csr_matrix(np.triu(np.ones((2, 2)), k=0)), + "spliced": csc_matrix(np.triu(np.ones((2, 2)), k=0)), + }, + { + "random_name": csr_matrix(np.triu(np.ones((2, 2)), k=0)), + "spliced": csr_matrix(np.triu(np.ones((2, 2)), k=0)), + }, + ), + ) + @pytest.mark.parametrize( + "layers_to_normalize", + ( + None, + "all", + "unspliced", + "random_name", + ["spliced"], + ["unspliced", "random_name"], + ["random_name", "spliced"], + ["random_name", "non_existing_layer"], + ), + ) + def test_layers(self, X, layers, layers_to_normalize): + adata = AnnData(X=X, layers=layers) + + normalize_per_cell( + data=adata, + layers=layers_to_normalize, + counts_per_cell_after=np.array([1, 0.5]), + ) + + if issparse(X): + assert issparse(adata.X) + np.testing.assert_almost_equal(adata.X.A, np.diag([1, 0.5])) + else: + np.testing.assert_almost_equal(adata.X, np.diag([1, 0.5])) + + if layers_to_normalize is None: + layers_to_normalize = ["unspliced", "spliced"] + elif layers_to_normalize == "all": + layers_to_normalize = [*adata.layers] + elif isinstance(layers_to_normalize, str): + layers_to_normalize = [layers_to_normalize] + + normalized_layer = np.array([[0.5, 0.5], [0, 0.5]]) + for layer in adata.layers: + if layer in layers_to_normalize: + if issparse(layers[layer]): + assert issparse(adata.layers[layer]) + np.testing.assert_almost_equal( + adata.layers[layer].A, normalized_layer + ) + else: + np.testing.assert_almost_equal( + adata.layers[layer], normalized_layer + ) + else: + if issparse(layers[layer]): + assert issparse(adata.layers[layer]) + np.testing.assert_almost_equal( + adata.layers[layer].A, layers[layer].A + ) + else: + np.testing.assert_almost_equal(adata.layers[layer], layers[layer]) + + @pytest.mark.parametrize( + "X, max_proportion_per_cell, normalization_constant", + ( + ( + np.array([[1, 4, 95], [4, 7, 89]]), + 0.9, + np.array([5 / 8, 11 / 8])[:, None], + ), + (np.array([[1, 4, 95], [4, 7, 89]]), 1, 1), + ), + ) + def test_max_proportion_per_cell( + self, X, max_proportion_per_cell, normalization_constant + ): + adata = AnnData(X) + normalize_per_cell(adata, max_proportion_per_cell=max_proportion_per_cell) + + np.testing.assert_almost_equal(adata.X, X / normalization_constant, decimal=6) + + @pytest.mark.parametrize( + "X, X_normalized", ((np.eye(3), False), (0.01 * np.eye(3), True)) + ) + def test_logging(self, capfd, X, X_normalized): + if X_normalized: + expected_log = ( + "WARNING: Did not normalize X as it looks processed already. To " + "enforce normalization, set `enforce=True`.\n" + ) + else: + expected_log = "Normalized count data: X.\n" + + adata = AnnData(X=X) + normalize_per_cell(adata) + + actual_log, _ = capfd.readouterr() + + assert actual_log == expected_log + + +class TestNotYetNormalized: + @pytest.mark.parametrize( + "X, normalized", + ( + ( + (np.eye(3), True), + (1.001 * np.eye(3), True), + (csr_matrix(np.eye(3)), True), + (1.01 * np.eye(3), False), + (0.1 * np.eye(3), False), + (csr_matrix(0.1 * np.eye(3)), False), + ) + ), + ) + def test_not_yet_normalized(self, X: Union[np.ndarray, spmatrix], normalized: bool): + normalize_check = not_yet_normalized(X) + + assert normalized == normalize_check + + +class TestRecipeVelocity: + def test_pancreas50obs(self, capfd, pancreas_50obs): + recipe_velocity(pancreas_50obs) + + assert pancreas_50obs.shape == (50, 3571) + assert pancreas_50obs.obs.columns.equals( + pd.Index( + [ + "initial_size_unspliced", + "initial_size_spliced", + "initial_size", + "n_counts", + ] + ) + ) + assert pancreas_50obs.var.columns.equals(pd.Index(["gene_count_corr"])) + assert [*pancreas_50obs.uns] == ["pca", "neighbors"] + assert [*pancreas_50obs.obsm] == ["X_pca"] + assert [*pancreas_50obs.varm] == ["PCs"] + assert [*pancreas_50obs.layers] == ["spliced", "unspliced", "Ms", "Mu"] + assert [*pancreas_50obs.obsp] == ["distances", "connectivities"] + + expected_log = ( + "Filtered out 19269 genes that are detected 3 counts (spliced).\n" + "Filtered out 5158 genes that are detected 3 counts (unspliced).\n" + "Normalized count data: X, spliced, unspliced.\n" + "Logarithmized X.\n" + "computing neighbors\n" + " finished (" + ) + + actual_log, _ = capfd.readouterr() + assert actual_log.startswith(expected_log) + + # `[7:]` removes execution time + actual_log = actual_log.split(expected_log)[1][7:] + expected_log = ( + ") --> added \n" + " 'distances' and 'connectivities', weighted adjacency matrices " + "(adata.obsp)\n" + "computing moments based on connectivities\n" + " finished (" + ) + assert actual_log.startswith(expected_log) + + # `[7:]` removes execution time + actual_log = actual_log.split(expected_log)[1][7:] + expected_log = ( + ") --> added \n 'Ms' and 'Mu', moments of un/spliced abundances " + "(adata.layers)\n" + ) + assert actual_log.startswith(expected_log) + + def test_dentategyrus50obs(self, capfd, dentategyrus_50obs): + recipe_velocity(dentategyrus_50obs) + + assert dentategyrus_50obs.shape == (50, 1150) + assert dentategyrus_50obs.obs.columns.equals( + pd.Index( + [ + "initial_size_unspliced", + "initial_size_spliced", + "initial_size", + "n_counts", + ] + ) + ) + assert dentategyrus_50obs.var.columns.equals(pd.Index([])) + assert [*dentategyrus_50obs.uns] == ["pca", "neighbors"] + assert [*dentategyrus_50obs.obsm] == ["X_pca"] + assert [*dentategyrus_50obs.varm] == ["PCs"] + assert [*dentategyrus_50obs.layers] == [ + "ambiguous", + "spliced", + "unspliced", + "Ms", + "Mu", + ] + assert [*dentategyrus_50obs.obsp] == ["distances", "connectivities"] + + expected_log = ( + "Filtered out 7068 genes that are detected 3 counts (spliced).\n" + "Filtered out 5695 genes that are detected 3 counts (unspliced).\n" + "Normalized count data: X, spliced, unspliced.\n" + "Logarithmized X.\n" + "computing neighbors\n" + " finished (" + ) + + actual_log, _ = capfd.readouterr() + assert actual_log.startswith(expected_log) + + # `[7:]` removes execution time + actual_log = actual_log.split(expected_log)[1][7:] + expected_log = ( + ") --> added \n" + " 'distances' and 'connectivities', weighted adjacency matrices " + "(adata.obsp)\n" + "computing moments based on connectivities\n" + " finished (" + ) + assert actual_log.startswith(expected_log) + + # `[7:]` removes execution time + actual_log = actual_log.split(expected_log)[1][7:] + expected_log = ( + ") --> added \n 'Ms' and 'Mu', moments of un/spliced abundances " + "(adata.layers)\n" + ) + assert actual_log.startswith(expected_log)