Fix neighbors calculation (#1233)

* Update Scanpy neighbor computation Remove redundant argument. * Update neighbor unit tests Update according to latest API changes in scvelo and scanpy. * Add deprecation warnings Add warning that neighbors and PCA will not be calculated automatically in the future. * Update ground truth data * Update unit tests to latest changes * Update assertion accuracy
theislab · Apr 12, 2024 · 6a0b63d · 6a0b63d
1 parent b2f31b3
commit 6a0b63d
Show file tree

Hide file tree

Showing 69 changed files with 72 additions and 132 deletions.
diff --git a/scvelo/preprocessing/moments.py b/scvelo/preprocessing/moments.py
@@ -1,3 +1,5 @@
+import warnings
+
 import numpy as np
 from scipy.sparse import csr_matrix, issparse
 
@@ -60,6 +62,12 @@ def moments(
         normalize_per_cell(adata)
 
     if n_neighbors is not None and n_neighbors > get_n_neighs(adata):
+        warnings.warn(
+            "Automatic neighbor calculation is deprecated since scvelo==0.4.0 and will be removed in a future version "
+            "of scVelo. Please compute neighbors first with Scanpy.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         neighbors(
             adata,
             n_neighbors=n_neighbors,

diff --git a/scvelo/preprocessing/neighbors.py b/scvelo/preprocessing/neighbors.py
@@ -37,7 +37,7 @@ def _get_scanpy_neighbors(adata: AnnData, **kwargs):
     with warnings.catch_warnings():  # ignore numba warning (umap/issues/252)
         warnings.simplefilter("ignore")
         neighbors = Neighbors(adata)
-        neighbors.compute_neighbors(write_knn_indices=True, **kwargs)
+        neighbors.compute_neighbors(**kwargs)
     logg.switch_verbosity("on", module="scanpy")
 
     return neighbors
@@ -124,6 +124,12 @@ def _set_pca(adata, n_pcs: Optional[int], use_highly_variable: bool):
         or n_pcs is not None
         and n_pcs > adata.obsm["X_pca"].shape[1]
     ):
+        warnings.warn(
+            "Automatic computation of PCA is deprecated since scvelo==0.4.0 and will be removed in a future version "
+            "of scVelo. Please compute PCA with Scanpy first.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
         if use_highly_variable and "highly_variable" in adata.var.keys():
             n_vars = np.sum(adata.var["highly_variable"])
         else:
@@ -213,6 +219,12 @@ def neighbors(
     distances : `.obsp`
         Sparse matrix of distances for each pair of neighbors.
     """
+    warnings.warn(
+        "`neighbors` is deprecated since scvelo==0.4.0 and will be removed in a future version "
+        "of scVelo. Please compute neighbors with Scanpy.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
     adata = adata.copy() if copy else adata
 
     use_rep = _get_rep(adata=adata, use_rep=use_rep, n_pcs=n_pcs)

diff --git a/tests/_data/dentategyrus_100obs_preprocessed.h5ad b/tests/_data/dentategyrus_100obs_preprocessed.h5ad
diff --git a/tests/_data/dentategyrus_50obs_preprocessed.h5ad b/tests/_data/dentategyrus_50obs_preprocessed.h5ad
diff --git a/tests/_data/pancreas_100obs_preprocessed.h5ad b/tests/_data/pancreas_100obs_preprocessed.h5ad
diff --git a/tests/_data/pancreas_50obs_preprocessed.h5ad b/tests/_data/pancreas_50obs_preprocessed.h5ad
diff --git a/...et_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=connectivities_first_moment.npy b/...et_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=connectivities_first_moment.npy
diff --git a/...t_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=connectivities_second_moment.npy b/...t_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=connectivities_second_moment.npy
diff --git a/...nts/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=distances_first_moment.npy b/...nts/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=distances_first_moment.npy
diff --git a/...ts/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=distances_second_moment.npy b/...ts/get_moments/dataset=dentategyrus-n_obs=100-layer=None-mode=distances_second_moment.npy
diff --git a/...moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=connectivities_first_moment.npy b/...moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=connectivities_first_moment.npy
diff --git a/...oments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=connectivities_second_moment.npy b/...oments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=connectivities_second_moment.npy
diff --git a/.../get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=distances_first_moment.npy b/.../get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=distances_first_moment.npy
diff --git a/...get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=distances_second_moment.npy b/...get_moments/dataset=dentategyrus-n_obs=100-layer=spliced-mode=distances_second_moment.npy
diff --git a/...ments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=connectivities_first_moment.npy b/...ments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=connectivities_first_moment.npy
diff --git a/...ents/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=connectivities_second_moment.npy b/...ents/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=connectivities_second_moment.npy
diff --git a/...et_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=distances_first_moment.npy b/...et_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=distances_first_moment.npy
diff --git a/...t_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=distances_second_moment.npy b/...t_moments/dataset=dentategyrus-n_obs=100-layer=unspliced-mode=distances_second_moment.npy
diff --git a/...get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=connectivities_first_moment.npy b/...get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=connectivities_first_moment.npy
diff --git a/...et_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=connectivities_second_moment.npy b/...et_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=connectivities_second_moment.npy
diff --git a/...ents/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=distances_first_moment.npy b/...ents/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=distances_first_moment.npy
diff --git a/...nts/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=distances_second_moment.npy b/...nts/get_moments/dataset=dentategyrus-n_obs=50-layer=None-mode=distances_second_moment.npy
diff --git a/..._moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=connectivities_first_moment.npy b/..._moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=connectivities_first_moment.npy
diff --git a/...moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=connectivities_second_moment.npy b/...moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=connectivities_second_moment.npy
diff --git a/...s/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=distances_first_moment.npy b/...s/get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=distances_first_moment.npy
diff --git a/.../get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=distances_second_moment.npy b/.../get_moments/dataset=dentategyrus-n_obs=50-layer=spliced-mode=distances_second_moment.npy
diff --git a/...oments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=connectivities_first_moment.npy b/...oments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=connectivities_first_moment.npy
diff --git a/...ments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=connectivities_second_moment.npy b/...ments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=connectivities_second_moment.npy
diff --git a/...get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=distances_first_moment.npy b/...get_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=distances_first_moment.npy
diff --git a/...et_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=distances_second_moment.npy b/...et_moments/dataset=dentategyrus-n_obs=50-layer=unspliced-mode=distances_second_moment.npy
diff --git a/...ts/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=connectivities_first_moment.npy b/...ts/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=connectivities_first_moment.npy
diff --git a/...s/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=connectivities_second_moment.npy b/...s/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=connectivities_second_moment.npy
diff --git a/...moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=distances_first_moment.npy b/...moments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=distances_first_moment.npy
diff --git a/...oments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=distances_second_moment.npy b/...oments/get_moments/dataset=pancreas-n_obs=100-layer=None-mode=distances_second_moment.npy
diff --git a/...get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=connectivities_first_moment.npy b/...get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=connectivities_first_moment.npy
diff --git a/...et_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=connectivities_second_moment.npy b/...et_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=connectivities_second_moment.npy
diff --git a/...ents/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=distances_first_moment.npy b/...ents/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=distances_first_moment.npy
diff --git a/...nts/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=distances_second_moment.npy b/...nts/get_moments/dataset=pancreas-n_obs=100-layer=spliced-mode=distances_second_moment.npy
diff --git a/...t_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=connectivities_first_moment.npy b/...t_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=connectivities_first_moment.npy
diff --git a/..._moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=connectivities_second_moment.npy b/..._moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=connectivities_second_moment.npy
diff --git a/...ts/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=distances_first_moment.npy b/...ts/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=distances_first_moment.npy
diff --git a/...s/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=distances_second_moment.npy b/...s/get_moments/dataset=pancreas-n_obs=100-layer=unspliced-mode=distances_second_moment.npy
diff --git a/...nts/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=connectivities_first_moment.npy b/...nts/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=connectivities_first_moment.npy
diff --git a/...ts/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=connectivities_second_moment.npy b/...ts/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=connectivities_second_moment.npy
diff --git a/..._moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=distances_first_moment.npy b/..._moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=distances_first_moment.npy
diff --git a/...moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=distances_second_moment.npy b/...moments/get_moments/dataset=pancreas-n_obs=50-layer=None-mode=distances_second_moment.npy
diff --git a/.../get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=connectivities_first_moment.npy b/.../get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=connectivities_first_moment.npy
diff --git a/...get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=connectivities_second_moment.npy b/...get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=connectivities_second_moment.npy
diff --git a/...ments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=distances_first_moment.npy b/...ments/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=distances_first_moment.npy
diff --git a/...ents/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=distances_second_moment.npy b/...ents/get_moments/dataset=pancreas-n_obs=50-layer=spliced-mode=distances_second_moment.npy
diff --git a/...et_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=connectivities_first_moment.npy b/...et_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=connectivities_first_moment.npy
diff --git a/...t_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=connectivities_second_moment.npy b/...t_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=connectivities_second_moment.npy
diff --git a/...nts/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=distances_first_moment.npy b/...nts/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=distances_first_moment.npy
diff --git a/...ts/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=distances_second_moment.npy b/...ts/get_moments/dataset=pancreas-n_obs=50-layer=unspliced-mode=distances_second_moment.npy
diff --git a/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=100first_moment_spliced.npy b/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=100first_moment_spliced.npy
diff --git a/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=100first_moment_unspliced.npy b/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=100first_moment_unspliced.npy
diff --git a/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=50first_moment_spliced.npy b/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=50first_moment_spliced.npy
diff --git a/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=50first_moment_unspliced.npy b/tests/_data/test_moments/moments/dataset=dentategyrus-n_obs=50first_moment_unspliced.npy
diff --git a/tests/_data/test_moments/moments/dataset=pancreas-n_obs=100first_moment_spliced.npy b/tests/_data/test_moments/moments/dataset=pancreas-n_obs=100first_moment_spliced.npy
diff --git a/tests/_data/test_moments/moments/dataset=pancreas-n_obs=100first_moment_unspliced.npy b/tests/_data/test_moments/moments/dataset=pancreas-n_obs=100first_moment_unspliced.npy
diff --git a/tests/_data/test_moments/moments/dataset=pancreas-n_obs=50first_moment_spliced.npy b/tests/_data/test_moments/moments/dataset=pancreas-n_obs=50first_moment_spliced.npy
diff --git a/tests/_data/test_moments/moments/dataset=pancreas-n_obs=50first_moment_unspliced.npy b/tests/_data/test_moments/moments/dataset=pancreas-n_obs=50first_moment_unspliced.npy
diff --git a/..._order_moments/dataset=dentategyrus-n_obs=100-mode=connectivities_second_moment_mixed.npy b/..._order_moments/dataset=dentategyrus-n_obs=100-mode=connectivities_second_moment_mixed.npy
diff --git a/...d_order_moments/dataset=dentategyrus-n_obs=50-mode=connectivities_second_moment_mixed.npy b/...d_order_moments/dataset=dentategyrus-n_obs=50-mode=connectivities_second_moment_mixed.npy
diff --git a/...cond_order_moments/dataset=pancreas-n_obs=100-mode=connectivities_second_moment_mixed.npy b/...cond_order_moments/dataset=pancreas-n_obs=100-mode=connectivities_second_moment_mixed.npy
diff --git a/...econd_order_moments/dataset=pancreas-n_obs=50-mode=connectivities_second_moment_mixed.npy b/...econd_order_moments/dataset=pancreas-n_obs=50-mode=connectivities_second_moment_mixed.npy
diff --git a/tests/preprocessing/test_moments.py b/tests/preprocessing/test_moments.py
@@ -55,7 +55,7 @@ def test_first_moments(
                 f"-layer={layer}-mode={mode}_first_moment.npy"
             )
         )
-        np.testing.assert_almost_equal(first_order_moment, ground_truth)
+        np.testing.assert_almost_equal(first_order_moment, ground_truth, decimal=3)
 
     @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"])
     @pytest.mark.parametrize("n_obs", [50, 100])
@@ -84,7 +84,7 @@ def test_second_moments(
                 f"-layer={layer}-mode={mode}_second_moment.npy"
             )
         )
-        np.testing.assert_almost_equal(second_order_moment, ground_truth)
+        np.testing.assert_almost_equal(second_order_moment, ground_truth, decimal=2)
 
     @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"])
     @pytest.mark.parametrize("n_obs", [50, 100])
@@ -117,7 +117,7 @@ def test_passing_array_for_layer(
                 f"-layer={layer}-mode={mode}_first_moment.npy"
             )
         )
-        np.testing.assert_almost_equal(first_order_moment, ground_truth)
+        np.testing.assert_almost_equal(first_order_moment, ground_truth, decimal=3)
 
     @pytest.mark.parametrize("sparse", [True, False])
     def test_analytic_example(self, sparse: bool):
@@ -187,7 +187,7 @@ def _compare_adatas(self, adata_1, adata_2):
         )
         assert issparse(adata_1.obsp["distances"])
         np.testing.assert_almost_equal(
-            adata_1.obsp["distances"].A, adata_2.obsp["distances"].A, decimal=4
+            adata_1.obsp["distances"].A, adata_2.obsp["distances"].A, decimal=3
         )
 
         # Check `.uns` is unchanged
@@ -204,7 +204,6 @@ def _compare_adatas(self, adata_1, adata_2):
         assert set(adata_1.uns["neighbors"]) == {
             "connectivities_key",
             "distances_key",
-            "indices",
             "params",
         }
         assert (
@@ -215,10 +214,6 @@ def _compare_adatas(self, adata_1, adata_2):
             adata_1.uns["neighbors"]["distances_key"]
             == adata_2.uns["neighbors"]["distances_key"]
         )
-        np.testing.assert_equal(
-            adata_1.uns["neighbors"]["indices"],
-            adata_2.uns["neighbors"]["indices"],
-        )
         assert adata_1.uns["neighbors"]["params"] == adata_2.uns["neighbors"]["params"]
 
     @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"])
@@ -271,7 +266,7 @@ def test_moment_calculation(
             )
         )
         np.testing.assert_almost_equal(
-            returned_adata.layers["Mu"], ground_truth_unspliced
+            returned_adata.layers["Mu"], ground_truth_unspliced, decimal=4
         )
 
         ground_truth_spliced = np.load(
@@ -281,7 +276,7 @@ def test_moment_calculation(
             )
         )
         np.testing.assert_almost_equal(
-            returned_adata.layers["Ms"], ground_truth_spliced
+            returned_adata.layers["Ms"], ground_truth_spliced, decimal=4
         )
 
     @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"])
@@ -332,10 +327,10 @@ def test_neighbors_and_moments_calculation(
         self._compare_adatas(returned_adata, original_adata)
 
         np.testing.assert_almost_equal(
-            returned_adata.layers["Mu"], original_adata.layers["Mu"]
+            returned_adata.layers["Mu"], original_adata.layers["Mu"], decimal=4
         )
         np.testing.assert_almost_equal(
-            returned_adata.layers["Ms"], original_adata.layers["Ms"]
+            returned_adata.layers["Ms"], original_adata.layers["Ms"], decimal=4
         )
 
         expected_log = "computing neighbors\n    finished ("
@@ -374,15 +369,19 @@ def test_raw_input(self, adata, dataset: str, n_obs: int):
                 f"first_moment_unspliced.npy"
             )
         )
-        np.testing.assert_almost_equal(adata.layers["Mu"], ground_truth_unspliced)
+        np.testing.assert_almost_equal(
+            adata.layers["Mu"], ground_truth_unspliced, decimal=4
+        )
 
         ground_truth_spliced = np.load(
             file=(
                 f"tests/_data/test_moments/moments/dataset={dataset}-n_obs={n_obs}"
                 f"first_moment_spliced.npy"
             )
         )
-        np.testing.assert_almost_equal(adata.layers["Ms"], ground_truth_spliced)
+        np.testing.assert_almost_equal(
+            adata.layers["Ms"], ground_truth_spliced, decimal=4
+        )
 
 
 class TestSecondOrderMoments:
@@ -418,7 +417,7 @@ def test_output(self, adata, dataset: str, n_obs: int):
             )
         )
         np.testing.assert_almost_equal(
-            second_order_moment_spliced, ground_truth_spliced
+            second_order_moment_spliced, ground_truth_spliced, decimal=2
         )
 
         ground_truth_mixed = np.load(
@@ -427,7 +426,9 @@ def test_output(self, adata, dataset: str, n_obs: int):
                 f"-n_obs={n_obs}-mode=connectivities_second_moment_mixed.npy"
             )
         )
-        np.testing.assert_almost_equal(second_order_moment_mixed, ground_truth_mixed)
+        np.testing.assert_almost_equal(
+            second_order_moment_mixed, ground_truth_mixed, decimal=3
+        )
 
     @pytest.mark.parametrize("dataset", ["pancreas", "dentategyrus"])
     @pytest.mark.parametrize("n_obs", [50, 100])
@@ -459,7 +460,9 @@ def test_adjusted(self, adata, dataset: str, n_obs: int):
             )
         )
         np.testing.assert_almost_equal(
-            second_order_moment_spliced, 2 * second_order_spliced - adata.layers["Ms"]
+            second_order_moment_spliced,
+            2 * second_order_spliced - adata.layers["Ms"],
+            decimal=2,
         )
 
         second_order_mixed = np.load(
@@ -469,7 +472,9 @@ def test_adjusted(self, adata, dataset: str, n_obs: int):
             )
         )
         np.testing.assert_almost_equal(
-            second_order_moment_mixed, 2 * second_order_mixed - adata.layers["Mu"]
+            second_order_moment_mixed,
+            2 * second_order_mixed - adata.layers["Mu"],
+            decimal=2,
         )
 
 
@@ -502,4 +507,4 @@ def test_output(self, adata, dataset: str, n_obs: int):
                 f"-layer=unspliced-mode=connectivities_second_moment.npy"
             )
         )
-        np.testing.assert_almost_equal(second_order_moment, ground_truth)
+        np.testing.assert_almost_equal(second_order_moment, ground_truth, decimal=2)