From afe7fa99d24421b9f3815a273ad857b27e98bd3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?=
Date: Thu, 3 Oct 2019 12:54:16 +0200
Subject: [PATCH] Added unittests for annotator
---
scanpy/tests/test_annotator.py | 158 +++++++++++++++++++++++++++++++++
setup.py | 10 ++-
2 files changed, 167 insertions(+), 1 deletion(-)
create mode 100644 scanpy/tests/test_annotator.py
diff --git a/scanpy/tests/test_annotator.py b/scanpy/tests/test_annotator.py
new file mode 100644
index 0000000000..2a514bb1ab
--- /dev/null
+++ b/scanpy/tests/test_annotator.py
@@ -0,0 +1,158 @@
+import unittest
+from importlib.util import find_spec
+
+import pandas as pd
+import numpy as np
+from scanpy import AnnData
+from scanpy.external.tl import annotator
+
+import pytest
+
+
+@pytest.mark.skipif(
+ find_spec('pointannotator') is None, reason="point-annotator not installed"
+)
+class AnnotatorTests(unittest.TestCase):
+ def setUp(self):
+ self.markers = pd.DataFrame(
+ [
+ ["Type 1", "111"],
+ ["Type 1", "112"],
+ ["Type 1", "113"],
+ ["Type 1", "114"],
+ ["Type 2", "211"],
+ ["Type 2", "212"],
+ ["Type 2", "213"],
+ ["Type 2", "214"],
+ ],
+ columns=["Cell Type", "Gene"],
+ )
+
+ genes = ["111", "112", "113", "114", "211", "212", "213", "214"]
+ self.data = pd.DataFrame(
+ np.array(
+ [
+ [1, 1, 1, 1.1, 0, 0, 0, 0],
+ [1, 0.8, 0.9, 1, 0, 0, 0, 0],
+ [0.7, 1.1, 1, 1.2, 0, 0, 0, 0],
+ [0.8, 0.7, 1.1, 1, 0, 0.1, 0, 0],
+ [0, 0, 0, 0, 1.05, 1.05, 1.1, 1],
+ [0, 0, 0, 0, 1.1, 1.0, 1.05, 1.1],
+ [0, 0, 0, 0, 1.05, 0.9, 1.1, 1.1],
+ [0, 0, 0, 0, 0.9, 0.9, 1.2, 1],
+ ]
+ ),
+ columns=genes,
+ )
+
+ # transform data to AnnData
+ self.anndata = AnnData(self.data.values, var=self.data.columns.values)
+
+ def basic_check(self, annotations):
+ self.assertEqual(type(annotations), AnnData)
+ self.assertEqual(len(annotations), len(self.anndata))
+ self.assertTupleEqual(
+ annotations.shape, (8, 2)
+ ) # two types in the data
+ self.assertGreater(np.nansum(annotations.X), 0)
+ self.assertLessEqual(np.nanmax(annotations.X), 1)
+ self.assertGreaterEqual(np.nanmin(annotations.X), 0)
+
+ def test_annotator(self):
+ annotations = annotator(
+ self.anndata, self.markers, normalize=False, num_genes=15
+ )
+
+ self.basic_check(annotations)
+
+ def test_remove_empty_column(self):
+ """
+ Type 3 column must be removed here, since this cell type does not
+ belong to any cell.
+ """
+ additinal_markers = pd.DataFrame(
+ [["Type 3", "311"], ["Type 3", "312"], ["Type 3", "313"]],
+ columns=["Cell Type", "Gene"],
+ )
+ markers = self.markers.append(additinal_markers)
+
+ annotations = annotator(self.anndata, markers, num_genes=20)
+
+ self.basic_check(annotations)
+
+ annotations = annotator(
+ self.anndata,
+ markers,
+ num_genes=20,
+ return_nonzero_annotations=False,
+ )
+ self.assertEqual(len(annotations), len(self.anndata))
+ self.assertTupleEqual(
+ annotations.shape, (8, 3)
+ ) # two types in the data
+ self.assertGreater(np.nansum(annotations.X), 0)
+ self.assertLessEqual(np.nanmax(annotations.X), 1)
+ self.assertGreaterEqual(np.nanmin(annotations.X), 0)
+
+ def test_sf(self):
+ """
+ Test annotations with hypergeom.sf
+ """
+ annotations = annotator(
+ self.anndata, self.markers, num_genes=15, p_value_fun="hypergeom"
+ )
+
+ self.basic_check(annotations)
+
+ def test_scoring(self):
+ # scoring SCORING_EXP_RATIO
+ annotations = annotator(
+ self.anndata, self.markers, num_genes=15, scoring="exp_ratio"
+ )
+
+ self.basic_check(annotations)
+
+ # scoring SCORING_MARKERS_SUM
+ annotations = annotator(
+ self.anndata,
+ self.markers,
+ num_genes=15,
+ scoring="sum_of_expressed_markers",
+ )
+
+ self.assertEqual(type(annotations), AnnData)
+ self.assertEqual(len(annotations), len(self.anndata))
+ self.assertTupleEqual(
+ annotations.shape, (8, 2)
+ ) # two types in the data
+
+ # based on provided data it should match
+ # the third row is skipped, since it is special
+ self.assertAlmostEqual(
+ annotations.X[0, 0], self.data.iloc[0].sum(), places=6
+ )
+ self.assertAlmostEqual(
+ annotations.X[5, 1], self.data.iloc[5].sum(), places=6
+ )
+
+ # scoring SCORING_LOG_FDR
+ annotations = annotator(
+ self.anndata, self.markers, num_genes=15, scoring="log_fdr"
+ )
+
+ self.assertEqual(type(annotations), AnnData)
+ self.assertEqual(len(annotations), len(self.anndata))
+ self.assertTupleEqual(
+ annotations.shape, (8, 2)
+ ) # two types in the data
+
+ # scoring SCORING_LOG_PVALUE
+ annotations = annotator(
+ self.anndata, self.markers, num_genes=15, scoring="log_p_value"
+ )
+
+ self.assertEqual(type(annotations), AnnData)
+ self.assertEqual(len(annotations), len(self.anndata))
+ self.assertTupleEqual(
+ annotations.shape, (8, 2)
+ ) # two types in the data
diff --git a/setup.py b/setup.py
index cd6363c97f..56eb277fa8 100644
--- a/setup.py
+++ b/setup.py
@@ -38,7 +38,15 @@
'sphinx_autodoc_typehints',
'scanpydoc',
],
- test=['pytest>=4.4', 'dask[array]', 'fsspec', 'zappy', 'zarr', 'black'],
+ test=[
+ 'pytest>=4.4',
+ 'dask[array]',
+ 'fsspec',
+ 'zappy',
+ 'zarr',
+ 'black',
+ 'point-annotator',
+ ],
),
packages=find_packages(),
entry_points=dict(console_scripts=['scanpy=scanpy.cli:console_main']),