From afe7fa99d24421b9f3815a273ad857b27e98bd3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Primo=C5=BE=20Godec?= Date: Thu, 3 Oct 2019 12:54:16 +0200 Subject: [PATCH] Added unittests for annotator --- scanpy/tests/test_annotator.py | 158 +++++++++++++++++++++++++++++++++ setup.py | 10 ++- 2 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 scanpy/tests/test_annotator.py diff --git a/scanpy/tests/test_annotator.py b/scanpy/tests/test_annotator.py new file mode 100644 index 0000000000..2a514bb1ab --- /dev/null +++ b/scanpy/tests/test_annotator.py @@ -0,0 +1,158 @@ +import unittest +from importlib.util import find_spec + +import pandas as pd +import numpy as np +from scanpy import AnnData +from scanpy.external.tl import annotator + +import pytest + + +@pytest.mark.skipif( + find_spec('pointannotator') is None, reason="point-annotator not installed" +) +class AnnotatorTests(unittest.TestCase): + def setUp(self): + self.markers = pd.DataFrame( + [ + ["Type 1", "111"], + ["Type 1", "112"], + ["Type 1", "113"], + ["Type 1", "114"], + ["Type 2", "211"], + ["Type 2", "212"], + ["Type 2", "213"], + ["Type 2", "214"], + ], + columns=["Cell Type", "Gene"], + ) + + genes = ["111", "112", "113", "114", "211", "212", "213", "214"] + self.data = pd.DataFrame( + np.array( + [ + [1, 1, 1, 1.1, 0, 0, 0, 0], + [1, 0.8, 0.9, 1, 0, 0, 0, 0], + [0.7, 1.1, 1, 1.2, 0, 0, 0, 0], + [0.8, 0.7, 1.1, 1, 0, 0.1, 0, 0], + [0, 0, 0, 0, 1.05, 1.05, 1.1, 1], + [0, 0, 0, 0, 1.1, 1.0, 1.05, 1.1], + [0, 0, 0, 0, 1.05, 0.9, 1.1, 1.1], + [0, 0, 0, 0, 0.9, 0.9, 1.2, 1], + ] + ), + columns=genes, + ) + + # transform data to AnnData + self.anndata = AnnData(self.data.values, var=self.data.columns.values) + + def basic_check(self, annotations): + self.assertEqual(type(annotations), AnnData) + self.assertEqual(len(annotations), len(self.anndata)) + self.assertTupleEqual( + annotations.shape, (8, 2) + ) # two types in the data + self.assertGreater(np.nansum(annotations.X), 0) + self.assertLessEqual(np.nanmax(annotations.X), 1) + self.assertGreaterEqual(np.nanmin(annotations.X), 0) + + def test_annotator(self): + annotations = annotator( + self.anndata, self.markers, normalize=False, num_genes=15 + ) + + self.basic_check(annotations) + + def test_remove_empty_column(self): + """ + Type 3 column must be removed here, since this cell type does not + belong to any cell. + """ + additinal_markers = pd.DataFrame( + [["Type 3", "311"], ["Type 3", "312"], ["Type 3", "313"]], + columns=["Cell Type", "Gene"], + ) + markers = self.markers.append(additinal_markers) + + annotations = annotator(self.anndata, markers, num_genes=20) + + self.basic_check(annotations) + + annotations = annotator( + self.anndata, + markers, + num_genes=20, + return_nonzero_annotations=False, + ) + self.assertEqual(len(annotations), len(self.anndata)) + self.assertTupleEqual( + annotations.shape, (8, 3) + ) # two types in the data + self.assertGreater(np.nansum(annotations.X), 0) + self.assertLessEqual(np.nanmax(annotations.X), 1) + self.assertGreaterEqual(np.nanmin(annotations.X), 0) + + def test_sf(self): + """ + Test annotations with hypergeom.sf + """ + annotations = annotator( + self.anndata, self.markers, num_genes=15, p_value_fun="hypergeom" + ) + + self.basic_check(annotations) + + def test_scoring(self): + # scoring SCORING_EXP_RATIO + annotations = annotator( + self.anndata, self.markers, num_genes=15, scoring="exp_ratio" + ) + + self.basic_check(annotations) + + # scoring SCORING_MARKERS_SUM + annotations = annotator( + self.anndata, + self.markers, + num_genes=15, + scoring="sum_of_expressed_markers", + ) + + self.assertEqual(type(annotations), AnnData) + self.assertEqual(len(annotations), len(self.anndata)) + self.assertTupleEqual( + annotations.shape, (8, 2) + ) # two types in the data + + # based on provided data it should match + # the third row is skipped, since it is special + self.assertAlmostEqual( + annotations.X[0, 0], self.data.iloc[0].sum(), places=6 + ) + self.assertAlmostEqual( + annotations.X[5, 1], self.data.iloc[5].sum(), places=6 + ) + + # scoring SCORING_LOG_FDR + annotations = annotator( + self.anndata, self.markers, num_genes=15, scoring="log_fdr" + ) + + self.assertEqual(type(annotations), AnnData) + self.assertEqual(len(annotations), len(self.anndata)) + self.assertTupleEqual( + annotations.shape, (8, 2) + ) # two types in the data + + # scoring SCORING_LOG_PVALUE + annotations = annotator( + self.anndata, self.markers, num_genes=15, scoring="log_p_value" + ) + + self.assertEqual(type(annotations), AnnData) + self.assertEqual(len(annotations), len(self.anndata)) + self.assertTupleEqual( + annotations.shape, (8, 2) + ) # two types in the data diff --git a/setup.py b/setup.py index cd6363c97f..56eb277fa8 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,15 @@ 'sphinx_autodoc_typehints', 'scanpydoc', ], - test=['pytest>=4.4', 'dask[array]', 'fsspec', 'zappy', 'zarr', 'black'], + test=[ + 'pytest>=4.4', + 'dask[array]', + 'fsspec', + 'zappy', + 'zarr', + 'black', + 'point-annotator', + ], ), packages=find_packages(), entry_points=dict(console_scripts=['scanpy=scanpy.cli:console_main']),