scverse · BorisMuzellec · Feb 21, 2023 · Feb 20, 2023 · Feb 21, 2023 · Feb 21, 2023
@@ -73,16 +73,18 @@ def load_example_data(
     if dataset == "synthetic":
         path_to_data = datasets_path / "synthetic"
         if Path(path_to_data).is_dir():
-            path_to_data_counts = path_to_data / "test_counts.csv"
-            path_to_data_clinical = path_to_data / "test_clinical.csv"
+            # Cast the Paths to strings to have coherent types wrt to the url case (that
+            # does not handle Paths), else mypy throws an error.
+            path_to_data_counts = str(path_to_data / "test_counts.csv")
+            path_to_data_clinical = str(path_to_data / "test_clinical.csv")
         else:
             # if the path does not exist (as is the case in RDT) load it from github
             url_to_data = (
                 "https://raw.githubusercontent.com/owkin/"
                 "PyDESeq2/main/datasets/synthetic/"
             )
-            path_to_data_counts = Path(url_to_data + "/test_counts.csv")
-            path_to_data_clinical = Path(url_to_data + "/test_clinical.csv")
+            path_to_data_counts = url_to_data + "/test_counts.csv"
+            path_to_data_clinical = url_to_data + "/test_clinical.csv"
 
         if modality == "raw_counts":
             df = pd.read_csv(

@@ -1,6 +1,10 @@
+import pathlib
+from unittest import mock
+
 import numpy as np
 import pytest
 
+from pydeseq2.utils import load_example_data
 from pydeseq2.utils import nb_nll
 
 
@@ -27,3 +31,25 @@ def test_nb_nll_moments(mu, alpha):
     assert np.abs(diff) < 0.2 * deviation
     error_var = np.abs(sample.var() - var_th) / var_th
     assert error_var < 1 / np.sqrt(n_montecarlo)
+
+
+# Test data loading from outside the package (e.g. on RTF)
+@pytest.mark.parametrize("modality", ["raw_counts", "clinical"])
+@pytest.mark.parametrize("mocked_dir_flag", [True, False])
+@mock.patch("pathlib.Path.is_dir")
+def test_rtd_example_data_loading(mocked_function, modality, mocked_dir_flag):
+    """
+    Test that load_example_data still works when run from a place where the ``datasets``
+    directory is not accessible, as is when the documentation is built on readthedocs.
+    """
+
+    # Mock the output of is_dir() as False to emulate not having access to the
+    # ``datasets`` directory
+    pathlib.Path.is_dir.return_value = mocked_dir_flag
+
+    # Try loading data.
+    load_example_data(
+        modality=modality,
+        dataset="synthetic",
+        debug=False,
+    )