From f2123ecbfd41acf19dd421cca994b645dabfddeb Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Tue, 24 Jan 2023 17:31:49 +0100
Subject: [PATCH 1/2] fix(DeseqStats): testing that dds has a "replaced"
 attribute should only be done when refit_cooks is true

---
 pydeseq2/DeseqStats.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/pydeseq2/DeseqStats.py b/pydeseq2/DeseqStats.py
index d44ca3e0..943e6129 100644
--- a/pydeseq2/DeseqStats.py
+++ b/pydeseq2/DeseqStats.py
@@ -193,14 +193,15 @@ def __init__(
 
         # If the `refit_cooks` attribute of the dds object is True, check that outliers
         # were actually refitted.
-        try:
-            dds.replaced
-        except AttributeError:
-            raise AttributeError(
-                "dds has 'refit_cooks' set to True but Cooks outliers have not been "
-                "refitted. Please run 'dds.refit()' first or set 'dds.refit_cooks' "
-                "to False."
-            )
+        if self.dds.refit_cooks:
+            try:
+                dds.replaced
+            except AttributeError:
+                raise AttributeError(
+                    "dds has 'refit_cooks' set to True but Cooks outliers have not been "
+                    "refitted. Please run 'dds.refit()' first or set 'dds.refit_cooks' "
+                    "to False."
+                )
 
     def summary(self):
         """Run the statistical analysis.

From df7568ff87aa87f61645ab8d1c7861daa290a567 Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Fri, 27 Jan 2023 11:10:56 +0100
Subject: [PATCH 2/2] ci: add test to check that the workflow runs bug-free
 when refit_cooks=False

---
 tests/test_pydeseq2.py | 46 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/tests/test_pydeseq2.py b/tests/test_pydeseq2.py
index 30598f86..9aaa3346 100644
--- a/tests/test_pydeseq2.py
+++ b/tests/test_pydeseq2.py
@@ -54,6 +54,52 @@ def test_deseq(tol=0.02):
     assert (abs(r_res.padj - res_df.padj) / r_res.padj).max() < tol
 
 
+def test_deseq_no_refit_cooks(tol=0.02):
+    """Test that the outputs of the DESeq2 function *without cooks refit*
+    match those of the original R package, up to a tolerance in relative error.
+    Note: this is just to check that the workflow runs bug-free, as we expect no outliers
+    in the synthetic dataset.
+    """
+
+    test_path = str(Path(os.path.realpath(tests.__file__)).parent.resolve())
+
+    counts_df = load_example_data(
+        modality="raw_counts",
+        dataset="synthetic",
+        debug=False,
+    )
+
+    clinical_df = load_example_data(
+        modality="clinical",
+        dataset="synthetic",
+        debug=False,
+    )
+
+    r_res = pd.read_csv(
+        os.path.join(test_path, "data/single_factor/r_test_res.csv"), index_col=0
+    )
+
+    dds = DeseqDataSet(
+        counts_df, clinical_df, design_factors="condition", refit_cooks=False
+    )
+    dds.deseq2()
+
+    res = DeseqStats(dds)
+    res.summary()
+    res_df = res.results_df
+
+    # check that the same p-values are NaN
+    assert (res_df.pvalue.isna() == r_res.pvalue.isna()).all()
+    assert (res_df.padj.isna() == r_res.padj.isna()).all()
+
+    # Check that the same LFC, p-values and adjusted p-values are found (up to tol)
+    assert (
+        abs(r_res.log2FoldChange - res_df.log2FoldChange) / abs(r_res.log2FoldChange)
+    ).max() < tol
+    assert (abs(r_res.pvalue - res_df.pvalue) / r_res.pvalue).max() < tol
+    assert (abs(r_res.padj - res_df.padj) / r_res.padj).max() < tol
+
+
 def test_lfc_shrinkage(tol=0.02):
     """Test that the outputs of the lfc_shrink function match those of the original
     R package (starting from the same inputs), up to a tolerance in relative error.