From f2123ecbfd41acf19dd421cca994b645dabfddeb Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Tue, 24 Jan 2023 17:31:49 +0100 Subject: [PATCH 1/2] fix(DeseqStats): testing that dds has a "replaced" attribute should only be done when refit_cooks is true --- pydeseq2/DeseqStats.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pydeseq2/DeseqStats.py b/pydeseq2/DeseqStats.py index d44ca3e0..943e6129 100644 --- a/pydeseq2/DeseqStats.py +++ b/pydeseq2/DeseqStats.py @@ -193,14 +193,15 @@ def __init__( # If the `refit_cooks` attribute of the dds object is True, check that outliers # were actually refitted. - try: - dds.replaced - except AttributeError: - raise AttributeError( - "dds has 'refit_cooks' set to True but Cooks outliers have not been " - "refitted. Please run 'dds.refit()' first or set 'dds.refit_cooks' " - "to False." - ) + if self.dds.refit_cooks: + try: + dds.replaced + except AttributeError: + raise AttributeError( + "dds has 'refit_cooks' set to True but Cooks outliers have not been " + "refitted. Please run 'dds.refit()' first or set 'dds.refit_cooks' " + "to False." + ) def summary(self): """Run the statistical analysis. From df7568ff87aa87f61645ab8d1c7861daa290a567 Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Fri, 27 Jan 2023 11:10:56 +0100 Subject: [PATCH 2/2] ci: add test to check that the workflow runs bug-free when refit_cooks=False --- tests/test_pydeseq2.py | 46 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tests/test_pydeseq2.py b/tests/test_pydeseq2.py index 30598f86..9aaa3346 100644 --- a/tests/test_pydeseq2.py +++ b/tests/test_pydeseq2.py @@ -54,6 +54,52 @@ def test_deseq(tol=0.02): assert (abs(r_res.padj - res_df.padj) / r_res.padj).max() < tol +def test_deseq_no_refit_cooks(tol=0.02): + """Test that the outputs of the DESeq2 function *without cooks refit* + match those of the original R package, up to a tolerance in relative error. + Note: this is just to check that the workflow runs bug-free, as we expect no outliers + in the synthetic dataset. + """ + + test_path = str(Path(os.path.realpath(tests.__file__)).parent.resolve()) + + counts_df = load_example_data( + modality="raw_counts", + dataset="synthetic", + debug=False, + ) + + clinical_df = load_example_data( + modality="clinical", + dataset="synthetic", + debug=False, + ) + + r_res = pd.read_csv( + os.path.join(test_path, "data/single_factor/r_test_res.csv"), index_col=0 + ) + + dds = DeseqDataSet( + counts_df, clinical_df, design_factors="condition", refit_cooks=False + ) + dds.deseq2() + + res = DeseqStats(dds) + res.summary() + res_df = res.results_df + + # check that the same p-values are NaN + assert (res_df.pvalue.isna() == r_res.pvalue.isna()).all() + assert (res_df.padj.isna() == r_res.padj.isna()).all() + + # Check that the same LFC, p-values and adjusted p-values are found (up to tol) + assert ( + abs(r_res.log2FoldChange - res_df.log2FoldChange) / abs(r_res.log2FoldChange) + ).max() < tol + assert (abs(r_res.pvalue - res_df.pvalue) / r_res.pvalue).max() < tol + assert (abs(r_res.padj - res_df.padj) / r_res.padj).max() < tol + + def test_lfc_shrinkage(tol=0.02): """Test that the outputs of the lfc_shrink function match those of the original R package (starting from the same inputs), up to a tolerance in relative error.