From 1055aea07687c1e617a5587834abe39598196aba Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Fri, 2 Feb 2024 16:19:02 +0100 Subject: [PATCH 01/16] feat: implementing local fit for trend curve (WIP) --- pydeseq2/dds.py | 229 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 159 insertions(+), 70 deletions(-) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index 665ef5b9..d4b30e0f 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -15,6 +15,8 @@ from scipy.stats import f # type: ignore from scipy.stats import trim_mean # type: ignore +from skmisc.loess import loess + from pydeseq2.default_inference import DefaultInference from pydeseq2.inference import Inference from pydeseq2.preprocessing import deseq2_norm_fit @@ -71,6 +73,11 @@ class DeseqDataSet(ad.AnnData): specifying the factor of interest and the reference (control) level against which we're testing, e.g. ``["condition", "A"]``. (default: ``None``). + trend_fit_type : str + Either "parametric" or "local" for the type of fitting of dispersions trend + curve.If "parametric" is selected but the fitting fails, it will switch to + "local". (default: ``"parametric"``). + min_mu : float Threshold for mean estimates. (default: ``0.5``). @@ -181,6 +188,7 @@ def __init__( design_factors: Union[str, List[str]] = "condition", continuous_factors: Optional[List[str]] = None, ref_level: Optional[List[str]] = None, + trend_fit_type: Literal["parametric", "local"] = "parametric", min_mu: float = 0.5, min_disp: float = 1e-8, max_disp: float = 10.0, @@ -266,6 +274,7 @@ def __init__( # Check that the design matrix has full rank self._check_full_rank_design() + self.trend_fit_type = trend_fit_type self.min_mu = min_mu self.min_disp = min_disp self.max_disp = np.maximum(max_disp, self.n_obs) @@ -568,71 +577,25 @@ def fit_genewise_dispersions(self) -> None: def fit_dispersion_trend(self) -> None: r"""Fit the dispersion trend coefficients. - - :math:`f(\mu) = \alpha_1/\mu + a_0`. + TODO """ - # Check that genewise dispersions are available. If not, compute them. - if "genewise_dispersions" not in self.varm: - self.fit_genewise_dispersions() - - if not self.quiet: - print("Fitting dispersion trend curve...", file=sys.stderr) - start = time.time() - self.varm["_normed_means"] = self.layers["normed_counts"].mean(0) - - # Exclude all-zero counts - targets = pd.Series( - self[:, self.non_zero_genes].varm["genewise_dispersions"].copy(), - index=self.non_zero_genes, - ) - covariates = pd.Series( - 1 / self[:, self.non_zero_genes].varm["_normed_means"], - index=self.non_zero_genes, - ) - - for gene in self.non_zero_genes: - if ( - np.isinf(covariates.loc[gene]).any() - or np.isnan(covariates.loc[gene]).any() - ): - targets.drop(labels=[gene], inplace=True) - covariates.drop(labels=[gene], inplace=True) - - # Initialize coefficients - old_coeffs = pd.Series([0.1, 0.1]) - coeffs = pd.Series([1.0, 1.0]) - - while (np.log(np.abs(coeffs / old_coeffs)) ** 2).sum() >= 1e-6: - old_coeffs = coeffs - coeffs, predictions = self.inference.dispersion_trend_gamma_glm( - covariates, targets - ) - # Filter out genes that are too far away from the curve before refitting - pred_ratios = ( - self[:, covariates.index].varm["genewise_dispersions"] / predictions - ) - targets.drop( - targets[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index, - inplace=True, - ) - covariates.drop( - covariates[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index, - inplace=True, - ) + if self.trend_fit_type == "parametric": + self._fit_parametric_trend() - end = time.time() - - if not self.quiet: - print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr) - - self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"]) + if (self.uns["trend_coeffs"] <= 0).any(): + warnings.warn( + f"self.trend_fit_type={self.trend_fit_type}, but the dispersion trend was" + f" not well captured by the function: y = a / x + b. Switchiing to local " + f"regression.", + UserWarning, + stacklevel=2, + ) + self.trend_fit_type = "local" + del self.uns["trend_coeffs"] - self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN) - self.varm["fitted_dispersions"][self.varm["non_zero"]] = dispersion_trend( - self.varm["_normed_means"][self.varm["non_zero"]], - self.uns["trend_coeffs"], - ) + if self.trend_fit_type == "local": + self._fit_local_trend() def fit_dispersion_prior(self) -> None: """Fit dispersion variance priors and standard deviation of log-residuals. @@ -721,9 +684,11 @@ def fit_MAP_dispersions(self) -> None: # Filter outlier genes for which we won't apply shrinkage self.varm["dispersions"] = self.varm["MAP_dispersions"].copy() - self.varm["_outlier_genes"] = np.log(self.varm["genewise_dispersions"]) > np.log( - self.varm["fitted_dispersions"] - ) + 2 * np.sqrt(self.uns["_squared_logres"]) + self.varm["_outlier_genes"] = np.log( + self.varm["genewise_dispersions"] + ) > np.log(self.varm["fitted_dispersions"]) + 2 * np.sqrt( + self.uns["_squared_logres"] + ) self.varm["dispersions"][self.varm["_outlier_genes"]] = self.varm[ "genewise_dispersions" ][self.varm["_outlier_genes"]] @@ -859,6 +824,115 @@ def _fit_MoM_dispersions(self) -> None: alpha_hat, self.min_disp, self.max_disp ) + def _fit_parametric_trend(self) -> None: + r"""Fit the dispersion trend coefficients. + + :math:`f(\mu) = \alpha_1/\mu + a_0`. + """ + # Check that genewise dispersions are available. If not, compute them. + if "genewise_dispersions" not in self.varm: + self.fit_genewise_dispersions() + + if not self.quiet: + print("Fitting dispersion trend curve...", file=sys.stderr) + start = time.time() + self.varm["_normed_means"] = self.layers["normed_counts"].mean(0) + + # Exclude all-zero counts + targets = pd.Series( + self[:, self.non_zero_genes].varm["genewise_dispersions"].copy(), + index=self.non_zero_genes, + ) + covariates = pd.Series( + 1 / self[:, self.non_zero_genes].varm["_normed_means"], + index=self.non_zero_genes, + ) + + for gene in self.non_zero_genes: + if ( + np.isinf(covariates.loc[gene]).any() + or np.isnan(covariates.loc[gene]).any() + ): + targets.drop(labels=[gene], inplace=True) + covariates.drop(labels=[gene], inplace=True) + + # Initialize coefficients + old_coeffs = pd.Series([0.1, 0.1]) + coeffs = pd.Series([1.0, 1.0]) + + while (np.log(np.abs(coeffs / old_coeffs)) ** 2).sum() >= 1e-6: + old_coeffs = coeffs + coeffs, predictions = self.inference.dispersion_trend_gamma_glm( + covariates, targets + ) + # Filter out genes that are too far away from the curve before refitting + pred_ratios = ( + self[:, covariates.index].varm["genewise_dispersions"] / predictions + ) + + targets.drop( + targets[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index, + inplace=True, + ) + covariates.drop( + covariates[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index, + inplace=True, + ) + + end = time.time() + + if not self.quiet: + print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr) + + self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"]) + + self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN) + self.varm["fitted_dispersions"][self.varm["non_zero"]] = dispersion_trend( + self.varm["_normed_means"][self.varm["non_zero"]], + self.uns["trend_coeffs"], + ) + + def _fit_local_trend(self) -> None: + r"""Fit the dispersion trend coefficients. + + TODO + """ + # Check that genewise dispersions are available. If not, compute them. + if "genewise_dispersions" not in self.varm: + self.fit_genewise_dispersions() + + if not self.quiet: + print("Fitting dispersion trend curve...", file=sys.stderr) + start = time.time() + self.varm["_normed_means"] = self.layers["normed_counts"].mean(0) + + genes_to_fit = self.varm["non_zero"] & ( + self.varm["genewise_dispersions"] >= 10 * self.min_disp + ) + + if len(genes_to_fit) == 0: + raise ValueError("No genes to fit: all dispersions are below 10 * min_disp") + + lo = loess( + x=np.log(self.varm["_normed_means"][genes_to_fit]), + y=np.log(self.varm["genewise_dispersions"][genes_to_fit]), + weights=self.varm["_normed_means"][genes_to_fit], + surface="direct", # to allow extrapolation + ) + + lo.fit() + + end = time.time() + + if not self.quiet: + print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr) + + self.uns["disp_function"] = lambda x: np.exp(lo.predict(np.log(x)).values) + self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN) + self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.uns[ + "disp_function" + ](self.varm["_normed_means"][self.varm["non_zero"]]) + def plot_dispersions( self, log: bool = True, save_path: Optional[str] = None, **kwargs ) -> None: @@ -1001,12 +1075,25 @@ def _refit_without_outliers( # Compute trend dispersions. # Note: the trend curve is not refitted. - sub_dds.uns["trend_coeffs"] = self.uns["trend_coeffs"] sub_dds.varm["_normed_means"] = sub_dds.layers["normed_counts"].mean(0) - sub_dds.varm["fitted_dispersions"] = dispersion_trend( - sub_dds.varm["_normed_means"], - sub_dds.uns["trend_coeffs"], - ) + + if self.trend_fit_type == "parametric": + sub_dds.uns["trend_coeffs"] = self.uns["trend_coeffs"] + sub_dds.varm["fitted_dispersions"] = dispersion_trend( + sub_dds.varm["_normed_means"], + sub_dds.uns["trend_coeffs"], + ) + elif self.trend_fit_type == "local": + sub_dds.uns["disp_function"] = self.uns["disp_function"] + sub_dds.varm["fitted_dispersions"] = self.uns["disp_function"]( + sub_dds.varm["_normed_means"][sub_dds.varm["non_zero"]] + ) + + else: + raise AttributeError( + f"Found trend_fit_type '{self.trend_fit_type}'. Expected 'parametric' or " + "'local'." + ) # Estimate MAP dispersions. # Note: the prior variance is not recomputed. @@ -1111,7 +1198,9 @@ def objective(p): ) < 1e-4: break elif i == niter - 1: - print("Iterative size factor fitting did not converge.", file=sys.stderr) + print( + "Iterative size factor fitting did not converge.", file=sys.stderr + ) # Restore the design matrix and free buffer self.obsm["design_matrix"] = self.obsm["design_matrix_buffer"].copy() From 718d7fddfea0062398dc10df2f190ea599a88f10 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 2 Feb 2024 15:21:37 +0000 Subject: [PATCH 02/16] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pydeseq2/dds.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index d4b30e0f..f0547291 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -14,7 +14,6 @@ from scipy.special import polygamma # type: ignore from scipy.stats import f # type: ignore from scipy.stats import trim_mean # type: ignore - from skmisc.loess import loess from pydeseq2.default_inference import DefaultInference @@ -579,7 +578,6 @@ def fit_dispersion_trend(self) -> None: r"""Fit the dispersion trend coefficients. TODO """ - if self.trend_fit_type == "parametric": self._fit_parametric_trend() @@ -684,11 +682,9 @@ def fit_MAP_dispersions(self) -> None: # Filter outlier genes for which we won't apply shrinkage self.varm["dispersions"] = self.varm["MAP_dispersions"].copy() - self.varm["_outlier_genes"] = np.log( - self.varm["genewise_dispersions"] - ) > np.log(self.varm["fitted_dispersions"]) + 2 * np.sqrt( - self.uns["_squared_logres"] - ) + self.varm["_outlier_genes"] = np.log(self.varm["genewise_dispersions"]) > np.log( + self.varm["fitted_dispersions"] + ) + 2 * np.sqrt(self.uns["_squared_logres"]) self.varm["dispersions"][self.varm["_outlier_genes"]] = self.varm[ "genewise_dispersions" ][self.varm["_outlier_genes"]] @@ -1198,9 +1194,7 @@ def objective(p): ) < 1e-4: break elif i == niter - 1: - print( - "Iterative size factor fitting did not converge.", file=sys.stderr - ) + print("Iterative size factor fitting did not converge.", file=sys.stderr) # Restore the design matrix and free buffer self.obsm["design_matrix"] = self.obsm["design_matrix_buffer"].copy() From 3e6692db78ea586d37937de1c52537338ee3174a Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Mon, 12 Feb 2024 11:57:23 +0100 Subject: [PATCH 03/16] chore: add scikit-misc dependency --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5bb3269c..25611c17 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ "numpy>=1.23.0", "pandas>=1.4.0", "scikit-learn>=1.1.0", - "scipy>=1.8.0", + "scikit-misc>=0.3.1" "scipy>=1.8.0", "statsmodels", "matplotlib>=3.6.2", # not sure why sphinx_gallery does not work without it ], # external packages as dependencies From 9a4671217dee6e59688def10cb9244e988b23d58 Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Mon, 12 Feb 2024 15:04:33 +0100 Subject: [PATCH 04/16] feat: implement mean trend curve --- pydeseq2/dds.py | 76 +++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 34 deletions(-) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index f0547291..08784317 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -73,8 +73,8 @@ class DeseqDataSet(ad.AnnData): we're testing, e.g. ``["condition", "A"]``. (default: ``None``). trend_fit_type : str - Either "parametric" or "local" for the type of fitting of dispersions trend - curve.If "parametric" is selected but the fitting fails, it will switch to + Either "parametric", "local" or "mean", for the type of fitting of dispersions + trend curve.If "parametric" is selected but the fitting fails, it will switch to "local". (default: ``"parametric"``). min_mu : float @@ -187,7 +187,7 @@ def __init__( design_factors: Union[str, List[str]] = "condition", continuous_factors: Optional[List[str]] = None, ref_level: Optional[List[str]] = None, - trend_fit_type: Literal["parametric", "local"] = "parametric", + trend_fit_type: Literal["parametric", "local", "mean"] = "parametric", min_mu: float = 0.5, min_disp: float = 1e-8, max_disp: float = 10.0, @@ -575,17 +575,19 @@ def fit_genewise_dispersions(self) -> None: self.varm["_genewise_converged"][self.varm["non_zero"]] = l_bfgs_b_converged_ def fit_dispersion_trend(self) -> None: - r"""Fit the dispersion trend coefficients. - TODO + r"""Fit the dispersion trend curve. + + Three methods are available, depending on the ``trend_fit_type`` attribute: + "parametric", "local" and "mean". """ if self.trend_fit_type == "parametric": self._fit_parametric_trend() if (self.uns["trend_coeffs"] <= 0).any(): warnings.warn( - f"self.trend_fit_type={self.trend_fit_type}, but the dispersion trend was" - f" not well captured by the function: y = a / x + b. Switchiing to local " - f"regression.", + f"self.trend_fit_type={self.trend_fit_type}, but the " + f"dispersion trend was not well captured by the function: " + f"y = a / x + b. Switchiing to local regression.", UserWarning, stacklevel=2, ) @@ -593,7 +595,14 @@ def fit_dispersion_trend(self) -> None: del self.uns["trend_coeffs"] if self.trend_fit_type == "local": - self._fit_local_trend() + try: + self._fit_local_trend() + except ValueError: + print("Local trend, fit did not converge, switching to mean fit.") + self.trend_fit_type = "mean" + + if self.trend_fit_type == "mean": + self._fit_mean_trend() def fit_dispersion_prior(self) -> None: """Fit dispersion variance priors and standard deviation of log-residuals. @@ -882,17 +891,17 @@ def _fit_parametric_trend(self) -> None: self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"]) - self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN) - self.varm["fitted_dispersions"][self.varm["non_zero"]] = dispersion_trend( - self.varm["_normed_means"][self.varm["non_zero"]], - self.uns["trend_coeffs"], + self.uns["disp_function"] = lambda x: dispersion_trend( + x, self.uns["trend_coeffs"] ) - def _fit_local_trend(self) -> None: - r"""Fit the dispersion trend coefficients. + self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN) + self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.uns[ + "disp_function" + ](self.varm["_normed_means"][self.varm["non_zero"]]) - TODO - """ + def _fit_local_trend(self) -> None: + r"""Fit the dispersion trend curve using local regression.""" # Check that genewise dispersions are available. If not, compute them. if "genewise_dispersions" not in self.varm: self.fit_genewise_dispersions() @@ -929,6 +938,18 @@ def _fit_local_trend(self) -> None: "disp_function" ](self.varm["_normed_means"][self.varm["non_zero"]]) + def _fit_mean_trend(self): + """Fit the dispersion trend curve using the mean of gene-wise dispersions.""" + mean_disp = trim_mean( + self.varm["genewise_dispersions"][ + self.varm["genewise_dispersions"] > 10 * self.min_disp + ], + proportiontocut=0.001, + ) + + self.uns["disp_function"] = lambda x: mean_disp + self.varm["fitted_dispersions"] = np.full(self.n_vars, mean_disp) + def plot_dispersions( self, log: bool = True, save_path: Optional[str] = None, **kwargs ) -> None: @@ -1073,23 +1094,10 @@ def _refit_without_outliers( # Note: the trend curve is not refitted. sub_dds.varm["_normed_means"] = sub_dds.layers["normed_counts"].mean(0) - if self.trend_fit_type == "parametric": - sub_dds.uns["trend_coeffs"] = self.uns["trend_coeffs"] - sub_dds.varm["fitted_dispersions"] = dispersion_trend( - sub_dds.varm["_normed_means"], - sub_dds.uns["trend_coeffs"], - ) - elif self.trend_fit_type == "local": - sub_dds.uns["disp_function"] = self.uns["disp_function"] - sub_dds.varm["fitted_dispersions"] = self.uns["disp_function"]( - sub_dds.varm["_normed_means"][sub_dds.varm["non_zero"]] - ) - - else: - raise AttributeError( - f"Found trend_fit_type '{self.trend_fit_type}'. Expected 'parametric' or " - "'local'." - ) + sub_dds.uns["disp_function"] = self.uns["disp_function"] + sub_dds.varm["fitted_dispersions"] = self.uns["disp_function"]( + sub_dds.varm["_normed_means"][sub_dds.varm["non_zero"]] + ) # Estimate MAP dispersions. # Note: the prior variance is not recomputed. From 5b9c18cc805cc2bec73668943c94701897740e8c Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Mon, 12 Feb 2024 15:14:19 +0100 Subject: [PATCH 05/16] fix: typo in setup.py --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 25611c17..cc41f679 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,8 @@ "numpy>=1.23.0", "pandas>=1.4.0", "scikit-learn>=1.1.0", - "scikit-misc>=0.3.1" "scipy>=1.8.0", + "scikit-misc>=0.3.1", + "scipy>=1.8.0", "statsmodels", "matplotlib>=3.6.2", # not sure why sphinx_gallery does not work without it ], # external packages as dependencies From a262e4f8cd6cbac83fed0242a9a65f282ef85cf1 Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Mon, 12 Feb 2024 15:21:38 +0100 Subject: [PATCH 06/16] build!: discontinue python 3.8 --- .github/workflows/pr_validation.yml | 2 -- setup.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/pr_validation.yml b/.github/workflows/pr_validation.yml index 4c14dfcb..4c55eebc 100644 --- a/.github/workflows/pr_validation.yml +++ b/.github/workflows/pr_validation.yml @@ -17,8 +17,6 @@ jobs: strategy: matrix: include: - - os: ubuntu-latest - python: "3.8" - os: ubuntu-latest python: "3.9" - os: ubuntu-latest diff --git a/setup.py b/setup.py index cc41f679..1870950e 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( name="pydeseq2", version=about["__version__"], - python_requires=">=3.8.0", + python_requires=">=3.9.0", license="MIT", description="A python implementation of DESeq2.", long_description=readme, From c86ced3ef4a83a847add59b5da530f795bbf6833 Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Mon, 12 Feb 2024 15:43:13 +0100 Subject: [PATCH 07/16] feat: raise NotImplementedError when fit type is not recognized --- pydeseq2/dds.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index 08784317..0a7c38f7 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -604,6 +604,12 @@ def fit_dispersion_trend(self) -> None: if self.trend_fit_type == "mean": self._fit_mean_trend() + else: + raise NotImplementedError( + f"Unknown trend_fit_type: {self.trend_fit_type}. " + "Expected 'parametric', 'local' or 'mean'." + ) + def fit_dispersion_prior(self) -> None: """Fit dispersion variance priors and standard deviation of log-residuals. From ef783df53479f81e90ef548fc4f50585b5d8e1ca Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Mon, 12 Feb 2024 15:46:53 +0100 Subject: [PATCH 08/16] feat: raise NotImplementedError when fit type is not recognized --- pydeseq2/dds.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index 0a7c38f7..4e8918d9 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -597,14 +597,14 @@ def fit_dispersion_trend(self) -> None: if self.trend_fit_type == "local": try: self._fit_local_trend() - except ValueError: - print("Local trend, fit did not converge, switching to mean fit.") + except (ValueError, RuntimeError): + print("Local trend fit did not converge, switching to mean fit.") self.trend_fit_type = "mean" if self.trend_fit_type == "mean": self._fit_mean_trend() - else: + if self.trend_fit_type not in ["parametric", "local", "mean"]: raise NotImplementedError( f"Unknown trend_fit_type: {self.trend_fit_type}. " "Expected 'parametric', 'local' or 'mean'." From 2a0dcc376924f177afd676e95ab5935e21bfb5a9 Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Mon, 12 Feb 2024 17:23:56 +0100 Subject: [PATCH 09/16] feat: implement trick to avoid segmentation faults when running loess --- pydeseq2/dds.py | 32 ++++++++++++++++++++++++-------- pydeseq2/utils.py | 16 ++++++++++++++++ 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index 4e8918d9..933783a6 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -1,3 +1,4 @@ +import multiprocessing as mp import sys import time import warnings @@ -14,7 +15,6 @@ from scipy.special import polygamma # type: ignore from scipy.stats import f # type: ignore from scipy.stats import trim_mean # type: ignore -from skmisc.loess import loess from pydeseq2.default_inference import DefaultInference from pydeseq2.inference import Inference @@ -22,6 +22,7 @@ from pydeseq2.preprocessing import deseq2_norm_transform from pydeseq2.utils import build_design_matrix from pydeseq2.utils import dispersion_trend +from pydeseq2.utils import local_trend_fit from pydeseq2.utils import make_scatter from pydeseq2.utils import mean_absolute_deviation from pydeseq2.utils import n_or_more_replicates @@ -909,6 +910,12 @@ def _fit_parametric_trend(self) -> None: def _fit_local_trend(self) -> None: r"""Fit the dispersion trend curve using local regression.""" # Check that genewise dispersions are available. If not, compute them. + warnings.warn( + "Running local trend fit will make the DeseqDataSet object unpicklable.", + UserWarning, + stacklevel=2, + ) + if "genewise_dispersions" not in self.varm: self.fit_genewise_dispersions() @@ -924,14 +931,23 @@ def _fit_local_trend(self) -> None: if len(genes_to_fit) == 0: raise ValueError("No genes to fit: all dispersions are below 10 * min_disp") - lo = loess( - x=np.log(self.varm["_normed_means"][genes_to_fit]), - y=np.log(self.varm["genewise_dispersions"][genes_to_fit]), - weights=self.varm["_normed_means"][genes_to_fit], - surface="direct", # to allow extrapolation - ) + means = self.varm["_normed_means"][genes_to_fit] + dispersions = self.varm["genewise_dispersions"][genes_to_fit] + + # Run local trend fit in a separate process to avoid segmentation faults + q = mp.Manager().Queue() + p = mp.Process(target=local_trend_fit, args=(means, dispersions, q)) + p.start() + p.join() + + if p.exitcode != 0: + raise RuntimeError + else: + # The code rand without fault, so we can now run it locally. + # Returning the loess object in the first step would be more straightforward, + # But it is unfortunately unpicklable. - lo.fit() + lo = local_trend_fit(means, dispersions) end = time.time() diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py index 0f04e0f5..3a46d682 100644 --- a/pydeseq2/utils.py +++ b/pydeseq2/utils.py @@ -1,6 +1,7 @@ import multiprocessing import warnings from math import floor +from multiprocessing import Queue from pathlib import Path from typing import List from typing import Literal @@ -18,6 +19,7 @@ from scipy.special import polygamma # type: ignore from scipy.stats import norm # type: ignore from sklearn.linear_model import LinearRegression # type: ignore +from skmisc.loess import loess import pydeseq2 from pydeseq2.grid_search import grid_fit_alpha @@ -724,6 +726,20 @@ def dloss(log_alpha: float) -> float: ) +def local_trend_fit(means, dispersions, q: Optional[Queue] = None): + """Run a wrapper for local trend fit to catch segfaults from scikit-misc.""" + lo = loess( + x=np.log(means), + y=np.log(dispersions), + weights=means, + surface="direct", # to allow extrapolation + ) + + lo.fit() + + return lo + + def trimmed_mean(x, trim: float = 0.1, **kwargs) -> Union[float, np.ndarray]: """Return trimmed mean. From b19f63973e989c548bc311e784d5e52f2c84611c Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Mon, 12 Feb 2024 17:38:29 +0100 Subject: [PATCH 10/16] fix: catch PerfectSeperationWarnings from statsmodels --- pydeseq2/dds.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index 933783a6..223668b8 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -15,6 +15,7 @@ from scipy.special import polygamma # type: ignore from scipy.stats import f # type: ignore from scipy.stats import trim_mean # type: ignore +from statsmodels.tools.sm_exceptions import PerfectSeparationWarning from pydeseq2.default_inference import DefaultInference from pydeseq2.inference import Inference @@ -582,7 +583,15 @@ def fit_dispersion_trend(self) -> None: "parametric", "local" and "mean". """ if self.trend_fit_type == "parametric": - self._fit_parametric_trend() + try: + self._fit_parametric_trend() + except PerfectSeparationWarning: + warnings.warn( + "Perfect separation detected, switching to local regression.", + UserWarning, + stacklevel=2, + ) + self.trend_fit_type = "local" if (self.uns["trend_coeffs"] <= 0).any(): warnings.warn( From 196141afad22ab243440a0d55ae2ec74e4c6c519 Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Mon, 12 Feb 2024 17:48:42 +0100 Subject: [PATCH 11/16] fix: catch PerfectSeperationWarnings from statsmodels --- pydeseq2/dds.py | 3 +-- pydeseq2/default_inference.py | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index 223668b8..f63502d2 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -15,7 +15,6 @@ from scipy.special import polygamma # type: ignore from scipy.stats import f # type: ignore from scipy.stats import trim_mean # type: ignore -from statsmodels.tools.sm_exceptions import PerfectSeparationWarning from pydeseq2.default_inference import DefaultInference from pydeseq2.inference import Inference @@ -585,7 +584,7 @@ def fit_dispersion_trend(self) -> None: if self.trend_fit_type == "parametric": try: self._fit_parametric_trend() - except PerfectSeparationWarning: + except RuntimeError: warnings.warn( "Perfect separation detected, switching to local regression.", UserWarning, diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py index 29ebb65c..ef5be46d 100644 --- a/pydeseq2/default_inference.py +++ b/pydeseq2/default_inference.py @@ -10,6 +10,7 @@ from joblib import delayed from joblib import parallel_backend from statsmodels.tools.sm_exceptions import DomainWarning # type: ignore +from statsmodels.tools.sm_exceptions import PerfectSeparationWarning from pydeseq2 import inference from pydeseq2 import utils @@ -210,12 +211,17 @@ def dispersion_trend_gamma_glm( # noqa: D102 covariates_w_intercept = sm.add_constant(covariates) targets_fit = targets.values covariates_fit = covariates_w_intercept.values - glm_gamma = sm.GLM( - targets_fit, - covariates_fit, - family=sm.families.Gamma(link=sm.families.links.identity()), - ) - res = glm_gamma.fit() + try: + glm_gamma = sm.GLM( + targets_fit, + covariates_fit, + family=sm.families.Gamma(link=sm.families.links.identity()), + ) + res = glm_gamma.fit() + except PerfectSeparationWarning: + raise RuntimeError( + "Perfect separation detected in dispersion trend model." + ) from None coeffs = res.params return (coeffs, covariates_fit @ coeffs) From b802c52187de96e9303404df5edbeb060bfa7388 Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Mon, 12 Feb 2024 17:55:28 +0100 Subject: [PATCH 12/16] fix: attempt to ignore PerfectSeparationWarning altogether --- pydeseq2/default_inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py index ef5be46d..8aa6fb5f 100644 --- a/pydeseq2/default_inference.py +++ b/pydeseq2/default_inference.py @@ -17,6 +17,7 @@ # Ignore DomainWarning raised by statsmodels when fitting a Gamma GLM with identity link. warnings.simplefilter("ignore", DomainWarning) +warnings.simplefilter("ignore", PerfectSeparationWarning) class DefaultInference(inference.Inference): From 71977ffce57367250b41b325d5815da2c09060d8 Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Tue, 13 Feb 2024 16:08:51 +0100 Subject: [PATCH 13/16] refactor: remove statsmodels with custom gamma GLM implementation for parametric trend curve in default inference model --- pydeseq2/default_inference.py | 51 +++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py index 8aa6fb5f..5fff7462 100644 --- a/pydeseq2/default_inference.py +++ b/pydeseq2/default_inference.py @@ -1,24 +1,17 @@ -import warnings from typing import Literal from typing import Optional from typing import Tuple import numpy as np import pandas as pd -import statsmodels.api as sm # type: ignore from joblib import Parallel # type: ignore from joblib import delayed from joblib import parallel_backend -from statsmodels.tools.sm_exceptions import DomainWarning # type: ignore -from statsmodels.tools.sm_exceptions import PerfectSeparationWarning +from scipy.optimize import minimize # type: ignore from pydeseq2 import inference from pydeseq2 import utils -# Ignore DomainWarning raised by statsmodels when fitting a Gamma GLM with identity link. -warnings.simplefilter("ignore", DomainWarning) -warnings.simplefilter("ignore", PerfectSeparationWarning) - class DefaultInference(inference.Inference): """Default DESeq2-related inference methods, using scipy/sklearn/numpy. @@ -209,22 +202,34 @@ def wald_test( # noqa: D102 def dispersion_trend_gamma_glm( # noqa: D102 self, covariates: pd.Series, targets: pd.Series ) -> Tuple[np.ndarray, np.ndarray]: - covariates_w_intercept = sm.add_constant(covariates) - targets_fit = targets.values + covariates_w_intercept = covariates.to_frame() + covariates_w_intercept["intercept"] = 1 covariates_fit = covariates_w_intercept.values - try: - glm_gamma = sm.GLM( - targets_fit, - covariates_fit, - family=sm.families.Gamma(link=sm.families.links.identity()), - ) - res = glm_gamma.fit() - except PerfectSeparationWarning: - raise RuntimeError( - "Perfect separation detected in dispersion trend model." - ) from None - coeffs = res.params - return (coeffs, covariates_fit @ coeffs) + targets_fit = targets.values + + def loss(coeffs): + mu = covariates_fit @ coeffs + return (targets_fit / mu + np.log(mu)).mean() + + def grad(coeffs): + mu = covariates_fit @ coeffs + return -( + ((targets_fit / mu - 1)[:, None] * covariates_fit) / mu[:, None] + ).mean(0) + + res = minimize( + loss, + x0=np.array([1.0, 1.0]), + jac=grad, + method="L-BFGS-B", + bounds=[(0, np.inf)], + ) + + if not res.success: + raise ValueError("Gamma GLM optimization failed.") + + coeffs = res.x + return coeffs, covariates_fit @ coeffs def lfc_shrink_nbinom_glm( # noqa: D102 self, From f36c1b93fe4d3fb14070a7949523a3ffb6d934a5 Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Tue, 13 Feb 2024 17:02:48 +0100 Subject: [PATCH 14/16] fix: use consistent ordering for trend curve coefficients --- pydeseq2/default_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py index 5fff7462..8a3985be 100644 --- a/pydeseq2/default_inference.py +++ b/pydeseq2/default_inference.py @@ -203,7 +203,7 @@ def dispersion_trend_gamma_glm( # noqa: D102 self, covariates: pd.Series, targets: pd.Series ) -> Tuple[np.ndarray, np.ndarray]: covariates_w_intercept = covariates.to_frame() - covariates_w_intercept["intercept"] = 1 + covariates_w_intercept.insert(0, "intercept", 1) covariates_fit = covariates_w_intercept.values targets_fit = targets.values From c151c10e6c6d8790bf0b848231c0f4b293f75f68 Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Tue, 13 Feb 2024 17:10:15 +0100 Subject: [PATCH 15/16] feat: catch gamma GLM failure and switch to mean fit --- pydeseq2/dds.py | 35 ++++++++++++++++++++--------------- pydeseq2/default_inference.py | 2 +- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index f63502d2..a98a29f2 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -352,7 +352,7 @@ def vst_fit( if use_design: # Check that the dispersion trend curve was fitted. If not, fit it. # This will call previous functions in a cascade. - if "trend_coeffs" not in self.uns: + if "disp_function" not in self.uns: self.fit_dispersion_trend() else: # Reduce the design matrix to an intercept and reconstruct at the end @@ -586,13 +586,14 @@ def fit_dispersion_trend(self) -> None: self._fit_parametric_trend() except RuntimeError: warnings.warn( - "Perfect separation detected, switching to local regression.", + "The dispersion trend curve fitting did not converge. " + "Switching to a mean-based dispersion trend.", UserWarning, stacklevel=2, ) self.trend_fit_type = "local" - if (self.uns["trend_coeffs"] <= 0).any(): + if (self.uns["trend_coeffs"] == 0).any(): warnings.warn( f"self.trend_fit_type={self.trend_fit_type}, but the " f"dispersion trend was not well captured by the function: " @@ -619,6 +620,15 @@ def fit_dispersion_trend(self) -> None: "Expected 'parametric', 'local' or 'mean'." ) + def disp_function(self, x): + """Return the dispersion trend function at x.""" + if self.uns["disp_function_type"] == "parametric": + return dispersion_trend(x, self.uns["trend_coeffs"]) + elif self.uns["disp_function_type"] == "local": + return np.exp(self.uns["loess"].predict(np.log(x)).values) + elif self.uns["disp_function_type"] == "mean": + return self.uns["mean_disp"] + def fit_dispersion_prior(self) -> None: """Fit dispersion variance priors and standard deviation of log-residuals. @@ -906,14 +916,10 @@ def _fit_parametric_trend(self) -> None: self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"]) - self.uns["disp_function"] = lambda x: dispersion_trend( - x, self.uns["trend_coeffs"] - ) - self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN) - self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.uns[ - "disp_function" - ](self.varm["_normed_means"][self.varm["non_zero"]]) + self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.disp_function( + self.varm["_normed_means"][self.varm["non_zero"]] + ) def _fit_local_trend(self) -> None: r"""Fit the dispersion trend curve using local regression.""" @@ -962,11 +968,11 @@ def _fit_local_trend(self) -> None: if not self.quiet: print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr) - self.uns["disp_function"] = lambda x: np.exp(lo.predict(np.log(x)).values) + self.uns["loess"] = lo self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN) - self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.uns[ - "disp_function" - ](self.varm["_normed_means"][self.varm["non_zero"]]) + self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.disp_function( + self.varm["_normed_means"][self.varm["non_zero"]] + ) def _fit_mean_trend(self): """Fit the dispersion trend curve using the mean of gene-wise dispersions.""" @@ -977,7 +983,6 @@ def _fit_mean_trend(self): proportiontocut=0.001, ) - self.uns["disp_function"] = lambda x: mean_disp self.varm["fitted_dispersions"] = np.full(self.n_vars, mean_disp) def plot_dispersions( diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py index 8a3985be..975375cd 100644 --- a/pydeseq2/default_inference.py +++ b/pydeseq2/default_inference.py @@ -226,7 +226,7 @@ def grad(coeffs): ) if not res.success: - raise ValueError("Gamma GLM optimization failed.") + raise RuntimeError("Gamma GLM optimization failed.") coeffs = res.x return coeffs, covariates_fit @ coeffs From 0d912b9d1fb63a7cec7590298169f2b3ca2e8535 Mon Sep 17 00:00:00 2001 From: Boris MUZELLEC Date: Fri, 16 Feb 2024 10:44:46 +0100 Subject: [PATCH 16/16] chore: catch up code state from statsmodels PR --- pydeseq2/dds.py | 86 +++++++++++++++++++++++----------------- tests/test_edge_cases.py | 2 +- 2 files changed, 50 insertions(+), 38 deletions(-) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index a98a29f2..879101ba 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -73,7 +73,7 @@ class DeseqDataSet(ad.AnnData): specifying the factor of interest and the reference (control) level against which we're testing, e.g. ``["condition", "A"]``. (default: ``None``). - trend_fit_type : str + disp_function_type : str Either "parametric", "local" or "mean", for the type of fitting of dispersions trend curve.If "parametric" is selected but the fitting fails, it will switch to "local". (default: ``"parametric"``). @@ -188,7 +188,7 @@ def __init__( design_factors: Union[str, List[str]] = "condition", continuous_factors: Optional[List[str]] = None, ref_level: Optional[List[str]] = None, - trend_fit_type: Literal["parametric", "local", "mean"] = "parametric", + disp_function_type: Literal["parametric", "local", "mean"] = "parametric", min_mu: float = 0.5, min_disp: float = 1e-8, max_disp: float = 10.0, @@ -274,7 +274,7 @@ def __init__( # Check that the design matrix has full rank self._check_full_rank_design() - self.trend_fit_type = trend_fit_type + self.disp_function_type = disp_function_type self.min_mu = min_mu self.min_disp = min_disp self.max_disp = np.maximum(max_disp, self.n_obs) @@ -486,7 +486,7 @@ def fit_size_factors( warnings.warn( "Every gene contains at least one zero, " "cannot compute log geometric means. Switching to iterative mode.", - RuntimeWarning, + UserWarning, stacklevel=2, ) self._fit_iterate_size_factors() @@ -578,10 +578,10 @@ def fit_genewise_dispersions(self) -> None: def fit_dispersion_trend(self) -> None: r"""Fit the dispersion trend curve. - Three methods are available, depending on the ``trend_fit_type`` attribute: + Three methods are available, depending on the ``disp_function_type`` attribute: "parametric", "local" and "mean". """ - if self.trend_fit_type == "parametric": + if self.disp_function_type == "parametric": try: self._fit_parametric_trend() except RuntimeError: @@ -591,42 +591,42 @@ def fit_dispersion_trend(self) -> None: UserWarning, stacklevel=2, ) - self.trend_fit_type = "local" + self.disp_function_type = "local" if (self.uns["trend_coeffs"] == 0).any(): warnings.warn( - f"self.trend_fit_type={self.trend_fit_type}, but the " + f"self.disp_function_type={self.disp_function_type}, but the " f"dispersion trend was not well captured by the function: " - f"y = a / x + b. Switchiing to local regression.", + f"y = a / x + b. Switching to local regression.", UserWarning, stacklevel=2, ) - self.trend_fit_type = "local" + self.disp_function_type = "local" del self.uns["trend_coeffs"] - if self.trend_fit_type == "local": + if self.disp_function_type == "local": try: self._fit_local_trend() except (ValueError, RuntimeError): print("Local trend fit did not converge, switching to mean fit.") - self.trend_fit_type = "mean" + self.disp_function_type = "mean" - if self.trend_fit_type == "mean": + if self.disp_function_type == "mean": self._fit_mean_trend() - if self.trend_fit_type not in ["parametric", "local", "mean"]: + if self.disp_function_type not in ["parametric", "local", "mean"]: raise NotImplementedError( - f"Unknown trend_fit_type: {self.trend_fit_type}. " + f"Unknown disp_function_type: {self.disp_function_type}. " "Expected 'parametric', 'local' or 'mean'." ) def disp_function(self, x): """Return the dispersion trend function at x.""" - if self.uns["disp_function_type"] == "parametric": + if self.disp_function_type == "parametric": return dispersion_trend(x, self.uns["trend_coeffs"]) - elif self.uns["disp_function_type"] == "local": + elif self.disp_function_type == "local": return np.exp(self.uns["loess"].predict(np.log(x)).values) - elif self.uns["disp_function_type"] == "mean": + elif self.disp_function_type == "mean": return self.uns["mean_disp"] def fit_dispersion_prior(self) -> None: @@ -890,24 +890,29 @@ def _fit_parametric_trend(self) -> None: old_coeffs = pd.Series([0.1, 0.1]) coeffs = pd.Series([1.0, 1.0]) - while (np.log(np.abs(coeffs / old_coeffs)) ** 2).sum() >= 1e-6: - old_coeffs = coeffs - coeffs, predictions = self.inference.dispersion_trend_gamma_glm( - covariates, targets - ) - # Filter out genes that are too far away from the curve before refitting - pred_ratios = ( - self[:, covariates.index].varm["genewise_dispersions"] / predictions - ) + try: + while (coeffs > 0).all() and ( + np.log(np.abs(coeffs / old_coeffs)) ** 2 + ).sum() >= 1e-6: + old_coeffs = coeffs + coeffs, predictions = self.inference.dispersion_trend_gamma_glm( + covariates, targets + ) + # Filter out genes that are too far away from the curve before refitting + pred_ratios = ( + self[:, covariates.index].varm["genewise_dispersions"] / predictions + ) - targets.drop( - targets[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index, - inplace=True, - ) - covariates.drop( - covariates[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index, - inplace=True, - ) + targets.drop( + targets[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index, + inplace=True, + ) + covariates.drop( + covariates[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index, + inplace=True, + ) + except RuntimeError as e: + raise e end = time.time() @@ -1114,6 +1119,7 @@ def _refit_without_outliers( min_replicates=self.min_replicates, beta_tol=self.beta_tol, inference=self.inference, + disp_function_type=self.disp_function_type, ) # Use the same size factors @@ -1129,8 +1135,7 @@ def _refit_without_outliers( # Note: the trend curve is not refitted. sub_dds.varm["_normed_means"] = sub_dds.layers["normed_counts"].mean(0) - sub_dds.uns["disp_function"] = self.uns["disp_function"] - sub_dds.varm["fitted_dispersions"] = self.uns["disp_function"]( + sub_dds.varm["fitted_dispersions"] = self.disp_function( sub_dds.varm["_normed_means"][sub_dds.varm["non_zero"]] ) @@ -1215,6 +1220,13 @@ def objective(p): & self.varm["non_zero"] ] + if len(use_for_mean_genes) == 0: + print( + "No genes have a dispersion above 10 * min_disp in " + "_fit_iterate_size_factors." + ) + break + mean_disp = trimmed_mean( self[:, use_for_mean_genes].varm["genewise_dispersions"], trim=0.001 ) diff --git a/tests/test_edge_cases.py b/tests/test_edge_cases.py index 5a2b8a71..b93a853e 100644 --- a/tests/test_edge_cases.py +++ b/tests/test_edge_cases.py @@ -468,7 +468,7 @@ def test_zero_inflated(): counts_df.iloc[idx, :] = 0 dds = DeseqDataSet(counts=counts_df, metadata=metadata) - with pytest.warns(RuntimeWarning): + with pytest.warns(UserWarning): dds.deseq2()