From 1055aea07687c1e617a5587834abe39598196aba Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Fri, 2 Feb 2024 16:19:02 +0100
Subject: [PATCH 01/16] feat: implementing local fit for trend curve (WIP)

---
 pydeseq2/dds.py | 229 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 159 insertions(+), 70 deletions(-)

diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index 665ef5b9..d4b30e0f 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -15,6 +15,8 @@
 from scipy.stats import f  # type: ignore
 from scipy.stats import trim_mean  # type: ignore
 
+from skmisc.loess import loess
+
 from pydeseq2.default_inference import DefaultInference
 from pydeseq2.inference import Inference
 from pydeseq2.preprocessing import deseq2_norm_fit
@@ -71,6 +73,11 @@ class DeseqDataSet(ad.AnnData):
         specifying the factor of interest and the reference (control) level against which
         we're testing, e.g. ``["condition", "A"]``. (default: ``None``).
 
+    trend_fit_type : str
+        Either "parametric" or "local" for the type of fitting of dispersions trend
+        curve.If "parametric" is selected but the fitting fails, it will switch to
+        "local". (default: ``"parametric"``).
+
     min_mu : float
         Threshold for mean estimates. (default: ``0.5``).
 
@@ -181,6 +188,7 @@ def __init__(
         design_factors: Union[str, List[str]] = "condition",
         continuous_factors: Optional[List[str]] = None,
         ref_level: Optional[List[str]] = None,
+        trend_fit_type: Literal["parametric", "local"] = "parametric",
         min_mu: float = 0.5,
         min_disp: float = 1e-8,
         max_disp: float = 10.0,
@@ -266,6 +274,7 @@ def __init__(
         # Check that the design matrix has full rank
         self._check_full_rank_design()
 
+        self.trend_fit_type = trend_fit_type
         self.min_mu = min_mu
         self.min_disp = min_disp
         self.max_disp = np.maximum(max_disp, self.n_obs)
@@ -568,71 +577,25 @@ def fit_genewise_dispersions(self) -> None:
 
     def fit_dispersion_trend(self) -> None:
         r"""Fit the dispersion trend coefficients.
-
-        :math:`f(\mu) = \alpha_1/\mu + a_0`.
+        TODO
         """
-        # Check that genewise dispersions are available. If not, compute them.
-        if "genewise_dispersions" not in self.varm:
-            self.fit_genewise_dispersions()
-
-        if not self.quiet:
-            print("Fitting dispersion trend curve...", file=sys.stderr)
-        start = time.time()
-        self.varm["_normed_means"] = self.layers["normed_counts"].mean(0)
-
-        # Exclude all-zero counts
-        targets = pd.Series(
-            self[:, self.non_zero_genes].varm["genewise_dispersions"].copy(),
-            index=self.non_zero_genes,
-        )
-        covariates = pd.Series(
-            1 / self[:, self.non_zero_genes].varm["_normed_means"],
-            index=self.non_zero_genes,
-        )
-
-        for gene in self.non_zero_genes:
-            if (
-                np.isinf(covariates.loc[gene]).any()
-                or np.isnan(covariates.loc[gene]).any()
-            ):
-                targets.drop(labels=[gene], inplace=True)
-                covariates.drop(labels=[gene], inplace=True)
-
-        # Initialize coefficients
-        old_coeffs = pd.Series([0.1, 0.1])
-        coeffs = pd.Series([1.0, 1.0])
-
-        while (np.log(np.abs(coeffs / old_coeffs)) ** 2).sum() >= 1e-6:
-            old_coeffs = coeffs
-            coeffs, predictions = self.inference.dispersion_trend_gamma_glm(
-                covariates, targets
-            )
-            # Filter out genes that are too far away from the curve before refitting
-            pred_ratios = (
-                self[:, covariates.index].varm["genewise_dispersions"] / predictions
-            )
 
-            targets.drop(
-                targets[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index,
-                inplace=True,
-            )
-            covariates.drop(
-                covariates[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index,
-                inplace=True,
-            )
+        if self.trend_fit_type == "parametric":
+            self._fit_parametric_trend()
 
-        end = time.time()
-
-        if not self.quiet:
-            print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr)
-
-        self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"])
+            if (self.uns["trend_coeffs"] <= 0).any():
+                warnings.warn(
+                    f"self.trend_fit_type={self.trend_fit_type}, but the dispersion trend was"
+                    f" not well captured by the function: y = a / x + b. Switchiing to local "
+                    f"regression.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+                self.trend_fit_type = "local"
+                del self.uns["trend_coeffs"]
 
-        self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN)
-        self.varm["fitted_dispersions"][self.varm["non_zero"]] = dispersion_trend(
-            self.varm["_normed_means"][self.varm["non_zero"]],
-            self.uns["trend_coeffs"],
-        )
+        if self.trend_fit_type == "local":
+            self._fit_local_trend()
 
     def fit_dispersion_prior(self) -> None:
         """Fit dispersion variance priors and standard deviation of log-residuals.
@@ -721,9 +684,11 @@ def fit_MAP_dispersions(self) -> None:
 
         # Filter outlier genes for which we won't apply shrinkage
         self.varm["dispersions"] = self.varm["MAP_dispersions"].copy()
-        self.varm["_outlier_genes"] = np.log(self.varm["genewise_dispersions"]) > np.log(
-            self.varm["fitted_dispersions"]
-        ) + 2 * np.sqrt(self.uns["_squared_logres"])
+        self.varm["_outlier_genes"] = np.log(
+            self.varm["genewise_dispersions"]
+        ) > np.log(self.varm["fitted_dispersions"]) + 2 * np.sqrt(
+            self.uns["_squared_logres"]
+        )
         self.varm["dispersions"][self.varm["_outlier_genes"]] = self.varm[
             "genewise_dispersions"
         ][self.varm["_outlier_genes"]]
@@ -859,6 +824,115 @@ def _fit_MoM_dispersions(self) -> None:
             alpha_hat, self.min_disp, self.max_disp
         )
 
+    def _fit_parametric_trend(self) -> None:
+        r"""Fit the dispersion trend coefficients.
+
+        :math:`f(\mu) = \alpha_1/\mu + a_0`.
+        """
+        # Check that genewise dispersions are available. If not, compute them.
+        if "genewise_dispersions" not in self.varm:
+            self.fit_genewise_dispersions()
+
+        if not self.quiet:
+            print("Fitting dispersion trend curve...", file=sys.stderr)
+        start = time.time()
+        self.varm["_normed_means"] = self.layers["normed_counts"].mean(0)
+
+        # Exclude all-zero counts
+        targets = pd.Series(
+            self[:, self.non_zero_genes].varm["genewise_dispersions"].copy(),
+            index=self.non_zero_genes,
+        )
+        covariates = pd.Series(
+            1 / self[:, self.non_zero_genes].varm["_normed_means"],
+            index=self.non_zero_genes,
+        )
+
+        for gene in self.non_zero_genes:
+            if (
+                np.isinf(covariates.loc[gene]).any()
+                or np.isnan(covariates.loc[gene]).any()
+            ):
+                targets.drop(labels=[gene], inplace=True)
+                covariates.drop(labels=[gene], inplace=True)
+
+        # Initialize coefficients
+        old_coeffs = pd.Series([0.1, 0.1])
+        coeffs = pd.Series([1.0, 1.0])
+
+        while (np.log(np.abs(coeffs / old_coeffs)) ** 2).sum() >= 1e-6:
+            old_coeffs = coeffs
+            coeffs, predictions = self.inference.dispersion_trend_gamma_glm(
+                covariates, targets
+            )
+            # Filter out genes that are too far away from the curve before refitting
+            pred_ratios = (
+                self[:, covariates.index].varm["genewise_dispersions"] / predictions
+            )
+
+            targets.drop(
+                targets[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index,
+                inplace=True,
+            )
+            covariates.drop(
+                covariates[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index,
+                inplace=True,
+            )
+
+        end = time.time()
+
+        if not self.quiet:
+            print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr)
+
+        self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"])
+
+        self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN)
+        self.varm["fitted_dispersions"][self.varm["non_zero"]] = dispersion_trend(
+            self.varm["_normed_means"][self.varm["non_zero"]],
+            self.uns["trend_coeffs"],
+        )
+
+    def _fit_local_trend(self) -> None:
+        r"""Fit the dispersion trend coefficients.
+
+        TODO
+        """
+        # Check that genewise dispersions are available. If not, compute them.
+        if "genewise_dispersions" not in self.varm:
+            self.fit_genewise_dispersions()
+
+        if not self.quiet:
+            print("Fitting dispersion trend curve...", file=sys.stderr)
+        start = time.time()
+        self.varm["_normed_means"] = self.layers["normed_counts"].mean(0)
+
+        genes_to_fit = self.varm["non_zero"] & (
+            self.varm["genewise_dispersions"] >= 10 * self.min_disp
+        )
+
+        if len(genes_to_fit) == 0:
+            raise ValueError("No genes to fit: all dispersions are below 10 * min_disp")
+
+        lo = loess(
+            x=np.log(self.varm["_normed_means"][genes_to_fit]),
+            y=np.log(self.varm["genewise_dispersions"][genes_to_fit]),
+            weights=self.varm["_normed_means"][genes_to_fit],
+            surface="direct",  # to allow extrapolation
+        )
+
+        lo.fit()
+
+        end = time.time()
+
+        if not self.quiet:
+            print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr)
+
+        self.uns["disp_function"] = lambda x: np.exp(lo.predict(np.log(x)).values)
+        self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN)
+        self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.uns[
+            "disp_function"
+        ](self.varm["_normed_means"][self.varm["non_zero"]])
+
     def plot_dispersions(
         self, log: bool = True, save_path: Optional[str] = None, **kwargs
     ) -> None:
@@ -1001,12 +1075,25 @@ def _refit_without_outliers(
 
         # Compute trend dispersions.
         # Note: the trend curve is not refitted.
-        sub_dds.uns["trend_coeffs"] = self.uns["trend_coeffs"]
         sub_dds.varm["_normed_means"] = sub_dds.layers["normed_counts"].mean(0)
-        sub_dds.varm["fitted_dispersions"] = dispersion_trend(
-            sub_dds.varm["_normed_means"],
-            sub_dds.uns["trend_coeffs"],
-        )
+
+        if self.trend_fit_type == "parametric":
+            sub_dds.uns["trend_coeffs"] = self.uns["trend_coeffs"]
+            sub_dds.varm["fitted_dispersions"] = dispersion_trend(
+                sub_dds.varm["_normed_means"],
+                sub_dds.uns["trend_coeffs"],
+            )
+        elif self.trend_fit_type == "local":
+            sub_dds.uns["disp_function"] = self.uns["disp_function"]
+            sub_dds.varm["fitted_dispersions"] = self.uns["disp_function"](
+                sub_dds.varm["_normed_means"][sub_dds.varm["non_zero"]]
+            )
+
+        else:
+            raise AttributeError(
+                f"Found trend_fit_type '{self.trend_fit_type}'. Expected 'parametric' or "
+                "'local'."
+            )
 
         # Estimate MAP dispersions.
         # Note: the prior variance is not recomputed.
@@ -1111,7 +1198,9 @@ def objective(p):
             ) < 1e-4:
                 break
             elif i == niter - 1:
-                print("Iterative size factor fitting did not converge.", file=sys.stderr)
+                print(
+                    "Iterative size factor fitting did not converge.", file=sys.stderr
+                )
 
         # Restore the design matrix and free buffer
         self.obsm["design_matrix"] = self.obsm["design_matrix_buffer"].copy()

From 718d7fddfea0062398dc10df2f190ea599a88f10 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 2 Feb 2024 15:21:37 +0000
Subject: [PATCH 02/16] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 pydeseq2/dds.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index d4b30e0f..f0547291 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -14,7 +14,6 @@
 from scipy.special import polygamma  # type: ignore
 from scipy.stats import f  # type: ignore
 from scipy.stats import trim_mean  # type: ignore
-
 from skmisc.loess import loess
 
 from pydeseq2.default_inference import DefaultInference
@@ -579,7 +578,6 @@ def fit_dispersion_trend(self) -> None:
         r"""Fit the dispersion trend coefficients.
         TODO
         """
-
         if self.trend_fit_type == "parametric":
             self._fit_parametric_trend()
 
@@ -684,11 +682,9 @@ def fit_MAP_dispersions(self) -> None:
 
         # Filter outlier genes for which we won't apply shrinkage
         self.varm["dispersions"] = self.varm["MAP_dispersions"].copy()
-        self.varm["_outlier_genes"] = np.log(
-            self.varm["genewise_dispersions"]
-        ) > np.log(self.varm["fitted_dispersions"]) + 2 * np.sqrt(
-            self.uns["_squared_logres"]
-        )
+        self.varm["_outlier_genes"] = np.log(self.varm["genewise_dispersions"]) > np.log(
+            self.varm["fitted_dispersions"]
+        ) + 2 * np.sqrt(self.uns["_squared_logres"])
         self.varm["dispersions"][self.varm["_outlier_genes"]] = self.varm[
             "genewise_dispersions"
         ][self.varm["_outlier_genes"]]
@@ -1198,9 +1194,7 @@ def objective(p):
             ) < 1e-4:
                 break
             elif i == niter - 1:
-                print(
-                    "Iterative size factor fitting did not converge.", file=sys.stderr
-                )
+                print("Iterative size factor fitting did not converge.", file=sys.stderr)
 
         # Restore the design matrix and free buffer
         self.obsm["design_matrix"] = self.obsm["design_matrix_buffer"].copy()

From 3e6692db78ea586d37937de1c52537338ee3174a Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Mon, 12 Feb 2024 11:57:23 +0100
Subject: [PATCH 03/16] chore: add scikit-misc dependency

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 5bb3269c..25611c17 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,7 @@
         "numpy>=1.23.0",
         "pandas>=1.4.0",
         "scikit-learn>=1.1.0",
-        "scipy>=1.8.0",
+        "scikit-misc>=0.3.1" "scipy>=1.8.0",
         "statsmodels",
         "matplotlib>=3.6.2",  # not sure why sphinx_gallery does not work without it
     ],  # external packages as dependencies

From 9a4671217dee6e59688def10cb9244e988b23d58 Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Mon, 12 Feb 2024 15:04:33 +0100
Subject: [PATCH 04/16] feat: implement mean trend curve

---
 pydeseq2/dds.py | 76 +++++++++++++++++++++++++++----------------------
 1 file changed, 42 insertions(+), 34 deletions(-)

diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index f0547291..08784317 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -73,8 +73,8 @@ class DeseqDataSet(ad.AnnData):
         we're testing, e.g. ``["condition", "A"]``. (default: ``None``).
 
     trend_fit_type : str
-        Either "parametric" or "local" for the type of fitting of dispersions trend
-        curve.If "parametric" is selected but the fitting fails, it will switch to
+        Either "parametric", "local" or "mean", for the type of fitting of dispersions
+        trend curve.If "parametric" is selected but the fitting fails, it will switch to
         "local". (default: ``"parametric"``).
 
     min_mu : float
@@ -187,7 +187,7 @@ def __init__(
         design_factors: Union[str, List[str]] = "condition",
         continuous_factors: Optional[List[str]] = None,
         ref_level: Optional[List[str]] = None,
-        trend_fit_type: Literal["parametric", "local"] = "parametric",
+        trend_fit_type: Literal["parametric", "local", "mean"] = "parametric",
         min_mu: float = 0.5,
         min_disp: float = 1e-8,
         max_disp: float = 10.0,
@@ -575,17 +575,19 @@ def fit_genewise_dispersions(self) -> None:
         self.varm["_genewise_converged"][self.varm["non_zero"]] = l_bfgs_b_converged_
 
     def fit_dispersion_trend(self) -> None:
-        r"""Fit the dispersion trend coefficients.
-        TODO
+        r"""Fit the dispersion trend curve.
+
+        Three methods are available, depending on the ``trend_fit_type`` attribute:
+        "parametric", "local" and "mean".
         """
         if self.trend_fit_type == "parametric":
             self._fit_parametric_trend()
 
             if (self.uns["trend_coeffs"] <= 0).any():
                 warnings.warn(
-                    f"self.trend_fit_type={self.trend_fit_type}, but the dispersion trend was"
-                    f" not well captured by the function: y = a / x + b. Switchiing to local "
-                    f"regression.",
+                    f"self.trend_fit_type={self.trend_fit_type}, but the "
+                    f"dispersion trend was not well captured by the function: "
+                    f"y = a / x + b. Switchiing to local regression.",
                     UserWarning,
                     stacklevel=2,
                 )
@@ -593,7 +595,14 @@ def fit_dispersion_trend(self) -> None:
                 del self.uns["trend_coeffs"]
 
         if self.trend_fit_type == "local":
-            self._fit_local_trend()
+            try:
+                self._fit_local_trend()
+            except ValueError:
+                print("Local trend, fit did not converge, switching to mean fit.")
+                self.trend_fit_type = "mean"
+
+        if self.trend_fit_type == "mean":
+            self._fit_mean_trend()
 
     def fit_dispersion_prior(self) -> None:
         """Fit dispersion variance priors and standard deviation of log-residuals.
@@ -882,17 +891,17 @@ def _fit_parametric_trend(self) -> None:
 
         self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"])
 
-        self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN)
-        self.varm["fitted_dispersions"][self.varm["non_zero"]] = dispersion_trend(
-            self.varm["_normed_means"][self.varm["non_zero"]],
-            self.uns["trend_coeffs"],
+        self.uns["disp_function"] = lambda x: dispersion_trend(
+            x, self.uns["trend_coeffs"]
         )
 
-    def _fit_local_trend(self) -> None:
-        r"""Fit the dispersion trend coefficients.
+        self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN)
+        self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.uns[
+            "disp_function"
+        ](self.varm["_normed_means"][self.varm["non_zero"]])
 
-        TODO
-        """
+    def _fit_local_trend(self) -> None:
+        r"""Fit the dispersion trend curve using local regression."""
         # Check that genewise dispersions are available. If not, compute them.
         if "genewise_dispersions" not in self.varm:
             self.fit_genewise_dispersions()
@@ -929,6 +938,18 @@ def _fit_local_trend(self) -> None:
             "disp_function"
         ](self.varm["_normed_means"][self.varm["non_zero"]])
 
+    def _fit_mean_trend(self):
+        """Fit the dispersion trend curve using the mean of gene-wise dispersions."""
+        mean_disp = trim_mean(
+            self.varm["genewise_dispersions"][
+                self.varm["genewise_dispersions"] > 10 * self.min_disp
+            ],
+            proportiontocut=0.001,
+        )
+
+        self.uns["disp_function"] = lambda x: mean_disp
+        self.varm["fitted_dispersions"] = np.full(self.n_vars, mean_disp)
+
     def plot_dispersions(
         self, log: bool = True, save_path: Optional[str] = None, **kwargs
     ) -> None:
@@ -1073,23 +1094,10 @@ def _refit_without_outliers(
         # Note: the trend curve is not refitted.
         sub_dds.varm["_normed_means"] = sub_dds.layers["normed_counts"].mean(0)
 
-        if self.trend_fit_type == "parametric":
-            sub_dds.uns["trend_coeffs"] = self.uns["trend_coeffs"]
-            sub_dds.varm["fitted_dispersions"] = dispersion_trend(
-                sub_dds.varm["_normed_means"],
-                sub_dds.uns["trend_coeffs"],
-            )
-        elif self.trend_fit_type == "local":
-            sub_dds.uns["disp_function"] = self.uns["disp_function"]
-            sub_dds.varm["fitted_dispersions"] = self.uns["disp_function"](
-                sub_dds.varm["_normed_means"][sub_dds.varm["non_zero"]]
-            )
-
-        else:
-            raise AttributeError(
-                f"Found trend_fit_type '{self.trend_fit_type}'. Expected 'parametric' or "
-                "'local'."
-            )
+        sub_dds.uns["disp_function"] = self.uns["disp_function"]
+        sub_dds.varm["fitted_dispersions"] = self.uns["disp_function"](
+            sub_dds.varm["_normed_means"][sub_dds.varm["non_zero"]]
+        )
 
         # Estimate MAP dispersions.
         # Note: the prior variance is not recomputed.

From 5b9c18cc805cc2bec73668943c94701897740e8c Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Mon, 12 Feb 2024 15:14:19 +0100
Subject: [PATCH 05/16] fix: typo in setup.py

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 25611c17..cc41f679 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,8 @@
         "numpy>=1.23.0",
         "pandas>=1.4.0",
         "scikit-learn>=1.1.0",
-        "scikit-misc>=0.3.1" "scipy>=1.8.0",
+        "scikit-misc>=0.3.1",
+        "scipy>=1.8.0",
         "statsmodels",
         "matplotlib>=3.6.2",  # not sure why sphinx_gallery does not work without it
     ],  # external packages as dependencies

From a262e4f8cd6cbac83fed0242a9a65f282ef85cf1 Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Mon, 12 Feb 2024 15:21:38 +0100
Subject: [PATCH 06/16] build!: discontinue python 3.8

---
 .github/workflows/pr_validation.yml | 2 --
 setup.py                            | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/pr_validation.yml b/.github/workflows/pr_validation.yml
index 4c14dfcb..4c55eebc 100644
--- a/.github/workflows/pr_validation.yml
+++ b/.github/workflows/pr_validation.yml
@@ -17,8 +17,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - os: ubuntu-latest
-            python: "3.8"
           - os: ubuntu-latest
             python: "3.9"
           - os: ubuntu-latest
diff --git a/setup.py b/setup.py
index cc41f679..1870950e 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
 setup(
     name="pydeseq2",
     version=about["__version__"],
-    python_requires=">=3.8.0",
+    python_requires=">=3.9.0",
     license="MIT",
     description="A python implementation of DESeq2.",
     long_description=readme,

From c86ced3ef4a83a847add59b5da530f795bbf6833 Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Mon, 12 Feb 2024 15:43:13 +0100
Subject: [PATCH 07/16] feat: raise NotImplementedError when fit type is not
 recognized

---
 pydeseq2/dds.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index 08784317..0a7c38f7 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -604,6 +604,12 @@ def fit_dispersion_trend(self) -> None:
         if self.trend_fit_type == "mean":
             self._fit_mean_trend()
 
+        else:
+            raise NotImplementedError(
+                f"Unknown trend_fit_type: {self.trend_fit_type}. "
+                "Expected 'parametric', 'local' or 'mean'."
+            )
+
     def fit_dispersion_prior(self) -> None:
         """Fit dispersion variance priors and standard deviation of log-residuals.
 

From ef783df53479f81e90ef548fc4f50585b5d8e1ca Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Mon, 12 Feb 2024 15:46:53 +0100
Subject: [PATCH 08/16] feat: raise NotImplementedError when fit type is not
 recognized

---
 pydeseq2/dds.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index 0a7c38f7..4e8918d9 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -597,14 +597,14 @@ def fit_dispersion_trend(self) -> None:
         if self.trend_fit_type == "local":
             try:
                 self._fit_local_trend()
-            except ValueError:
-                print("Local trend, fit did not converge, switching to mean fit.")
+            except (ValueError, RuntimeError):
+                print("Local trend fit did not converge, switching to mean fit.")
                 self.trend_fit_type = "mean"
 
         if self.trend_fit_type == "mean":
             self._fit_mean_trend()
 
-        else:
+        if self.trend_fit_type not in ["parametric", "local", "mean"]:
             raise NotImplementedError(
                 f"Unknown trend_fit_type: {self.trend_fit_type}. "
                 "Expected 'parametric', 'local' or 'mean'."

From 2a0dcc376924f177afd676e95ab5935e21bfb5a9 Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Mon, 12 Feb 2024 17:23:56 +0100
Subject: [PATCH 09/16] feat: implement trick to avoid segmentation faults when
 running loess

---
 pydeseq2/dds.py   | 32 ++++++++++++++++++++++++--------
 pydeseq2/utils.py | 16 ++++++++++++++++
 2 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index 4e8918d9..933783a6 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -1,3 +1,4 @@
+import multiprocessing as mp
 import sys
 import time
 import warnings
@@ -14,7 +15,6 @@
 from scipy.special import polygamma  # type: ignore
 from scipy.stats import f  # type: ignore
 from scipy.stats import trim_mean  # type: ignore
-from skmisc.loess import loess
 
 from pydeseq2.default_inference import DefaultInference
 from pydeseq2.inference import Inference
@@ -22,6 +22,7 @@
 from pydeseq2.preprocessing import deseq2_norm_transform
 from pydeseq2.utils import build_design_matrix
 from pydeseq2.utils import dispersion_trend
+from pydeseq2.utils import local_trend_fit
 from pydeseq2.utils import make_scatter
 from pydeseq2.utils import mean_absolute_deviation
 from pydeseq2.utils import n_or_more_replicates
@@ -909,6 +910,12 @@ def _fit_parametric_trend(self) -> None:
     def _fit_local_trend(self) -> None:
         r"""Fit the dispersion trend curve using local regression."""
         # Check that genewise dispersions are available. If not, compute them.
+        warnings.warn(
+            "Running local trend fit will make the DeseqDataSet object unpicklable.",
+            UserWarning,
+            stacklevel=2,
+        )
+
         if "genewise_dispersions" not in self.varm:
             self.fit_genewise_dispersions()
 
@@ -924,14 +931,23 @@ def _fit_local_trend(self) -> None:
         if len(genes_to_fit) == 0:
             raise ValueError("No genes to fit: all dispersions are below 10 * min_disp")
 
-        lo = loess(
-            x=np.log(self.varm["_normed_means"][genes_to_fit]),
-            y=np.log(self.varm["genewise_dispersions"][genes_to_fit]),
-            weights=self.varm["_normed_means"][genes_to_fit],
-            surface="direct",  # to allow extrapolation
-        )
+        means = self.varm["_normed_means"][genes_to_fit]
+        dispersions = self.varm["genewise_dispersions"][genes_to_fit]
+
+        # Run local trend fit in a separate process to avoid segmentation faults
+        q = mp.Manager().Queue()
+        p = mp.Process(target=local_trend_fit, args=(means, dispersions, q))
+        p.start()
+        p.join()
+
+        if p.exitcode != 0:
+            raise RuntimeError
+        else:
+            # The code rand without fault, so we can now run it locally.
+            # Returning the loess object in the first step would be more straightforward,
+            # But it is unfortunately unpicklable.
 
-        lo.fit()
+            lo = local_trend_fit(means, dispersions)
 
         end = time.time()
 
diff --git a/pydeseq2/utils.py b/pydeseq2/utils.py
index 0f04e0f5..3a46d682 100644
--- a/pydeseq2/utils.py
+++ b/pydeseq2/utils.py
@@ -1,6 +1,7 @@
 import multiprocessing
 import warnings
 from math import floor
+from multiprocessing import Queue
 from pathlib import Path
 from typing import List
 from typing import Literal
@@ -18,6 +19,7 @@
 from scipy.special import polygamma  # type: ignore
 from scipy.stats import norm  # type: ignore
 from sklearn.linear_model import LinearRegression  # type: ignore
+from skmisc.loess import loess
 
 import pydeseq2
 from pydeseq2.grid_search import grid_fit_alpha
@@ -724,6 +726,20 @@ def dloss(log_alpha: float) -> float:
         )
 
 
+def local_trend_fit(means, dispersions, q: Optional[Queue] = None):
+    """Run a wrapper for local trend fit to catch segfaults from scikit-misc."""
+    lo = loess(
+        x=np.log(means),
+        y=np.log(dispersions),
+        weights=means,
+        surface="direct",  # to allow extrapolation
+    )
+
+    lo.fit()
+
+    return lo
+
+
 def trimmed_mean(x, trim: float = 0.1, **kwargs) -> Union[float, np.ndarray]:
     """Return trimmed mean.
 

From b19f63973e989c548bc311e784d5e52f2c84611c Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Mon, 12 Feb 2024 17:38:29 +0100
Subject: [PATCH 10/16] fix: catch PerfectSeperationWarnings from statsmodels

---
 pydeseq2/dds.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index 933783a6..223668b8 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -15,6 +15,7 @@
 from scipy.special import polygamma  # type: ignore
 from scipy.stats import f  # type: ignore
 from scipy.stats import trim_mean  # type: ignore
+from statsmodels.tools.sm_exceptions import PerfectSeparationWarning
 
 from pydeseq2.default_inference import DefaultInference
 from pydeseq2.inference import Inference
@@ -582,7 +583,15 @@ def fit_dispersion_trend(self) -> None:
         "parametric", "local" and "mean".
         """
         if self.trend_fit_type == "parametric":
-            self._fit_parametric_trend()
+            try:
+                self._fit_parametric_trend()
+            except PerfectSeparationWarning:
+                warnings.warn(
+                    "Perfect separation detected, switching to local regression.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+                self.trend_fit_type = "local"
 
             if (self.uns["trend_coeffs"] <= 0).any():
                 warnings.warn(

From 196141afad22ab243440a0d55ae2ec74e4c6c519 Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Mon, 12 Feb 2024 17:48:42 +0100
Subject: [PATCH 11/16] fix: catch PerfectSeperationWarnings from statsmodels

---
 pydeseq2/dds.py               |  3 +--
 pydeseq2/default_inference.py | 18 ++++++++++++------
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index 223668b8..f63502d2 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -15,7 +15,6 @@
 from scipy.special import polygamma  # type: ignore
 from scipy.stats import f  # type: ignore
 from scipy.stats import trim_mean  # type: ignore
-from statsmodels.tools.sm_exceptions import PerfectSeparationWarning
 
 from pydeseq2.default_inference import DefaultInference
 from pydeseq2.inference import Inference
@@ -585,7 +584,7 @@ def fit_dispersion_trend(self) -> None:
         if self.trend_fit_type == "parametric":
             try:
                 self._fit_parametric_trend()
-            except PerfectSeparationWarning:
+            except RuntimeError:
                 warnings.warn(
                     "Perfect separation detected, switching to local regression.",
                     UserWarning,
diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py
index 29ebb65c..ef5be46d 100644
--- a/pydeseq2/default_inference.py
+++ b/pydeseq2/default_inference.py
@@ -10,6 +10,7 @@
 from joblib import delayed
 from joblib import parallel_backend
 from statsmodels.tools.sm_exceptions import DomainWarning  # type: ignore
+from statsmodels.tools.sm_exceptions import PerfectSeparationWarning
 
 from pydeseq2 import inference
 from pydeseq2 import utils
@@ -210,12 +211,17 @@ def dispersion_trend_gamma_glm(  # noqa: D102
         covariates_w_intercept = sm.add_constant(covariates)
         targets_fit = targets.values
         covariates_fit = covariates_w_intercept.values
-        glm_gamma = sm.GLM(
-            targets_fit,
-            covariates_fit,
-            family=sm.families.Gamma(link=sm.families.links.identity()),
-        )
-        res = glm_gamma.fit()
+        try:
+            glm_gamma = sm.GLM(
+                targets_fit,
+                covariates_fit,
+                family=sm.families.Gamma(link=sm.families.links.identity()),
+            )
+            res = glm_gamma.fit()
+        except PerfectSeparationWarning:
+            raise RuntimeError(
+                "Perfect separation detected in dispersion trend model."
+            ) from None
         coeffs = res.params
         return (coeffs, covariates_fit @ coeffs)
 

From b802c52187de96e9303404df5edbeb060bfa7388 Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Mon, 12 Feb 2024 17:55:28 +0100
Subject: [PATCH 12/16] fix: attempt to ignore PerfectSeparationWarning
 altogether

---
 pydeseq2/default_inference.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py
index ef5be46d..8aa6fb5f 100644
--- a/pydeseq2/default_inference.py
+++ b/pydeseq2/default_inference.py
@@ -17,6 +17,7 @@
 
 # Ignore DomainWarning raised by statsmodels when fitting a Gamma GLM with identity link.
 warnings.simplefilter("ignore", DomainWarning)
+warnings.simplefilter("ignore", PerfectSeparationWarning)
 
 
 class DefaultInference(inference.Inference):

From 71977ffce57367250b41b325d5815da2c09060d8 Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Tue, 13 Feb 2024 16:08:51 +0100
Subject: [PATCH 13/16] refactor: remove statsmodels with custom gamma GLM
 implementation for parametric trend curve in default inference model

---
 pydeseq2/default_inference.py | 51 +++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py
index 8aa6fb5f..5fff7462 100644
--- a/pydeseq2/default_inference.py
+++ b/pydeseq2/default_inference.py
@@ -1,24 +1,17 @@
-import warnings
 from typing import Literal
 from typing import Optional
 from typing import Tuple
 
 import numpy as np
 import pandas as pd
-import statsmodels.api as sm  # type: ignore
 from joblib import Parallel  # type: ignore
 from joblib import delayed
 from joblib import parallel_backend
-from statsmodels.tools.sm_exceptions import DomainWarning  # type: ignore
-from statsmodels.tools.sm_exceptions import PerfectSeparationWarning
+from scipy.optimize import minimize  # type: ignore
 
 from pydeseq2 import inference
 from pydeseq2 import utils
 
-# Ignore DomainWarning raised by statsmodels when fitting a Gamma GLM with identity link.
-warnings.simplefilter("ignore", DomainWarning)
-warnings.simplefilter("ignore", PerfectSeparationWarning)
-
 
 class DefaultInference(inference.Inference):
     """Default DESeq2-related inference methods, using scipy/sklearn/numpy.
@@ -209,22 +202,34 @@ def wald_test(  # noqa: D102
     def dispersion_trend_gamma_glm(  # noqa: D102
         self, covariates: pd.Series, targets: pd.Series
     ) -> Tuple[np.ndarray, np.ndarray]:
-        covariates_w_intercept = sm.add_constant(covariates)
-        targets_fit = targets.values
+        covariates_w_intercept = covariates.to_frame()
+        covariates_w_intercept["intercept"] = 1
         covariates_fit = covariates_w_intercept.values
-        try:
-            glm_gamma = sm.GLM(
-                targets_fit,
-                covariates_fit,
-                family=sm.families.Gamma(link=sm.families.links.identity()),
-            )
-            res = glm_gamma.fit()
-        except PerfectSeparationWarning:
-            raise RuntimeError(
-                "Perfect separation detected in dispersion trend model."
-            ) from None
-        coeffs = res.params
-        return (coeffs, covariates_fit @ coeffs)
+        targets_fit = targets.values
+
+        def loss(coeffs):
+            mu = covariates_fit @ coeffs
+            return (targets_fit / mu + np.log(mu)).mean()
+
+        def grad(coeffs):
+            mu = covariates_fit @ coeffs
+            return -(
+                ((targets_fit / mu - 1)[:, None] * covariates_fit) / mu[:, None]
+            ).mean(0)
+
+        res = minimize(
+            loss,
+            x0=np.array([1.0, 1.0]),
+            jac=grad,
+            method="L-BFGS-B",
+            bounds=[(0, np.inf)],
+        )
+
+        if not res.success:
+            raise ValueError("Gamma GLM optimization failed.")
+
+        coeffs = res.x
+        return coeffs, covariates_fit @ coeffs
 
     def lfc_shrink_nbinom_glm(  # noqa: D102
         self,

From f36c1b93fe4d3fb14070a7949523a3ffb6d934a5 Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Tue, 13 Feb 2024 17:02:48 +0100
Subject: [PATCH 14/16] fix: use consistent ordering for trend curve
 coefficients

---
 pydeseq2/default_inference.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py
index 5fff7462..8a3985be 100644
--- a/pydeseq2/default_inference.py
+++ b/pydeseq2/default_inference.py
@@ -203,7 +203,7 @@ def dispersion_trend_gamma_glm(  # noqa: D102
         self, covariates: pd.Series, targets: pd.Series
     ) -> Tuple[np.ndarray, np.ndarray]:
         covariates_w_intercept = covariates.to_frame()
-        covariates_w_intercept["intercept"] = 1
+        covariates_w_intercept.insert(0, "intercept", 1)
         covariates_fit = covariates_w_intercept.values
         targets_fit = targets.values
 

From c151c10e6c6d8790bf0b848231c0f4b293f75f68 Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Tue, 13 Feb 2024 17:10:15 +0100
Subject: [PATCH 15/16] feat: catch gamma GLM failure and switch to mean fit

---
 pydeseq2/dds.py               | 35 ++++++++++++++++++++---------------
 pydeseq2/default_inference.py |  2 +-
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index f63502d2..a98a29f2 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -352,7 +352,7 @@ def vst_fit(
         if use_design:
             # Check that the dispersion trend curve was fitted. If not, fit it.
             # This will call previous functions in a cascade.
-            if "trend_coeffs" not in self.uns:
+            if "disp_function" not in self.uns:
                 self.fit_dispersion_trend()
         else:
             # Reduce the design matrix to an intercept and reconstruct at the end
@@ -586,13 +586,14 @@ def fit_dispersion_trend(self) -> None:
                 self._fit_parametric_trend()
             except RuntimeError:
                 warnings.warn(
-                    "Perfect separation detected, switching to local regression.",
+                    "The dispersion trend curve fitting did not converge. "
+                    "Switching to a mean-based dispersion trend.",
                     UserWarning,
                     stacklevel=2,
                 )
                 self.trend_fit_type = "local"
 
-            if (self.uns["trend_coeffs"] <= 0).any():
+            if (self.uns["trend_coeffs"] == 0).any():
                 warnings.warn(
                     f"self.trend_fit_type={self.trend_fit_type}, but the "
                     f"dispersion trend was not well captured by the function: "
@@ -619,6 +620,15 @@ def fit_dispersion_trend(self) -> None:
                 "Expected 'parametric', 'local' or 'mean'."
             )
 
+    def disp_function(self, x):
+        """Return the dispersion trend function at x."""
+        if self.uns["disp_function_type"] == "parametric":
+            return dispersion_trend(x, self.uns["trend_coeffs"])
+        elif self.uns["disp_function_type"] == "local":
+            return np.exp(self.uns["loess"].predict(np.log(x)).values)
+        elif self.uns["disp_function_type"] == "mean":
+            return self.uns["mean_disp"]
+
     def fit_dispersion_prior(self) -> None:
         """Fit dispersion variance priors and standard deviation of log-residuals.
 
@@ -906,14 +916,10 @@ def _fit_parametric_trend(self) -> None:
 
         self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"])
 
-        self.uns["disp_function"] = lambda x: dispersion_trend(
-            x, self.uns["trend_coeffs"]
-        )
-
         self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN)
-        self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.uns[
-            "disp_function"
-        ](self.varm["_normed_means"][self.varm["non_zero"]])
+        self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.disp_function(
+            self.varm["_normed_means"][self.varm["non_zero"]]
+        )
 
     def _fit_local_trend(self) -> None:
         r"""Fit the dispersion trend curve using local regression."""
@@ -962,11 +968,11 @@ def _fit_local_trend(self) -> None:
         if not self.quiet:
             print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr)
 
-        self.uns["disp_function"] = lambda x: np.exp(lo.predict(np.log(x)).values)
+        self.uns["loess"] = lo
         self.varm["fitted_dispersions"] = np.full(self.n_vars, np.NaN)
-        self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.uns[
-            "disp_function"
-        ](self.varm["_normed_means"][self.varm["non_zero"]])
+        self.varm["fitted_dispersions"][self.varm["non_zero"]] = self.disp_function(
+            self.varm["_normed_means"][self.varm["non_zero"]]
+        )
 
     def _fit_mean_trend(self):
         """Fit the dispersion trend curve using the mean of gene-wise dispersions."""
@@ -977,7 +983,6 @@ def _fit_mean_trend(self):
             proportiontocut=0.001,
         )
 
-        self.uns["disp_function"] = lambda x: mean_disp
         self.varm["fitted_dispersions"] = np.full(self.n_vars, mean_disp)
 
     def plot_dispersions(
diff --git a/pydeseq2/default_inference.py b/pydeseq2/default_inference.py
index 8a3985be..975375cd 100644
--- a/pydeseq2/default_inference.py
+++ b/pydeseq2/default_inference.py
@@ -226,7 +226,7 @@ def grad(coeffs):
         )
 
         if not res.success:
-            raise ValueError("Gamma GLM optimization failed.")
+            raise RuntimeError("Gamma GLM optimization failed.")
 
         coeffs = res.x
         return coeffs, covariates_fit @ coeffs

From 0d912b9d1fb63a7cec7590298169f2b3ca2e8535 Mon Sep 17 00:00:00 2001
From: Boris MUZELLEC <boris.muzellec@owkin.com>
Date: Fri, 16 Feb 2024 10:44:46 +0100
Subject: [PATCH 16/16] chore: catch up code state from statsmodels PR

---
 pydeseq2/dds.py          | 86 +++++++++++++++++++++++-----------------
 tests/test_edge_cases.py |  2 +-
 2 files changed, 50 insertions(+), 38 deletions(-)

diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py
index a98a29f2..879101ba 100644
--- a/pydeseq2/dds.py
+++ b/pydeseq2/dds.py
@@ -73,7 +73,7 @@ class DeseqDataSet(ad.AnnData):
         specifying the factor of interest and the reference (control) level against which
         we're testing, e.g. ``["condition", "A"]``. (default: ``None``).
 
-    trend_fit_type : str
+    disp_function_type : str
         Either "parametric", "local" or "mean", for the type of fitting of dispersions
         trend curve.If "parametric" is selected but the fitting fails, it will switch to
         "local". (default: ``"parametric"``).
@@ -188,7 +188,7 @@ def __init__(
         design_factors: Union[str, List[str]] = "condition",
         continuous_factors: Optional[List[str]] = None,
         ref_level: Optional[List[str]] = None,
-        trend_fit_type: Literal["parametric", "local", "mean"] = "parametric",
+        disp_function_type: Literal["parametric", "local", "mean"] = "parametric",
         min_mu: float = 0.5,
         min_disp: float = 1e-8,
         max_disp: float = 10.0,
@@ -274,7 +274,7 @@ def __init__(
         # Check that the design matrix has full rank
         self._check_full_rank_design()
 
-        self.trend_fit_type = trend_fit_type
+        self.disp_function_type = disp_function_type
         self.min_mu = min_mu
         self.min_disp = min_disp
         self.max_disp = np.maximum(max_disp, self.n_obs)
@@ -486,7 +486,7 @@ def fit_size_factors(
             warnings.warn(
                 "Every gene contains at least one zero, "
                 "cannot compute log geometric means. Switching to iterative mode.",
-                RuntimeWarning,
+                UserWarning,
                 stacklevel=2,
             )
             self._fit_iterate_size_factors()
@@ -578,10 +578,10 @@ def fit_genewise_dispersions(self) -> None:
     def fit_dispersion_trend(self) -> None:
         r"""Fit the dispersion trend curve.
 
-        Three methods are available, depending on the ``trend_fit_type`` attribute:
+        Three methods are available, depending on the ``disp_function_type`` attribute:
         "parametric", "local" and "mean".
         """
-        if self.trend_fit_type == "parametric":
+        if self.disp_function_type == "parametric":
             try:
                 self._fit_parametric_trend()
             except RuntimeError:
@@ -591,42 +591,42 @@ def fit_dispersion_trend(self) -> None:
                     UserWarning,
                     stacklevel=2,
                 )
-                self.trend_fit_type = "local"
+                self.disp_function_type = "local"
 
             if (self.uns["trend_coeffs"] == 0).any():
                 warnings.warn(
-                    f"self.trend_fit_type={self.trend_fit_type}, but the "
+                    f"self.disp_function_type={self.disp_function_type}, but the "
                     f"dispersion trend was not well captured by the function: "
-                    f"y = a / x + b. Switchiing to local regression.",
+                    f"y = a / x + b. Switching to local regression.",
                     UserWarning,
                     stacklevel=2,
                 )
-                self.trend_fit_type = "local"
+                self.disp_function_type = "local"
                 del self.uns["trend_coeffs"]
 
-        if self.trend_fit_type == "local":
+        if self.disp_function_type == "local":
             try:
                 self._fit_local_trend()
             except (ValueError, RuntimeError):
                 print("Local trend fit did not converge, switching to mean fit.")
-                self.trend_fit_type = "mean"
+                self.disp_function_type = "mean"
 
-        if self.trend_fit_type == "mean":
+        if self.disp_function_type == "mean":
             self._fit_mean_trend()
 
-        if self.trend_fit_type not in ["parametric", "local", "mean"]:
+        if self.disp_function_type not in ["parametric", "local", "mean"]:
             raise NotImplementedError(
-                f"Unknown trend_fit_type: {self.trend_fit_type}. "
+                f"Unknown disp_function_type: {self.disp_function_type}. "
                 "Expected 'parametric', 'local' or 'mean'."
             )
 
     def disp_function(self, x):
         """Return the dispersion trend function at x."""
-        if self.uns["disp_function_type"] == "parametric":
+        if self.disp_function_type == "parametric":
             return dispersion_trend(x, self.uns["trend_coeffs"])
-        elif self.uns["disp_function_type"] == "local":
+        elif self.disp_function_type == "local":
             return np.exp(self.uns["loess"].predict(np.log(x)).values)
-        elif self.uns["disp_function_type"] == "mean":
+        elif self.disp_function_type == "mean":
             return self.uns["mean_disp"]
 
     def fit_dispersion_prior(self) -> None:
@@ -890,24 +890,29 @@ def _fit_parametric_trend(self) -> None:
         old_coeffs = pd.Series([0.1, 0.1])
         coeffs = pd.Series([1.0, 1.0])
 
-        while (np.log(np.abs(coeffs / old_coeffs)) ** 2).sum() >= 1e-6:
-            old_coeffs = coeffs
-            coeffs, predictions = self.inference.dispersion_trend_gamma_glm(
-                covariates, targets
-            )
-            # Filter out genes that are too far away from the curve before refitting
-            pred_ratios = (
-                self[:, covariates.index].varm["genewise_dispersions"] / predictions
-            )
+        try:
+            while (coeffs > 0).all() and (
+                np.log(np.abs(coeffs / old_coeffs)) ** 2
+            ).sum() >= 1e-6:
+                old_coeffs = coeffs
+                coeffs, predictions = self.inference.dispersion_trend_gamma_glm(
+                    covariates, targets
+                )
+                # Filter out genes that are too far away from the curve before refitting
+                pred_ratios = (
+                    self[:, covariates.index].varm["genewise_dispersions"] / predictions
+                )
 
-            targets.drop(
-                targets[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index,
-                inplace=True,
-            )
-            covariates.drop(
-                covariates[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index,
-                inplace=True,
-            )
+                targets.drop(
+                    targets[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index,
+                    inplace=True,
+                )
+                covariates.drop(
+                    covariates[(pred_ratios < 1e-4) | (pred_ratios >= 15)].index,
+                    inplace=True,
+                )
+        except RuntimeError as e:
+            raise e
 
         end = time.time()
 
@@ -1114,6 +1119,7 @@ def _refit_without_outliers(
             min_replicates=self.min_replicates,
             beta_tol=self.beta_tol,
             inference=self.inference,
+            disp_function_type=self.disp_function_type,
         )
 
         # Use the same size factors
@@ -1129,8 +1135,7 @@ def _refit_without_outliers(
         # Note: the trend curve is not refitted.
         sub_dds.varm["_normed_means"] = sub_dds.layers["normed_counts"].mean(0)
 
-        sub_dds.uns["disp_function"] = self.uns["disp_function"]
-        sub_dds.varm["fitted_dispersions"] = self.uns["disp_function"](
+        sub_dds.varm["fitted_dispersions"] = self.disp_function(
             sub_dds.varm["_normed_means"][sub_dds.varm["non_zero"]]
         )
 
@@ -1215,6 +1220,13 @@ def objective(p):
                 & self.varm["non_zero"]
             ]
 
+            if len(use_for_mean_genes) == 0:
+                print(
+                    "No genes have a dispersion above 10 * min_disp in "
+                    "_fit_iterate_size_factors."
+                )
+                break
+
             mean_disp = trimmed_mean(
                 self[:, use_for_mean_genes].varm["genewise_dispersions"], trim=0.001
             )
diff --git a/tests/test_edge_cases.py b/tests/test_edge_cases.py
index 5a2b8a71..b93a853e 100644
--- a/tests/test_edge_cases.py
+++ b/tests/test_edge_cases.py
@@ -468,7 +468,7 @@ def test_zero_inflated():
     counts_df.iloc[idx, :] = 0
 
     dds = DeseqDataSet(counts=counts_df, metadata=metadata)
-    with pytest.warns(RuntimeWarning):
+    with pytest.warns(UserWarning):
         dds.deseq2()