From 8a18459969effd332dfb4f0f5cbfa67219fdb796 Mon Sep 17 00:00:00 2001 From: Noam <22600644+noamteyssier@users.noreply.github.com> Date: Fri, 30 Jun 2023 17:21:17 -0700 Subject: [PATCH 1/5] added quiet self attribute + check if quiet before print updates --- pydeseq2/dds.py | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index 5819555d..d415ca43 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -152,6 +152,9 @@ class DeseqDataSet(ad.AnnData): new_all_zeroes_genes : pandas.Index Genes which have only zero counts after outlier replacement. + quiet : bool + Suppress deseq2 status updates during fit. + References ---------- .. bibliography:: @@ -175,6 +178,7 @@ def __init__( n_cpus: Optional[int] = None, batch_size: int = 128, joblib_verbosity: int = 0, + quiet: bool = False, ) -> None: # Initialize the AnnData part @@ -231,6 +235,7 @@ def __init__( self.n_processes = get_num_processes(n_cpus) self.batch_size = batch_size self.joblib_verbosity = joblib_verbosity + self.quiet = quiet def vst( self, @@ -341,7 +346,8 @@ def fit_size_factors( fit_type : str The normalization method to use (default: ``"ratio"``). """ - print("Fitting size factors...") + if not self.quiet: + print("Fitting size factors...") start = time.time() if fit_type == "iterative": self._fit_iterate_size_factors() @@ -358,7 +364,9 @@ def fit_size_factors( else: self.layers["normed_counts"], self.obsm["size_factors"] = deseq2_norm(self.X) end = time.time() - print(f"... done in {end - start:.2f} seconds.\n") + + if not self.quiet: + print(f"... done in {end - start:.2f} seconds.\n") def fit_genewise_dispersions(self) -> None: """Fit gene-wise dispersion estimates. @@ -431,7 +439,8 @@ def fit_genewise_dispersions(self) -> None: self.layers["_mu_hat"] = np.full((self.n_obs, self.n_vars), np.NaN) self.layers["_mu_hat"][:, self.varm["non_zero"]] = mu_hat_.T - print("Fitting dispersions...") + if not self.quiet: + print("Fitting dispersions...") start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -451,7 +460,9 @@ def fit_genewise_dispersions(self) -> None: for i in self.non_zero_idx ) end = time.time() - print(f"... done in {end - start:.2f} seconds.\n") + + if not self.quiet: + print(f"... done in {end - start:.2f} seconds.\n") dispersions_, l_bfgs_b_converged_ = zip(*res) @@ -473,7 +484,8 @@ def fit_dispersion_trend(self) -> None: if "genewise_dispersions" not in self.varm: self.fit_genewise_dispersions() - print("Fitting dispersion trend curve...") + if not self.quiet: + print("Fitting dispersion trend curve...") start = time.time() self.varm["_normed_means"] = self.layers["normed_counts"].mean(0) @@ -528,7 +540,9 @@ def fit_dispersion_trend(self) -> None: ) end = time.time() - print(f"... done in {end - start:.2f} seconds.\n") + + if not self.quiet: + print(f"... done in {end - start:.2f} seconds.\n") self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"]) @@ -584,7 +598,8 @@ def fit_MAP_dispersions(self) -> None: # Convert design matrix to numpy for speed design_matrix = self.obsm["design_matrix"].values - print("Fitting MAP dispersions...") + if not self.quiet: + print("Fitting MAP dispersions...") start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -606,7 +621,9 @@ def fit_MAP_dispersions(self) -> None: for i in self.non_zero_idx ) end = time.time() - print(f"... done in {end-start:.2f} seconds.\n") + + if not self.quiet: + print(f"... done in {end-start:.2f} seconds.\n") dispersions_, l_bfgs_b_converged_ = zip(*res) @@ -641,7 +658,8 @@ def fit_LFC(self) -> None: # Convert design matrix to numpy for speed design_matrix = self.obsm["design_matrix"].values - print("Fitting LFCs...") + if not self.quiet: + print("Fitting LFCs...") start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -660,7 +678,9 @@ def fit_LFC(self) -> None: for i in self.non_zero_idx ) end = time.time() - print(f"... done in {end-start:.2f} seconds.\n") + + if not self.quiet: + print(f"... done in {end-start:.2f} seconds.\n") MLE_lfcs_, mu_, hat_diagonals_, converged_ = zip(*res) mu_ = np.array(mu_).T @@ -736,7 +756,8 @@ def refit(self) -> None: """ # Replace outlier counts self._replace_outliers() - print(f"Refitting {sum(self.varm['replaced']) } outliers.\n") + if not self.quiet: + print(f"Refitting {sum(self.varm['replaced']) } outliers.\n") if sum(self.varm["replaced"]) > 0: # Refit dispersions and LFCs for genes that had outliers replaced From 45618cc33ad80cfc37f22ce9810e4864724d3cc3 Mon Sep 17 00:00:00 2001 From: Noam <22600644+noamteyssier@users.noreply.github.com> Date: Fri, 30 Jun 2023 17:22:32 -0700 Subject: [PATCH 2/5] black formatting --- pydeseq2/dds.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index d415ca43..b99614e1 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -180,7 +180,6 @@ def __init__( joblib_verbosity: int = 0, quiet: bool = False, ) -> None: - # Initialize the AnnData part if adata is not None: if counts is not None: From 9784ec08e9fe5da5f2d153ba733c62927be1cb58 Mon Sep 17 00:00:00 2001 From: Noam <22600644+noamteyssier@users.noreply.github.com> Date: Fri, 30 Jun 2023 17:25:07 -0700 Subject: [PATCH 3/5] black formatting --- pydeseq2/dds.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index b99614e1..8977635b 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -1,3 +1,4 @@ +import sys import time import warnings from typing import List @@ -346,7 +347,7 @@ def fit_size_factors( The normalization method to use (default: ``"ratio"``). """ if not self.quiet: - print("Fitting size factors...") + print("Fitting size factors...", file=sys.stderr) start = time.time() if fit_type == "iterative": self._fit_iterate_size_factors() @@ -365,7 +366,7 @@ def fit_size_factors( end = time.time() if not self.quiet: - print(f"... done in {end - start:.2f} seconds.\n") + print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr) def fit_genewise_dispersions(self) -> None: """Fit gene-wise dispersion estimates. @@ -439,7 +440,7 @@ def fit_genewise_dispersions(self) -> None: self.layers["_mu_hat"][:, self.varm["non_zero"]] = mu_hat_.T if not self.quiet: - print("Fitting dispersions...") + print("Fitting dispersions...", file=sys.stderr) start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -461,7 +462,7 @@ def fit_genewise_dispersions(self) -> None: end = time.time() if not self.quiet: - print(f"... done in {end - start:.2f} seconds.\n") + print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr) dispersions_, l_bfgs_b_converged_ = zip(*res) @@ -484,7 +485,7 @@ def fit_dispersion_trend(self) -> None: self.fit_genewise_dispersions() if not self.quiet: - print("Fitting dispersion trend curve...") + print("Fitting dispersion trend curve...", file=sys.stderr) start = time.time() self.varm["_normed_means"] = self.layers["normed_counts"].mean(0) @@ -541,7 +542,7 @@ def fit_dispersion_trend(self) -> None: end = time.time() if not self.quiet: - print(f"... done in {end - start:.2f} seconds.\n") + print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr) self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"]) @@ -598,7 +599,7 @@ def fit_MAP_dispersions(self) -> None: design_matrix = self.obsm["design_matrix"].values if not self.quiet: - print("Fitting MAP dispersions...") + print("Fitting MAP dispersions...", file=sys.stderr) start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -622,7 +623,7 @@ def fit_MAP_dispersions(self) -> None: end = time.time() if not self.quiet: - print(f"... done in {end-start:.2f} seconds.\n") + print(f"... done in {end-start:.2f} seconds.\n", file=sys.stderr) dispersions_, l_bfgs_b_converged_ = zip(*res) @@ -658,7 +659,7 @@ def fit_LFC(self) -> None: design_matrix = self.obsm["design_matrix"].values if not self.quiet: - print("Fitting LFCs...") + print("Fitting LFCs...", file=sys.stderr) start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -679,7 +680,7 @@ def fit_LFC(self) -> None: end = time.time() if not self.quiet: - print(f"... done in {end-start:.2f} seconds.\n") + print(f"... done in {end-start:.2f} seconds.\n", file=sys.stderr) MLE_lfcs_, mu_, hat_diagonals_, converged_ = zip(*res) mu_ = np.array(mu_).T @@ -756,7 +757,9 @@ def refit(self) -> None: # Replace outlier counts self._replace_outliers() if not self.quiet: - print(f"Refitting {sum(self.varm['replaced']) } outliers.\n") + print( + f"Refitting {sum(self.varm['replaced']) } outliers.\n", file=sys.stderr + ) if sum(self.varm["replaced"]) > 0: # Refit dispersions and LFCs for genes that had outliers replaced @@ -1040,7 +1043,7 @@ def objective(p): self.obsm["size_factors"] = np.exp(res.x - np.mean(res.x)) if not res.success: - print("A size factor fitting iteration failed.") + print("A size factor fitting iteration failed.", file=sys.stderr) break if (i > 1) and np.sum( @@ -1048,7 +1051,7 @@ def objective(p): ) < 1e-4: break elif i == niter - 1: - print("Iterative size factor fitting did not converge.") + print("Iterative size factor fitting did not converge.", file=sys.stderr) # Restore the design matrix and free buffer self.obsm["design_matrix"] = self.obsm["design_matrix_buffer"].copy() From 93f589d8d8cd0095de4fa471b9e1eda0ae2ad664 Mon Sep 17 00:00:00 2001 From: Noam <22600644+noamteyssier@users.noreply.github.com> Date: Fri, 30 Jun 2023 17:29:59 -0700 Subject: [PATCH 4/5] include quiet in parameter docs --- pydeseq2/dds.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index 8977635b..0dbaf83c 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -111,6 +111,9 @@ class DeseqDataSet(ad.AnnData): The verbosity level for joblib tasks. The higher the value, the more updates are reported. (default: ``0``). + quiet : bool + Suppress deseq2 status updates during fit. + Attributes ---------- X From 1d7840dd5da750296d01ee06154df6fb759673c2 Mon Sep 17 00:00:00 2001 From: Noam <22600644+noamteyssier@users.noreply.github.com> Date: Thu, 13 Jul 2023 08:12:44 -0700 Subject: [PATCH 5/5] added self.quiet attribute to DeseqStats class as well --- pydeseq2/ds.py | 53 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/pydeseq2/ds.py b/pydeseq2/ds.py index 3cff5f13..f29b8928 100644 --- a/pydeseq2/ds.py +++ b/pydeseq2/ds.py @@ -1,3 +1,4 @@ +import sys import time from typing import List from typing import Optional @@ -67,6 +68,9 @@ class DeseqStats: The verbosity level for joblib tasks. The higher the value, the more updates are reported. (default: ``0``). + quiet : bool + Suppress deseq2 status updates during fit. + Attributes ---------- base_mean : pandas.Series @@ -108,6 +112,9 @@ class DeseqStats: n_processes : int Number of threads to use for multiprocessing. + quiet : bool + Suppress deseq2 status updates during fit. + References ---------- .. bibliography:: @@ -125,6 +132,7 @@ def __init__( prior_LFC_var: Optional[np.ndarray] = None, batch_size: int = 128, joblib_verbosity: int = 0, + quiet: bool = False, ) -> None: assert ( "LFC" in dds.varm @@ -154,6 +162,7 @@ def __init__( self.n_processes = get_num_processes(n_cpus) self.batch_size = batch_size self.joblib_verbosity = joblib_verbosity + self.quiet = quiet # If the `refit_cooks` attribute of the dds object is True, check that outliers # were actually refitted. @@ -196,10 +205,12 @@ def summary(self) -> None: self.results_df["pvalue"] = self.p_values self.results_df["padj"] = self.padj - print( - f"Log2 fold change & Wald test p-value: " - f"{self.contrast[0]} {self.contrast[1]} vs {self.contrast[2]}" - ) + if not self.quiet: + print( + f"Log2 fold change & Wald test p-value: " + f"{self.contrast[0]} {self.contrast[1]} vs {self.contrast[2]}", + file=sys.stderr, + ) display(self.results_df) def run_wald_test(self) -> None: @@ -213,11 +224,13 @@ def run_wald_test(self) -> None: # Raise a warning if LFCs are shrunk. if self.shrunk_LFCs: - print( - "Note: running Wald test on shrunk LFCs. " - "Some sequencing datasets show better performance with the testing " - "separated from the use of the LFC prior." - ) + if not self.quiet: + print( + "Note: running Wald test on shrunk LFCs. " + "Some sequencing datasets show better performance with the testing " + "separated from the use of the LFC prior.", + file=sys.stderr, + ) mu = ( np.exp(self.design_matrix @ self.LFC.T) @@ -234,7 +247,8 @@ def run_wald_test(self) -> None: design_matrix = self.design_matrix.values LFCs = self.LFC.values - print("Running Wald tests...") + if not self.quiet: + print("Running Wald tests...", file=sys.stderr) start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -253,7 +267,8 @@ def run_wald_test(self) -> None: for i in range(num_genes) ) end = time.time() - print(f"... done in {end-start:.2f} seconds.\n") + if not self.quiet: + print(f"... done in {end-start:.2f} seconds.\n", file=sys.stderr) pvals, stats, se = zip(*res) @@ -326,7 +341,8 @@ def lfc_shrink(self, coeff: Optional[str] = None) -> None: design_matrix = self.design_matrix.values - print("Fitting MAP LFCs...") + if not self.quiet: + print("Fitting MAP LFCs...", file=sys.stderr) start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -347,7 +363,8 @@ def lfc_shrink(self, coeff: Optional[str] = None) -> None: for i in self.dds.non_zero_idx ) end = time.time() - print(f"... done in {end-start:.2f} seconds.\n") + if not self.quiet: + print(f"... done in {end-start:.2f} seconds.\n", file=sys.stderr) lfcs, inv_hessians, l_bfgs_b_converged_ = zip(*res) @@ -388,10 +405,12 @@ def lfc_shrink(self, coeff: Optional[str] = None) -> None: split_coeff = coeff.split("_") # coeffs are of the form "factor_A_vs_B", hence "factor" is split_coeff[0], # "A" is split_coeff[1] and "B" split_coeff[3] - print( - f"Shrunk Log2 fold change & Wald test p-value: " - f"{split_coeff[0]} {split_coeff[1]} vs {split_coeff[3]}" - ) + if not self.quiet: + print( + f"Shrunk Log2 fold change & Wald test p-value: " + f"{split_coeff[0]} {split_coeff[1]} vs {split_coeff[3]}", + file=sys.stderr, + ) display(self.results_df)