diff --git a/pydeseq2/dds.py b/pydeseq2/dds.py index 5819555d..0dbaf83c 100644 --- a/pydeseq2/dds.py +++ b/pydeseq2/dds.py @@ -1,3 +1,4 @@ +import sys import time import warnings from typing import List @@ -110,6 +111,9 @@ class DeseqDataSet(ad.AnnData): The verbosity level for joblib tasks. The higher the value, the more updates are reported. (default: ``0``). + quiet : bool + Suppress deseq2 status updates during fit. + Attributes ---------- X @@ -152,6 +156,9 @@ class DeseqDataSet(ad.AnnData): new_all_zeroes_genes : pandas.Index Genes which have only zero counts after outlier replacement. + quiet : bool + Suppress deseq2 status updates during fit. + References ---------- .. bibliography:: @@ -175,8 +182,8 @@ def __init__( n_cpus: Optional[int] = None, batch_size: int = 128, joblib_verbosity: int = 0, + quiet: bool = False, ) -> None: - # Initialize the AnnData part if adata is not None: if counts is not None: @@ -231,6 +238,7 @@ def __init__( self.n_processes = get_num_processes(n_cpus) self.batch_size = batch_size self.joblib_verbosity = joblib_verbosity + self.quiet = quiet def vst( self, @@ -341,7 +349,8 @@ def fit_size_factors( fit_type : str The normalization method to use (default: ``"ratio"``). """ - print("Fitting size factors...") + if not self.quiet: + print("Fitting size factors...", file=sys.stderr) start = time.time() if fit_type == "iterative": self._fit_iterate_size_factors() @@ -358,7 +367,9 @@ def fit_size_factors( else: self.layers["normed_counts"], self.obsm["size_factors"] = deseq2_norm(self.X) end = time.time() - print(f"... done in {end - start:.2f} seconds.\n") + + if not self.quiet: + print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr) def fit_genewise_dispersions(self) -> None: """Fit gene-wise dispersion estimates. @@ -431,7 +442,8 @@ def fit_genewise_dispersions(self) -> None: self.layers["_mu_hat"] = np.full((self.n_obs, self.n_vars), np.NaN) self.layers["_mu_hat"][:, self.varm["non_zero"]] = mu_hat_.T - print("Fitting dispersions...") + if not self.quiet: + print("Fitting dispersions...", file=sys.stderr) start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -451,7 +463,9 @@ def fit_genewise_dispersions(self) -> None: for i in self.non_zero_idx ) end = time.time() - print(f"... done in {end - start:.2f} seconds.\n") + + if not self.quiet: + print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr) dispersions_, l_bfgs_b_converged_ = zip(*res) @@ -473,7 +487,8 @@ def fit_dispersion_trend(self) -> None: if "genewise_dispersions" not in self.varm: self.fit_genewise_dispersions() - print("Fitting dispersion trend curve...") + if not self.quiet: + print("Fitting dispersion trend curve...", file=sys.stderr) start = time.time() self.varm["_normed_means"] = self.layers["normed_counts"].mean(0) @@ -528,7 +543,9 @@ def fit_dispersion_trend(self) -> None: ) end = time.time() - print(f"... done in {end - start:.2f} seconds.\n") + + if not self.quiet: + print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr) self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"]) @@ -584,7 +601,8 @@ def fit_MAP_dispersions(self) -> None: # Convert design matrix to numpy for speed design_matrix = self.obsm["design_matrix"].values - print("Fitting MAP dispersions...") + if not self.quiet: + print("Fitting MAP dispersions...", file=sys.stderr) start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -606,7 +624,9 @@ def fit_MAP_dispersions(self) -> None: for i in self.non_zero_idx ) end = time.time() - print(f"... done in {end-start:.2f} seconds.\n") + + if not self.quiet: + print(f"... done in {end-start:.2f} seconds.\n", file=sys.stderr) dispersions_, l_bfgs_b_converged_ = zip(*res) @@ -641,7 +661,8 @@ def fit_LFC(self) -> None: # Convert design matrix to numpy for speed design_matrix = self.obsm["design_matrix"].values - print("Fitting LFCs...") + if not self.quiet: + print("Fitting LFCs...", file=sys.stderr) start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -660,7 +681,9 @@ def fit_LFC(self) -> None: for i in self.non_zero_idx ) end = time.time() - print(f"... done in {end-start:.2f} seconds.\n") + + if not self.quiet: + print(f"... done in {end-start:.2f} seconds.\n", file=sys.stderr) MLE_lfcs_, mu_, hat_diagonals_, converged_ = zip(*res) mu_ = np.array(mu_).T @@ -736,7 +759,10 @@ def refit(self) -> None: """ # Replace outlier counts self._replace_outliers() - print(f"Refitting {sum(self.varm['replaced']) } outliers.\n") + if not self.quiet: + print( + f"Refitting {sum(self.varm['replaced']) } outliers.\n", file=sys.stderr + ) if sum(self.varm["replaced"]) > 0: # Refit dispersions and LFCs for genes that had outliers replaced @@ -1020,7 +1046,7 @@ def objective(p): self.obsm["size_factors"] = np.exp(res.x - np.mean(res.x)) if not res.success: - print("A size factor fitting iteration failed.") + print("A size factor fitting iteration failed.", file=sys.stderr) break if (i > 1) and np.sum( @@ -1028,7 +1054,7 @@ def objective(p): ) < 1e-4: break elif i == niter - 1: - print("Iterative size factor fitting did not converge.") + print("Iterative size factor fitting did not converge.", file=sys.stderr) # Restore the design matrix and free buffer self.obsm["design_matrix"] = self.obsm["design_matrix_buffer"].copy() diff --git a/pydeseq2/ds.py b/pydeseq2/ds.py index 3cff5f13..f29b8928 100644 --- a/pydeseq2/ds.py +++ b/pydeseq2/ds.py @@ -1,3 +1,4 @@ +import sys import time from typing import List from typing import Optional @@ -67,6 +68,9 @@ class DeseqStats: The verbosity level for joblib tasks. The higher the value, the more updates are reported. (default: ``0``). + quiet : bool + Suppress deseq2 status updates during fit. + Attributes ---------- base_mean : pandas.Series @@ -108,6 +112,9 @@ class DeseqStats: n_processes : int Number of threads to use for multiprocessing. + quiet : bool + Suppress deseq2 status updates during fit. + References ---------- .. bibliography:: @@ -125,6 +132,7 @@ def __init__( prior_LFC_var: Optional[np.ndarray] = None, batch_size: int = 128, joblib_verbosity: int = 0, + quiet: bool = False, ) -> None: assert ( "LFC" in dds.varm @@ -154,6 +162,7 @@ def __init__( self.n_processes = get_num_processes(n_cpus) self.batch_size = batch_size self.joblib_verbosity = joblib_verbosity + self.quiet = quiet # If the `refit_cooks` attribute of the dds object is True, check that outliers # were actually refitted. @@ -196,10 +205,12 @@ def summary(self) -> None: self.results_df["pvalue"] = self.p_values self.results_df["padj"] = self.padj - print( - f"Log2 fold change & Wald test p-value: " - f"{self.contrast[0]} {self.contrast[1]} vs {self.contrast[2]}" - ) + if not self.quiet: + print( + f"Log2 fold change & Wald test p-value: " + f"{self.contrast[0]} {self.contrast[1]} vs {self.contrast[2]}", + file=sys.stderr, + ) display(self.results_df) def run_wald_test(self) -> None: @@ -213,11 +224,13 @@ def run_wald_test(self) -> None: # Raise a warning if LFCs are shrunk. if self.shrunk_LFCs: - print( - "Note: running Wald test on shrunk LFCs. " - "Some sequencing datasets show better performance with the testing " - "separated from the use of the LFC prior." - ) + if not self.quiet: + print( + "Note: running Wald test on shrunk LFCs. " + "Some sequencing datasets show better performance with the testing " + "separated from the use of the LFC prior.", + file=sys.stderr, + ) mu = ( np.exp(self.design_matrix @ self.LFC.T) @@ -234,7 +247,8 @@ def run_wald_test(self) -> None: design_matrix = self.design_matrix.values LFCs = self.LFC.values - print("Running Wald tests...") + if not self.quiet: + print("Running Wald tests...", file=sys.stderr) start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -253,7 +267,8 @@ def run_wald_test(self) -> None: for i in range(num_genes) ) end = time.time() - print(f"... done in {end-start:.2f} seconds.\n") + if not self.quiet: + print(f"... done in {end-start:.2f} seconds.\n", file=sys.stderr) pvals, stats, se = zip(*res) @@ -326,7 +341,8 @@ def lfc_shrink(self, coeff: Optional[str] = None) -> None: design_matrix = self.design_matrix.values - print("Fitting MAP LFCs...") + if not self.quiet: + print("Fitting MAP LFCs...", file=sys.stderr) start = time.time() with parallel_backend("loky", inner_max_num_threads=1): res = Parallel( @@ -347,7 +363,8 @@ def lfc_shrink(self, coeff: Optional[str] = None) -> None: for i in self.dds.non_zero_idx ) end = time.time() - print(f"... done in {end-start:.2f} seconds.\n") + if not self.quiet: + print(f"... done in {end-start:.2f} seconds.\n", file=sys.stderr) lfcs, inv_hessians, l_bfgs_b_converged_ = zip(*res) @@ -388,10 +405,12 @@ def lfc_shrink(self, coeff: Optional[str] = None) -> None: split_coeff = coeff.split("_") # coeffs are of the form "factor_A_vs_B", hence "factor" is split_coeff[0], # "A" is split_coeff[1] and "B" split_coeff[3] - print( - f"Shrunk Log2 fold change & Wald test p-value: " - f"{split_coeff[0]} {split_coeff[1]} vs {split_coeff[3]}" - ) + if not self.quiet: + print( + f"Shrunk Log2 fold change & Wald test p-value: " + f"{split_coeff[0]} {split_coeff[1]} vs {split_coeff[3]}", + file=sys.stderr, + ) display(self.results_df)