Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 40 additions & 14 deletions pydeseq2/dds.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import time
import warnings
from typing import List
Expand Down Expand Up @@ -110,6 +111,9 @@ class DeseqDataSet(ad.AnnData):
The verbosity level for joblib tasks. The higher the value, the more updates
are reported. (default: ``0``).

quiet : bool
Suppress deseq2 status updates during fit.

Attributes
----------
X
Expand Down Expand Up @@ -152,6 +156,9 @@ class DeseqDataSet(ad.AnnData):
new_all_zeroes_genes : pandas.Index
Genes which have only zero counts after outlier replacement.

quiet : bool
Suppress deseq2 status updates during fit.

References
----------
.. bibliography::
Expand All @@ -175,8 +182,8 @@ def __init__(
n_cpus: Optional[int] = None,
batch_size: int = 128,
joblib_verbosity: int = 0,
quiet: bool = False,
) -> None:

# Initialize the AnnData part
if adata is not None:
if counts is not None:
Expand Down Expand Up @@ -231,6 +238,7 @@ def __init__(
self.n_processes = get_num_processes(n_cpus)
self.batch_size = batch_size
self.joblib_verbosity = joblib_verbosity
self.quiet = quiet

def vst(
self,
Expand Down Expand Up @@ -341,7 +349,8 @@ def fit_size_factors(
fit_type : str
The normalization method to use (default: ``"ratio"``).
"""
print("Fitting size factors...")
if not self.quiet:
print("Fitting size factors...", file=sys.stderr)
start = time.time()
if fit_type == "iterative":
self._fit_iterate_size_factors()
Expand All @@ -358,7 +367,9 @@ def fit_size_factors(
else:
self.layers["normed_counts"], self.obsm["size_factors"] = deseq2_norm(self.X)
end = time.time()
print(f"... done in {end - start:.2f} seconds.\n")

if not self.quiet:
print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr)

def fit_genewise_dispersions(self) -> None:
"""Fit gene-wise dispersion estimates.
Expand Down Expand Up @@ -431,7 +442,8 @@ def fit_genewise_dispersions(self) -> None:
self.layers["_mu_hat"] = np.full((self.n_obs, self.n_vars), np.NaN)
self.layers["_mu_hat"][:, self.varm["non_zero"]] = mu_hat_.T

print("Fitting dispersions...")
if not self.quiet:
print("Fitting dispersions...", file=sys.stderr)
start = time.time()
with parallel_backend("loky", inner_max_num_threads=1):
res = Parallel(
Expand All @@ -451,7 +463,9 @@ def fit_genewise_dispersions(self) -> None:
for i in self.non_zero_idx
)
end = time.time()
print(f"... done in {end - start:.2f} seconds.\n")

if not self.quiet:
print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr)

dispersions_, l_bfgs_b_converged_ = zip(*res)

Expand All @@ -473,7 +487,8 @@ def fit_dispersion_trend(self) -> None:
if "genewise_dispersions" not in self.varm:
self.fit_genewise_dispersions()

print("Fitting dispersion trend curve...")
if not self.quiet:
print("Fitting dispersion trend curve...", file=sys.stderr)
start = time.time()
self.varm["_normed_means"] = self.layers["normed_counts"].mean(0)

Expand Down Expand Up @@ -528,7 +543,9 @@ def fit_dispersion_trend(self) -> None:
)

end = time.time()
print(f"... done in {end - start:.2f} seconds.\n")

if not self.quiet:
print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr)

self.uns["trend_coeffs"] = pd.Series(coeffs, index=["a0", "a1"])

Expand Down Expand Up @@ -584,7 +601,8 @@ def fit_MAP_dispersions(self) -> None:
# Convert design matrix to numpy for speed
design_matrix = self.obsm["design_matrix"].values

print("Fitting MAP dispersions...")
if not self.quiet:
print("Fitting MAP dispersions...", file=sys.stderr)
start = time.time()
with parallel_backend("loky", inner_max_num_threads=1):
res = Parallel(
Expand All @@ -606,7 +624,9 @@ def fit_MAP_dispersions(self) -> None:
for i in self.non_zero_idx
)
end = time.time()
print(f"... done in {end-start:.2f} seconds.\n")

if not self.quiet:
print(f"... done in {end-start:.2f} seconds.\n", file=sys.stderr)

dispersions_, l_bfgs_b_converged_ = zip(*res)

Expand Down Expand Up @@ -641,7 +661,8 @@ def fit_LFC(self) -> None:
# Convert design matrix to numpy for speed
design_matrix = self.obsm["design_matrix"].values

print("Fitting LFCs...")
if not self.quiet:
print("Fitting LFCs...", file=sys.stderr)
start = time.time()
with parallel_backend("loky", inner_max_num_threads=1):
res = Parallel(
Expand All @@ -660,7 +681,9 @@ def fit_LFC(self) -> None:
for i in self.non_zero_idx
)
end = time.time()
print(f"... done in {end-start:.2f} seconds.\n")

if not self.quiet:
print(f"... done in {end-start:.2f} seconds.\n", file=sys.stderr)

MLE_lfcs_, mu_, hat_diagonals_, converged_ = zip(*res)
mu_ = np.array(mu_).T
Expand Down Expand Up @@ -736,7 +759,10 @@ def refit(self) -> None:
"""
# Replace outlier counts
self._replace_outliers()
print(f"Refitting {sum(self.varm['replaced']) } outliers.\n")
if not self.quiet:
print(
f"Refitting {sum(self.varm['replaced']) } outliers.\n", file=sys.stderr
)

if sum(self.varm["replaced"]) > 0:
# Refit dispersions and LFCs for genes that had outliers replaced
Expand Down Expand Up @@ -1020,15 +1046,15 @@ def objective(p):
self.obsm["size_factors"] = np.exp(res.x - np.mean(res.x))

if not res.success:
print("A size factor fitting iteration failed.")
print("A size factor fitting iteration failed.", file=sys.stderr)
break

if (i > 1) and np.sum(
(np.log(old_sf) - np.log(self.obsm["size_factors"])) ** 2
) < 1e-4:
break
elif i == niter - 1:
print("Iterative size factor fitting did not converge.")
print("Iterative size factor fitting did not converge.", file=sys.stderr)

# Restore the design matrix and free buffer
self.obsm["design_matrix"] = self.obsm["design_matrix_buffer"].copy()
Expand Down
53 changes: 36 additions & 17 deletions pydeseq2/ds.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import time
from typing import List
from typing import Optional
Expand Down Expand Up @@ -67,6 +68,9 @@ class DeseqStats:
The verbosity level for joblib tasks. The higher the value, the more updates
are reported. (default: ``0``).

quiet : bool
Suppress deseq2 status updates during fit.

Attributes
----------
base_mean : pandas.Series
Expand Down Expand Up @@ -108,6 +112,9 @@ class DeseqStats:
n_processes : int
Number of threads to use for multiprocessing.

quiet : bool
Suppress deseq2 status updates during fit.

References
----------
.. bibliography::
Expand All @@ -125,6 +132,7 @@ def __init__(
prior_LFC_var: Optional[np.ndarray] = None,
batch_size: int = 128,
joblib_verbosity: int = 0,
quiet: bool = False,
) -> None:
assert (
"LFC" in dds.varm
Expand Down Expand Up @@ -154,6 +162,7 @@ def __init__(
self.n_processes = get_num_processes(n_cpus)
self.batch_size = batch_size
self.joblib_verbosity = joblib_verbosity
self.quiet = quiet

# If the `refit_cooks` attribute of the dds object is True, check that outliers
# were actually refitted.
Expand Down Expand Up @@ -196,10 +205,12 @@ def summary(self) -> None:
self.results_df["pvalue"] = self.p_values
self.results_df["padj"] = self.padj

print(
f"Log2 fold change & Wald test p-value: "
f"{self.contrast[0]} {self.contrast[1]} vs {self.contrast[2]}"
)
if not self.quiet:
print(
f"Log2 fold change & Wald test p-value: "
f"{self.contrast[0]} {self.contrast[1]} vs {self.contrast[2]}",
file=sys.stderr,
)
display(self.results_df)

def run_wald_test(self) -> None:
Expand All @@ -213,11 +224,13 @@ def run_wald_test(self) -> None:

# Raise a warning if LFCs are shrunk.
if self.shrunk_LFCs:
print(
"Note: running Wald test on shrunk LFCs. "
"Some sequencing datasets show better performance with the testing "
"separated from the use of the LFC prior."
)
if not self.quiet:
print(
"Note: running Wald test on shrunk LFCs. "
"Some sequencing datasets show better performance with the testing "
"separated from the use of the LFC prior.",
file=sys.stderr,
)

mu = (
np.exp(self.design_matrix @ self.LFC.T)
Expand All @@ -234,7 +247,8 @@ def run_wald_test(self) -> None:
design_matrix = self.design_matrix.values
LFCs = self.LFC.values

print("Running Wald tests...")
if not self.quiet:
print("Running Wald tests...", file=sys.stderr)
start = time.time()
with parallel_backend("loky", inner_max_num_threads=1):
res = Parallel(
Expand All @@ -253,7 +267,8 @@ def run_wald_test(self) -> None:
for i in range(num_genes)
)
end = time.time()
print(f"... done in {end-start:.2f} seconds.\n")
if not self.quiet:
print(f"... done in {end-start:.2f} seconds.\n", file=sys.stderr)

pvals, stats, se = zip(*res)

Expand Down Expand Up @@ -326,7 +341,8 @@ def lfc_shrink(self, coeff: Optional[str] = None) -> None:

design_matrix = self.design_matrix.values

print("Fitting MAP LFCs...")
if not self.quiet:
print("Fitting MAP LFCs...", file=sys.stderr)
start = time.time()
with parallel_backend("loky", inner_max_num_threads=1):
res = Parallel(
Expand All @@ -347,7 +363,8 @@ def lfc_shrink(self, coeff: Optional[str] = None) -> None:
for i in self.dds.non_zero_idx
)
end = time.time()
print(f"... done in {end-start:.2f} seconds.\n")
if not self.quiet:
print(f"... done in {end-start:.2f} seconds.\n", file=sys.stderr)

lfcs, inv_hessians, l_bfgs_b_converged_ = zip(*res)

Expand Down Expand Up @@ -388,10 +405,12 @@ def lfc_shrink(self, coeff: Optional[str] = None) -> None:
split_coeff = coeff.split("_")
# coeffs are of the form "factor_A_vs_B", hence "factor" is split_coeff[0],
# "A" is split_coeff[1] and "B" split_coeff[3]
print(
f"Shrunk Log2 fold change & Wald test p-value: "
f"{split_coeff[0]} {split_coeff[1]} vs {split_coeff[3]}"
)
if not self.quiet:
print(
f"Shrunk Log2 fold change & Wald test p-value: "
f"{split_coeff[0]} {split_coeff[1]} vs {split_coeff[3]}",
file=sys.stderr,
)

display(self.results_df)

Expand Down