From e3c2cd8919798b4f52aa313bf4a1daa1b5bc893e Mon Sep 17 00:00:00 2001 From: stemangiola Date: Thu, 24 Sep 2020 11:14:20 +1000 Subject: [PATCH 01/12] start edits --- R/methods.R | 47 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/R/methods.R b/R/methods.R index 12146b7a..17ad05e6 100755 --- a/R/methods.R +++ b/R/methods.R @@ -1844,6 +1844,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' @param fill_missing_values A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed. #' @param scaling_method A character string. The scaling method passed to the back-end function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile") #' @param omit_contrast_in_colnames If just one contrast is specified you can choose to omit the contrast label in the colnames. +#' @param prefix A character string. The prefix you would like to add to the result columns. It is useful if you want to compare several methods. #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' #' @details This function provides the option to use edgeR \url{https://doi.org/10.1093/bioinformatics/btp616}, limma-voom \url{https://doi.org/10.1186/gb-2014-15-2-r29}, or DESeq2 \url{https://doi.org/10.1186/s13059-014-0550-8} to perform the testing. @@ -1920,12 +1921,15 @@ setGeneric("test_differential_abundance", function(.data, .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, - - action = "add") + prefix = "", + action = "add", + + # DEPRECATED + significance_threshold = 0.05, + fill_missing_values = FALSE + ) standardGeneric("test_differential_abundance")) # Set internal @@ -1940,8 +1944,14 @@ setGeneric("test_differential_abundance", function(.data, fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, + prefix = "", - action = "add") + action = "add", + + # DEPRECATED + significance_threshold = 0.05, + fill_missing_values = FALSE + ) { # Get column names .sample = enquo(.sample) @@ -1952,6 +1962,22 @@ setGeneric("test_differential_abundance", function(.data, .transcript = col_names$.transcript .abundance = col_names$.abundance + # DEPRECATION OF significance_threshold + if (is_present(significance_threshold) & !quo_is_null(significance_threshold)) { + + # Signal the deprecation to the user + deprecate_warn("1.1.7", "tidybulk::test_differential_abundance(significance_threshold = )", details = "The argument significance_threshold is now deprecated please look at the resulting statistics to do the filtering (e.g., filter(., FDR < 0.05))") + + } + + # DEPRECATION OF fill_missing_values + if (is_present(fill_missing_values) & !quo_is_null(fill_missing_values)) { + + # Signal the deprecation to the user + deprecate_warn("1.1.7", "tidybulk::test_differential_abundance(fill_missing_values = )", details = "The argument fill_missing_values is now deprecated, you will receive a warning/error instead. Please use externally the methods fill_missing_abundance or impute_missing_abundance instead.") + + } + # Clearly state what counts are used message("tidybulk says: All methods use raw counts, irrespective of if scale_abundance or adjust_abundance have been calculated, therefore it is essential to add covariates such as batch effects (if applicable) in the formula.") @@ -1977,7 +2003,7 @@ setGeneric("test_differential_abundance", function(.data, when( # edgeR - grepl("edgeR", method) ~ get_differential_transcript_abundance_bulk( + grepl("edger", tolower(method)) ~ get_differential_transcript_abundance_bulk( ., .formula, .sample = !!.sample, @@ -1988,7 +2014,8 @@ setGeneric("test_differential_abundance", function(.data, significance_threshold = significance_threshold, fill_missing_values = fill_missing_values, scaling_method = scaling_method, - omit_contrast_in_colnames = omit_contrast_in_colnames + omit_contrast_in_colnames = omit_contrast_in_colnames, + prefix = prefix ), # Voom @@ -2003,7 +2030,8 @@ setGeneric("test_differential_abundance", function(.data, significance_threshold = significance_threshold, fill_missing_values = fill_missing_values, scaling_method = scaling_method, - omit_contrast_in_colnames = omit_contrast_in_colnames + omit_contrast_in_colnames = omit_contrast_in_colnames, + prefix = prefix ), # DESeq2 @@ -2018,7 +2046,8 @@ setGeneric("test_differential_abundance", function(.data, significance_threshold = significance_threshold, fill_missing_values = fill_missing_values, scaling_method = scaling_method, - omit_contrast_in_colnames = omit_contrast_in_colnames + omit_contrast_in_colnames = omit_contrast_in_colnames, + prefix = prefix ), # Else error From 8cde3887cb5541b483c2cae11684b0bdbfc3af4e Mon Sep 17 00:00:00 2001 From: stemangiola Date: Thu, 24 Sep 2020 11:14:20 +1000 Subject: [PATCH 02/12] start edits --- R/methods.R | 47 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/R/methods.R b/R/methods.R index 12146b7a..17ad05e6 100755 --- a/R/methods.R +++ b/R/methods.R @@ -1844,6 +1844,7 @@ setMethod("ensembl_to_symbol", "tidybulk", .ensembl_to_symbol) #' @param fill_missing_values A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed. #' @param scaling_method A character string. The scaling method passed to the back-end function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile") #' @param omit_contrast_in_colnames If just one contrast is specified you can choose to omit the contrast label in the colnames. +#' @param prefix A character string. The prefix you would like to add to the result columns. It is useful if you want to compare several methods. #' @param action A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get). #' #' @details This function provides the option to use edgeR \url{https://doi.org/10.1093/bioinformatics/btp616}, limma-voom \url{https://doi.org/10.1186/gb-2014-15-2-r29}, or DESeq2 \url{https://doi.org/10.1186/s13059-014-0550-8} to perform the testing. @@ -1920,12 +1921,15 @@ setGeneric("test_differential_abundance", function(.data, .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, - - action = "add") + prefix = "", + action = "add", + + # DEPRECATED + significance_threshold = 0.05, + fill_missing_values = FALSE + ) standardGeneric("test_differential_abundance")) # Set internal @@ -1940,8 +1944,14 @@ setGeneric("test_differential_abundance", function(.data, fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, + prefix = "", - action = "add") + action = "add", + + # DEPRECATED + significance_threshold = 0.05, + fill_missing_values = FALSE + ) { # Get column names .sample = enquo(.sample) @@ -1952,6 +1962,22 @@ setGeneric("test_differential_abundance", function(.data, .transcript = col_names$.transcript .abundance = col_names$.abundance + # DEPRECATION OF significance_threshold + if (is_present(significance_threshold) & !quo_is_null(significance_threshold)) { + + # Signal the deprecation to the user + deprecate_warn("1.1.7", "tidybulk::test_differential_abundance(significance_threshold = )", details = "The argument significance_threshold is now deprecated please look at the resulting statistics to do the filtering (e.g., filter(., FDR < 0.05))") + + } + + # DEPRECATION OF fill_missing_values + if (is_present(fill_missing_values) & !quo_is_null(fill_missing_values)) { + + # Signal the deprecation to the user + deprecate_warn("1.1.7", "tidybulk::test_differential_abundance(fill_missing_values = )", details = "The argument fill_missing_values is now deprecated, you will receive a warning/error instead. Please use externally the methods fill_missing_abundance or impute_missing_abundance instead.") + + } + # Clearly state what counts are used message("tidybulk says: All methods use raw counts, irrespective of if scale_abundance or adjust_abundance have been calculated, therefore it is essential to add covariates such as batch effects (if applicable) in the formula.") @@ -1977,7 +2003,7 @@ setGeneric("test_differential_abundance", function(.data, when( # edgeR - grepl("edgeR", method) ~ get_differential_transcript_abundance_bulk( + grepl("edger", tolower(method)) ~ get_differential_transcript_abundance_bulk( ., .formula, .sample = !!.sample, @@ -1988,7 +2014,8 @@ setGeneric("test_differential_abundance", function(.data, significance_threshold = significance_threshold, fill_missing_values = fill_missing_values, scaling_method = scaling_method, - omit_contrast_in_colnames = omit_contrast_in_colnames + omit_contrast_in_colnames = omit_contrast_in_colnames, + prefix = prefix ), # Voom @@ -2003,7 +2030,8 @@ setGeneric("test_differential_abundance", function(.data, significance_threshold = significance_threshold, fill_missing_values = fill_missing_values, scaling_method = scaling_method, - omit_contrast_in_colnames = omit_contrast_in_colnames + omit_contrast_in_colnames = omit_contrast_in_colnames, + prefix = prefix ), # DESeq2 @@ -2018,7 +2046,8 @@ setGeneric("test_differential_abundance", function(.data, significance_threshold = significance_threshold, fill_missing_values = fill_missing_values, scaling_method = scaling_method, - omit_contrast_in_colnames = omit_contrast_in_colnames + omit_contrast_in_colnames = omit_contrast_in_colnames, + prefix = prefix ), # Else error From 5fbf8f4af44f79aa8f0e0329e0c5e162b4cc89a1 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 25 Sep 2020 10:41:21 +1000 Subject: [PATCH 03/12] setup framework --- R/functions.R | 71 +++++++++++++------ R/methods.R | 10 +-- R/methods_SE.R | 12 ++-- ..._differential_transcript_abundance_bulk.Rd | 3 +- ...erential_transcript_abundance_bulk_voom.Rd | 3 +- ...ifferential_transcript_abundance_deseq2.Rd | 3 +- man/test_differential_abundance-methods.Rd | 52 ++++++++------ 7 files changed, 97 insertions(+), 57 deletions(-) diff --git a/R/functions.R b/R/functions.R index f4dc95d7..d81b451d 100755 --- a/R/functions.R +++ b/R/functions.R @@ -305,7 +305,8 @@ get_differential_transcript_abundance_bulk <- function(.data, minimum_proportion = 0.7, fill_missing_values = FALSE, scaling_method = "TMM", - omit_contrast_in_colnames = FALSE) { + omit_contrast_in_colnames = FALSE, + prefix = "") { # Get column names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -370,7 +371,7 @@ get_differential_transcript_abundance_bulk <- function(.data, data = df_for_edgeR %>% select(!!.sample, one_of(parse_formula(.formula))) %>% distinct %>% arrange(!!.sample) ) - # Print the design column names in case I want constrasts + # Print the design column names in case I want contrasts message( sprintf( "tidybulk says: The design column names are \"%s\"", @@ -431,8 +432,8 @@ get_differential_transcript_abundance_bulk <- function(.data, table %>% as_tibble(rownames = quo_name(.transcript)) %>% - # Mark DE genes - mutate(significant = FDR < significance_threshold) %>% + # # Mark DE genes + # mutate(significant = FDR < significance_threshold) %>% # Arrange arrange(FDR), @@ -455,16 +456,23 @@ get_differential_transcript_abundance_bulk <- function(.data, edgeR::topTags(n = 999999) %$% table %>% as_tibble(rownames = quo_name(.transcript)) %>% - mutate(constrast = colnames(my_contrasts)[.x]) %>% - - # Mark DE genes - mutate(significant = FDR < significance_threshold) + mutate(constrast = colnames(my_contrasts)[.x]) + # %>% + # + # # Mark DE genes + # mutate(significant = FDR < significance_threshold) ) %>% pivot_wider(values_from = -c(!!.transcript, constrast), names_from = constrast, names_sep = "___") } ) %>% + # Attach prefix + setNames(c( + colnames(.)[1], + sprintf("%s%s", prefix, colnames(.)[2:ncol(.)]) + )) %>% + # Attach attributes reattach_internals(.data) %>% memorise_methods_used(c("edger", "limma")) %>% @@ -520,7 +528,8 @@ get_differential_transcript_abundance_bulk_voom <- function(.data, minimum_proportion = 0.7, fill_missing_values = FALSE, scaling_method = "TMM", - omit_contrast_in_colnames = FALSE) { + omit_contrast_in_colnames = FALSE, + prefix = "") { # Get column names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -561,7 +570,7 @@ get_differential_transcript_abundance_bulk_voom <- function(.data, data = df_for_voom %>% select(!!.sample, one_of(parse_formula(.formula))) %>% distinct %>% arrange(!!.sample) ) - # Print the design column names in case I want constrasts + # Print the design column names in case I want contrasts message( sprintf( "tidybulk says: The design column names are \"%s\"", @@ -612,8 +621,8 @@ get_differential_transcript_abundance_bulk_voom <- function(.data, limma::topTable(n = 999999) %>% as_tibble(rownames = quo_name(.transcript)) %>% - # Mark DE genes - mutate(significant = adj.P.Val < significance_threshold) %>% + # # Mark DE genes + # mutate(significant = adj.P.Val < significance_threshold) %>% # Arrange arrange(adj.P.Val), @@ -633,16 +642,24 @@ get_differential_transcript_abundance_bulk_voom <- function(.data, # Convert to tibble limma::topTable(n = 999999) %>% as_tibble(rownames = quo_name(.transcript)) %>% - mutate(constrast = colnames(my_contrasts)[.x]) %>% - - # Mark DE genes - mutate(significant = adj.P.Val < significance_threshold) + mutate(constrast = colnames(my_contrasts)[.x]) + # %>% + # + # # Mark DE genes + # mutate(significant = adj.P.Val < significance_threshold) ) %>% pivot_wider(values_from = -c(!!.transcript, constrast), names_from = constrast, names_sep = "___") } ) %>% + + # Attach prefix + setNames(c( + colnames(.)[1], + sprintf("%s%s", prefix, colnames(.)[2:ncol(.)]) + )) %>% + # Attach attributes reattach_internals(.data) %>% @@ -699,7 +716,8 @@ get_differential_transcript_abundance_deseq2 <- function(.data, minimum_proportion = 0.7, fill_missing_values = FALSE, scaling_method = "TMM", - omit_contrast_in_colnames = FALSE) { + omit_contrast_in_colnames = FALSE, + prefix = "") { # Get column names .sample = enquo(.sample) .transcript = enquo(.transcript) @@ -785,8 +803,8 @@ get_differential_transcript_abundance_deseq2 <- function(.data, DESeq2::results() %>% as_tibble(rownames = quo_name(.transcript)) %>% - # Mark DE genes - mutate(significant = padj < significance_threshold) %>% + # # Mark DE genes + # mutate(significant = padj < significance_threshold) %>% # Arrange arrange(padj), @@ -804,10 +822,11 @@ get_differential_transcript_abundance_deseq2 <- function(.data, # Convert to tibble as_tibble(rownames = quo_name(.transcript)) %>% - mutate(constrast = colnames(my_contrasts)[.x]) %>% - - # Mark DE genes - mutate(significant = padj < significance_threshold) + mutate(constrast = colnames(my_contrasts)[.x]) + # %>% + # + # # Mark DE genes + # mutate(significant = padj < significance_threshold) ) %>% pivot_wider(values_from = -c(!!.transcript, constrast), names_from = constrast, names_sep = "___") @@ -819,6 +838,12 @@ get_differential_transcript_abundance_deseq2 <- function(.data, # when(!"lowly_abundant" %in% colnames(.data) ~ (.) %>% mutate(lowly_abundant = if_else(is.na(log2FoldChange), T, F)) , # ~ (.)) %>% + # Attach prefix + setNames(c( + colnames(.)[1], + sprintf("%s%s", prefix, colnames(.)[2:ncol(.)]) + )) %>% + # Attach attributes reattach_internals(.data) %>% memorise_methods_used("deseq2") %>% diff --git a/R/methods.R b/R/methods.R index 17ad05e6..834743ce 100755 --- a/R/methods.R +++ b/R/methods.R @@ -1940,8 +1940,6 @@ setGeneric("test_differential_abundance", function(.data, .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, prefix = "", @@ -1963,7 +1961,7 @@ setGeneric("test_differential_abundance", function(.data, .abundance = col_names$.abundance # DEPRECATION OF significance_threshold - if (is_present(significance_threshold) & !quo_is_null(significance_threshold)) { + if (is_present(significance_threshold)) { # Signal the deprecation to the user deprecate_warn("1.1.7", "tidybulk::test_differential_abundance(significance_threshold = )", details = "The argument significance_threshold is now deprecated please look at the resulting statistics to do the filtering (e.g., filter(., FDR < 0.05))") @@ -1971,7 +1969,7 @@ setGeneric("test_differential_abundance", function(.data, } # DEPRECATION OF fill_missing_values - if (is_present(fill_missing_values) & !quo_is_null(fill_missing_values)) { + if (is_present(fill_missing_values)) { # Signal the deprecation to the user deprecate_warn("1.1.7", "tidybulk::test_differential_abundance(fill_missing_values = )", details = "The argument fill_missing_values is now deprecated, you will receive a warning/error instead. Please use externally the methods fill_missing_abundance or impute_missing_abundance instead.") @@ -1979,7 +1977,9 @@ setGeneric("test_differential_abundance", function(.data, } # Clearly state what counts are used - message("tidybulk says: All methods use raw counts, irrespective of if scale_abundance or adjust_abundance have been calculated, therefore it is essential to add covariates such as batch effects (if applicable) in the formula.") + message("tidybulk says: All methods use raw counts, +irrespective of if scale_abundance or adjust_abundance have been calculated, +therefore it is essential to add covariates such as batch effects (if applicable) in the formula.") # Validate data frame if(do_validate()) { diff --git a/R/methods_SE.R b/R/methods_SE.R index 73fa6879..d4b01b83 100644 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -282,7 +282,7 @@ setMethod("reduce_dimensions", dimension_2_column_rotated = NULL, action = "add") { - # Get column names + # Get column namest .element = enquo(.element) # Parse other colnames @@ -617,11 +617,15 @@ setMethod( .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, - action = "add") + prefix = "", + + action = "add", + + # DEPRECATED + significance_threshold = 0.05, + fill_missing_values = FALSE) { # Make col names .sample = enquo(.sample) diff --git a/man/get_differential_transcript_abundance_bulk.Rd b/man/get_differential_transcript_abundance_bulk.Rd index de9a2354..8ffd3b1c 100644 --- a/man/get_differential_transcript_abundance_bulk.Rd +++ b/man/get_differential_transcript_abundance_bulk.Rd @@ -17,7 +17,8 @@ get_differential_transcript_abundance_bulk( minimum_proportion = 0.7, fill_missing_values = FALSE, scaling_method = "TMM", - omit_contrast_in_colnames = FALSE + omit_contrast_in_colnames = FALSE, + prefix = "" ) } \arguments{ diff --git a/man/get_differential_transcript_abundance_bulk_voom.Rd b/man/get_differential_transcript_abundance_bulk_voom.Rd index de56fb8e..336a6ccb 100644 --- a/man/get_differential_transcript_abundance_bulk_voom.Rd +++ b/man/get_differential_transcript_abundance_bulk_voom.Rd @@ -16,7 +16,8 @@ get_differential_transcript_abundance_bulk_voom( minimum_proportion = 0.7, fill_missing_values = FALSE, scaling_method = "TMM", - omit_contrast_in_colnames = FALSE + omit_contrast_in_colnames = FALSE, + prefix = "" ) } \arguments{ diff --git a/man/get_differential_transcript_abundance_deseq2.Rd b/man/get_differential_transcript_abundance_deseq2.Rd index b4bbca28..1f4f47d8 100644 --- a/man/get_differential_transcript_abundance_deseq2.Rd +++ b/man/get_differential_transcript_abundance_deseq2.Rd @@ -17,7 +17,8 @@ get_differential_transcript_abundance_deseq2( minimum_proportion = 0.7, fill_missing_values = FALSE, scaling_method = "TMM", - omit_contrast_in_colnames = FALSE + omit_contrast_in_colnames = FALSE, + prefix = "" ) } \arguments{ diff --git a/man/test_differential_abundance-methods.Rd b/man/test_differential_abundance-methods.Rd index 6bc9e4c3..0dc9bd55 100644 --- a/man/test_differential_abundance-methods.Rd +++ b/man/test_differential_abundance-methods.Rd @@ -18,11 +18,12 @@ test_differential_abundance( .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, - action = "add" + prefix = "", + action = "add", + significance_threshold = 0.05, + fill_missing_values = FALSE ) \S4method{test_differential_abundance}{spec_tbl_df}( @@ -33,11 +34,12 @@ test_differential_abundance( .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, - action = "add" + prefix = "", + action = "add", + significance_threshold = 0.05, + fill_missing_values = FALSE ) \S4method{test_differential_abundance}{tbl_df}( @@ -48,11 +50,12 @@ test_differential_abundance( .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, - action = "add" + prefix = "", + action = "add", + significance_threshold = 0.05, + fill_missing_values = FALSE ) \S4method{test_differential_abundance}{tidybulk}( @@ -63,11 +66,12 @@ test_differential_abundance( .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, - action = "add" + prefix = "", + action = "add", + significance_threshold = 0.05, + fill_missing_values = FALSE ) \S4method{test_differential_abundance}{SummarizedExperiment}( @@ -78,11 +82,12 @@ test_differential_abundance( .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, - action = "add" + prefix = "", + action = "add", + significance_threshold = 0.05, + fill_missing_values = FALSE ) \S4method{test_differential_abundance}{RangedSummarizedExperiment}( @@ -93,11 +98,12 @@ test_differential_abundance( .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, - action = "add" + prefix = "", + action = "add", + significance_threshold = 0.05, + fill_missing_values = FALSE ) } \arguments{ @@ -115,15 +121,17 @@ test_differential_abundance( \item{method}{A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT), "DESeq2", "limma_voom"} -\item{significance_threshold}{A real between 0 and 1 (usually 0.05).} - -\item{fill_missing_values}{A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed.} - \item{scaling_method}{A character string. The scaling method passed to the back-end function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile")} \item{omit_contrast_in_colnames}{If just one contrast is specified you can choose to omit the contrast label in the colnames.} +\item{prefix}{A character string. The prefix you would like to add to the result columns. It is useful if you want to compare several methods.} + \item{action}{A character string. Whether to join the new information to the input tbl (add), or just get the non-redundant tbl with the new information (get).} + +\item{significance_threshold}{A real between 0 and 1 (usually 0.05).} + +\item{fill_missing_values}{A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed.} } \value{ A `tbl` with additional columns for the statistics from the test (e.g., log fold change, p-value and false discovery rate). From 8eab5d1a5f87c3f621479efa0e65fa602754e701 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 25 Sep 2020 11:39:16 +1000 Subject: [PATCH 04/12] update tests --- tests/testthat/test-bulk_methods.R | 90 ++++++++++++------------------ 1 file changed, 37 insertions(+), 53 deletions(-) diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index 5e03967a..e1ed9e75 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -199,7 +199,7 @@ test_that("Only differential trancript abundance - no object",{ expect_equal( ncol(res), - 7 + 6 ) expect_equal( class(attr(res, "internals")$edgeR)[1], "DGEGLM" ) @@ -227,7 +227,7 @@ test_that("Only differential trancript abundance - no object",{ expect_equal( ncol(res), - 7 + 6 ) expect_equal( class(attr(res, "internals")$edgeR)[1], "DGEGLM" ) @@ -252,7 +252,7 @@ test_that("Only differential trancript abundance - no object",{ expect_equal( ncol(res), - 7 + 6 ) expect_equal( class(attr(res, "internals")$edgeR)[1], "DGEGLM" ) @@ -271,20 +271,6 @@ test_that("Only differential trancript abundance - no object",{ "Design matrix not of full rank" ) - # # Just one sample per covariate error - # expect_message( - # test_differential_abundance( - # filter(input_df, a %in% c("SRR1740034", "SRR1740035", "SRR1740043", "SRR1740058")), - # ~ condition, - # .sample = a, - # .transcript = b, - # .abundance = c, - # method = "edgeR_likelihood_ratio", - # action="only" - # ), - # "You have less than two replicated for each factorial combination" - # ) - # Change scaling method res = test_differential_abundance( @@ -321,35 +307,9 @@ test_that("Only differential trancript abundance - no object - with contrasts",{ expect_equal( ncol(res), - 13 - ) - - expect_equal( class(attr(res, "internals")$edgeR)[1], "DGEGLM" ) - -}) - -test_that("Get differential trancript abundance - no object",{ - - res = - test_differential_abundance( - input_df %>% identify_abundant(a, b, c, factor_of_interest = condition), - ~ condition, - .sample = a, - .transcript = b, - .abundance = c, - method = "edgeR_likelihood_ratio", - action="get" - ) - - expect_equal( - dplyr::pull(dplyr::slice(distinct(res, b, logFC), 1:4) , "logFC"), - c(NA, 5.477110, -6.079712 , NA), - tolerance=1e-6 + 11 ) - expect_equal( ncol(res), 8) - expect_equal( nrow(res), 527) - expect_equal( class(attr(res, "internals")$edgeR)[1], "DGEGLM" ) }) @@ -375,7 +335,7 @@ test_that("Add differential trancript abundance - no object",{ expect_equal( ncol(res), - 13 + 12 ) expect_equal( class(attr(res, "internals")$edgeR)[1], "DGEGLM" ) @@ -403,7 +363,7 @@ test_that("Only differential trancript abundance voom - no object",{ expect_equal( ncol(res), - 8 + 7 ) expect_equal( class(attr(res, "internals")$voom)[1], "MArrayLM" ) @@ -431,7 +391,7 @@ test_that("Only differential trancript abundance voom - no object",{ expect_equal( ncol(res), - 8 + 7 ) expect_equal( class(attr(res, "internals")$voom)[1], "MArrayLM" ) @@ -456,7 +416,7 @@ test_that("Only differential trancript abundance voom - no object",{ expect_equal( ncol(res), - 8 + 7 ) expect_equal( class(attr(res, "internals")$voom)[1], "MArrayLM" ) @@ -511,7 +471,7 @@ test_that("Only differential trancript abundance - no object - with contrasts",{ expect_equal( ncol(res), - 15 + 13 ) expect_equal( class(attr(res, "internals")$voom)[1], "MArrayLM" ) @@ -539,7 +499,7 @@ test_that("New method choice",{ expect_equal( ncol(res), - 7 + 6 ) expect_equal( class(attr(res, "internals")$edgeR)[1], "DGEGLM" ) @@ -600,7 +560,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ expect_equal( ncol(res), - 8 + 7 ) expect_equal( class(attr(res, "internals")$DESeq2)[1], "DESeqDataSet" ) @@ -628,7 +588,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ expect_equal( ncol(res), - 8 + 7 ) expect_equal( class(attr(res, "internals")$DESeq2)[1], "DESeqDataSet" ) @@ -653,7 +613,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ expect_equal( ncol(res), - 8 + 7 ) expect_equal( class(attr(res, "internals")$DESeq2)[1], "DESeqDataSet" ) @@ -674,6 +634,30 @@ test_that("DESeq2 differential trancript abundance - no object",{ }) +test_that("test prefix",{ + + library(DESeq2) + + df = input_df %>% tidybulk(a, b, c, ) %>% identify_abundant(factor_of_interest = condition) + + res_DeSEQ2 = + df %>% + test_differential_abundance(~condition, method="DeSEQ2", action="only", prefix = "prefix_") + + res_voom = + df %>% + test_differential_abundance(~condition, method="limma_voom", action="only", prefix = "prefix_") + + res_edger = + df %>% + test_differential_abundance(~condition, method="edgeR_likelihood_ratio", action="only", prefix = "prefix_") + + expect_gt(colnames(res_DeSEQ2) %>% grep("prefix_", .) %>% length, 0) + expect_gt(colnames(res_voom) %>% grep("prefix_", .) %>% length, 0) + expect_gt(colnames(res_edger) %>% grep("prefix_", .) %>% length, 0) +}) + + test_that("Get entrez from symbol - no object",{ res = From 8b2191046181183ac1f7f5735c23d05c5ba5a0a2 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 25 Sep 2020 11:39:43 +1000 Subject: [PATCH 05/12] improve DE method error --- R/methods.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/methods.R b/R/methods.R index 834743ce..b25d5c82 100755 --- a/R/methods.R +++ b/R/methods.R @@ -2003,7 +2003,7 @@ therefore it is essential to add covariates such as batch effects (if applicable when( # edgeR - grepl("edger", tolower(method)) ~ get_differential_transcript_abundance_bulk( + tolower(method) %in% c("edger_quasi_likelihood", "edger_likelihood_ratio") ~ get_differential_transcript_abundance_bulk( ., .formula, .sample = !!.sample, From da49b0e3e8207ccb99e28fc1b7a7b7e809eba1a0 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 25 Sep 2020 15:42:13 +1000 Subject: [PATCH 06/12] deprecate internal filtering and significance column --- R/functions.R | 78 +++++++------------ R/methods.R | 17 ++-- R/methods_SE.R | 6 +- ..._differential_transcript_abundance_bulk.Rd | 12 --- ...erential_transcript_abundance_bulk_voom.Rd | 12 --- ...ifferential_transcript_abundance_deseq2.Rd | 12 --- man/test_differential_abundance-methods.Rd | 24 +++--- 7 files changed, 47 insertions(+), 114 deletions(-) diff --git a/R/functions.R b/R/functions.R index d81b451d..152ade94 100755 --- a/R/functions.R +++ b/R/functions.R @@ -284,10 +284,6 @@ get_scaled_counts_bulk <- function(.data, #' @param .abundance The name of the transcript/gene abundance column #' @param .contrasts A character vector. See edgeR makeContrasts specification for the parameter `contrasts`. If contrasts are not present the first covariate is the one the model is tested against (e.g., ~ factor_of_interest) #' @param method A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT) -#' @param significance_threshold A real between 0 and 1 -#' @param minimum_counts A positive integer. Minimum counts required for at least some samples. -#' @param minimum_proportion A real positive number between 0 and 1. It is the threshold of proportion of samples for each transcripts/genes that have to be characterised by a cmp bigger than the threshold to be included for scaling procedure. -#' @param fill_missing_values A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed. #' @param scaling_method A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile") #' @param omit_contrast_in_colnames If just one contrast is specified you can choose to omit the contrast label in the colnames. #' @@ -300,10 +296,6 @@ get_differential_transcript_abundance_bulk <- function(.data, .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - minimum_counts = 10, - minimum_proportion = 0.7, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, prefix = "") { @@ -329,15 +321,16 @@ get_differential_transcript_abundance_bulk <- function(.data, distinct() %>% # drop factors as it can affect design matrix - mutate_if(is.factor, as.character()) %>% - - # Check if data rectangular - ifelse2_pipe( - (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & fill_missing_values, - (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & !fill_missing_values, - ~ .x %>% fill_NA_using_formula(.formula,!!.sample, !!.transcript, !!.abundance), - ~ .x %>% eliminate_sparse_transcripts(!!.transcript) - ) + mutate_if(is.factor, as.character()) + #%>% + + # # Check if data rectangular + # ifelse2_pipe( + # (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & fill_missing_values, + # (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & !fill_missing_values, + # ~ .x %>% fill_NA_using_formula(.formula,!!.sample, !!.transcript, !!.abundance), + # ~ .x %>% eliminate_sparse_transcripts(!!.transcript) + # ) # # Check if at least two samples for each group @@ -508,10 +501,6 @@ get_differential_transcript_abundance_bulk <- function(.data, #' @param .transcript The name of the transcript/gene column #' @param .abundance The name of the transcript/gene abundance column #' @param .contrasts A character vector. See voom makeContrasts specification for the parameter `contrasts`. If contrasts are not present the first covariate is the one the model is tested against (e.g., ~ factor_of_interest) -#' @param significance_threshold A real between 0 and 1 -#' @param minimum_counts A positive integer. Minimum counts required for at least some samples. -#' @param minimum_proportion A real positive number between 0 and 1. It is the threshold of proportion of samples for each transcripts/genes that have to be characterised by a cmp bigger than the threshold to be included for scaling procedure. -#' @param fill_missing_values A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed. #' @param scaling_method A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile") #' @param omit_contrast_in_colnames If just one contrast is specified you can choose to omit the contrast label in the colnames. #' @@ -523,10 +512,6 @@ get_differential_transcript_abundance_bulk_voom <- function(.data, .transcript = NULL, .abundance = NULL, .contrasts = NULL, - significance_threshold = 0.05, - minimum_counts = 10, - minimum_proportion = 0.7, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, prefix = "") { @@ -552,15 +537,16 @@ get_differential_transcript_abundance_bulk_voom <- function(.data, distinct() %>% # drop factors as it can affect design matrix - mutate_if(is.factor, as.character()) %>% - - # Check if data rectangular - ifelse2_pipe( - (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & fill_missing_values, - (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & !fill_missing_values, - ~ .x %>% fill_NA_using_formula(.formula,!!.sample, !!.transcript, !!.abundance), - ~ .x %>% eliminate_sparse_transcripts(!!.transcript) - ) + mutate_if(is.factor, as.character()) + # %>% + + # # Check if data rectangular + # ifelse2_pipe( + # (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & fill_missing_values, + # (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & !fill_missing_values, + # ~ .x %>% fill_NA_using_formula(.formula,!!.sample, !!.transcript, !!.abundance), + # ~ .x %>% eliminate_sparse_transcripts(!!.transcript) + # ) # Create design matrix @@ -695,10 +681,6 @@ get_differential_transcript_abundance_bulk_voom <- function(.data, #' @param .abundance The name of the transcript/gene abundance column #' @param .contrasts A character vector. See edgeR makeContrasts specification for the parameter `contrasts`. If contrasts are not present the first covariate is the one the model is tested against (e.g., ~ factor_of_interest) #' @param method A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT) -#' @param significance_threshold A real between 0 and 1 -#' @param minimum_counts A positive integer. Minimum counts required for at least some samples. -#' @param minimum_proportion A real positive number between 0 and 1. It is the threshold of proportion of samples for each transcripts/genes that have to be characterised by a cmp bigger than the threshold to be included for scaling procedure. -#' @param fill_missing_values A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed. #' @param scaling_method A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile") #' @param omit_contrast_in_colnames If just one contrast is specified you can choose to omit the contrast label in the colnames. #' @@ -711,10 +693,6 @@ get_differential_transcript_abundance_deseq2 <- function(.data, .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - minimum_counts = 10, - minimum_proportion = 0.7, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, prefix = "") { @@ -742,8 +720,6 @@ get_differential_transcript_abundance_deseq2 <- function(.data, BiocManager::install("DESeq2", ask = FALSE) } - - # # Print the design column names in case I want contrasts # message( # sprintf( @@ -769,13 +745,13 @@ get_differential_transcript_abundance_deseq2 <- function(.data, # drop factors as it can affect design matrix mutate_if(is.factor, as.character()) %>% - # Check if data rectangular - ifelse2_pipe( - (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & fill_missing_values, - (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & !fill_missing_values, - ~ .x %>% fill_NA_using_formula(.formula,!!.sample, !!.transcript, !!.abundance), - ~ .x %>% eliminate_sparse_transcripts(!!.transcript) - ) %>% + # # Check if data rectangular + # ifelse2_pipe( + # (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & fill_missing_values, + # (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & !fill_missing_values, + # ~ .x %>% fill_NA_using_formula(.formula,!!.sample, !!.transcript, !!.abundance), + # ~ .x %>% eliminate_sparse_transcripts(!!.transcript) + # ) %>% # Needed for DESeq2 mutate(!!.abundance := as.integer(!!.abundance)) %>% diff --git a/R/methods.R b/R/methods.R index b25d5c82..2e65d772 100755 --- a/R/methods.R +++ b/R/methods.R @@ -1927,8 +1927,8 @@ setGeneric("test_differential_abundance", function(.data, action = "add", # DEPRECATED - significance_threshold = 0.05, - fill_missing_values = FALSE + significance_threshold = NULL, + fill_missing_values = NULL ) standardGeneric("test_differential_abundance")) @@ -1947,8 +1947,8 @@ setGeneric("test_differential_abundance", function(.data, action = "add", # DEPRECATED - significance_threshold = 0.05, - fill_missing_values = FALSE + significance_threshold = NULL, + fill_missing_values = NULL ) { # Get column names @@ -2003,7 +2003,8 @@ therefore it is essential to add covariates such as batch effects (if applicable when( # edgeR - tolower(method) %in% c("edger_quasi_likelihood", "edger_likelihood_ratio") ~ get_differential_transcript_abundance_bulk( + tolower(method) %in% c("edger_quasi_likelihood", "edger_likelihood_ratio") ~ + get_differential_transcript_abundance_bulk( ., .formula, .sample = !!.sample, @@ -2011,8 +2012,6 @@ therefore it is essential to add covariates such as batch effects (if applicable .abundance = !!.abundance, .contrasts = .contrasts, method = method, - significance_threshold = significance_threshold, - fill_missing_values = fill_missing_values, scaling_method = scaling_method, omit_contrast_in_colnames = omit_contrast_in_colnames, prefix = prefix @@ -2027,8 +2026,6 @@ therefore it is essential to add covariates such as batch effects (if applicable .transcript = !!.transcript, .abundance = !!.abundance, .contrasts = .contrasts, - significance_threshold = significance_threshold, - fill_missing_values = fill_missing_values, scaling_method = scaling_method, omit_contrast_in_colnames = omit_contrast_in_colnames, prefix = prefix @@ -2043,8 +2040,6 @@ therefore it is essential to add covariates such as batch effects (if applicable .abundance = !!.abundance, .contrasts = .contrasts, method = method, - significance_threshold = significance_threshold, - fill_missing_values = fill_missing_values, scaling_method = scaling_method, omit_contrast_in_colnames = omit_contrast_in_colnames, prefix = prefix diff --git a/R/methods_SE.R b/R/methods_SE.R index d4b01b83..f2dbb9ef 100644 --- a/R/methods_SE.R +++ b/R/methods_SE.R @@ -624,8 +624,8 @@ setMethod( action = "add", # DEPRECATED - significance_threshold = 0.05, - fill_missing_values = FALSE) + significance_threshold = NULL, + fill_missing_values = NULL) { # Make col names .sample = enquo(.sample) @@ -645,8 +645,6 @@ setMethod( .abundance = !!.abundance, .contrasts = .contrasts, method = method, - significance_threshold = significance_threshold, - fill_missing_values = fill_missing_values, scaling_method = scaling_method, omit_contrast_in_colnames = omit_contrast_in_colnames, action = action diff --git a/man/get_differential_transcript_abundance_bulk.Rd b/man/get_differential_transcript_abundance_bulk.Rd index 8ffd3b1c..54831db6 100644 --- a/man/get_differential_transcript_abundance_bulk.Rd +++ b/man/get_differential_transcript_abundance_bulk.Rd @@ -12,10 +12,6 @@ get_differential_transcript_abundance_bulk( .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - minimum_counts = 10, - minimum_proportion = 0.7, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, prefix = "" @@ -36,14 +32,6 @@ get_differential_transcript_abundance_bulk( \item{method}{A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT)} -\item{significance_threshold}{A real between 0 and 1} - -\item{minimum_counts}{A positive integer. Minimum counts required for at least some samples.} - -\item{minimum_proportion}{A real positive number between 0 and 1. It is the threshold of proportion of samples for each transcripts/genes that have to be characterised by a cmp bigger than the threshold to be included for scaling procedure.} - -\item{fill_missing_values}{A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed.} - \item{scaling_method}{A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile")} \item{omit_contrast_in_colnames}{If just one contrast is specified you can choose to omit the contrast label in the colnames.} diff --git a/man/get_differential_transcript_abundance_bulk_voom.Rd b/man/get_differential_transcript_abundance_bulk_voom.Rd index 336a6ccb..8969c7e9 100644 --- a/man/get_differential_transcript_abundance_bulk_voom.Rd +++ b/man/get_differential_transcript_abundance_bulk_voom.Rd @@ -11,10 +11,6 @@ get_differential_transcript_abundance_bulk_voom( .transcript = NULL, .abundance = NULL, .contrasts = NULL, - significance_threshold = 0.05, - minimum_counts = 10, - minimum_proportion = 0.7, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, prefix = "" @@ -33,14 +29,6 @@ get_differential_transcript_abundance_bulk_voom( \item{.contrasts}{A character vector. See voom makeContrasts specification for the parameter `contrasts`. If contrasts are not present the first covariate is the one the model is tested against (e.g., ~ factor_of_interest)} -\item{significance_threshold}{A real between 0 and 1} - -\item{minimum_counts}{A positive integer. Minimum counts required for at least some samples.} - -\item{minimum_proportion}{A real positive number between 0 and 1. It is the threshold of proportion of samples for each transcripts/genes that have to be characterised by a cmp bigger than the threshold to be included for scaling procedure.} - -\item{fill_missing_values}{A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed.} - \item{scaling_method}{A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile")} \item{omit_contrast_in_colnames}{If just one contrast is specified you can choose to omit the contrast label in the colnames.} diff --git a/man/get_differential_transcript_abundance_deseq2.Rd b/man/get_differential_transcript_abundance_deseq2.Rd index 1f4f47d8..ede6de8c 100644 --- a/man/get_differential_transcript_abundance_deseq2.Rd +++ b/man/get_differential_transcript_abundance_deseq2.Rd @@ -12,10 +12,6 @@ get_differential_transcript_abundance_deseq2( .abundance = NULL, .contrasts = NULL, method = "edgeR_quasi_likelihood", - significance_threshold = 0.05, - minimum_counts = 10, - minimum_proportion = 0.7, - fill_missing_values = FALSE, scaling_method = "TMM", omit_contrast_in_colnames = FALSE, prefix = "" @@ -36,14 +32,6 @@ get_differential_transcript_abundance_deseq2( \item{method}{A string character. Either "edgeR_quasi_likelihood" (i.e., QLF), "edgeR_likelihood_ratio" (i.e., LRT)} -\item{significance_threshold}{A real between 0 and 1} - -\item{minimum_counts}{A positive integer. Minimum counts required for at least some samples.} - -\item{minimum_proportion}{A real positive number between 0 and 1. It is the threshold of proportion of samples for each transcripts/genes that have to be characterised by a cmp bigger than the threshold to be included for scaling procedure.} - -\item{fill_missing_values}{A boolean. Whether to fill missing sample/transcript values with the median of the transcript. This is rarely needed.} - \item{scaling_method}{A character string. The scaling method passed to the backend function (i.e., edgeR::calcNormFactors; "TMM","TMMwsp","RLE","upperquartile")} \item{omit_contrast_in_colnames}{If just one contrast is specified you can choose to omit the contrast label in the colnames.} diff --git a/man/test_differential_abundance-methods.Rd b/man/test_differential_abundance-methods.Rd index 0dc9bd55..d7cbaf8e 100644 --- a/man/test_differential_abundance-methods.Rd +++ b/man/test_differential_abundance-methods.Rd @@ -22,8 +22,8 @@ test_differential_abundance( omit_contrast_in_colnames = FALSE, prefix = "", action = "add", - significance_threshold = 0.05, - fill_missing_values = FALSE + significance_threshold = NULL, + fill_missing_values = NULL ) \S4method{test_differential_abundance}{spec_tbl_df}( @@ -38,8 +38,8 @@ test_differential_abundance( omit_contrast_in_colnames = FALSE, prefix = "", action = "add", - significance_threshold = 0.05, - fill_missing_values = FALSE + significance_threshold = NULL, + fill_missing_values = NULL ) \S4method{test_differential_abundance}{tbl_df}( @@ -54,8 +54,8 @@ test_differential_abundance( omit_contrast_in_colnames = FALSE, prefix = "", action = "add", - significance_threshold = 0.05, - fill_missing_values = FALSE + significance_threshold = NULL, + fill_missing_values = NULL ) \S4method{test_differential_abundance}{tidybulk}( @@ -70,8 +70,8 @@ test_differential_abundance( omit_contrast_in_colnames = FALSE, prefix = "", action = "add", - significance_threshold = 0.05, - fill_missing_values = FALSE + significance_threshold = NULL, + fill_missing_values = NULL ) \S4method{test_differential_abundance}{SummarizedExperiment}( @@ -86,8 +86,8 @@ test_differential_abundance( omit_contrast_in_colnames = FALSE, prefix = "", action = "add", - significance_threshold = 0.05, - fill_missing_values = FALSE + significance_threshold = NULL, + fill_missing_values = NULL ) \S4method{test_differential_abundance}{RangedSummarizedExperiment}( @@ -102,8 +102,8 @@ test_differential_abundance( omit_contrast_in_colnames = FALSE, prefix = "", action = "add", - significance_threshold = 0.05, - fill_missing_values = FALSE + significance_threshold = NULL, + fill_missing_values = NULL ) } \arguments{ From 7731a1ec31bec001e64aeab0a32ebf55823e15ff Mon Sep 17 00:00:00 2001 From: stemangiola Date: Fri, 25 Sep 2020 21:22:25 +1000 Subject: [PATCH 07/12] fix some problems --- R/functions.R | 19 ++++++++++--------- tests/testthat/test-bulk_methods.R | 4 +--- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/R/functions.R b/R/functions.R index 152ade94..df437bb2 100755 --- a/R/functions.R +++ b/R/functions.R @@ -400,8 +400,8 @@ get_differential_transcript_abundance_bulk <- function(.data, # select method when( - method == "edgeR_likelihood_ratio" ~ (.) %>% edgeR::glmFit(design), - method == "edgeR_quasi_likelihood" ~ (.) %>% edgeR::glmQLFit(design) + tolower(method) == "edger_likelihood_ratio" ~ (.) %>% edgeR::glmFit(design), + tolower(method) == "edger_quasi_likelihood" ~ (.) %>% edgeR::glmQLFit(design) ) @@ -416,8 +416,8 @@ get_differential_transcript_abundance_bulk <- function(.data, # select method when( - method == "edgeR_likelihood_ratio" ~ (.) %>% edgeR::glmLRT(coef = 2, contrast = my_contrasts) , - method == "edgeR_quasi_likelihood" ~ (.) %>% edgeR::glmQLFTest(coef = 2, contrast = my_contrasts) + tolower(method) == "edger_likelihood_ratio" ~ (.) %>% edgeR::glmLRT(coef = 2, contrast = my_contrasts) , + tolower(method) == "edger_quasi_likelihood" ~ (.) %>% edgeR::glmQLFTest(coef = 2, contrast = my_contrasts) ) %>% # Convert to tibble @@ -441,8 +441,8 @@ get_differential_transcript_abundance_bulk <- function(.data, # select method when( - method == "edgeR_likelihood_ratio" ~ (.) %>% edgeR::glmLRT(coef = 2, contrast = my_contrasts[, .x]) , - method == "edgeR_quasi_likelihood" ~ (.) %>% edgeR::glmQLFTest(coef = 2, contrast = my_contrasts[, .x]) + tolower(method) == "edger_likelihood_ratio" ~ (.) %>% edgeR::glmLRT(coef = 2, contrast = my_contrasts[, .x]) , + tolower(method) == "edger_quasi_likelihood" ~ (.) %>% edgeR::glmQLFTest(coef = 2, contrast = my_contrasts[, .x]) ) %>% # Convert to tibble @@ -777,13 +777,14 @@ get_differential_transcript_abundance_deseq2 <- function(.data, ~ .x %>% DESeq2::results() %>% - as_tibble(rownames = quo_name(.transcript)) %>% + as_tibble(rownames = quo_name(.transcript)) + # %>% # # Mark DE genes # mutate(significant = padj < significance_threshold) %>% - # Arrange - arrange(padj), + # # Arrange + # arrange(padj), # Multiple comparisons ~ { diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index e1ed9e75..b02d28f9 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -521,11 +521,9 @@ test_that("New method choice",{ test_that("DESeq2 differential trancript abundance - no object",{ - library(DESeq2) - res_deseq2 = test_deseq2_df %>% - DESeq() %>% + DESeq2::DESeq() %>% results() res_tidybulk = From 65f1130c38bb928facf19a40cab73217866834e8 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 26 Sep 2020 12:01:18 +1000 Subject: [PATCH 08/12] select the right default contrast in deseq2 --- R/functions.R | 51 ++++++++++++++++----------------------------------- 1 file changed, 16 insertions(+), 35 deletions(-) diff --git a/R/functions.R b/R/functions.R index df437bb2..ab6a0e24 100755 --- a/R/functions.R +++ b/R/functions.R @@ -743,15 +743,7 @@ get_differential_transcript_abundance_deseq2 <- function(.data, distinct() %>% # drop factors as it can affect design matrix - mutate_if(is.factor, as.character()) %>% - - # # Check if data rectangular - # ifelse2_pipe( - # (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & fill_missing_values, - # (.) %>% check_if_data_rectangular(!!.sample,!!.transcript,!!.abundance) %>% not() & !fill_missing_values, - # ~ .x %>% fill_NA_using_formula(.formula,!!.sample, !!.transcript, !!.abundance), - # ~ .x %>% eliminate_sparse_transcripts(!!.transcript) - # ) %>% + droplevels() %>% # Needed for DESeq2 mutate(!!.abundance := as.integer(!!.abundance)) %>% @@ -770,25 +762,21 @@ get_differential_transcript_abundance_deseq2 <- function(.data, deseq2_object %>% # If I have multiple .contrasts merge the results - ifelse_pipe( - my_contrasts %>% is.null | omit_contrast_in_colnames, - + when( + # Simple comparison - ~ .x %>% - - DESeq2::results() %>% - as_tibble(rownames = quo_name(.transcript)) - # %>% - - # # Mark DE genes - # mutate(significant = padj < significance_threshold) %>% - - # # Arrange - # arrange(padj), - - # Multiple comparisons + my_contrasts %>% is.null | omit_contrast_in_colnames ~ + (.) %>% + DESeq2::results(contrast = c( + parse_formula(.formula)[1], + deseq2_object@colData[,parse_formula(.formula)[1]] %>% levels %>% .[1], + deseq2_object@colData[,parse_formula(.formula)[1]] %>% levels %>% .[2] + )) %>% + as_tibble(rownames = quo_name(.transcript)), + + # Multiple comparisons NOT USED AT THE MOMENT ~ { - deseq2_obj = .x + deseq2_obj = (.) 1:ncol(my_contrasts) %>% map_dfr( @@ -800,21 +788,13 @@ get_differential_transcript_abundance_deseq2 <- function(.data, # Convert to tibble as_tibble(rownames = quo_name(.transcript)) %>% mutate(constrast = colnames(my_contrasts)[.x]) - # %>% - # - # # Mark DE genes - # mutate(significant = padj < significance_threshold) + ) %>% pivot_wider(values_from = -c(!!.transcript, constrast), names_from = constrast, names_sep = "___") } ) %>% - # # Add filtering info - # right_join(.data %>% distinct(!!.transcript)) %>% - # when(!"lowly_abundant" %in% colnames(.data) ~ (.) %>% mutate(lowly_abundant = if_else(is.na(log2FoldChange), T, F)) , - # ~ (.)) %>% - # Attach prefix setNames(c( colnames(.)[1], @@ -827,6 +807,7 @@ get_differential_transcript_abundance_deseq2 <- function(.data, # Add raw object attach_to_internals(deseq2_object, "DESeq2") %>% + # Communicate the attribute added { message( From 7c6ec281811773a9528ecfc79527c0ccc4971a2c Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 26 Sep 2020 12:32:06 +1000 Subject: [PATCH 09/12] drop levels in the correct way --- R/functions.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/functions.R b/R/functions.R index ab6a0e24..87fc853f 100755 --- a/R/functions.R +++ b/R/functions.R @@ -321,7 +321,7 @@ get_differential_transcript_abundance_bulk <- function(.data, distinct() %>% # drop factors as it can affect design matrix - mutate_if(is.factor, as.character()) + droplevels() #%>% # # Check if data rectangular @@ -537,7 +537,7 @@ get_differential_transcript_abundance_bulk_voom <- function(.data, distinct() %>% # drop factors as it can affect design matrix - mutate_if(is.factor, as.character()) + droplevels() # %>% # # Check if data rectangular @@ -769,8 +769,8 @@ get_differential_transcript_abundance_deseq2 <- function(.data, (.) %>% DESeq2::results(contrast = c( parse_formula(.formula)[1], - deseq2_object@colData[,parse_formula(.formula)[1]] %>% levels %>% .[1], - deseq2_object@colData[,parse_formula(.formula)[1]] %>% levels %>% .[2] + deseq2_object@colData[,parse_formula(.formula)[1]] %>% levels %>% .[2], + deseq2_object@colData[,parse_formula(.formula)[1]] %>% levels %>% .[1] )) %>% as_tibble(rownames = quo_name(.transcript)), From 41ea96ada5a79301972e46ed626e4596015733d8 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 26 Sep 2020 15:17:37 +1000 Subject: [PATCH 10/12] fix some checks --- R/functions.R | 6 ++++++ tests/testthat/test-bulk_methods.R | 8 ++++---- vignettes/manuscript_transcriptional_signatures.Rmd | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/R/functions.R b/R/functions.R index 87fc853f..beb19282 100755 --- a/R/functions.R +++ b/R/functions.R @@ -764,6 +764,12 @@ get_differential_transcript_abundance_deseq2 <- function(.data, # If I have multiple .contrasts merge the results when( + # Simple comparison + (my_contrasts %>% is.null | omit_contrast_in_colnames) & (deseq2_object@colData[,parse_formula(.formula)[1]] %>% class %in% c("numeric", "integer", "double")) ~ + (.) %>% + DESeq2::results() %>% + as_tibble(rownames = quo_name(.transcript)), + # Simple comparison my_contrasts %>% is.null | omit_contrast_in_colnames ~ (.) %>% diff --git a/tests/testthat/test-bulk_methods.R b/tests/testthat/test-bulk_methods.R index b02d28f9..a65cdf33 100755 --- a/tests/testthat/test-bulk_methods.R +++ b/tests/testthat/test-bulk_methods.R @@ -524,7 +524,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ res_deseq2 = test_deseq2_df %>% DESeq2::DESeq() %>% - results() + DESeq2::results() res_tidybulk = test_deseq2_df %>% @@ -552,7 +552,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ expect_equal( unique(res$log2FoldChange)[1:4], - c(-12.322037, -11.670005 , -9.048954 ,-12.603183), + c(3.449740, 2.459516, 2.433466, 1.951263), tolerance=1e-6 ) @@ -580,7 +580,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ expect_equal( unique(res$log2FoldChange)[1:4], - c(-3.656244 ,-3.241215, -3.037658, 4.173217), + c(-1.1906895, -0.4422231, 0.9656122, -0.3328515), tolerance=1e-6 ) @@ -605,7 +605,7 @@ test_that("DESeq2 differential trancript abundance - no object",{ expect_equal( unique(res$log2FoldChange)[1:4], - c(-10.432623, -6.747017, -7.598174 , 6.598429), + c(1.1110210, 3.0077779, 0.9024256, 4.7259792), tolerance=1e-6 ) diff --git a/vignettes/manuscript_transcriptional_signatures.Rmd b/vignettes/manuscript_transcriptional_signatures.Rmd index 7a575474..b0d6b919 100644 --- a/vignettes/manuscript_transcriptional_signatures.Rmd +++ b/vignettes/manuscript_transcriptional_signatures.Rmd @@ -12,7 +12,7 @@ vignette: > %\usepackage[UTF-8]{inputenc} --- -This decument includes the code used for the manuscript, for the transcriptional signature identification. +This document includes the code used for the manuscript, for the transcriptional signature identification. ```{r, echo=FALSE, include=FALSE} From 758ba9e93ffe8651a26fbee854569b61b3f8d978 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 26 Sep 2020 15:34:48 +1000 Subject: [PATCH 11/12] fix deprecation --- R/methods.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/methods.R b/R/methods.R index 2e65d772..ef50999e 100755 --- a/R/methods.R +++ b/R/methods.R @@ -1961,7 +1961,7 @@ setGeneric("test_differential_abundance", function(.data, .abundance = col_names$.abundance # DEPRECATION OF significance_threshold - if (is_present(significance_threshold)) { + if (is_present(significance_threshold) & !is.null(significance_threshold)) { # Signal the deprecation to the user deprecate_warn("1.1.7", "tidybulk::test_differential_abundance(significance_threshold = )", details = "The argument significance_threshold is now deprecated please look at the resulting statistics to do the filtering (e.g., filter(., FDR < 0.05))") @@ -1969,7 +1969,7 @@ setGeneric("test_differential_abundance", function(.data, } # DEPRECATION OF fill_missing_values - if (is_present(fill_missing_values)) { + if (is_present(fill_missing_values) & !is.null(significance_threshold)) { # Signal the deprecation to the user deprecate_warn("1.1.7", "tidybulk::test_differential_abundance(fill_missing_values = )", details = "The argument fill_missing_values is now deprecated, you will receive a warning/error instead. Please use externally the methods fill_missing_abundance or impute_missing_abundance instead.") From 3f30c475f6cb6a53cc3c1d8c4c88d6f12fa37fa0 Mon Sep 17 00:00:00 2001 From: stemangiola Date: Sat, 26 Sep 2020 16:04:44 +1000 Subject: [PATCH 12/12] version UP --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e06e9f7e..2e3ff2f3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: tidybulk Title: Brings transcriptomics to the tidyverse -Version: 1.1.7 +Version: 1.1.8 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Maria", "Doyle", email = "Maria.Doyle@petermac.org",