From 321fa6dc322fa73dae98bf532cb646e08b52b313 Mon Sep 17 00:00:00 2001 From: Samuel Date: Fri, 31 May 2024 20:54:53 +0000 Subject: [PATCH 1/7] Expose parallel.core in coverage_stats() --- R/gds.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/gds.R b/R/gds.R index 6ea89f9f..373f6cb5 100644 --- a/R/gds.R +++ b/R/gds.R @@ -1099,7 +1099,8 @@ extract_coverage <- function( ad = TRUE, coverage.stats = c("sum", "mean", "median", "iqr"), subsample.info = 1, - verbose = TRUE + verbose = TRUE, + parallel.core = TRUE, ) { if (verbose) cli::cli_progress_step("Coverage ...") @@ -1112,7 +1113,8 @@ extract_coverage <- function( dp = TRUE, ad = TRUE, individuals = TRUE, - markers = TRUE + markers = TRUE, + parallel.core = TRUE, ) { coverage.stats <- match.arg( @@ -1148,7 +1150,7 @@ extract_coverage <- function( FUN = rad_cov_stats, as.is = "integer", margin = "by.variant", - parallel = TRUE + parallel = parallel.core ) } @@ -1271,7 +1273,8 @@ extract_coverage <- function( dp = dp, ad = ad, individuals = individuals, - markers = markers + markers = markers, + parallel.core = parallel.core ) # required for individuals and markers From 4d31b9c10d2b2d460f32d1108c7a922690076efb Mon Sep 17 00:00:00 2001 From: Samuel Date: Fri, 31 May 2024 21:09:55 +0000 Subject: [PATCH 2/7] Update extract_coverage parameter in detect_paralogs.R --- R/detect_paralogs.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/detect_paralogs.R b/R/detect_paralogs.R index 1e54c68d..4ebb2c9c 100644 --- a/R/detect_paralogs.R +++ b/R/detect_paralogs.R @@ -236,7 +236,7 @@ detect_paralogs <- function( # Extract the depth/coverage info from GDS ----------------------------------- if (verbose) message("Extracting coverage...") - depth.info <- extract_coverage(gds = data, individuals = FALSE, coverage.stats = "sum") %>% + depth.info <- extract_coverage(gds = data, individuals = FALSE, coverage.stats = "sum", parallel.core = parallel.core) %>% dplyr::mutate(dplyr::across(where(is.factor), .fns = as.character)) %>% dplyr::left_join( gds2tidy(gds = data, parallel.core = parallel.core) %>% From ee5b23bf4b03da60267a2c1818b086e04f06a487 Mon Sep 17 00:00:00 2001 From: Samuel Date: Fri, 31 May 2024 21:13:50 +0000 Subject: [PATCH 3/7] Update extract_coverage() parameters in filter_ma.R --- R/filter_ma.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/filter_ma.R b/R/filter_ma.R index 1dc590d1..daa7b824 100644 --- a/R/filter_ma.R +++ b/R/filter_ma.R @@ -922,7 +922,8 @@ minor_allele_stats <- function( ad = TRUE, coverage.stats = "sum", subsample.info = 1, - verbose = FALSE + verbose = FALSE, + parallel.core = parallel.core, ) %$% m.info } else { From 2b2a54c13ce8fdd76fe1ffc021f6b6753340aa3f Mon Sep 17 00:00:00 2001 From: Samuel Date: Fri, 31 May 2024 17:26:04 -0400 Subject: [PATCH 4/7] fix typo --- R/filter_ma.R | 3 +-- R/gds.R | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/R/filter_ma.R b/R/filter_ma.R index daa7b824..d73856cc 100644 --- a/R/filter_ma.R +++ b/R/filter_ma.R @@ -923,7 +923,7 @@ minor_allele_stats <- function( coverage.stats = "sum", subsample.info = 1, verbose = FALSE, - parallel.core = parallel.core, + parallel.core = parallel.core ) %$% m.info } else { @@ -1104,4 +1104,3 @@ ma_one <- carrier::crate(function(x) { mac.data$MAC_GLOBAL %<>% as.integer(.) return(mac.data) })#End ma_one - diff --git a/R/gds.R b/R/gds.R index 373f6cb5..1df6abcd 100644 --- a/R/gds.R +++ b/R/gds.R @@ -1100,7 +1100,7 @@ extract_coverage <- function( coverage.stats = c("sum", "mean", "median", "iqr"), subsample.info = 1, verbose = TRUE, - parallel.core = TRUE, + parallel.core = TRUE ) { if (verbose) cli::cli_progress_step("Coverage ...") @@ -1114,7 +1114,7 @@ extract_coverage <- function( ad = TRUE, individuals = TRUE, markers = TRUE, - parallel.core = TRUE, + parallel.core = TRUE ) { coverage.stats <- match.arg( @@ -3363,5 +3363,3 @@ write_gds <- function( if (open) data.gds <- read_rad(data.gds, verbose = FALSE) return(data.gds) } # End write_gds - - From 703dbd595d7e3f46851169772863db2dc54e4d7e Mon Sep 17 00:00:00 2001 From: Samuel Date: Fri, 31 May 2024 17:41:52 -0400 Subject: [PATCH 5/7] expose parallel.core in markers_het() --- R/gds.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/gds.R b/R/gds.R index 1df6abcd..f0c6d1cb 100644 --- a/R/gds.R +++ b/R/gds.R @@ -2322,7 +2322,7 @@ generate_stats <- function( if (!rlang::has_name(m.info, "HET_OBS") || force.stats) { m.info %<>% dplyr::mutate( - HET_OBS = round(markers_het(gds), 6), + HET_OBS = round(markers_het(gds, parallel.core), 6), FIS = round(markers_fis(gds), 6) ) } @@ -3064,7 +3064,7 @@ individual_het <- function(gds) { #' @rdname markers_het #' @keywords internal #' @export -markers_het <- function(gds) { +markers_het <- function(gds, parallel.core = TRUE) { # PLAN A SeqArray::seqApply( gdsfile = gds, @@ -3072,7 +3072,7 @@ markers_het <- function(gds) { FUN = function(x) sum(x == 1, na.rm = TRUE) / sum(!is.na(x)), margin = "by.variant", as.is = "double", - parallel = TRUE + parallel = parallel.core ) # PLAN B # not faster... strange because for sample it is faster... From 834d37df733e59590fd635f7c9fd49608f8eaa19 Mon Sep 17 00:00:00 2001 From: Samuel Date: Fri, 31 May 2024 17:57:48 -0400 Subject: [PATCH 6/7] expose parallel.core in dp_f_m() --- R/gds.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/gds.R b/R/gds.R index f0c6d1cb..90e6b731 100644 --- a/R/gds.R +++ b/R/gds.R @@ -1135,7 +1135,7 @@ extract_coverage <- function( ) if (markers) { - dp_f_m <- function(gds, coverage.stats) { + dp_f_m <- function(gds, coverage.stats, parallel.core = TRUE) { # Using switch instead was not optimal for additional options in the func... if (coverage.stats == "sum") rad_cov_stats <- function(x) round(sum(x, na.rm = TRUE)) @@ -1154,7 +1154,7 @@ extract_coverage <- function( ) } - dp.m <- purrr::map_dfc(.x = coverage.stats.l, .f = dp_f_m, gds = gds) + dp.m <- purrr::map_dfc(.x = coverage.stats.l, .f = dp_f_m, gds = gds, parallel.core = parallel.core) } if (individuals) { @@ -2543,7 +2543,7 @@ generate_stats <- function( } if (markers) { - dp_f_m <- function(gds, coverage.stats, dart.data) { + dp_f_m <- function(gds, coverage.stats, dart.data, parallel.core = TRUE) { # Using switch instead was not optimal for additional options in the func... if (coverage.stats == "sum") rad_cov_stats <- function(x) round(sum(x, na.rm = TRUE)) if (coverage.stats == "mean") rad_cov_stats <- function(x) round(mean(x, na.rm = TRUE)) @@ -2561,13 +2561,13 @@ generate_stats <- function( FUN = rad_cov_stats, as.is = "integer", margin = "by.variant", - parallel = TRUE + parallel = parallel.core ) } return(x) } - dp.m <- purrr::map_dfc(.x = coverage.stats.l, .f = dp_f_m, gds = gds, dart.data = dart.data) + dp.m <- purrr::map_dfc(.x = coverage.stats.l, .f = dp_f_m, gds = gds, dart.data = dart.data, parallel.core = parallel.core) } if (individuals) { From 6b2c71ba49ce748644dc54ecb0c5ae7449c76453 Mon Sep 17 00:00:00 2001 From: Samuel Date: Fri, 31 May 2024 18:29:07 -0400 Subject: [PATCH 7/7] fixes crash during plotting when an ind has no markers --- R/gds.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/gds.R b/R/gds.R index 90e6b731..f21e54ac 100644 --- a/R/gds.R +++ b/R/gds.R @@ -2863,7 +2863,7 @@ generate_stats <- function( corr.info <- stringi::stri_join(corr.info, cmt) } if (coverage) { - if (stats::sd(i.info$COVERAGE_MEAN) != 0) { + if (stats::sd(i.info$COVERAGE_MEAN, na.rm = TRUE) != 0) { cc <- ceiling(stats::cor(i.info$COVERAGE_TOTAL, i.info$COVERAGE_MEAN, use = "pairwise.complete.obs") * 100) / 100 } else { cc <- "NA"