diff --git a/DESCRIPTION b/DESCRIPTION index b8106156..0271cef4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: radiator Type: Package Title: RADseq Data Exploration, Manipulation and Visualization using R -Version: 1.2.9 -Date: 2024-01-25 +Version: 1.3.0 +Date: 2024-02-22 Encoding: UTF-8 Authors@R: c( person("Thierry", "Gosselin", email = "thierrygosselin@icloud.com", role = c("aut", "cre")), @@ -56,7 +56,7 @@ License: GPL-3 LazyLoad: yes VignetteBuilder: knitr -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 ByteCompile: TRUE URL: https://thierrygosselin.github.io/radiator/ BugReports: https://github.com/thierrygosselin/radiator/issues diff --git a/NEWS.md b/NEWS.md index 345aeb08..0cb72bf0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# radiator 1.3.0 2024-02-22 + +* Bug fix using coverage and DArT data + + # radiator 1.2.9 2024-01-25 * Bug fix stemming from genalex files and genind conversion diff --git a/R/filter_coverage.R b/R/filter_coverage.R index 79ac0e65..67cb3af0 100644 --- a/R/filter_coverage.R +++ b/R/filter_coverage.R @@ -186,7 +186,7 @@ filter_coverage <- function( verbose = verbose) # Verify that coverage information is present in the data... - depth.info <- check_coverage(gds = data, stacks.haplo.check = TRUE, dart.check = TRUE) + depth.info <- check_coverage(gds = data, genotypes.metadata.check = TRUE, stacks.haplo.check = TRUE, dart.check = TRUE) if (is.null(depth.info)) { message("\n\nCoverate information is not available for this dataset, returning GDS...") return(data) @@ -204,6 +204,8 @@ filter_coverage <- function( individuals = FALSE, markers = TRUE, missing = FALSE, + coverage = TRUE, + # allele.coverage = TRUE, allele.coverage = FALSE, mac = FALSE, heterozygosity = FALSE, diff --git a/R/gds.R b/R/gds.R index 276c157f..95d54097 100644 --- a/R/gds.R +++ b/R/gds.R @@ -967,9 +967,9 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c # stacks haplotype vcf have the info fields for depth in the VCF header # but they do not have the info with genotypes... # it's laziness from stacks... + data.source <- extract_data_source(gds) if (stacks.haplo.check) { - data.source <- extract_data_source(gds) biallelic <- radiator::detect_biallelic_markers(data = gds) biallelic <- gdsfmt::read.gdsn(gdsfmt::index.gdsn( node = gds, path = "radiator/biallelic", silent = TRUE)) @@ -989,7 +989,10 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c markers.meta.select = c("AVG_COUNT_REF", "AVG_COUNT_SNP"), whitelist = TRUE ) - if (!is.null(got.coverage)) got.coverage <- c("AVG_COUNT_REF", "AVG_COUNT_SNP") + if (!is.null(got.coverage)) { + got.coverage <- c("AVG_COUNT_REF", "AVG_COUNT_SNP") + return(got.coverage) + } }#End DART 1row and 2 rows } @@ -1006,6 +1009,7 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c # this part might generate an error if you actually have genotypes metadata... # need to run tests... got.coverage <- geno.index + return(got.coverage) } geno.index <- NULL } @@ -1018,13 +1022,9 @@ check_coverage <- function(gds, genotypes.metadata.check = FALSE, stacks.haplo.c check = "none", verbose = FALSE)$ID if (length(have) > 0) { - # if (!exhaustive) { - # want <- c("DP", "CATG") - # } else { want <- c("DP", "AD", "CATG") - # } - got.coverage <- purrr::keep(.x = have, .p = have %in% want) + return(got.coverage) } else { got.coverage <- NULL } @@ -2461,7 +2461,7 @@ generate_stats <- function( genotypes.metadata.check = TRUE, dart.check = TRUE ) - if (!"DP" %in% got.coverage) coverage <- FALSE + if (!"DP" %in% got.coverage && !"READ_DEPTH" %in% got.coverage) coverage <- FALSE if (!"AD" %in% got.coverage) allele.coverage <- FALSE if (!exhaustive) allele.coverage <- FALSE got.coverage <- NULL @@ -2496,10 +2496,28 @@ generate_stats <- function( replacement = c("COVERAGE_TOTAL", "COVERAGE_MEAN", "COVERAGE_MEDIAN", "COVERAGE_IQR"), vectorize_all = FALSE ) + data.source <- radiator::extract_data_source(gds = gds) + + if ("dart" %in% data.source) { + dart.data <- radiator::extract_genotypes_metadata( + gds = gds, + genotypes.meta.select = c("M_SEQ", "ID_SEQ", "READ_DEPTH"), + whitelist = TRUE + ) %>% + radiator::rad_wide( + x = ., + formula = "ID_SEQ ~ M_SEQ", + values_from = "READ_DEPTH" + ) %>% + dplyr::select(-ID_SEQ) + colnames(dart.data) <- NULL + dart.data <- as.matrix(dart.data) + } else { + dart.data <- NULL + } if (markers) { - dp_f_m <- function(gds, coverage.stats) { - + dp_f_m <- function(gds, coverage.stats, dart.data) { # Using switch instead was not optimal for additional options in the func... if (coverage.stats == "sum") rad_cov_stats <- function(x) round(sum(x, na.rm = TRUE)) if (coverage.stats == "mean") rad_cov_stats <- function(x) round(mean(x, na.rm = TRUE)) @@ -2507,17 +2525,23 @@ generate_stats <- function( # if (coverage.stats == "iqr") rad_cov_stats <- function(x) round(abs(diff(stats::quantile(x, probs = c(0.25, 0.75), na.rm = TRUE)))) if (coverage.stats == "iqr") rad_cov_stats <- function(x) round(matrixStats::iqr(x, na.rm = TRUE)) # faster - x <- SeqArray::seqApply( - gdsfile = gds, - var.name = "annotation/format/DP", - FUN = rad_cov_stats, - as.is = "integer", - margin = "by.variant", - parallel = TRUE - ) + if (!is.null(dart.data)) { + x <- as.integer(apply(X = dart.data, MARGIN = 2, FUN = rad_cov_stats)) + dart.data <- NULL + } else { + x <- SeqArray::seqApply( + gdsfile = gds, + var.name = "annotation/format/DP", + FUN = rad_cov_stats, + as.is = "integer", + margin = "by.variant", + parallel = TRUE + ) + } + return(x) } - dp.m <- purrr::map_dfc(.x = coverage.stats.l, .f = dp_f_m, gds = gds) + dp.m <- purrr::map_dfc(.x = coverage.stats.l, .f = dp_f_m, gds = gds, dart.data = dart.data) } if (individuals) { @@ -2527,10 +2551,16 @@ generate_stats <- function( # Note to myself: the huge speed gain by using other packages robustbse, Rfast, etc. # is not worth the unreliability of the results check your testing files... - dp.temp <- SeqArray::seqGetData( - gdsfile = gds, - var.name = "annotation/format/DP" - ) + if ("dart" %in% data.source) { + dp.temp <- dart.data + dart.data <- NULL + } else { + dp.temp <- SeqArray::seqGetData( + gdsfile = gds, + var.name = "annotation/format/DP" + ) + } + dp_f_i <- function(coverage.stats, x) { if ("sum" %in% coverage.stats) x <- rowSums(x, na.rm = TRUE) diff --git a/R/utils.R b/R/utils.R index 8e3cb0c1..f2b06484 100644 --- a/R/utils.R +++ b/R/utils.R @@ -977,9 +977,28 @@ strip_rad <- function( ) { objs <- utils::object.size(x) - # STRATA ---------- - strata.n <- intersect(colnames(x), c("STRATA", "POP_ID")) + # Check if ID_SEQ, STRATA_SEQ and M_SEQ present... + if (rlang::has_name(x, "ID_SEQ") && rlang::has_name(x, "INDIVIDUALS")) { + x %<>% dplyr::select(-ID_SEQ) + } + + if (rlang::has_name(x, "STRATA_SEQ")) { + strata.n <- intersect(colnames(x), c("STRATA", "POP_ID")) + if (length(strata.n) > 0 && strata.n %in% c("STRATA", "POP_ID")) { + x %<>% dplyr::select(-STRATA_SEQ) + } + } + if (rlang::has_name(x, "M_SEQ")) { + markers.n <- intersect(colnames(x), c("VARIANT_ID", "CHROM", "LOCUS", "POS", "MARKERS")) + if (length(markers.n) > 0) { + x %<>% dplyr::select(-M_SEQ) + } + } + strata.n <- markers.n <- NULL + + + # STRATA ---------- if (rlang::has_name(x, "POP_ID")) { strata <- radiator::generate_strata(data = x, pop.id = TRUE) %>% dplyr::mutate( @@ -1009,7 +1028,7 @@ strip_rad <- function( pos = env.arg, envir = env.arg ) - cm <- keep.strata <- pop.id <- strata.n <- strata <- NULL + cm <- keep.strata <- pop.id <- strata <- NULL # MARKERS --------- x %<>% diff --git a/README.md b/README.md index ed86be2e..7cca7e06 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,8 @@ state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active) [![minimal R version](https://img.shields.io/badge/R%3E%3D-NA-6666ff.svg)](https://cran.r-project.org/) -[![packageversion](https://img.shields.io/badge/Package%20version-1.2.9-orange.svg)](commits/master) -[![Last-changedate](https://img.shields.io/badge/last%20change-2024--01--25-brightgreen.svg)](/commits/master) +[![packageversion](https://img.shields.io/badge/Package%20version-1.3.0-orange.svg)](commits/master) +[![Last-changedate](https://img.shields.io/badge/last%20change-2024--02--22-brightgreen.svg)](/commits/master) [![R-CMD-check](https://github.com/thierrygosselin/radiator/workflows/R-CMD-check/badge.svg)](https://github.com/thierrygosselin/radiator/actions) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3687060.svg)](https://doi.org/10.5281/zenodo.3687060)