From ab2e840f0f22ccdeb5317475698aa58e7eecd345 Mon Sep 17 00:00:00 2001 From: HannahVMeyer Date: Thu, 4 Jul 2019 14:43:59 -0400 Subject: [PATCH] Check for number of fail individuals, format as input for overviewPerIndividualQC and return p_overview (fixes #6) --- R/individualQC.R | 129 +++++++++++++++-------------- tests/testthat/test-individualQC.R | 7 -- 2 files changed, 68 insertions(+), 68 deletions(-) diff --git a/R/individualQC.R b/R/individualQC.R index 77e73f0..957c6a4 100755 --- a/R/individualQC.R +++ b/R/individualQC.R @@ -300,27 +300,32 @@ perIndividualQC <- function(indir, name, qcdir=indir, } } - if(!is.null(fail_het_imiss$fail_imiss)) { + if(!is.null(fail_het_imiss$fail_imiss) && + nrow(fail_het_imiss$fail_imiss) != 0) { missing_genotype <- select_(fail_het_imiss$fail_imiss, "FID", "IID") } else { missing_genotype <- NULL } - if(!is.null(fail_relatedness$failIDs)) { + if(!is.null(fail_relatedness$failIDs) && + nrow(fail_relatedness$failIDs) != 0) { highIBD <- select_(fail_relatedness$failIDs, "FID", "IID") } else { highIBD <- NULL } - if(!is.null(fail_het_imiss$fail_het)) { + if(!is.null(fail_het_imiss$fail_het) && + nrow(fail_het_imiss$fail_het) != 0) { outlying_heterozygosity <- select_(fail_het_imiss$fail_het, "FID", "IID") } else { outlying_heterozygosity <- NULL } - if(!is.null(fail_sex$fail_sex)) { + if(!is.null(fail_sex$fail_sex) && + nrow(fail_sex$fail_sex) != 0) { mismatched_sex<- select_(fail_sex$fail_sex, "FID", "IID") } else { mismatched_sex <- NULL } - if(!is.null(fail_ancestry$fail_ancestry)) { + if(!is.null(fail_ancestry$fail_ancestry) && + nrow(fail_ancestry$fail_ancestry) != 0) { ancestry <- select_(fail_ancestry$fail_ancestry, "FID", "IID") } else { ancestry <- NULL @@ -386,12 +391,14 @@ perIndividualQC <- function(indir, name, qcdir=indir, #' other_arguments) or pdf(outfile) print(p_overview) dev.off(). #' @return Named [list] with i) nr_fail_samples: total number of samples #' [integer] failing perIndividualQC, ii) fail_QC containing a [data.frame] with -#' samples that failed QC steps (excluding ancestry): samples IIDs in rows, -#' columns are all QC steps applied by perIndividualQC (max=4), with entries=0 +#' samples that failed QC steps (excluding ancestry) with IID, FID, +#' all QC steps applied by perIndividualQC (max=4), with entries=0 #' if passing the QC and entries=1 if failing that particular QC and iii) #' fail_QC_and_ancestry containing a [data.frame] with samples that failed -#' ancestry and QC checks: samples IIDs in rows, columns are QC_fail and -#' Ancestry_fail, with entries=0 if passing and entries=1 if failing that check. +#' ancestry and QC checks with IID, FID, QC_fail and +#' Ancestry_fail, with entries=0 if passing and entries=1 if failing that check, +#' iii) p_overview, a ggplot2-object 'containing' a sub-paneled plot with the +#' QC-plots. #' @export #' @examples #' indir <- system.file("extdata", package="plinkQC") @@ -410,81 +417,81 @@ perIndividualQC <- function(indir, name, qcdir=indir, overviewPerIndividualQC <- function(results_perIndividualQC, interactive=FALSE) { - list2counts <- function(element, all_names) { - all_names[!(all_names %in% element)] <- 0 - all_names[all_names %in% element] <- 1 - return(as.numeric(all_names)) - } if (length(perIndividualQC) == 2 && !all(names(results_perIndividualQC) == c("fail_list", "p_sampleQC"))) { stop("results_perIndividualQC not direct output of perIndividualQC") } fail_list <- results_perIndividualQC$fail_list - # Remove null elements - fail_list <- fail_list[!sapply(fail_list, is.null)] - - unique_samples_fail_all <- unique(unlist(fail_list)) + samples_fail_all <- do.call(rbind, fail_list) # a) overview QC fails independent of ethnicity fail_list_wo_ancestry <- fail_list[!names(fail_list) == "ancestry"] - unique_samples_fail_wo_ancestry <- unique(unlist(fail_list_wo_ancestry)) - - fail_counts_wo_ancestry <- sapply(fail_list_wo_ancestry, list2counts, - unique_samples_fail_wo_ancestry) + id_list_wo_ancestry <- sapply(fail_list_wo_ancestry, function(x) x$IID) + unique_samples_fail_wo_ancestry <- unique(unlist(id_list_wo_ancestry)) + fail_counts_wo_ancestry <- UpSetR::fromList(id_list_wo_ancestry) rownames(fail_counts_wo_ancestry) <- unique_samples_fail_wo_ancestry if (interactive) { - if (length(fail_list_wo_ancestry) >= 2) { - UpSetR::upset(UpSetR::fromList(fail_list_wo_ancestry), - order.by = "freq", - empty.intersections = "on", text.scale=1.2, - # Include when UpSetR v1.4.1 is released - # title="Overview quality control failures", - mainbar.y.label="Samples failing multiple QC checks", - sets.x.label="Sample fails per QC check", - main.bar.color="#1b9e77", matrix.color="#1b9e77", - sets.bar.color="#d95f02") - } else { - message("overviewSampleQC for QC fails cannot be displayed with ", - "UpSetR: at least two elements in list required, but only ", - length(fail_list_wo_ancestry) ," provided") - } + p <- UpSetR::upset(fail_counts_wo_ancestry, + order.by = "freq", + empty.intersections = "on", text.scale=1.2, + # Include when UpSetR v1.4.1 is released + #title="Overview quality control failures", + mainbar.y.label="Samples failing multiple QC checks", + sets.x.label="Sample fails per QC check", + main.bar.color="#1b9e77", matrix.color="#1b9e77", + sets.bar.color="#d95f02") + p_qc <- cowplot::plot_grid(NULL, p$Main_bar, p$Sizes, p$Matrix, + nrow=2, align='v', rel_heights = c(3,1), + rel_widths = c(2,3)) + print(p_qc) + } - if ("ancestry" %in% names(fail_list)) { + fail_counts_wo_ancestry <- merge(samples_fail_all, fail_counts_wo_ancestry, + by.x=2, by.y=0) + if ("ancestry" %in% names(fail_list) && !is.null(fail_list$ancestry)) { # b) overview of QC and ancestry fails fail_all <- list(QC_fail=unique_samples_fail_wo_ancestry, - Ancestry_fail=fail_list$ancestry) - fail_counts_all <- sapply(fail_all, list2counts, - unique_samples_fail_all) + Ancestry_fail=fail_list$ancestry$IID) + + unique_samples_fail_all <- unique(unlist(fail_all)) + fail_counts_all <- UpSetR::fromList(fail_all) rownames(fail_counts_all) <- unique_samples_fail_all + if (interactive) { - if (length(fail_all) >= 2) { - UpSetR::upset(UpSetR::fromList(fail_all), - order.by = "freq", - # Include when UpSetR v1.4.1 is released - # title="Intersection between QC and ancestry failures", - mainbar.y.label="Samples failing QC and ancestry checks", - sets.x.label="Sample fails per QC check", - empty.intersections = "on", text.scale=1.2, - main.bar.color="#7570b3", matrix.color="#7570b3", - sets.bar.color="#e7298a" ) - } else { - message("overviewSampleQC for QC fails and ancestry cannot be ", - "displayed with UpSetR: as no samples are present in ", - "QC fails") - } + m <- UpSetR::upset(fail_counts_all, + order.by = "freq", + # Include when UpSetR v1.4.1 is released + # title= + # "Intersection between QC and ancestry failures", + mainbar.y.label= + "Samples failing QC and ancestry checks", + sets.x.label="Sample fails per QC check", + empty.intersections = "on", text.scale=1.2, + main.bar.color="#7570b3", matrix.color="#7570b3", + sets.bar.color="#e7298a" ) + p_all <- cowplot::plot_grid(NULL, m$Main_bar, m$Sizes, m$Matrix, + nrow=2, align='v', rel_heights = c(3,1), + rel_widths = c(2,3)) + print(p_all) } - #p_overview <- cowplot::plot_grid(p_qc, p_qc_ancestry, nrow=2) + fail_counts_all <- merge(samples_fail_all, fail_counts_all, + by.x=2, by.y=0) + + p_overview <- cowplot::plot_grid(NULL, p$Main_bar, p$Sizes, p$Matrix, + NULL, m$Main_bar, m$Sizes, m$Matrix, + nrow=4, align='v', rel_heights = c(3,1,3,1), + rel_widths = c(2,3)) } else { - #p_overview <- p_qc + p_overview <- p_qc fail_counts_all <- NULL } - nr_fail_samples <- length(unique_samples_fail_all) + nr_fail_samples <- length(unique(samples_fail_all$IID)) return(list(nr_fail_samples=nr_fail_samples, fail_QC=fail_counts_wo_ancestry, - fail_QC_and_ancestry=fail_counts_all)) - #p_overview=p_overview)) + fail_QC_and_ancestry=fail_counts_all, + p_overview=p_overview)) } #' Identification of individuals with discordant sex information diff --git a/tests/testthat/test-individualQC.R b/tests/testthat/test-individualQC.R index 0b72b58..2f43d24 100644 --- a/tests/testthat/test-individualQC.R +++ b/tests/testthat/test-individualQC.R @@ -234,10 +234,3 @@ test_that('evaluate_check_ancestry returns correct fail IDs for example data',{ expect_true(all(fail$fail_ancestry$IID %in% fail_ancestryIDs[,1])) }) -context('Test overviewPerIndividualQC') -test_that('overviewPerIndividualQC returns QC fails message', { - results_perSampleQC <- list(fail_list=list(a=1:10), p_SampleQC=NULL) - expect_message(overviewPerIndividualQC(results_perSampleQC, - interactive=TRUE), - "overviewSampleQC for QC fails cannot be displayed with") -})