From ab2e840f0f22ccdeb5317475698aa58e7eecd345 Mon Sep 17 00:00:00 2001
From: HannahVMeyer <hannah.v.meyer@googlemail.com>
Date: Thu, 4 Jul 2019 14:43:59 -0400
Subject: [PATCH] Check for number of fail individuals, format as input for
 overviewPerIndividualQC and return p_overview (fixes #6)

---
 R/individualQC.R                   | 129 +++++++++++++++--------------
 tests/testthat/test-individualQC.R |   7 --
 2 files changed, 68 insertions(+), 68 deletions(-)

diff --git a/R/individualQC.R b/R/individualQC.R
index 77e73f0..957c6a4 100755
--- a/R/individualQC.R
+++ b/R/individualQC.R
@@ -300,27 +300,32 @@ perIndividualQC <- function(indir, name, qcdir=indir,
         }
     }
 
-    if(!is.null(fail_het_imiss$fail_imiss)) {
+    if(!is.null(fail_het_imiss$fail_imiss) &&
+       nrow(fail_het_imiss$fail_imiss) != 0) {
         missing_genotype <- select_(fail_het_imiss$fail_imiss, "FID", "IID")
         } else {
         missing_genotype <- NULL
     }
-    if(!is.null(fail_relatedness$failIDs)) {
+    if(!is.null(fail_relatedness$failIDs) &&
+       nrow(fail_relatedness$failIDs) != 0) {
         highIBD <- select_(fail_relatedness$failIDs, "FID", "IID")
     } else {
         highIBD <- NULL
     }
-    if(!is.null(fail_het_imiss$fail_het)) {
+    if(!is.null(fail_het_imiss$fail_het) &&
+       nrow(fail_het_imiss$fail_het) != 0) {
         outlying_heterozygosity <- select_(fail_het_imiss$fail_het, "FID", "IID")
     } else {
         outlying_heterozygosity <- NULL
     }
-    if(!is.null(fail_sex$fail_sex)) {
+    if(!is.null(fail_sex$fail_sex) &&
+       nrow(fail_sex$fail_sex) != 0) {
         mismatched_sex<- select_(fail_sex$fail_sex, "FID", "IID")
     } else {
         mismatched_sex <- NULL
     }
-    if(!is.null(fail_ancestry$fail_ancestry)) {
+    if(!is.null(fail_ancestry$fail_ancestry) &&
+       nrow(fail_ancestry$fail_ancestry) != 0) {
         ancestry <- select_(fail_ancestry$fail_ancestry, "FID", "IID")
     } else {
         ancestry <- NULL
@@ -386,12 +391,14 @@ perIndividualQC <- function(indir, name, qcdir=indir,
 #' other_arguments) or pdf(outfile) print(p_overview) dev.off().
 #' @return Named [list] with i) nr_fail_samples: total number of samples
 #' [integer] failing perIndividualQC, ii) fail_QC containing a [data.frame] with
-#' samples that failed QC steps (excluding ancestry): samples IIDs in rows,
-#' columns are all QC steps applied by perIndividualQC (max=4), with entries=0
+#' samples that failed QC steps (excluding ancestry) with IID, FID,
+#' all QC steps applied by perIndividualQC (max=4), with entries=0
 #' if passing the QC and entries=1 if failing that particular QC and iii)
 #' fail_QC_and_ancestry containing a [data.frame] with samples that failed
-#' ancestry and QC checks: samples IIDs in rows, columns are QC_fail and
-#' Ancestry_fail, with entries=0 if passing and entries=1 if failing that check.
+#' ancestry and QC checks with IID, FID, QC_fail and
+#' Ancestry_fail, with entries=0 if passing and entries=1 if failing that check,
+#' iii) p_overview, a ggplot2-object 'containing' a sub-paneled plot with the
+#' QC-plots.
 #' @export
 #' @examples
 #' indir <- system.file("extdata", package="plinkQC")
@@ -410,81 +417,81 @@ perIndividualQC <- function(indir, name, qcdir=indir,
 
 overviewPerIndividualQC <- function(results_perIndividualQC,
                                     interactive=FALSE) {
-    list2counts <- function(element, all_names) {
-        all_names[!(all_names %in% element)] <- 0
-        all_names[all_names %in% element] <- 1
-        return(as.numeric(all_names))
-    }
     if (length(perIndividualQC) == 2 &&
         !all(names(results_perIndividualQC) == c("fail_list", "p_sampleQC"))) {
         stop("results_perIndividualQC not direct output of perIndividualQC")
     }
     fail_list <- results_perIndividualQC$fail_list
-    # Remove null elements
-    fail_list <- fail_list[!sapply(fail_list, is.null)]
-
-    unique_samples_fail_all <- unique(unlist(fail_list))
+    samples_fail_all <- do.call(rbind, fail_list)
 
     # a) overview QC fails independent of ethnicity
     fail_list_wo_ancestry <- fail_list[!names(fail_list) == "ancestry"]
-    unique_samples_fail_wo_ancestry <- unique(unlist(fail_list_wo_ancestry))
-
-    fail_counts_wo_ancestry <- sapply(fail_list_wo_ancestry, list2counts,
-                                      unique_samples_fail_wo_ancestry)
+    id_list_wo_ancestry <- sapply(fail_list_wo_ancestry, function(x) x$IID)
+    unique_samples_fail_wo_ancestry <- unique(unlist(id_list_wo_ancestry))
+    fail_counts_wo_ancestry <- UpSetR::fromList(id_list_wo_ancestry)
     rownames(fail_counts_wo_ancestry) <- unique_samples_fail_wo_ancestry
 
     if (interactive) {
-        if (length(fail_list_wo_ancestry) >= 2) {
-            UpSetR::upset(UpSetR::fromList(fail_list_wo_ancestry),
-                  order.by = "freq",
-                  empty.intersections = "on", text.scale=1.2,
-                  # Include when UpSetR v1.4.1 is released
-                  # title="Overview quality control failures",
-                  mainbar.y.label="Samples failing multiple QC checks",
-                  sets.x.label="Sample fails per QC check",
-                  main.bar.color="#1b9e77", matrix.color="#1b9e77",
-                  sets.bar.color="#d95f02")
-        } else {
-            message("overviewSampleQC for QC fails cannot be displayed with ",
-                    "UpSetR: at least two elements in list required, but only ",
-                    length(fail_list_wo_ancestry) ," provided")
-        }
+        p <- UpSetR::upset(fail_counts_wo_ancestry,
+                           order.by = "freq",
+                           empty.intersections = "on", text.scale=1.2,
+                           # Include when UpSetR v1.4.1 is released
+                           #title="Overview quality control failures",
+                           mainbar.y.label="Samples failing multiple QC checks",
+                           sets.x.label="Sample fails per QC check",
+                           main.bar.color="#1b9e77", matrix.color="#1b9e77",
+                           sets.bar.color="#d95f02")
+        p_qc <- cowplot::plot_grid(NULL, p$Main_bar, p$Sizes, p$Matrix,
+                                   nrow=2, align='v', rel_heights = c(3,1),
+                                   rel_widths = c(2,3))
+        print(p_qc)
+
     }
 
-    if ("ancestry" %in% names(fail_list)) {
+    fail_counts_wo_ancestry <- merge(samples_fail_all, fail_counts_wo_ancestry,
+                                     by.x=2, by.y=0)
+    if ("ancestry" %in% names(fail_list) && !is.null(fail_list$ancestry)) {
         # b) overview of QC and ancestry fails
         fail_all <- list(QC_fail=unique_samples_fail_wo_ancestry,
-                    Ancestry_fail=fail_list$ancestry)
-        fail_counts_all <- sapply(fail_all, list2counts,
-                                  unique_samples_fail_all)
+                    Ancestry_fail=fail_list$ancestry$IID)
+
+        unique_samples_fail_all <- unique(unlist(fail_all))
+        fail_counts_all <- UpSetR::fromList(fail_all)
         rownames(fail_counts_all) <- unique_samples_fail_all
+
         if (interactive) {
-            if (length(fail_all) >= 2) {
-                UpSetR::upset(UpSetR::fromList(fail_all),
-                              order.by = "freq",
-                            # Include when UpSetR v1.4.1 is released
-                      # title="Intersection between QC and ancestry failures",
-                      mainbar.y.label="Samples failing QC and ancestry checks",
-                      sets.x.label="Sample fails per QC check",
-                      empty.intersections = "on", text.scale=1.2,
-                      main.bar.color="#7570b3", matrix.color="#7570b3",
-                    sets.bar.color="#e7298a" )
-            } else {
-                message("overviewSampleQC for QC fails and ancestry cannot be ",
-                        "displayed with UpSetR: as no samples are present in ",
-                        "QC fails")
-            }
+            m <- UpSetR::upset(fail_counts_all,
+                          order.by = "freq",
+                          # Include when UpSetR v1.4.1 is released
+                          # title=
+                          # "Intersection between QC and ancestry failures",
+                          mainbar.y.label=
+                              "Samples failing QC and ancestry checks",
+                                      sets.x.label="Sample fails per QC check",
+                          empty.intersections = "on", text.scale=1.2,
+                          main.bar.color="#7570b3", matrix.color="#7570b3",
+                          sets.bar.color="#e7298a" )
+            p_all <- cowplot::plot_grid(NULL, m$Main_bar, m$Sizes, m$Matrix,
+                                       nrow=2, align='v', rel_heights = c(3,1),
+                                       rel_widths = c(2,3))
+            print(p_all)
         }
-        #p_overview <- cowplot::plot_grid(p_qc, p_qc_ancestry, nrow=2)
+        fail_counts_all <- merge(samples_fail_all, fail_counts_all,
+                                         by.x=2, by.y=0)
+
+        p_overview <- cowplot::plot_grid(NULL, p$Main_bar, p$Sizes, p$Matrix,
+                                         NULL, m$Main_bar, m$Sizes, m$Matrix,
+                                   nrow=4, align='v', rel_heights = c(3,1,3,1),
+                                   rel_widths = c(2,3))
     } else {
-        #p_overview <- p_qc
+        p_overview <- p_qc
         fail_counts_all <- NULL
     }
-    nr_fail_samples <- length(unique_samples_fail_all)
+    nr_fail_samples <- length(unique(samples_fail_all$IID))
     return(list(nr_fail_samples=nr_fail_samples,
                 fail_QC=fail_counts_wo_ancestry,
-                fail_QC_and_ancestry=fail_counts_all))
-                #p_overview=p_overview))
+                fail_QC_and_ancestry=fail_counts_all,
+                p_overview=p_overview))
 }
 
 #' Identification of individuals with discordant sex information
diff --git a/tests/testthat/test-individualQC.R b/tests/testthat/test-individualQC.R
index 0b72b58..2f43d24 100644
--- a/tests/testthat/test-individualQC.R
+++ b/tests/testthat/test-individualQC.R
@@ -234,10 +234,3 @@ test_that('evaluate_check_ancestry returns correct fail IDs for example data',{
     expect_true(all(fail$fail_ancestry$IID %in% fail_ancestryIDs[,1]))
 })
 
-context('Test overviewPerIndividualQC')
-test_that('overviewPerIndividualQC returns QC fails message', {
-    results_perSampleQC <- list(fail_list=list(a=1:10), p_SampleQC=NULL)
-    expect_message(overviewPerIndividualQC(results_perSampleQC,
-                                           interactive=TRUE),
-                   "overviewSampleQC for QC fails cannot be displayed with")
-})