Skip to content

Commit

Permalink
Check for number of fail individuals, format as input for overviewPer…
Browse files Browse the repository at this point in the history
…IndividualQC and return p_overview (fixes #6)
  • Loading branch information
HannahVMeyer committed Jul 4, 2019
1 parent 0464224 commit ab2e840
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 68 deletions.
129 changes: 68 additions & 61 deletions R/individualQC.R
Original file line number Diff line number Diff line change
Expand Up @@ -300,27 +300,32 @@ perIndividualQC <- function(indir, name, qcdir=indir,
}
}

if(!is.null(fail_het_imiss$fail_imiss)) {
if(!is.null(fail_het_imiss$fail_imiss) &&
nrow(fail_het_imiss$fail_imiss) != 0) {
missing_genotype <- select_(fail_het_imiss$fail_imiss, "FID", "IID")
} else {
missing_genotype <- NULL
}
if(!is.null(fail_relatedness$failIDs)) {
if(!is.null(fail_relatedness$failIDs) &&
nrow(fail_relatedness$failIDs) != 0) {
highIBD <- select_(fail_relatedness$failIDs, "FID", "IID")
} else {
highIBD <- NULL
}
if(!is.null(fail_het_imiss$fail_het)) {
if(!is.null(fail_het_imiss$fail_het) &&
nrow(fail_het_imiss$fail_het) != 0) {
outlying_heterozygosity <- select_(fail_het_imiss$fail_het, "FID", "IID")
} else {
outlying_heterozygosity <- NULL
}
if(!is.null(fail_sex$fail_sex)) {
if(!is.null(fail_sex$fail_sex) &&
nrow(fail_sex$fail_sex) != 0) {
mismatched_sex<- select_(fail_sex$fail_sex, "FID", "IID")
} else {
mismatched_sex <- NULL
}
if(!is.null(fail_ancestry$fail_ancestry)) {
if(!is.null(fail_ancestry$fail_ancestry) &&
nrow(fail_ancestry$fail_ancestry) != 0) {
ancestry <- select_(fail_ancestry$fail_ancestry, "FID", "IID")
} else {
ancestry <- NULL
Expand Down Expand Up @@ -386,12 +391,14 @@ perIndividualQC <- function(indir, name, qcdir=indir,
#' other_arguments) or pdf(outfile) print(p_overview) dev.off().
#' @return Named [list] with i) nr_fail_samples: total number of samples
#' [integer] failing perIndividualQC, ii) fail_QC containing a [data.frame] with
#' samples that failed QC steps (excluding ancestry): samples IIDs in rows,
#' columns are all QC steps applied by perIndividualQC (max=4), with entries=0
#' samples that failed QC steps (excluding ancestry) with IID, FID,
#' all QC steps applied by perIndividualQC (max=4), with entries=0
#' if passing the QC and entries=1 if failing that particular QC and iii)
#' fail_QC_and_ancestry containing a [data.frame] with samples that failed
#' ancestry and QC checks: samples IIDs in rows, columns are QC_fail and
#' Ancestry_fail, with entries=0 if passing and entries=1 if failing that check.
#' ancestry and QC checks with IID, FID, QC_fail and
#' Ancestry_fail, with entries=0 if passing and entries=1 if failing that check,
#' iii) p_overview, a ggplot2-object 'containing' a sub-paneled plot with the
#' QC-plots.
#' @export
#' @examples
#' indir <- system.file("extdata", package="plinkQC")
Expand All @@ -410,81 +417,81 @@ perIndividualQC <- function(indir, name, qcdir=indir,

overviewPerIndividualQC <- function(results_perIndividualQC,
interactive=FALSE) {
list2counts <- function(element, all_names) {
all_names[!(all_names %in% element)] <- 0
all_names[all_names %in% element] <- 1
return(as.numeric(all_names))
}
if (length(perIndividualQC) == 2 &&
!all(names(results_perIndividualQC) == c("fail_list", "p_sampleQC"))) {
stop("results_perIndividualQC not direct output of perIndividualQC")
}
fail_list <- results_perIndividualQC$fail_list
# Remove null elements
fail_list <- fail_list[!sapply(fail_list, is.null)]

unique_samples_fail_all <- unique(unlist(fail_list))
samples_fail_all <- do.call(rbind, fail_list)

# a) overview QC fails independent of ethnicity
fail_list_wo_ancestry <- fail_list[!names(fail_list) == "ancestry"]
unique_samples_fail_wo_ancestry <- unique(unlist(fail_list_wo_ancestry))

fail_counts_wo_ancestry <- sapply(fail_list_wo_ancestry, list2counts,
unique_samples_fail_wo_ancestry)
id_list_wo_ancestry <- sapply(fail_list_wo_ancestry, function(x) x$IID)
unique_samples_fail_wo_ancestry <- unique(unlist(id_list_wo_ancestry))
fail_counts_wo_ancestry <- UpSetR::fromList(id_list_wo_ancestry)
rownames(fail_counts_wo_ancestry) <- unique_samples_fail_wo_ancestry

if (interactive) {
if (length(fail_list_wo_ancestry) >= 2) {
UpSetR::upset(UpSetR::fromList(fail_list_wo_ancestry),
order.by = "freq",
empty.intersections = "on", text.scale=1.2,
# Include when UpSetR v1.4.1 is released
# title="Overview quality control failures",
mainbar.y.label="Samples failing multiple QC checks",
sets.x.label="Sample fails per QC check",
main.bar.color="#1b9e77", matrix.color="#1b9e77",
sets.bar.color="#d95f02")
} else {
message("overviewSampleQC for QC fails cannot be displayed with ",
"UpSetR: at least two elements in list required, but only ",
length(fail_list_wo_ancestry) ," provided")
}
p <- UpSetR::upset(fail_counts_wo_ancestry,
order.by = "freq",
empty.intersections = "on", text.scale=1.2,
# Include when UpSetR v1.4.1 is released
#title="Overview quality control failures",
mainbar.y.label="Samples failing multiple QC checks",
sets.x.label="Sample fails per QC check",
main.bar.color="#1b9e77", matrix.color="#1b9e77",
sets.bar.color="#d95f02")
p_qc <- cowplot::plot_grid(NULL, p$Main_bar, p$Sizes, p$Matrix,
nrow=2, align='v', rel_heights = c(3,1),
rel_widths = c(2,3))
print(p_qc)

}

if ("ancestry" %in% names(fail_list)) {
fail_counts_wo_ancestry <- merge(samples_fail_all, fail_counts_wo_ancestry,
by.x=2, by.y=0)
if ("ancestry" %in% names(fail_list) && !is.null(fail_list$ancestry)) {
# b) overview of QC and ancestry fails
fail_all <- list(QC_fail=unique_samples_fail_wo_ancestry,
Ancestry_fail=fail_list$ancestry)
fail_counts_all <- sapply(fail_all, list2counts,
unique_samples_fail_all)
Ancestry_fail=fail_list$ancestry$IID)

unique_samples_fail_all <- unique(unlist(fail_all))
fail_counts_all <- UpSetR::fromList(fail_all)
rownames(fail_counts_all) <- unique_samples_fail_all

if (interactive) {
if (length(fail_all) >= 2) {
UpSetR::upset(UpSetR::fromList(fail_all),
order.by = "freq",
# Include when UpSetR v1.4.1 is released
# title="Intersection between QC and ancestry failures",
mainbar.y.label="Samples failing QC and ancestry checks",
sets.x.label="Sample fails per QC check",
empty.intersections = "on", text.scale=1.2,
main.bar.color="#7570b3", matrix.color="#7570b3",
sets.bar.color="#e7298a" )
} else {
message("overviewSampleQC for QC fails and ancestry cannot be ",
"displayed with UpSetR: as no samples are present in ",
"QC fails")
}
m <- UpSetR::upset(fail_counts_all,
order.by = "freq",
# Include when UpSetR v1.4.1 is released
# title=
# "Intersection between QC and ancestry failures",
mainbar.y.label=
"Samples failing QC and ancestry checks",
sets.x.label="Sample fails per QC check",
empty.intersections = "on", text.scale=1.2,
main.bar.color="#7570b3", matrix.color="#7570b3",
sets.bar.color="#e7298a" )
p_all <- cowplot::plot_grid(NULL, m$Main_bar, m$Sizes, m$Matrix,
nrow=2, align='v', rel_heights = c(3,1),
rel_widths = c(2,3))
print(p_all)
}
#p_overview <- cowplot::plot_grid(p_qc, p_qc_ancestry, nrow=2)
fail_counts_all <- merge(samples_fail_all, fail_counts_all,
by.x=2, by.y=0)

p_overview <- cowplot::plot_grid(NULL, p$Main_bar, p$Sizes, p$Matrix,
NULL, m$Main_bar, m$Sizes, m$Matrix,
nrow=4, align='v', rel_heights = c(3,1,3,1),
rel_widths = c(2,3))
} else {
#p_overview <- p_qc
p_overview <- p_qc
fail_counts_all <- NULL
}
nr_fail_samples <- length(unique_samples_fail_all)
nr_fail_samples <- length(unique(samples_fail_all$IID))
return(list(nr_fail_samples=nr_fail_samples,
fail_QC=fail_counts_wo_ancestry,
fail_QC_and_ancestry=fail_counts_all))
#p_overview=p_overview))
fail_QC_and_ancestry=fail_counts_all,
p_overview=p_overview))
}

#' Identification of individuals with discordant sex information
Expand Down
7 changes: 0 additions & 7 deletions tests/testthat/test-individualQC.R
Original file line number Diff line number Diff line change
Expand Up @@ -234,10 +234,3 @@ test_that('evaluate_check_ancestry returns correct fail IDs for example data',{
expect_true(all(fail$fail_ancestry$IID %in% fail_ancestryIDs[,1]))
})

context('Test overviewPerIndividualQC')
test_that('overviewPerIndividualQC returns QC fails message', {
results_perSampleQC <- list(fail_list=list(a=1:10), p_SampleQC=NULL)
expect_message(overviewPerIndividualQC(results_perSampleQC,
interactive=TRUE),
"overviewSampleQC for QC fails cannot be displayed with")
})

0 comments on commit ab2e840

Please sign in to comment.