@@ -300,27 +300,32 @@ perIndividualQC <- function(indir, name, qcdir=indir,
}
}
if (! is.null(fail_het_imiss $ fail_imiss )) {
if (! is.null(fail_het_imiss $ fail_imiss ) &&
nrow(fail_het_imiss $ fail_imiss ) != 0 ) {
missing_genotype <- select_(fail_het_imiss $ fail_imiss , " FID" , " IID" )
} else {
missing_genotype <- NULL
}
if (! is.null(fail_relatedness $ failIDs )) {
if (! is.null(fail_relatedness $ failIDs ) &&
nrow(fail_relatedness $ failIDs ) != 0 ) {
highIBD <- select_(fail_relatedness $ failIDs , " FID" , " IID" )
} else {
highIBD <- NULL
}
if (! is.null(fail_het_imiss $ fail_het )) {
if (! is.null(fail_het_imiss $ fail_het ) &&
nrow(fail_het_imiss $ fail_het ) != 0 ) {
outlying_heterozygosity <- select_(fail_het_imiss $ fail_het , " FID" , " IID" )
} else {
outlying_heterozygosity <- NULL
}
if (! is.null(fail_sex $ fail_sex )) {
if (! is.null(fail_sex $ fail_sex ) &&
nrow(fail_sex $ fail_sex ) != 0 ) {
mismatched_sex <- select_(fail_sex $ fail_sex , " FID" , " IID" )
} else {
mismatched_sex <- NULL
}
if (! is.null(fail_ancestry $ fail_ancestry )) {
if (! is.null(fail_ancestry $ fail_ancestry ) &&
nrow(fail_ancestry $ fail_ancestry ) != 0 ) {
ancestry <- select_(fail_ancestry $ fail_ancestry , " FID" , " IID" )
} else {
ancestry <- NULL
@@ -386,12 +391,14 @@ perIndividualQC <- function(indir, name, qcdir=indir,
# ' other_arguments) or pdf(outfile) print(p_overview) dev.off().
# ' @return Named [list] with i) nr_fail_samples: total number of samples
# ' [integer] failing perIndividualQC, ii) fail_QC containing a [data.frame] with
# ' samples that failed QC steps (excluding ancestry): samples IIDs in rows ,
# ' columns are all QC steps applied by perIndividualQC (max=4), with entries=0
# ' samples that failed QC steps (excluding ancestry) with IID, FID ,
# ' all QC steps applied by perIndividualQC (max=4), with entries=0
# ' if passing the QC and entries=1 if failing that particular QC and iii)
# ' fail_QC_and_ancestry containing a [data.frame] with samples that failed
# ' ancestry and QC checks: samples IIDs in rows, columns are QC_fail and
# ' Ancestry_fail, with entries=0 if passing and entries=1 if failing that check.
# ' ancestry and QC checks with IID, FID, QC_fail and
# ' Ancestry_fail, with entries=0 if passing and entries=1 if failing that check,
# ' iii) p_overview, a ggplot2-object 'containing' a sub-paneled plot with the
# ' QC-plots.
# ' @export
# ' @examples
# ' indir <- system.file("extdata", package="plinkQC")
@@ -410,81 +417,81 @@ perIndividualQC <- function(indir, name, qcdir=indir,
overviewPerIndividualQC <- function (results_perIndividualQC ,
interactive = FALSE ) {
list2counts <- function (element , all_names ) {
all_names [! (all_names %in% element )] <- 0
all_names [all_names %in% element ] <- 1
return (as.numeric(all_names ))
}
if (length(perIndividualQC ) == 2 &&
! all(names(results_perIndividualQC ) == c(" fail_list" , " p_sampleQC" ))) {
stop(" results_perIndividualQC not direct output of perIndividualQC" )
}
fail_list <- results_perIndividualQC $ fail_list
# Remove null elements
fail_list <- fail_list [! sapply(fail_list , is.null )]
unique_samples_fail_all <- unique(unlist(fail_list ))
samples_fail_all <- do.call(rbind , fail_list )
# a) overview QC fails independent of ethnicity
fail_list_wo_ancestry <- fail_list [! names(fail_list ) == " ancestry" ]
unique_samples_fail_wo_ancestry <- unique(unlist(fail_list_wo_ancestry ))
fail_counts_wo_ancestry <- sapply(fail_list_wo_ancestry , list2counts ,
unique_samples_fail_wo_ancestry )
id_list_wo_ancestry <- sapply(fail_list_wo_ancestry , function (x ) x $ IID )
unique_samples_fail_wo_ancestry <- unique(unlist(id_list_wo_ancestry ))
fail_counts_wo_ancestry <- UpSetR :: fromList(id_list_wo_ancestry )
rownames(fail_counts_wo_ancestry ) <- unique_samples_fail_wo_ancestry
if (interactive ) {
if (length(fail_list_wo_ancestry ) > = 2 ) {
UpSetR :: upset(UpSetR :: fromList(fail_list_wo_ancestry ),
order.by = " freq" ,
empty.intersections = " on" , text.scale = 1.2 ,
# Include when UpSetR v1.4.1 is released
# title="Overview quality control failures",
mainbar.y.label = " Samples failing multiple QC checks" ,
sets.x.label = " Sample fails per QC check" ,
main.bar.color = " #1b9e77" , matrix.color = " #1b9e77" ,
sets.bar.color = " #d95f02" )
} else {
message(" overviewSampleQC for QC fails cannot be displayed with " ,
" UpSetR: at least two elements in list required, but only " ,
length(fail_list_wo_ancestry ) ," provided" )
}
p <- UpSetR :: upset(fail_counts_wo_ancestry ,
order.by = " freq" ,
empty.intersections = " on" , text.scale = 1.2 ,
# Include when UpSetR v1.4.1 is released
# title="Overview quality control failures",
mainbar.y.label = " Samples failing multiple QC checks" ,
sets.x.label = " Sample fails per QC check" ,
main.bar.color = " #1b9e77" , matrix.color = " #1b9e77" ,
sets.bar.color = " #d95f02" )
p_qc <- cowplot :: plot_grid(NULL , p $ Main_bar , p $ Sizes , p $ Matrix ,
nrow = 2 , align = ' v' , rel_heights = c(3 ,1 ),
rel_widths = c(2 ,3 ))
print(p_qc )
}
if (" ancestry" %in% names(fail_list )) {
fail_counts_wo_ancestry <- merge(samples_fail_all , fail_counts_wo_ancestry ,
by.x = 2 , by.y = 0 )
if (" ancestry" %in% names(fail_list ) && ! is.null(fail_list $ ancestry )) {
# b) overview of QC and ancestry fails
fail_all <- list (QC_fail = unique_samples_fail_wo_ancestry ,
Ancestry_fail = fail_list $ ancestry )
fail_counts_all <- sapply(fail_all , list2counts ,
unique_samples_fail_all )
Ancestry_fail = fail_list $ ancestry $ IID )
unique_samples_fail_all <- unique(unlist(fail_all ))
fail_counts_all <- UpSetR :: fromList(fail_all )
rownames(fail_counts_all ) <- unique_samples_fail_all
if (interactive ) {
if (length( fail_all ) > = 2 ) {
UpSetR :: upset( UpSetR :: fromList( fail_all ) ,
order.by = " freq " ,
# Include when UpSetR v1.4.1 is released
# title= "Intersection between QC and ancestry failures",
mainbar.y.label = " Samples failing QC and ancestry checks " ,
sets.x.label = " Sample fails per QC check " ,
empty.intersections = " on " , text.scale = 1.2 ,
main.bar.color = " #7570b3 " , matrix.color = " #7570b3 " ,
sets .bar.color= " #e7298a " )
} else {
message( " overviewSampleQC for QC fails and ancestry cannot be " ,
" displayed with UpSetR: as no samples are present in " ,
" QC fails " )
}
m <- UpSetR :: upset( fail_counts_all ,
order.by = " freq " ,
# Include when UpSetR v1.4.1 is released
# title=
# "Intersection between QC and ancestry failures",
mainbar.y.label =
" Samples failing QC and ancestry checks " ,
sets.x.label = " Sample fails per QC check " ,
empty.intersections = " on " , text.scale = 1.2 ,
main .bar.color= " #7570b3 " , matrix.color = " #7570b3 " ,
sets.bar.color = " #e7298a " )
p_all <- cowplot :: plot_grid( NULL , m $ Main_bar , m $ Sizes , m $ Matrix ,
nrow = 2 , align = ' v ' , rel_heights = c( 3 , 1 ) ,
rel_widths = c( 2 , 3 ) )
print( p_all )
}
# p_overview <- cowplot::plot_grid(p_qc, p_qc_ancestry, nrow=2)
fail_counts_all <- merge(samples_fail_all , fail_counts_all ,
by.x = 2 , by.y = 0 )
p_overview <- cowplot :: plot_grid(NULL , p $ Main_bar , p $ Sizes , p $ Matrix ,
NULL , m $ Main_bar , m $ Sizes , m $ Matrix ,
nrow = 4 , align = ' v' , rel_heights = c(3 ,1 ,3 ,1 ),
rel_widths = c(2 ,3 ))
} else {
# p_overview <- p_qc
p_overview <- p_qc
fail_counts_all <- NULL
}
nr_fail_samples <- length(unique_samples_fail_all )
nr_fail_samples <- length(unique( samples_fail_all $ IID ) )
return (list (nr_fail_samples = nr_fail_samples ,
fail_QC = fail_counts_wo_ancestry ,
fail_QC_and_ancestry = fail_counts_all ))
# p_overview=p_overview))
fail_QC_and_ancestry = fail_counts_all ,
p_overview = p_overview ))
}
# ' Identification of individuals with discordant sex information