diff --git a/DESCRIPTION b/DESCRIPTION index eb35b1b..4baa3ac 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: DIscBIO Date: 2020-05-04 Title: A User-Friendly Pipeline for Biomarker Discovery in Single-Cell Transcriptomics -Version: 0.99.7 +Version: 0.99.8 Authors@R: c( person(given = "Salim", diff --git a/R/Jaccard.R b/R/Jaccard.R index a9fcaf6..b4fd7ea 100644 --- a/R/Jaccard.R +++ b/R/Jaccard.R @@ -9,6 +9,8 @@ #' ["K-means","MB"]. Default is "K-means" #' @param K A numeric value of the number of clusters #' @param plot if `TRUE`, plots the mean Jaccard similarities +#' @param R number of bootstrap replicates +#' @param ... Further arguments passed to \code{boot::boot} #' @importFrom philentropy distance #' @importFrom boot boot #' @importFrom graphics barplot box @@ -17,7 +19,14 @@ #' sc <- DISCBIO(valuesG1msReduced) #' sc <- Clustexp(sc, cln=3, quiet=TRUE) # K-means clustering #' Jaccard(sc, Clustering="K-means", K=3) -Jaccard <- function(object, Clustering = "K-means", K, plot = TRUE) { +Jaccard <- function( + object, + Clustering = "K-means", + K, + plot = TRUE, + R = 100, + ... +) { JACCARD <- c() # Validation @@ -29,46 +38,40 @@ Jaccard <- function(object, Clustering = "K-means", K, plot = TRUE) { } JS <- function(data, indices) { - d <- data[indices,] # allows boot to select sample - jac <- suppressMessages(distance(t(d), method = "jaccard")) - jac1 <- 1 - jac + d <- data[indices,] # allows boot to select sample + jac <- suppressMessages(distance(t(d), method = "jaccard")) + jac1 <- 1 - jac JSmean <- mean(jac1) return(JSmean) } for (i in 1:K) { # Optimize by avoiding if every loop. Only thing variable is data if (Clustering == "K-means") { - results <- boot( - data = object@fdata[, which(object@kmeans$kpart == i)], - statistic = JS, - R = 100, - stype = "f" - ) - # to get the mean of all bootstrappings (mean of mean Jaccard values) - JACCARD[i] <- round(mean(results$t), digits = 3) - } - if (Clustering == "MB") { - results <- boot( - data = object@fdata[, which(object@MBclusters$clusterid == i)], - statistic = JS, - R = 100, - stype = "f" - ) - # to get the mean of all bootstrappings (mean of mean Jaccard values) - JACCARD[i] <- round(mean(results$t), digits = 3) + target_col <- object@kmeans$kpart + } else if (Clustering == "MB") { + target_col <- object@MBclusters$clusterid } + results <- boot( + data = object@fdata[, which(target_col == i)], + statistic = JS, + R = R, + stype = "f", + ... + ) + # to get the mean of all bootstrappings (mean of mean Jaccard values) + JACCARD[i] <- round(mean(results$t), digits = 3) } if (plot) { - barplot( - height = JACCARD, - names.arg = 1:length(JACCARD), - ylab = "Mean Jaccard's similarity values", - xlab = "Clusters", - las = 1, - ylim = c(0, 1), - col = c("black", "blue", "green", "red", "yellow", "gray") - ) - box() + barplot( + height = JACCARD, + names.arg = 1: length(JACCARD), + ylab = "Mean Jaccard's similarity values", + xlab = "Clusters", + las = 1, + ylim = c(0, 1), + col = c("black", "blue", "green", "red", "yellow", "gray") + ) + box() } return(JACCARD) } \ No newline at end of file diff --git a/man/Jaccard.Rd b/man/Jaccard.Rd index 841f412..56dc870 100644 --- a/man/Jaccard.Rd +++ b/man/Jaccard.Rd @@ -4,7 +4,7 @@ \alias{Jaccard} \title{Jaccard’s similarity} \usage{ -Jaccard(object, Clustering = "K-means", K, plot = TRUE) +Jaccard(object, Clustering = "K-means", K, plot = TRUE, R = 100, ...) } \arguments{ \item{object}{\code{DISCBIO} class object.} @@ -15,6 +15,10 @@ Jaccard(object, Clustering = "K-means", K, plot = TRUE) \item{K}{A numeric value of the number of clusters} \item{plot}{if `TRUE`, plots the mean Jaccard similarities} + +\item{R}{number of bootstrap replicates} + +\item{...}{Further arguments passed to \code{boot::boot}} } \value{ A plot of the mean Jaccard similarity coefficient per cluster. diff --git a/tests/testthat/test.DIscBIO.IMP.R b/tests/testthat/test.DIscBIO.IMP.R index 5b365e2..8ade01f 100644 --- a/tests/testthat/test.DIscBIO.IMP.R +++ b/tests/testthat/test.DIscBIO.IMP.R @@ -56,7 +56,7 @@ test_that("tSNE is computed", { test_that("Cluster plots output is as expexted", { expect_equivalent( - object = Jaccard(sc, Clustering="K-means", K=2, plot = FALSE), + object = Jaccard(sc, Clustering="K-means", K=2, plot=FALSE, R=5), expected = c(.790, .653), tolerance = .01 ) @@ -111,7 +111,7 @@ cdiff2 <- DEGanalysis( # differential expression analysis between two particular clusters. cdiff3 <- DEGanalysis2clust( sc, Clustering="K-means", K=2, fdr=.15, name="Name", First="CL1", - Second="CL2", export=FALSE, quiet=TRUE, plot=FALSE + Second="CL2", export=FALSE, quiet=TRUE, plot=FALSE, nperms=5, nresamp=2 ) test_that("DEGs are calculated", { @@ -150,9 +150,9 @@ test_that("Decision tree elements are defined", { expect_output(str(DATAforDT), "3 obs. of 30 variables") expect_s3_class(j48dt, "J48") expect_s3_class(summary(j48dt), "Weka_classifier_evaluation") - expect_identical(j48dt_eval, c(TP = 14, FN = 4, FP = 5, TN = 7)) + expect_identical(j48dt_eval, c(TP = 14, FN = 4, FP = 3, TN = 9)) expect_s3_class(rpartDT, "rpart") - expect_identical(rpartEVAL, c(TP = 15, FN = 3, FP = 3, TN = 9)) + expect_identical(rpartEVAL, c(TP = 16, FN = 2, FP = 4, TN = 8)) }) # ---------------------------------------------------------------------------- # @@ -196,7 +196,7 @@ Outliers2 <- FindOutliersMB( test_that("MB clustering and outliers work as expected", { expect_equivalent( - object = Jaccard(sc, Clustering="MB", K=2, plot = FALSE), + object = Jaccard(sc, Clustering="MB", K=2, plot = FALSE, R=5), expected = c(.819, .499), tolerance = 0.01 ) @@ -229,13 +229,13 @@ cdiff1 <- MBClustDiffGenes(sc, K=2, fdr=.2, export=FALSE, quiet=TRUE) # DE analysis between all clusters cdiff2 <- DEGanalysis( sc, Clustering="MB", K=2, fdr=.2, name="Name", export=FALSE, quiet=TRUE, - plot = FALSE + plot = FALSE, nperms=5, nresamp=2 ) # differential expression analysis between particular clusters. cdiff3 <- DEGanalysis2clust( sc, Clustering="MB", K=2, fdr=.15, name="Name", First="CL1", Second="CL2", - export = FALSE, plot = FALSE, quiet = TRUE + export = FALSE, plot = FALSE, quiet = TRUE, nperms=5, nresamp=2 ) test_that("DEGs are calculated", { @@ -268,10 +268,10 @@ rpartEVAL <- RpartEVAL( ) test_that("Decision tree elements are defined", { - expect_output(str(DATAforDT), "23 obs. of 30 variables") # used to be 31 + expect_output(str(DATAforDT), "29 obs. of 30 variables") # used to be 31 expect_s3_class(j48dt, "J48") expect_s3_class(summary(j48dt), "Weka_classifier_evaluation") expect_identical(j48dt_eval, c(TP = 21, FN = 2, FP = 4, TN = 3)) expect_s3_class(rpartDT, "rpart") - expect_identical(rpartEVAL, c(TP = 19, FN = 4, FP = 4, TN = 3)) + expect_identical(rpartEVAL, c(TP = 20, FN = 3, FP = 4, TN = 3)) }) diff --git a/tests/testthat/test.integration.R b/tests/testthat/test.integration.R index f3a48cf..df48a5c 100644 --- a/tests/testthat/test.integration.R +++ b/tests/testthat/test.integration.R @@ -2,17 +2,21 @@ # Testing integration with SingleCellExperiment # ============================================================================== context("Converting other formats to DISCBIO") - +# ------------------------------------------------------------------------------ +# Setting up datasets +# ------------------------------------------------------------------------------ pmbc_seurat <- Seurat::pbmc_small pmbc_sce <- Seurat::as.SingleCellExperiment(pmbc_seurat) g1_sce <- SingleCellExperiment::SingleCellExperiment( list(counts=as.matrix(valuesG1msReduced)) ) - +# ------------------------------------------------------------------------------ +# Performing unit tests +# ------------------------------------------------------------------------------ test_that("Pure text gets formatted as DISCBIO", { - expect_true(is(as.DISCBIO(pmbc_seurat), "DISCBIO")) + expect_s4_class(as.DISCBIO(pmbc_seurat), "DISCBIO") }) test_that("SCE file gets formatted as DISCBIO", { - expect_true(is(as.DISCBIO(pmbc_sce), "DISCBIO")) - expect_true(is(as.DISCBIO(g1_sce), "DISCBIO")) + expect_s4_class(as.DISCBIO(pmbc_sce), "DISCBIO") + expect_s4_class(as.DISCBIO(g1_sce), "DISCBIO") })