data tax table updates and plot_composition

microbiome · Nov 30, 2018 · 387dc86 · 387dc86
1 parent 0a14f1a
commit 387dc86
Show file tree

Hide file tree

Showing 19 changed files with 181 additions and 116 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,12 +3,12 @@ Type: Package
 Title: Microbiome Analytics
 Description: Utilities for microbiome analysis.
 Encoding: UTF-8
-Version: 1.5.10004
-Date: 2018-11-20
+Version: 1.5.10006
+Date: 2018-11-29
 Authors@R: c(
     person("Leo", "Lahti", email = "leo.lahti@iki.fi", role = c("aut", "cre")),
     person("Sudarshan", "Shetty", role = "aut"))
-biocViews: Clustering, Metagenomics, Microbiome, Sequencing, SystemsBiology
+biocViews: Clustering, Metagenomics, Microbiome, Sequencing,SystemsBiology,ImmunoOncology
 License: BSD_2_clause + file LICENSE
 Depends:
     R (>= 3.5.0),

diff --git a/NAMESPACE b/NAMESPACE
@@ -12,10 +12,12 @@ export(core)
 export(core_abundance)
 export(core_members)
 export(coverage)
+export(default_colors)
 export(divergence)
 export(diversities)
 export(diversity)
 export(dominance)
+export(dominant)
 export(evenness)
 export(gktau)
 export(global)
@@ -34,7 +36,6 @@ export(neat)
 export(neatsort)
 export(noncore_abundance)
 export(noncore_members)
-export(plot_abundances)
 export(plot_atlas)
 export(plot_composition)
 export(plot_core)

diff --git a/R/aggregate_taxa.R b/R/aggregate_taxa.R
@@ -23,10 +23,7 @@ aggregate_taxa <- function(x, level, top = NULL) {
     # FIXME: this function contains quick hacks to circumvent
     # missing tax_table and sample_data. Those would be better handled
     # in the original reading functions.
-
-    x <- check_phyloseq(x)
-
-    mypseq <- x
+    mypseq <- check_phyloseq(x)
 
     if (!is.null(mypseq@phy_tree)) {
 
@@ -37,9 +34,9 @@ aggregate_taxa <- function(x, level, top = NULL) {
 
         # Agglomerate taxa
         mypseq2 <- tax_glom(mypseq, level)
-    mypseq2@phy_tree <- NULL # Remove tree 
-    a <- abundances(mypseq2)
-    nams <- as.character(tax_table(mypseq2)[, level])
+        mypseq2@phy_tree <- NULL # Remove tree 
+        a <- abundances(mypseq2)
+        nams <- as.character(tax_table(mypseq2)[, level])
         rownames(a) <- nams
         tt <- tax_table(mypseq2)[, seq_len(match(level,
         colnames(tax_table(mypseq2))))]
@@ -62,45 +59,51 @@ aggregate_taxa <- function(x, level, top = NULL) {
             tt[which(!tt[, level] %in% top), level] <- "Other"
             tax_table(mypseq) <- tt
         }
-        
+
         # Split the OTUs in tax_table by the given taxonomic level otus <-
         # split(rownames(tax_table(mypseq)), tax_table(mypseq)[, level])
-        current.level <- names(which.max(apply(tt, 2, function(x) {
-            mean(taxa(mypseq) %in% unique(x))
-        })))
-    if (length(current.level) == 0) {
+        v <- apply(tt, 2, function(x) {mean(taxa(mypseq) %in% unique(x))})
+        if (max(v) > 0) {
+            current.level <- names(which.max(v))
+        } else {
+            stop("The taxa are not found in tax_table in aggregate_taxa") 
+        }
+        if (length(current.level) == 0) {
             current.level <- "unique"
-        tax_table(mypseq) <- tax_table(cbind(tax_table(mypseq),
-        unique = rownames(tax_table(mypseq))))
+            tax_table(mypseq) <- tax_table(cbind(tax_table(mypseq),
+            unique = rownames(tax_table(mypseq))))
         }
 
         otus <- map_levels(data=mypseq, to=current.level, from=level)
-        
+
         ab <- matrix(NA, nrow=length(otus), ncol=nsamples(mypseq))
         rownames(ab) <- names(otus)
         colnames(ab) <- sample_names(mypseq)
-        
+
         d <- abundances(mypseq)
-        
+
         for (nam in names(otus)) {
             taxa <- otus[[nam]]
-            ab[nam, ] <- colSums(matrix(d[taxa, ], ncol=nsamples(mypseq)))
+            ab[nam, ] <- colSums(matrix(d[taxa, ], ncol=nsamples(mypseq)),
+            na.rm = TRUE)
         }
-
+
+
         # Create phyloseq object
         OTU <- otu_table(ab, taxa_are_rows=TRUE)
         mypseq2 <- phyloseq(OTU)
-
-    # Remove ambiguous levels
-    ## First remove NA entries from the target level    
-    tax_table(mypseq) <- tax_table(mypseq)[!is.na(tax_table(mypseq)[, level]),]
+
+        # Remove ambiguous levels
+        ## First remove NA entries from the target level
+        keep <- !is.na(tax_table(mypseq)[, level])
+        tax_table(mypseq) <- tax_table(mypseq)[keep,]
         keep <- colnames(
         tax_table(mypseq))[
-    which(
-        vapply(seq(ncol(tax_table(mypseq))),
+        which(
+            vapply(seq(ncol(tax_table(mypseq))),
             function(k)
             sum(
-        vapply(split(as.character(tax_table(mypseq)[, k]),
+            vapply(split(as.character(tax_table(mypseq)[, k]),
             as.character(tax_table(mypseq)[, level])), function(x) {
             length(unique(x))
         }, 1) > 1), 1) == 0)]

diff --git a/R/bimodality.R b/R/bimodality.R
@@ -76,8 +76,8 @@
 #' # function (x) {1 - unname(dip.test(x)$p.value)})
 #'
 #' @keywords utilities
-bimodality <- function(x, method="potential_analysis", peak.threshold=1, bw.adjust=1,
-bs.iter=100, min.density=1, verbose=TRUE) {
+bimodality <- function(x, method="potential_analysis", peak.threshold=1,
+    bw.adjust=1, bs.iter=100, min.density=1, verbose=TRUE) {
 
     accepted <- intersect(method, c("potential_analysis",
         "Sarle.finite.sample", "Sarle.asymptotic"))

diff --git a/R/colors.R b/R/colors.R
@@ -0,0 +1,30 @@
+#' @title Default Colors
+#' @description Default colors for different variables.
+#' @param x Name of the variable type ("Phylum")
+#' @param v Optional. Vector of elements to color.
+#' @return Named character vector of default colors
+#' @author Leo Lahti \email{leo.lahti@@iki.fi}
+#' @references See citation("microbiome")
+#' @export
+#' @examples \dontrun{col <- default_colors("Phylum")}
+#' @keywords utilities
+default_colors <- function (x, v=NULL) {
+
+  if (x == "Phylum") {
+    #http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
+    #https://www.r-graph-gallery.com/42-colors-names/
+    col <- c(
+        "Actinobacteria" = "darkgreen",
+	"Firmicutes" = "blue",
+	"Proteobacteria" = "black",
+	"Verrucomicrobia" = "darkblue",
+	"Bacteroidetes" = "red",
+	"Spirochaetes" = "darkgray",
+	"Fusobacteria" = "lightblue",
+	"Cyanobacteria" = "deepskyblue3")
+  }      
+
+  col
+
+}
+
diff --git a/R/dominant.R b/R/dominant.R
@@ -0,0 +1,17 @@
+#' @title Dominant taxa
+#' @description Returns the dominant taxonomic group for each sample.
+#' @param x A \code{\link{phyloseq-class}} object
+#' @return A vector of dominance indices
+#' @export
+#' @examples
+#' data(dietswap)
+#' # vector
+#' d <- dominant(dietswap)
+#' @author Leo Lahti \email{microbiome-admin@@googlegroups.com}
+#' @keywords utilities
+dominant <- function(x) {
+
+    # TODO add support to other taxonomic levels
+    taxa(x)[apply(abundances(x), 2, which.max)]
+
+}
diff --git a/R/plot_abundances.R b/R/plot_abundances.R
diff --git a/R/plot_composition.R b/R/plot_composition.R
@@ -5,7 +5,8 @@
 #' \itemize{
 #' \item NULL or 'none': No sorting
 #' \item A single character string: indicate the metadata field to be used
-#'  for ordering
+#'  for ordering. Or: if this string is found from the tax_table, then sort by the
+#'  corresponding taxonomic group.
 #' \item A character vector: sample IDs indicating the sample ordering.
 #' \item 'neatmap' Order samples based on the neatmap approach.
 #' See \code{\link{neatsort}}. By default, 'NMDS' method with 'bray'
@@ -22,31 +23,35 @@
 #'  (but not in sample/taxon ordering).
 #' The options are 'Z-OTU', 'Z-Sample', 'log10' and 'compositional'.
 #' See the \code{\link{transform}} function.
-#' @param mar Figure margins
 #' @param average_by Average the samples by the average_by variable 
 #' @param ... Arguments to be passed (for \code{\link{neatsort}} function)
 #' @return A \code{\link{ggplot}} plot object.
 #' @export
 #' @examples
-#' data(dietswap)
-#' pseq <- subset_samples(dietswap, group == 'DI' & nationality == 'AFR' &
-#'    sex == "female")
-#' p <- plot_composition(pseq, verbose = TRUE)
+#' library(dplyr)
+#' data(atlas1006)
+#' pseq <- atlas1006 %>%
+#'    subset_samples(DNA_extraction_method == "r") %>%
+#'    aggregate_taxa(level = "Phylum") %>%
+#'    transform(transform = "compositional")
+#' p <- plot_composition(pseq, sample.sort = "Firmicutes", otu.sort = "abudance", verbose = TRUE) +
+#'          scale_fill_manual(values = default_colors("Phylum")[taxa(pseq)]) # Use custom colors
 #' @keywords utilities
 plot_composition <- function(x, sample.sort=NULL,
     otu.sort=NULL, x.label="sample", plot.type="barplot",
     verbose=FALSE, 
-    mar=c(5, 12, 1, 1), average_by=NULL, ...) {
+    average_by=NULL, ...) {
 
     # Avoid warnings
-    Sample <- Abundance <- Taxon <- horiz <- value <- scales <- ID <- 
+    Sample <- Abundance <- Taxon <- Tax <- horiz <- value <- scales <- ID <- 
         meta <- OTU <- taxic <- otu.df <- taxmat <-  new.tax<- NULL
     if (!is.null(x@phy_tree)){
         x@phy_tree <- NULL
     }
 
     xorig <- x
 
+
     if (verbose) {
         message("Pick the abundance matrix taxa x samples")
     }
@@ -79,6 +84,9 @@ plot_composition <- function(x, sample.sort=NULL,
         !is.null(average_by)) {
         # No sorting sample.sort <- sample_names(x)
         sample.sort <- colnames(abu)
+    } else if (length(sample.sort) == 1 && sample.sort %in% taxa(xorig)) {
+        tax <- sample.sort
+        sample.sort <- rev(sample_names(x)[order(abundances(x)[tax,])])
     } else if (length(sample.sort) == 1 &&
         sample.sort %in% names(sample_data(x)) && 
         is.null(average_by)) {
@@ -104,6 +112,8 @@ plot_composition <- function(x, sample.sort=NULL,
     if (is.null(otu.sort) || otu.sort == "none") {
         # No sorting
         otu.sort <- taxa(x)
+    } else if (length(otu.sort) == 1 && otu.sort == "abundance2") {
+        otu.sort <- rev(c(rev(names(sort(rowSums(abu)))[seq(1, nrow(abu), 2)]), names(sort(rowSums(abu)))[seq(2, nrow(abu), 2)]))
     } else if (length(otu.sort) == 1 && otu.sort == "abundance") {
         otu.sort <- rev(names(sort(rowSums(abu))))
     } else if (length(otu.sort) == 1 && otu.sort %in% names(tax_table(x))) {
@@ -129,10 +139,10 @@ plot_composition <- function(x, sample.sort=NULL,
 
     # Abundances as data.frame dfm <- psmelt(x)
     dfm <- psmelt(otu_table(abu, taxa_are_rows = TRUE))
-    names(dfm) <- c("OTU", "Sample", "Abundance")
+    names(dfm) <- c("Tax", "Sample", "Abundance")
 
     dfm$Sample <- factor(dfm$Sample, levels=sample.sort)
-    dfm$OTU <- factor(dfm$OTU, levels=otu.sort)
+    dfm$Tax <- factor(dfm$Tax, levels=otu.sort)
 
     # SampleIDs for plotting
     if (x.label %in% colnames(sample_data(x)) & is.null(average_by)) {
@@ -166,9 +176,9 @@ plot_composition <- function(x, sample.sort=NULL,
     if (plot.type == "barplot") {
 
         # Provide barplot
-        dfm <- dfm %>% arrange(OTU)  # Show OTUs always in the same order
+        dfm <- dfm %>% arrange(Tax)  # Show Taxs always in the same order
 
-        p <- ggplot(dfm, aes(x=Sample, y=Abundance, fill=OTU))
+        p <- ggplot(dfm, aes(x=Sample, y=Abundance, fill=Tax))
         p <- p + geom_bar(position="stack", stat="identity")
         p <- p + scale_x_discrete(labels=dfm$xlabel, breaks=dfm$Sample)
 
@@ -195,17 +205,14 @@ plot_composition <- function(x, sample.sort=NULL,
         otu.sort <- otu.sort[otu.sort %in% rownames(otu)]
         sample.sort <- sample.sort[sample.sort %in% colnames(otu)]
 
-        # Plot TODO: move it in here from netresponse and return the
-        # #ggplot object as well
-        # p <- plot_matrix(otu[otu.sort, sample.sort], type="twoway", mar=mar)
         tmp <- melt(otu[otu.sort, sample.sort])
         p <- heat(tmp, colnames(tmp)[[1]], colnames(tmp)[[2]],
             colnames(tmp)[[3]])
 
     } else if (plot.type == "lineplot") {
 
-        dfm <- dfm %>% arrange(OTU)  # Show OTUs always in the same order
-        p <- ggplot(dfm, aes(x=Sample, y=Abundance, color=OTU, group = OTU))
+        dfm <- dfm %>% arrange(Tax)  # Show Taxs always in the same order
+        p <- ggplot(dfm, aes(x=Sample, y=Abundance, color=Tax, group = Tax))
         p <- p + geom_point()
         p <- p + geom_line()    
         p <- p + scale_x_discrete(labels=dfm$xlabel, breaks=dfm$Sample)

diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ Tools for the exploration and analysis of microbiome profiling data sets, in par
 
 See the package [tutorial](http://microbiome.github.io/microbiome/).
 
-**Kindly cite** as follows: "Leo Lahti, Sudarshan Shetty [et al.](https://github.com/microbiome/microbiome/graphs/contributors) ([Bioconductor, 2017](https://bioconductor.org/packages/devel/bioc/html/microbiome.html)). Tools for microbiome analysis in R. Microbiome package version 1.5.10004. URL: [http://microbiome.github.com/microbiome](http://microbiome.github.com/microbiome). See also the relevant references listed in the manual page of each function. 
+**Kindly cite** as follows: "Leo Lahti, Sudarshan Shetty [et al.](https://github.com/microbiome/microbiome/graphs/contributors) ([Bioconductor, 2017](https://bioconductor.org/packages/devel/bioc/html/microbiome.html)). Tools for microbiome analysis in R. Microbiome package version 1.5.10006. URL: [http://microbiome.github.com/microbiome](http://microbiome.github.com/microbiome). See also the relevant references listed in the manual page of each function. 
 
 ### Contribute
 

diff --git a/data/atlas1006.rda b/data/atlas1006.rda
diff --git a/data/dietswap.rda b/data/dietswap.rda
diff --git a/data/peerj32.rda b/data/peerj32.rda
diff --git a/inst/NEWS b/inst/NEWS
@@ -1,4 +1,7 @@
 Changes in version 1.5.2 (2018-11-20)
++ plot_composition function: new options for sample.sort and otu.sort
++ Added Phylum level to taxonomy tables in example data sets 
+* New function: dominant
 + The diversities function is now replaces by alpha function. The alpha is more
   generic and can return also other alpha diversity indices.
 + plot_frequencies function now only returns the ggplot object

diff --git a/inst/extras/build.sh b/inst/extras/build.sh
@@ -1,4 +1,4 @@
-# https://support.rstudio.com/hc/en-us/articles/200626518-Customizing-Package-Build-Options
+# https://support.rstudio.com/hc/en-us/articles/200626518-Customizing-Package-Bubuiild-Options
 #/usr/bin/R CMD BATCH document.R
 /usr/bin/R CMD build ../../ --resave-data #--no-examples  --no-build-vignettes 
 /usr/bin/R CMD check microbiome_1.5.10003.tar.gz #--no-build-vignettes --no-examples

diff --git a/man/default_colors.Rd b/man/default_colors.Rd