Skip to content

Commit

Permalink
data tax table updates and plot_composition
Browse files Browse the repository at this point in the history
  • Loading branch information
antagomir committed Nov 30, 2018
1 parent 0a14f1a commit 387dc86
Show file tree
Hide file tree
Showing 19 changed files with 181 additions and 116 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Expand Up @@ -3,12 +3,12 @@ Type: Package
Title: Microbiome Analytics
Description: Utilities for microbiome analysis.
Encoding: UTF-8
Version: 1.5.10004
Date: 2018-11-20
Version: 1.5.10006
Date: 2018-11-29
Authors@R: c(
person("Leo", "Lahti", email = "leo.lahti@iki.fi", role = c("aut", "cre")),
person("Sudarshan", "Shetty", role = "aut"))
biocViews: Clustering, Metagenomics, Microbiome, Sequencing, SystemsBiology
biocViews: Clustering, Metagenomics, Microbiome, Sequencing,SystemsBiology,ImmunoOncology
License: BSD_2_clause + file LICENSE
Depends:
R (>= 3.5.0),
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Expand Up @@ -12,10 +12,12 @@ export(core)
export(core_abundance)
export(core_members)
export(coverage)
export(default_colors)
export(divergence)
export(diversities)
export(diversity)
export(dominance)
export(dominant)
export(evenness)
export(gktau)
export(global)
Expand All @@ -34,7 +36,6 @@ export(neat)
export(neatsort)
export(noncore_abundance)
export(noncore_members)
export(plot_abundances)
export(plot_atlas)
export(plot_composition)
export(plot_core)
Expand Down
55 changes: 29 additions & 26 deletions R/aggregate_taxa.R
Expand Up @@ -23,10 +23,7 @@ aggregate_taxa <- function(x, level, top = NULL) {
# FIXME: this function contains quick hacks to circumvent
# missing tax_table and sample_data. Those would be better handled
# in the original reading functions.

x <- check_phyloseq(x)

mypseq <- x
mypseq <- check_phyloseq(x)

if (!is.null(mypseq@phy_tree)) {

Expand All @@ -37,9 +34,9 @@ aggregate_taxa <- function(x, level, top = NULL) {

# Agglomerate taxa
mypseq2 <- tax_glom(mypseq, level)
mypseq2@phy_tree <- NULL # Remove tree
a <- abundances(mypseq2)
nams <- as.character(tax_table(mypseq2)[, level])
mypseq2@phy_tree <- NULL # Remove tree
a <- abundances(mypseq2)
nams <- as.character(tax_table(mypseq2)[, level])
rownames(a) <- nams
tt <- tax_table(mypseq2)[, seq_len(match(level,
colnames(tax_table(mypseq2))))]
Expand All @@ -62,45 +59,51 @@ aggregate_taxa <- function(x, level, top = NULL) {
tt[which(!tt[, level] %in% top), level] <- "Other"
tax_table(mypseq) <- tt
}

# Split the OTUs in tax_table by the given taxonomic level otus <-
# split(rownames(tax_table(mypseq)), tax_table(mypseq)[, level])
current.level <- names(which.max(apply(tt, 2, function(x) {
mean(taxa(mypseq) %in% unique(x))
})))
if (length(current.level) == 0) {
v <- apply(tt, 2, function(x) {mean(taxa(mypseq) %in% unique(x))})
if (max(v) > 0) {
current.level <- names(which.max(v))
} else {
stop("The taxa are not found in tax_table in aggregate_taxa")
}
if (length(current.level) == 0) {
current.level <- "unique"
tax_table(mypseq) <- tax_table(cbind(tax_table(mypseq),
unique = rownames(tax_table(mypseq))))
tax_table(mypseq) <- tax_table(cbind(tax_table(mypseq),
unique = rownames(tax_table(mypseq))))
}

otus <- map_levels(data=mypseq, to=current.level, from=level)

ab <- matrix(NA, nrow=length(otus), ncol=nsamples(mypseq))
rownames(ab) <- names(otus)
colnames(ab) <- sample_names(mypseq)

d <- abundances(mypseq)

for (nam in names(otus)) {
taxa <- otus[[nam]]
ab[nam, ] <- colSums(matrix(d[taxa, ], ncol=nsamples(mypseq)))
ab[nam, ] <- colSums(matrix(d[taxa, ], ncol=nsamples(mypseq)),
na.rm = TRUE)
}



# Create phyloseq object
OTU <- otu_table(ab, taxa_are_rows=TRUE)
mypseq2 <- phyloseq(OTU)

# Remove ambiguous levels
## First remove NA entries from the target level
tax_table(mypseq) <- tax_table(mypseq)[!is.na(tax_table(mypseq)[, level]),]

# Remove ambiguous levels
## First remove NA entries from the target level
keep <- !is.na(tax_table(mypseq)[, level])
tax_table(mypseq) <- tax_table(mypseq)[keep,]
keep <- colnames(
tax_table(mypseq))[
which(
vapply(seq(ncol(tax_table(mypseq))),
which(
vapply(seq(ncol(tax_table(mypseq))),
function(k)
sum(
vapply(split(as.character(tax_table(mypseq)[, k]),
vapply(split(as.character(tax_table(mypseq)[, k]),
as.character(tax_table(mypseq)[, level])), function(x) {
length(unique(x))
}, 1) > 1), 1) == 0)]
Expand Down
4 changes: 2 additions & 2 deletions R/bimodality.R
Expand Up @@ -76,8 +76,8 @@
#' # function (x) {1 - unname(dip.test(x)$p.value)})
#'
#' @keywords utilities
bimodality <- function(x, method="potential_analysis", peak.threshold=1, bw.adjust=1,
bs.iter=100, min.density=1, verbose=TRUE) {
bimodality <- function(x, method="potential_analysis", peak.threshold=1,
bw.adjust=1, bs.iter=100, min.density=1, verbose=TRUE) {

accepted <- intersect(method, c("potential_analysis",
"Sarle.finite.sample", "Sarle.asymptotic"))
Expand Down
30 changes: 30 additions & 0 deletions R/colors.R
@@ -0,0 +1,30 @@
#' @title Default Colors
#' @description Default colors for different variables.
#' @param x Name of the variable type ("Phylum")
#' @param v Optional. Vector of elements to color.
#' @return Named character vector of default colors
#' @author Leo Lahti \email{leo.lahti@@iki.fi}
#' @references See citation("microbiome")
#' @export
#' @examples \dontrun{col <- default_colors("Phylum")}
#' @keywords utilities
default_colors <- function (x, v=NULL) {

if (x == "Phylum") {
#http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf
#https://www.r-graph-gallery.com/42-colors-names/
col <- c(
"Actinobacteria" = "darkgreen",
"Firmicutes" = "blue",
"Proteobacteria" = "black",
"Verrucomicrobia" = "darkblue",
"Bacteroidetes" = "red",
"Spirochaetes" = "darkgray",
"Fusobacteria" = "lightblue",
"Cyanobacteria" = "deepskyblue3")
}

col

}

17 changes: 17 additions & 0 deletions R/dominant.R
@@ -0,0 +1,17 @@
#' @title Dominant taxa
#' @description Returns the dominant taxonomic group for each sample.
#' @param x A \code{\link{phyloseq-class}} object
#' @return A vector of dominance indices
#' @export
#' @examples
#' data(dietswap)
#' # vector
#' d <- dominant(dietswap)
#' @author Leo Lahti \email{microbiome-admin@@googlegroups.com}
#' @keywords utilities
dominant <- function(x) {

# TODO add support to other taxonomic levels
taxa(x)[apply(abundances(x), 2, which.max)]

}
28 changes: 0 additions & 28 deletions R/plot_abundances.R

This file was deleted.

41 changes: 24 additions & 17 deletions R/plot_composition.R
Expand Up @@ -5,7 +5,8 @@
#' \itemize{
#' \item NULL or 'none': No sorting
#' \item A single character string: indicate the metadata field to be used
#' for ordering
#' for ordering. Or: if this string is found from the tax_table, then sort by the
#' corresponding taxonomic group.
#' \item A character vector: sample IDs indicating the sample ordering.
#' \item 'neatmap' Order samples based on the neatmap approach.
#' See \code{\link{neatsort}}. By default, 'NMDS' method with 'bray'
Expand All @@ -22,31 +23,35 @@
#' (but not in sample/taxon ordering).
#' The options are 'Z-OTU', 'Z-Sample', 'log10' and 'compositional'.
#' See the \code{\link{transform}} function.
#' @param mar Figure margins
#' @param average_by Average the samples by the average_by variable
#' @param ... Arguments to be passed (for \code{\link{neatsort}} function)
#' @return A \code{\link{ggplot}} plot object.
#' @export
#' @examples
#' data(dietswap)
#' pseq <- subset_samples(dietswap, group == 'DI' & nationality == 'AFR' &
#' sex == "female")
#' p <- plot_composition(pseq, verbose = TRUE)
#' library(dplyr)
#' data(atlas1006)
#' pseq <- atlas1006 %>%
#' subset_samples(DNA_extraction_method == "r") %>%
#' aggregate_taxa(level = "Phylum") %>%
#' transform(transform = "compositional")
#' p <- plot_composition(pseq, sample.sort = "Firmicutes", otu.sort = "abudance", verbose = TRUE) +
#' scale_fill_manual(values = default_colors("Phylum")[taxa(pseq)]) # Use custom colors
#' @keywords utilities
plot_composition <- function(x, sample.sort=NULL,
otu.sort=NULL, x.label="sample", plot.type="barplot",
verbose=FALSE,
mar=c(5, 12, 1, 1), average_by=NULL, ...) {
average_by=NULL, ...) {

# Avoid warnings
Sample <- Abundance <- Taxon <- horiz <- value <- scales <- ID <-
Sample <- Abundance <- Taxon <- Tax <- horiz <- value <- scales <- ID <-
meta <- OTU <- taxic <- otu.df <- taxmat <- new.tax<- NULL
if (!is.null(x@phy_tree)){
x@phy_tree <- NULL
}

xorig <- x


if (verbose) {
message("Pick the abundance matrix taxa x samples")
}
Expand Down Expand Up @@ -79,6 +84,9 @@ plot_composition <- function(x, sample.sort=NULL,
!is.null(average_by)) {
# No sorting sample.sort <- sample_names(x)
sample.sort <- colnames(abu)
} else if (length(sample.sort) == 1 && sample.sort %in% taxa(xorig)) {
tax <- sample.sort
sample.sort <- rev(sample_names(x)[order(abundances(x)[tax,])])
} else if (length(sample.sort) == 1 &&
sample.sort %in% names(sample_data(x)) &&
is.null(average_by)) {
Expand All @@ -104,6 +112,8 @@ plot_composition <- function(x, sample.sort=NULL,
if (is.null(otu.sort) || otu.sort == "none") {
# No sorting
otu.sort <- taxa(x)
} else if (length(otu.sort) == 1 && otu.sort == "abundance2") {
otu.sort <- rev(c(rev(names(sort(rowSums(abu)))[seq(1, nrow(abu), 2)]), names(sort(rowSums(abu)))[seq(2, nrow(abu), 2)]))
} else if (length(otu.sort) == 1 && otu.sort == "abundance") {
otu.sort <- rev(names(sort(rowSums(abu))))
} else if (length(otu.sort) == 1 && otu.sort %in% names(tax_table(x))) {
Expand All @@ -129,10 +139,10 @@ plot_composition <- function(x, sample.sort=NULL,

# Abundances as data.frame dfm <- psmelt(x)
dfm <- psmelt(otu_table(abu, taxa_are_rows = TRUE))
names(dfm) <- c("OTU", "Sample", "Abundance")
names(dfm) <- c("Tax", "Sample", "Abundance")

dfm$Sample <- factor(dfm$Sample, levels=sample.sort)
dfm$OTU <- factor(dfm$OTU, levels=otu.sort)
dfm$Tax <- factor(dfm$Tax, levels=otu.sort)

# SampleIDs for plotting
if (x.label %in% colnames(sample_data(x)) & is.null(average_by)) {
Expand Down Expand Up @@ -166,9 +176,9 @@ plot_composition <- function(x, sample.sort=NULL,
if (plot.type == "barplot") {

# Provide barplot
dfm <- dfm %>% arrange(OTU) # Show OTUs always in the same order
dfm <- dfm %>% arrange(Tax) # Show Taxs always in the same order

p <- ggplot(dfm, aes(x=Sample, y=Abundance, fill=OTU))
p <- ggplot(dfm, aes(x=Sample, y=Abundance, fill=Tax))
p <- p + geom_bar(position="stack", stat="identity")
p <- p + scale_x_discrete(labels=dfm$xlabel, breaks=dfm$Sample)

Expand All @@ -195,17 +205,14 @@ plot_composition <- function(x, sample.sort=NULL,
otu.sort <- otu.sort[otu.sort %in% rownames(otu)]
sample.sort <- sample.sort[sample.sort %in% colnames(otu)]

# Plot TODO: move it in here from netresponse and return the
# #ggplot object as well
# p <- plot_matrix(otu[otu.sort, sample.sort], type="twoway", mar=mar)
tmp <- melt(otu[otu.sort, sample.sort])
p <- heat(tmp, colnames(tmp)[[1]], colnames(tmp)[[2]],
colnames(tmp)[[3]])

} else if (plot.type == "lineplot") {

dfm <- dfm %>% arrange(OTU) # Show OTUs always in the same order
p <- ggplot(dfm, aes(x=Sample, y=Abundance, color=OTU, group = OTU))
dfm <- dfm %>% arrange(Tax) # Show Taxs always in the same order
p <- ggplot(dfm, aes(x=Sample, y=Abundance, color=Tax, group = Tax))
p <- p + geom_point()
p <- p + geom_line()
p <- p + scale_x_discrete(labels=dfm$xlabel, breaks=dfm$Sample)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -40,7 +40,7 @@ Tools for the exploration and analysis of microbiome profiling data sets, in par

See the package [tutorial](http://microbiome.github.io/microbiome/).

**Kindly cite** as follows: "Leo Lahti, Sudarshan Shetty [et al.](https://github.com/microbiome/microbiome/graphs/contributors) ([Bioconductor, 2017](https://bioconductor.org/packages/devel/bioc/html/microbiome.html)). Tools for microbiome analysis in R. Microbiome package version 1.5.10004. URL: [http://microbiome.github.com/microbiome](http://microbiome.github.com/microbiome). See also the relevant references listed in the manual page of each function.
**Kindly cite** as follows: "Leo Lahti, Sudarshan Shetty [et al.](https://github.com/microbiome/microbiome/graphs/contributors) ([Bioconductor, 2017](https://bioconductor.org/packages/devel/bioc/html/microbiome.html)). Tools for microbiome analysis in R. Microbiome package version 1.5.10006. URL: [http://microbiome.github.com/microbiome](http://microbiome.github.com/microbiome). See also the relevant references listed in the manual page of each function.

### Contribute

Expand Down
Binary file modified data/atlas1006.rda
Binary file not shown.
Binary file modified data/dietswap.rda
Binary file not shown.
Binary file modified data/peerj32.rda
Binary file not shown.
3 changes: 3 additions & 0 deletions inst/NEWS
@@ -1,4 +1,7 @@
Changes in version 1.5.2 (2018-11-20)
+ plot_composition function: new options for sample.sort and otu.sort
+ Added Phylum level to taxonomy tables in example data sets
* New function: dominant
+ The diversities function is now replaces by alpha function. The alpha is more
generic and can return also other alpha diversity indices.
+ plot_frequencies function now only returns the ggplot object
Expand Down
2 changes: 1 addition & 1 deletion inst/extras/build.sh
@@ -1,4 +1,4 @@
# https://support.rstudio.com/hc/en-us/articles/200626518-Customizing-Package-Build-Options
# https://support.rstudio.com/hc/en-us/articles/200626518-Customizing-Package-Bubuiild-Options
#/usr/bin/R CMD BATCH document.R
/usr/bin/R CMD build ../../ --resave-data #--no-examples --no-build-vignettes
/usr/bin/R CMD check microbiome_1.5.10003.tar.gz #--no-build-vignettes --no-examples
Expand Down
29 changes: 29 additions & 0 deletions man/default_colors.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 387dc86

Please sign in to comment.