ok

microbiome · Feb 16, 2022 · 1db2fdb · 1db2fdb
2 parents 2740451 + d2efd89
commit 1db2fdb
Show file tree

Hide file tree

Showing 15 changed files with 661 additions and 47 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,7 +3,7 @@ Type: Package
 Title: Microbiome Analytics
 Description: Utilities for microbiome analysis.
 Encoding: UTF-8
-Version: 1.17.4
+Version: 1.17.41
 Date: 2022-02-16
 Authors@R: c(
     person("Leo", "Lahti", email = "leo.lahti@iki.fi",
@@ -18,6 +18,7 @@ Depends:
     phyloseq,
     ggplot2
 Imports:
+    Biostrings,
     compositions,
     dplyr,
     reshape2,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,14 +1,18 @@
 # Generated by roxygen2: do not edit by hand
 
 export(abundances)
+export(add_besthit)
+export(add_refseq)
 export(aggregate_rare)
 export(aggregate_taxa)
 export(alpha)
 export(associate)
 export(baseline)
 export(bimodality)
 export(boxplot_abundance)
+export(boxplot_alpha)
 export(collapse_replicates)
+export(combine_otu_tax)
 export(core)
 export(core_abundance)
 export(core_members)
@@ -35,6 +39,7 @@ export(meta)
 export(multimodality)
 export(neat)
 export(neatsort)
+export(otu_tibble)
 export(overlap)
 export(plot_atlas)
 export(plot_composition)
@@ -47,6 +52,7 @@ export(plot_taxa_prevalence)
 export(plot_tipping)
 export(potential_analysis)
 export(prevalence)
+export(psmelt2)
 export(quiet)
 export(rare)
 export(rare_abundance)
@@ -60,15 +66,18 @@ export(readcount)
 export(remove_samples)
 export(remove_taxa)
 export(richness)
+export(sample_tibble)
 export(spreadplot)
 export(summarize_phyloseq)
+export(tax_tibble)
 export(taxa)
 export(time_normalize)
 export(time_sort)
 export(timesplit)
 export(top)
 export(top_taxa)
 export(transform)
+importFrom(Biostrings,DNAStringSet)
 importFrom(Rtsne,Rtsne)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,arrange)

diff --git a/NEWS b/NEWS
@@ -1,3 +1,14 @@
+Changes in version 1.17.2 (2022-02-15)
++ Bug fix error in transform when taxa_are_rows is FALSE
+
+Changes in version 1.17.2 (2022-02-03)
++ Merge microbiomeutilites functionality
+  * Convert phyloseq slots to tibbles
+  * Combine_otu_tax joins otu_table and tax_table
+  * Merged add_besthit a fine tuned version from format_to_besthit 
+  * Merged psmelt2 a fine tuned version from phy_to_ldf 
+  * Bug fix in transform method alr 
+
 Changes in version 1.17.2 (2022-01-15)
 + alr transformation added
 

diff --git a/R/add_besthit.R b/R/add_besthit.R
@@ -0,0 +1,79 @@
+#' @title Adds \code{best_hist} to a \code{\link{phyloseq-class}} Object
+#' @description Add the lowest classification for an OTU or ASV.
+#' @details Most commonly it is observed that taxa names are either OTU ids or 
+#'          ASV ids. In such cases it is useful to know the taxonomic identity.
+#'          For this purpose, \code{best_hist} identifies the best available 
+#'          taxonomic identity and adds it to the OTU ids or ASV ids. If genus 
+#'          and species columns are present in input the function internally 
+#'          combines the names.  
+#' @param x \code{\link{phyloseq-class}} object
+#' @param sep separator e.g. ASV161:Roseburia
+#' @return  \code{\link{phyloseq-class}} object \code{\link{phyloseq-class}}
+#' @export
+#' @examples
+#' \dontrun{
+#' # Example data
+#' library(microbiome)
+#' data(dietswap)
+#' p0.f <- add_besthit(atlas1006, sep=":")
+#' }
+#' @author Contact: Sudarshan A. Shetty \email{sudarshanshetty9@@gmail.com}
+#' @keywords utilities
+
+add_besthit <- function(x, sep=":"){
+
+  Class<-Domain<- Family<- Genus<- Genus.Species<- NULL
+  Order<- Phylum<- Species<-NULL
+
+  x.nw <- x
+  if(length(rank_names(x.nw))== 6){
+    colnames(tax_table(x.nw)) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus")
+  }
+  if(length(rank_names(x.nw))==7){
+    colnames(tax_table(x.nw)) <- c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")
+  }
+
+  tax.tib <- .get_taxa_tib_unite(x)
+
+  tax.tib <- tax.tib %>% 
+    dplyr::mutate(Domain =ifelse(is.na(Domain), "Unclassifed", Domain),
+                  Phylum =ifelse(is.na(Phylum), Domain, Phylum),
+                  Class =ifelse(is.na(Class), Phylum, Class),
+                  Order =ifelse(is.na(Order), Class, Order),
+                  Family =ifelse(is.na(Family), Order, Family),
+                  Genus =ifelse(is.na(Genus), Family, Genus)) 
+  if(length(rank_names(x))==7){
+    tax.tib <- tax.tib %>%
+      dplyr::mutate(Species =ifelse(is.na(Species), Genus, Species))
+  }
+
+  best_hit <- paste0(taxa_names(x), sep,tax.tib[,ncol(tax.tib)])
+
+  taxa_names(x) <- best_hit
+  return(x)
+}
+
+
+
+.get_taxa_tib_unite <- function(x){
+
+  Genus<- Species <- Genus.Species<- NULL
+  tax.tib <- tax_table(x) %>% 
+    as.matrix() %>% 
+    as.data.frame() 
+
+  #n.rk <- length(rank_names(x))
+  if(any(rank_names(x) == "Species") && any(rank_names(x) == "Genus")){
+
+    tax.tib <- tax.tib %>% 
+      dplyr::mutate(Genus.Species = ifelse(!is.na(Species), 
+                                           paste0(Genus, ".", Species), Species)) %>%
+      dplyr::select(-Species) %>%
+      dplyr::rename(Species = Genus.Species)
+
+  }
+  return(tax.tib)
+}
+
+
+
diff --git a/R/add_refseq.R b/R/add_refseq.R
@@ -0,0 +1,38 @@
+#' @title Add \code{refseq} Slot for \code{dada2} based \code{phyloseq} Object
+#' @description Utility to add refseq slot for \code{dada2} based 
+#'              \code{phyloseq} Object. Here, the taxa_names which are unique 
+#'              sequences, are stored in \code{refseq} slot of \code{phyloseq}.
+#'              Sequence ids are converted to ids using tag option.  
+#' @param x \code{\link{phyloseq-class}} object with sequences as rownames.
+#' @param tag Provide name for Ids, Default="ASV".
+#' @return \code{\link{phyloseq-class}} object 
+#' @examples
+#' 
+#' # ps <- add_refseq(p0,tag="ASV")
+#' # ps
+#' 
+#' @export
+#' @author Contact: Sudarshan A. Shetty \email{sudarshanshetty9@@gmail.com}
+#' @keywords utilities
+#' @importFrom Biostrings DNAStringSet
+add_refseq <- function(x, tag="ASV"){
+
+    if (class(x)!="phyloseq"){
+        stop("Input is not an object of phyloseq class")
+    }
+
+    nucl <- Biostrings::DNAStringSet(taxa_names(x))
+    names(nucl) <- taxa_names(x)
+    x <- merge_phyloseq(x, nucl)
+
+    rm(nucl)
+
+    if(is.na(tag) || is.null(tag)){
+        taxa_names(x) <- paste0("taxa", seq(ntaxa(x)))
+        return(x)
+    } else{
+        taxa_names(x) <- paste0(tag, seq(ntaxa(x)))
+        return(x)
+    }
+
+}
diff --git a/R/boxplot_alpha.R b/R/boxplot_alpha.R
@@ -0,0 +1,113 @@
+#' @title Alpha Boxplot
+#' @description Plot alpha index.
+#' @param x \code{\link{phyloseq-class}} object
+#' @param x_var Metadata variable to map to the horizontal axis.
+#' @param index Alpha index to plot. See function \code{alpha}. 
+#' @param zeroes Include zero counts in diversity estimation. Default is TRUE
+#' @param violin Use violin version of the boxplot
+#' @param na.rm Remove NAs
+#' @param show.points Include data points in the figure
+#' @param element.alpha Alpha value for plot elements. Controls the 
+#'                      transparency of plots elements.
+#' @param element.width Width value for plot elements. Controls the 
+#'                      transparency of plots elements.
+#' @param fill.colors Specify a list of colors passed on to ggplot2 
+#'                    \code{scale_fill_manual}
+#' @param outlier.fill If using boxplot and and points together how to deal with 
+#'                     outliers. See ggplot2 outlier.fill argument in 
+#'                     geom_ elements.  
+#' @details A simple wrapper to visualize alpha diversity index.
+#' @return A \code{\link{ggplot}} plot object
+#' @export
+#' @examples
+#' data("dietswap")
+#' p <- boxplot_alpha(dietswap, x_var = "sex", index="observed", violin=FALSE, 
+#'                    na.rm=FALSE, show.points=TRUE, zeroes=TRUE, 
+#'                    element.alpha=0.5, element.width=0.2, 
+#'                    fill.colors= c("steelblue", "firebrick"),
+#'                    outlier.fill="white")
+#' p
+#' 
+#' @keywords utilities
+boxplot_alpha <- function(x,
+                          x_var = NULL,
+                          index=NULL, 
+                          violin=FALSE, 
+                          na.rm=FALSE, 
+                          show.points=TRUE,
+                          zeroes=TRUE,
+                          element.alpha=0.5,
+                          element.width=0.2,
+                          fill.colors= NA,
+                          outlier.fill="grey50"){
+
+    if(length(index) >1){
+        stop("Please provide a single alpha index, e.g. index='shannon'")
+    }
+
+    d <- suppressMessages(alpha(x, index=index,zeroes=zeroes) )
+    meta.df <-  cbind(meta(x),d) %>% 
+        tibble::rownames_to_column(".sampleid")
+
+    index.name <- colnames(meta.df)[ncol(meta.df)]
+
+    if(is.null(x_var)){
+        # Visualize example data with a boxplot
+        p <- ggplot(meta.df, aes_string(".sampleid", index.name)) +
+            geom_point() + 
+            theme(axis.text.x = element_text(angle = 90)) +
+            theme_bw()
+        return(p)
+    }
+
+    if(!any(phyloseq::sample_variables(x) %in% x_var)){
+      stop("'x_var' not available in `sample_data`")  
+    }
+
+    if(!is.na(fill.colors)[1]){
+        .check.colors(meta.df, x_var, fill.colors)
+    }
+
+    p <- ggplot(meta.df, aes_string(x_var, index.name, fill=x_var)) 
+
+
+    if (show.points) {
+        p <- p + geom_point(size=2,
+                            position=position_jitter(width=element.width), 
+                            alpha=element.alpha,
+                            shape=21)
+    }
+
+    # Box or Violin plot ?
+    if (show.points & !violin) {
+        p <- p + geom_boxplot(width=element.width,
+                              alpha=element.alpha,
+                              outlier.colour = outlier.fill,
+                              outlier.fill = outlier.fill,
+                              na.rm = na.rm)
+    } else {
+        p <- p + geom_violin(width=element.width,
+                             alpha=element.alpha,
+                             na.rm = na.rm)
+    }  
+
+    if(!is.na(fill.colors)[1]){
+        p <- p + ggplot2::scale_fill_manual(values = fill.colors) +
+            theme_bw()
+    }
+    p <- p + theme_bw()
+
+    return(p)
+}
+
+
+.check.colors <- function(df, x_var=NULL, fill.colors=NULL){
+
+    pl.vars = unique(df[,x_var])
+
+    if(length(pl.vars) != length(fill.colors))
+        stop("No. of fill.colors not equal to number of unique 'x_var'")
+}
+
+
+
diff --git a/R/psmelt2.R b/R/psmelt2.R
@@ -0,0 +1,52 @@
+#' @title Convert \code{\link{phyloseq-class}} object to long data format
+#' @description An alternative to psmelt function from 
+#'              \code{\link{phyloseq-class}} object.
+#' @param x \code{\link{phyloseq-class}} object
+#' @param x \code{\link{phyloseq-class}} object
+#' @param sample.column A single character string specifying name
+#'                      of the column to hold sample names.
+#' @param feature.column A single character string specifying name
+#'                      of the column to hold OTU or ASV names.
+#'                      
+#' @examples
+#' data("dietswap")
+#' ps.melt <- psmelt2(dietswap, sample.column="SampleID", 
+#'                    feature.column="Feature") 
+#' head(ps.melt)                                         
+#' @return A \code{tibble} in long format
+#' @author Contact: Sudarshan A. Shetty \email{sudarshanshetty9@@gmail.com}
+#' @keywords utilities
+#' @export                     
+psmelt2 <- function(x, sample.column=NULL, feature.column=NULL){
+
+    if (class(x)!="phyloseq"){
+        stop("Input is not an object of phyloseq class")
+    }
+
+    if(is.null(sample.column) && any(sample_names(x) %in% sample.column)){
+
+        sample.column = "SampleID"
+
+    }
+
+    if(is.null(sample.column) && !any(sample_names(x) %in% sample.column)){
+
+        sample.column = ".SampleID"
+
+    }
+
+    if(is.null(feature.column)){
+
+        feature.column = "FeatureID"
+
+    }
+
+    otu_tib <- otu_tibble(x, column.id = feature.column)
+    tax_tib <- tax_tibble(x, column.id = feature.column)
+    sam_tib <- sample_tibble(x, column.id = sample.column)
+    otu_tib.m <- otu_tib %>% 
+        tidyr::pivot_longer(cols = sample_names(x), names_to = sample.column) %>% 
+        dplyr::left_join(tax_tib, by=feature.column) %>% 
+        dplyr::left_join(sam_tib, by=sample.column)
+    return(otu_tib.m)
+}