Skip to content

Latest commit

 

History

History
5523 lines (5198 loc) · 111 KB

BRC_growth_rate_microbiome.md

File metadata and controls

5523 lines (5198 loc) · 111 KB

How fast does biological rock crust grow?

20 April, 2021

Setting general parameters:

set.seed(15102020)
bootstraps <- 1000
data_path <- "./Data/"
Proj_name <- "BRC_growth_rate"

Browns <- RColorBrewer::brewer.pal(n = 9, "YlOrBr")[9:6]
Greens <- RColorBrewer::brewer.pal(n = 9, "YlGn")[9:6]
Blues <- RColorBrewer::brewer.pal(n = 9, "YlGnBu")[9:6]
Gradient.colours <- c(Browns[1], Greens[1], Browns[2], Greens[2], Browns[3], Greens[3], Browns[4], Greens[4], Blues)

Description

This script reproduces all sequence analysis steps and plots included in the paper plus some additional exploratory analyses.

Load data

OTUmat <- t(read.csv(paste0(data_path, "Shivta_site_otuTab2.txt"), header = TRUE, row.names = 1))
sort.order <- as.numeric(gsub("OTU([0-9]+)", "\\1", colnames( OTUmat )))
OTUmat <- OTUmat[, order(sort.order )]

Metadata <- read.csv(paste0(data_path, "Shivta_metadata.csv"), row.names = 1, header = TRUE)

read_csv(paste0(data_path, "Shivta_metadata.csv"),
                     trim_ws = TRUE) %>%
  mutate_at(
    c(
      "Rock.type",
      "Location"
    ), 
    ~(factor(.))
  ) %>% 
  column_to_rownames("Sample.code") ->
  Metadata

row.names(OTUmat) <- gsub("(.*)Nimrod[0-9]+|Osnat[0-9]+", "\\1", row.names( OTUmat))
Metadata <- Metadata[order(row.names(Metadata)), ]
OTUmat <- OTUmat[order(row.names(OTUmat)), ]
# calculate sample size
Metadata$Library.size = rowSums(OTUmat)
Metadata$Location.rock <- with(Metadata, Location:Rock.type)

# Load taxonomy data
tax.file <- "Shivta_site_silva.nrv119.taxonomy"
Taxonomy <- read.table(paste0(data_path, tax.file), stringsAsFactors = FALSE) # read taxonomy file

# count how many ';' in each cell and add up to 6
for (i in 1:nrow(Taxonomy)){
  semicolons <- length(gregexpr(";", Taxonomy$V2[i] )[[1]])
  if (semicolons < 6){
    x <- paste0( rep("Unclassified;", 6 - semicolons ), collapse = "")
    Taxonomy$V2[i] <- paste0( Taxonomy$V2[i], x, sep = "")
  }
}
# split taxonomy to columns
do.call( "rbind", strsplit( Taxonomy$V1, ";", fixed = TRUE)) %>% 
  gsub( "size=([0-9]+)", "\\1", .) %>%
  data.frame( ., do.call( "rbind", strsplit( Taxonomy$V2, ";", fixed = TRUE)), stringsAsFactors = F) %>% 
  apply(., 2, function(x) gsub( "\\(.*\\)", "", x)) %>% 
  replace(., . == "unclassified", "Unclassified") -> 
  Taxonomy
colnames( Taxonomy ) <- c( "OTU", "Frequency", "Domain", "Phylum", "Class", "Order", "Family", "Genus" )
# rownames(Taxonomy) <- colnames(Rock_weathering_OTUmat)
rownames(Taxonomy) <- Taxonomy[, 1]

Tree_IQ <- read_tree(paste0(data_path, "Shivta_site_otuReps.filtered.align.treefile"))

# generate phyloseq object
Ps_obj <- phyloseq(otu_table(OTUmat, taxa_are_rows = FALSE),
                   tax_table(Taxonomy[, -c(1, 2)]),
                   sample_data(Metadata),
                   phy_tree(Tree_IQ)
)
# Reorder factors for plotting
sample_data(Ps_obj)$Location %<>% fct_relevel("Slope", "City")

Remove un- and mis-classified sequences, chloroplasts and mitochondria

domains2remove <- c("", "Archaea", "Eukaryota", "Unclassified")
classes2remove <- c("Chloroplast")
families2remove <- c("Mitochondria")
Ps_obj_filt <- subset_taxa(Ps_obj, !is.na(Phylum) &
                        !Domain %in% domains2remove &
                      !Class %in% classes2remove &
                      !Family %in% families2remove)

Inspect library size and number of OTU

Ps_obj_df <-
  as.data.frame(sample_data(Ps_obj_filt)) # Put sample_data into a ggplot-friendly data.frame
Ps_obj_df <- Ps_obj_df[order(Ps_obj_df$Library.size), ]
Ps_obj_df$Index <- seq(nrow(Ps_obj_df))
ggplot(data = Ps_obj_df, 
       aes(x = Index, y = Library.size, color = Location.rock)) + 
  geom_point2(size = 4) + 
  scale_colour_manual(values = ggpomological:::pomological_palette[c(2, 1, 9, 3)], name = "Location.rock")

summary(sample_sums(Ps_obj_filt))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   23966   44989   53000   52663   61348   77459
summary(taxa_sums(Ps_obj_filt))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       3      70     198    1852     771   76180

Explore the prevalence of different taxa in the database

prevdf <- apply(X = otu_table(Ps_obj_filt),
                 MARGIN = ifelse(taxa_are_rows(Ps_obj_filt), yes = 1, no = 2),
                 FUN = function(x){sum(x > 0)})
# Add taxonomy and total read counts to this data.frame
prevdf <- data.frame(Prevalence = prevdf,
                      TotalAbundance = taxa_sums(Ps_obj_filt),
                      tax_table(Ps_obj_filt))
prevdf %>%
  group_by(Phylum) %>%
  summarise(`Mean prevalence` = mean(Prevalence),
            `Sum prevalence` = sum(Prevalence)) ->
  Prevalence_phylum_summary

Prevalence_phylum_summary %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Phylum Mean prevalence Sum prevalence
Acidobacteria 11.7 152
Actinobacteria 16.3 3414
Aquificae 4.0 12
Armatimonadetes 7.6 76
Bacteroidetes 11.8 1027
Candidate\_division\_KB1 8.0 8
Candidate\_division\_OD1 4.0 4
Candidate\_division\_OP11 4.5 9
Candidate\_division\_TM7 7.9 158
Chloroflexi 11.0 1124
Cyanobacteria 14.9 506
Deinococcus-Thermus 15.8 174
Firmicutes 13.4 214
Gemmatimonadetes 11.2 617
Nitrospirae 6.0 12
Planctomycetes 10.8 140
Proteobacteria 14.4 1817
Tenericutes 3.0 3
Verrucomicrobia 23.0 23
WCHB1-60 7.0 28
prevdf %>%
  group_by(Order) %>%
  summarise(`Mean prevalence` = mean(Prevalence),
            `Sum prevalence` = sum(Prevalence)) ->
  Prevalence_Order_summary

Prevalence_Order_summary %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Order Mean prevalence Sum prevalence
AKIW781 9.4 198
AKYG1722 10.7 75
AT425-EubC11\_terrestrial\_group 10.2 378
Acidimicrobiales 14.7 368
Alteromonadales 12.0 12
Aquificales 4.0 12
Ardenticatenales 8.0 16
B103G10 7.0 7
BD2-11\_terrestrial\_group 5.0 5
Bacillales 13.7 164
Bacteroidales 17.0 17
Bdellovibrionales 8.0 40
Burkholderiales 18.5 204
C0119 5.0 5
Caldilineales 18.0 18
Campylobacterales 6.0 6
Caulobacterales 23.7 71
Chromatiales 20.0 20
Chthoniobacterales 23.0 23
Clostridiales 6.5 13
Corynebacteriales 8.0 40
Cytophagales 11.8 695
Deinococcales 15.1 151
Desulfovibrionales 5.0 5
E6aD10 11.0 11
EMP-G18 3.0 3
Enterobacteriales 22.0 44
Euzebyales 13.5 202
Flavobacteriales 12.7 38
Frankiales 20.9 356
Gaiellales 10.4 94
Gemmatimonadales 14.3 214
JG30-KF-CM45 13.0 468
Kineosporiales 20.2 121
Lactobacillales 18.5 37
Micrococcales 18.0 198
Micromonosporales 19.5 39
Myxococcales 10.2 51
Nitriliruptorales 9.7 58
Nitrosomonadales 11.5 23
Nitrospirales 6.0 12
Oceanospirillales 16.0 16
Orbales 7.0 7
Order\_II 4.8 24
Order\_III 8.7 26
PAUC43f\_marine\_benthic\_group 2.0 2
Planctomycetales 9.2 74
Propionibacteriales 14.5 318
Pseudomonadales 14.7 103
Pseudonocardiales 18.0 306
Rhizobiales 16.1 386
Rhodobacterales 14.8 163
Rhodospirillales 11.8 271
Rickettsiales 11.2 146
Rubrobacterales 22.3 491
S0134\_terrestrial\_group 18.0 18
SBYG-4553 10.0 10
Solirubrobacterales 17.2 638
Sphaerobacterales 3.3 10
Sphingobacteriales 14.2 227
Sphingomonadales 19.6 196
Streptomycetales 24.5 49
Streptosporangiales 3.0 3
Subgroup\_3 15.0 15
Subgroup\_4 10.6 85
Subgroup\_6 13.0 52
SubsectionI 13.0 13
SubsectionII 15.8 315
SubsectionIII 14.8 118
SubsectionIV 13.5 54
TRA3-20 14.0 14
Thermales 23.0 23
Thermophilales 15.0 30
Unclassified 9.2 642
Unknown\_Order 9.8 98
Vampirovibrionales 6.0 6
WD2101\_soil\_group 13.0 39
Xanthomonadales 18.0 18

Based on that we will remove all phyla with a prevalence of under 8

Prevalence_phylum_summary %>% 
  filter(`Sum prevalence` < 8) %>% 
  dplyr::select(Phylum) %>% 
  map(as.character) %>% 
  unlist() ->
  filterPhyla
Ps_obj_filt %<>% subset_taxa(!Phylum %in% filterPhyla)
sample_data(Ps_obj_filt)$Library.size <- rowSums(otu_table(Ps_obj_filt))
print(Ps_obj)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 767 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 767 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 767 tips and 765 internal nodes ]
## taxa are columns
print(Ps_obj_filt)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 709 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 709 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 709 tips and 707 internal nodes ]
## taxa are columns

Plot general prevalence features of the phyla

# Subset to the remaining phyla
prevdf_phylum_filt <- subset(prevdf, Phylum %in% get_taxa_unique(Ps_obj_filt, "Phylum"))
ggplot(prevdf_phylum_filt,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt), color = Phylum)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point2(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Phylum) + theme(legend.position = "none")

Plot general prevalence features of the top 20 orders

# Subset to the remaining phyla
prevdf_order_filt <- subset(prevdf, Order %in% get_taxa_unique(Ps_obj_filt, "Order"))
# grab the top 30 most abundant orders
prevdf_order_filt %>% 
  group_by(Order) %>%
  summarise(Combined.abundance = sum(TotalAbundance)) %>% 
  arrange(desc(Combined.abundance)) %>% 
  .[1:30, "Order"]  ->
  Orders2plot
prevdf_order_filt2 <- subset(prevdf, Order %in% Orders2plot$Order)
ggplot(prevdf_order_filt2,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt), color = Order)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point2(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Order) + theme(legend.position = "none")

Unsupervised filtering by prevalence

We will remove all sequences which appear in less than 10% of the samples

# Define prevalence threshold as 10% of total samples
prevalenceThreshold <- 0.1 * nsamples(Ps_obj_filt)
prevalenceThreshold
## [1] 2.5
# Execute prevalence filter, using `prune_taxa()` function
keepTaxa <-
  row.names(prevdf_phylum_filt)[(prevdf_phylum_filt$Prevalence >= prevalenceThreshold)]
Ps_obj_filt  %<>%  prune_taxa(keepTaxa, .)
sample_data(Ps_obj_filt)$Library.size <- rowSums(otu_table(Ps_obj_filt))
print(Ps_obj)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 767 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 767 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 767 tips and 765 internal nodes ]
## taxa are columns
print(Ps_obj_filt)
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 694 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 694 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 694 tips and 692 internal nodes ]
## taxa are columns

This removed 73 or 10% of the sequences.

Exploring the dataset features

First let’s look at the count data distribution after filtering:

PlotLibDist(Ps_obj_filt)

sample_data(Ps_obj_filt) %>% 
  as_tibble() %>% 
  dplyr::select(Sample.name, Library.size) %>% 
  as(., "data.frame") %>% 
  kable(.) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Sample.name Library.size
City Chalk 2 49453
City Chalk 3 54155
City Chalk 4 44943
City Chalk 5 58835
City Chalk 6 77459
City Limestone 1 36122
City Limestone 2 52816
City Limestone 3 48378
City Limestone 4 47108
City Limestone 5 61346
City Limestone 6 54505
Slope Limestone 1 75578
Slope Chalk 1 51651
Slope Chalk 2 45197
Slope Chalk 3 33625
Slope Chalk 4 70886
Slope Chalk 5 71458
Slope Chalk 6 58959
Slope Chalk 7 36756
City Chalk 1 23962
Slope Limestone 2 62456
Slope Limestone 3 36628
Slope Limestone 4 57283
Slope Limestone 5 64509
Slope Limestone 6 41564

The figure and table indicate only a small deviation in the number of reads per samples.

(mod1 <- adonis(
  otu_table(Ps_obj_filt) ~ Library.size,
  data = as(sample_data(Ps_obj_filt), "data.frame"), 
  method = "horn",
  permutations = 9999
))
## 
## Call:
## adonis(formula = otu_table(Ps_obj_filt) ~ Library.size, data = as(sample_data(Ps_obj_filt),      "data.frame"), permutations = 9999, method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##              Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## Library.size  1    0.4246 0.42456  1.3337 0.05481 0.2192
## Residuals    23    7.3218 0.31834         0.94519       
## Total        24    7.7464                 1.00000
PlotReadHist(as(otu_table(Ps_obj_filt), "matrix"))

notAllZero <- (rowSums(t(otu_table(Ps_obj_filt))) > 0)
vsn::meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_filt))[notAllZero, ] + 1)))

The difference in library sizes is low and its effect on the community composition is minimal. We’ll use the GMPR method for library size normalisation (Chen and Chen 2017)

Ps_obj_filt_GMPR <- Ps_obj_filt
Ps_obj_filt %>%
  otu_table(.) %>%
  t() %>%
  as(., "matrix") %>%
  GMPR() ->
  GMPR_factors
## Begin GMPR size factor calculation ...
## Completed!
## Please watch for the samples with limited sharing with other samples based on NSS! They may be outliers!
Ps_obj_filt %>%
  otu_table(.) %>%
  t() %*% diag(1 / GMPR_factors$gmpr) %>%
  t() %>%
  as.data.frame(., row.names = sample_names(Ps_obj_filt)) %>%
  otu_table(., taxa_are_rows = FALSE) ->
  otu_table(Ps_obj_filt_GMPR)
sample_data(Ps_obj_filt_GMPR)$Library.size <- sample_sums(Ps_obj_filt)
adonis(
  otu_table(Ps_obj_filt_GMPR) ~ Library.size,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
)
## 
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Library.size,      data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999,      method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##              Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## Library.size  1    0.4246 0.42456  1.3337 0.05481  0.228
## Residuals    23    7.3218 0.31834         0.94519       
## Total        24    7.7464                 1.00000
PlotLibDist(Ps_obj_filt_GMPR)

Did it improve anything?

PlotReadHist(as(otu_table(Ps_obj_filt_GMPR), "matrix"))

notAllZero <- (rowSums(t(otu_table(Ps_obj_filt_GMPR))) > 0)
vsn::meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_filt_GMPR))[notAllZero, ] + 1)))

Alpha diversity

Calculate and plot alpha diversity metrics

We do that by simulating 1000 rarefaction events and calculating the metrics each time. Then, the result is averaged.

rarefaction.mat <- matrix(0, nrow = nsamples(Ps_obj_filt), ncol = bootstraps)
rownames(rarefaction.mat) <- sample_names(Ps_obj_filt)
rich.ests <- list(S.obs = rarefaction.mat, S.chao1 = rarefaction.mat, se.chao1 = rarefaction.mat,
                   S.ACE = rarefaction.mat, se.ACE = rarefaction.mat)
for (i in seq(bootstraps)) {
  sub.OTUmat <- rrarefy(otu_table(Ps_obj_filt), min(rowSums(otu_table(Ps_obj_filt))))
  for (j in seq(length(rich.ests))) {
    rich.ests[[j]][, i] <- t(estimateR(sub.OTUmat))[, j]
  }
}
Richness <- data.frame(row.names = row.names(rich.ests[[1]]))
for (i in c(1, seq(2, length(rich.ests), 2))) {
  S <- apply(rich.ests[[i]], 1, mean)
  if (i == 1) { 
    se <- apply(rich.ests[[i]], 1, function(x) (mean(x)/sqrt(length(x))))
    } else se <- apply(rich.ests[[i + 1]], 1, mean)
  Richness <- cbind(Richness, S, se)
}
colnames(Richness) <- c("S.obs", "S.obs.se", "S.chao1", "S.chao1.se", "S.ACE", "S.ACE.se")
saveRDS(Richness, file = paste0("./Results/", Proj_name, "_richness.RDS"))
write.csv(Richness, file = paste0("./Results/", Proj_name, "_richness.csv"))
ses <- grep("\\.se", colnames(Richness))
Richness[, ses] %>% 
  gather(key = "est.se") -> se.dat
Richness[, -unique(ses)] %>% 
  gather(key = "est") -> mean.dat
n <- length(unique(mean.dat$est))
# diversity indices
diversity.inds <- list(Shannon = rarefaction.mat, inv.simpson = rarefaction.mat, BP = rarefaction.mat)
for (i in seq(bootstraps)) {
  sub.OTUmat <- rrarefy(otu_table(Ps_obj_filt), min(rowSums(otu_table(Ps_obj_filt))))
  diversity.inds$Shannon[, i] <- diversityresult(sub.OTUmat, index = 'Shannon', method = 'each site', digits = 3)[, 1]
  diversity.inds$inv.simpson[, i] <- diversityresult(sub.OTUmat, index = 'inverseSimpson', method = 'each site', digits = 3)[, 1]
  diversity.inds$BP[, i] <- diversityresult(sub.OTUmat, index = 'Berger', method = 'each site', digits = 3)[, 1]
}
Diversity <- data.frame(row.names = row.names(diversity.inds[[1]]))
for (i in seq(length(diversity.inds))) {
  S <- apply(diversity.inds[[i]], 1, mean)
  se <- apply(diversity.inds[[i]], 1, function(x) (mean(x)/sqrt(length(x))))
  Diversity <- cbind(Diversity, S, se)
}
colnames(Diversity) <- c("Shannon", "Shannon.se", "Inv.simpson", "Inv.simpson.se", "BP", "BP.se")
ses <- grep("\\.se", colnames(Diversity))
Diversity[, ses] %>% gather(key = "est.se") -> se.dat
Diversity[, -unique(ses)] %>% gather(key = "est") -> mean.dat
saveRDS(Diversity, file = paste0("./Results/", Proj_name, "_diversity.RDS"))
write.csv(Diversity, file = paste0("./Results/", Proj_name, "_diversity.csv"))

Test the differences in alpha diversity.

# make combined richness diversity
Richness_Diversity <- cbind(Richness, Diversity)
ses <- grep("\\.se", colnames(Richness_Diversity))
Richness_Diversity[, ses] %>% 
  gather(key = "est.se") -> 
  se.dat
Richness_Diversity[, -unique(ses)] %>% 
  gather(key = "Metric", 
         value = "Estimate") -> 
  mean.dat
Richness_Diversity_long <-
  cbind(
    Sample = rep(rownames(Richness_Diversity), times = length(unique(mean.dat$Metric))),
    mean.dat,
    lerr = mean.dat$Estimate - se.dat$value,
    herr = mean.dat$Estimate + se.dat$value
  )
Richness_Diversity_long$Metric <-
  factor(
    Richness_Diversity_long$Metric,
    levels = c("S.obs", "S.chao1", "S.ACE", "Shannon", "Inv.simpson", "BP"),
    labels = c("S obs.", "Chao1", "ACE", "Shannon", "Inv. Simpson" , "Berger Parker")
  )
Richness_Diversity_long %<>%
  cbind(., 
        sample_data(Ps_obj_filt))

# S Obs
(mod_obsS <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "S obs.")))
## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares     67.75    647.17           10123.07  62200.46
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 54.42358
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                    Sum Sq Df  F value    Pr(>F)    
## (Intercept)        588166  1 198.5755 3.566e-12 ***
## Location             3993  1   1.3480   0.25866    
## Rock.type            7767  1   2.6224   0.12028    
## Location:Rock.type  10123  1   3.4177   0.07863 .  
## Residuals           62200 21                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##          
## 310.9173 
## 
##  Location 
##     Slope City
##       312  309
## rep    13   12
## 
##  Rock.type 
##     Chalk Limestone
##       306       316
## rep    13        12
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 290   339      
##    rep     7     6      
##    City  325   293      
##    rep     6     6

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares     67.75    647.17           10123.07  62200.46
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 54.42358
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_obsS,
                   ~ Location : Rock.type)
summary(marginal)
Location Rock.type emmean SE df lower.CL upper.CL
Slope Chalk 289.8684 20.57018 21 247.0904 332.6465
City Chalk 325.0227 22.21834 21 278.8171 371.2282
Slope Limestone 338.9012 22.21834 21 292.6956 385.1067
City Limestone 293.3852 22.21834 21 247.1796 339.5907
contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")
##  contrast                         estimate   SE df t.ratio p.value
##  Slope Chalk - City Chalk           -35.15 30.3 21 -1.161  0.6571 
##  Slope Chalk - Slope Limestone      -49.03 30.3 21 -1.619  0.3897 
##  Slope Chalk - City Limestone        -3.52 30.3 21 -0.116  0.9994 
##  City Chalk - Slope Limestone       -13.88 31.4 21 -0.442  0.9705 
##  City Chalk - City Limestone         31.64 31.4 21  1.007  0.7472 
##  Slope Limestone - City Limestone    45.52 31.4 21  1.449  0.4847 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
(obsS_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART
Location Rock.type emmean SE df lower.CL upper.CL .group
1 Slope Chalk 289.8684 20.57018 21 233.8552 345.8817 a
4 City Limestone 293.3852 22.21834 21 232.8840 353.8864 a
2 City Chalk 325.0227 22.21834 21 264.5215 385.5239 a
3 Slope Limestone 338.9012 22.21834 21 278.4000 399.4024 a
(mod_obsS %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_obsS_ANOVA)
Df Sum Sq Mean Sq F value Pr(>F) Part Eta Sq
Location 1 67.7481 67.7481 0.0228730 0.8812313 0.0009276
Rock.type 1 647.1708 647.1708 0.2184966 0.6450019 0.0088607
Location:Rock.type 1 10123.0692 10123.0692 3.4177313 0.0786297 0.1385992
Residuals 21 62200.4587 2961.9266 NA NA 0.8516126
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_obsS, Location ~ Rock.type)

# summary(as.glht(pairs(marginal))) # fails because of unbalanced design

# Shannon
(mod_Shannon <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Shannon")))
## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares  1.325063  0.093952           0.262865  4.794581
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 0.4778215
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                     Sum Sq Df   F value  Pr(>F)    
## (Intercept)        308.927  1 1353.0847 < 2e-16 ***
## Location             1.393  1    6.1011 0.02216 *  
## Rock.type            0.083  1    0.3616 0.55407    
## Location:Rock.type   0.263  1    1.1513 0.29545    
## Residuals            4.795 21                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##          
## 3.526152 
## 
##  Location 
##     Slope  City
##      3.75  3.29
## rep 13.00 12.00
## 
##  Rock.type 
##     Chalk Limestone
##      3.47      3.59
## rep 13.00     12.00
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 3.60  3.92     
##    rep   7.00  6.00     
##    City  3.33  3.24     
##    rep   6.00  6.00

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares  1.325063  0.093952           0.262865  4.794581
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 0.4778215
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_Shannon,
                   ~ Location : Rock.type)
summary(marginal)
Location Rock.type emmean SE df lower.CL upper.CL
Slope Chalk 3.599318 0.1805996 21 3.223740 3.974895
City Chalk 3.331707 0.1950698 21 2.926037 3.737377
Slope Limestone 3.920039 0.1950698 21 3.514369 4.325709
City Limestone 3.241351 0.1950698 21 2.835681 3.647020
contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")
##  contrast                         estimate    SE df t.ratio p.value
##  Slope Chalk - City Chalk           0.2676 0.266 21  1.007  0.7473 
##  Slope Chalk - Slope Limestone     -0.3207 0.266 21 -1.206  0.6297 
##  Slope Chalk - City Limestone       0.3580 0.266 21  1.347  0.5450 
##  City Chalk - Slope Limestone      -0.5883 0.276 21 -2.133  0.1754 
##  City Chalk - City Limestone        0.0904 0.276 21  0.328  0.9875 
##  Slope Limestone - City Limestone   0.6787 0.276 21  2.460  0.0962 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
(Shannon_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART
Location Rock.type emmean SE df lower.CL upper.CL .group
4 City Limestone 3.241351 0.1950698 21 2.710170 3.772532 a
2 City Chalk 3.331707 0.1950698 21 2.800526 3.862888 a
1 Slope Chalk 3.599318 0.1805996 21 3.107540 4.091096 a
3 Slope Limestone 3.920039 0.1950698 21 3.388858 4.451220 a
(mod_Shannon %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_Shannon_ANOVA)
Df Sum Sq Mean Sq F value Pr(>F) Part Eta Sq
Location 1 1.3250630 1.3250630 5.8037028 0.0252549 0.2045968
Rock.type 1 0.0939515 0.0939515 0.4115025 0.5281480 0.0145066
Location:Rock.type 1 0.2628651 0.2628651 1.1513345 0.2954460 0.0405878
Residuals 21 4.7945811 0.2283134 NA NA 0.7403088
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_Shannon, Location ~ Rock.type)

# ACE
(mod_ACE <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "ACE")))
## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares     10.12   1969.32            8755.53  68809.75
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 57.24207
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                     Sum Sq Df   F value Pr(>F)    
## (Intercept)        4281315  1 1306.6116 <2e-16 ***
## Location                 4  1    0.0012 0.9725    
## Rock.type             1671  1    0.5100 0.4830    
## Location:Rock.type    8756  1    2.6721 0.1170    
## Residuals            68810 21                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##          
## 413.6874 
## 
##  Location 
##     Slope City
##       413  414
## rep    13   12
## 
##  Rock.type 
##     Chalk Limestone
##       405       423
## rep    13        12
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 388   442      
##    rep     7     6      
##    City  425   404      
##    rep     6     6

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares     10.12   1969.32            8755.53  68809.75
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 57.24207
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_ACE,
                   ~ Location : Rock.type)
summary(marginal)
Location Rock.type emmean SE df lower.CL upper.CL
Slope Chalk 388.1990 21.63547 21 343.2056 433.1924
City Chalk 424.9111 23.36898 21 376.3126 473.5095
Slope Limestone 442.0997 23.36898 21 393.5012 490.6981
City Limestone 403.7881 23.36898 21 355.1896 452.3865
contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")
##  contrast                         estimate   SE df t.ratio p.value
##  Slope Chalk - City Chalk            -36.7 31.8 21 -1.153  0.6620 
##  Slope Chalk - Slope Limestone       -53.9 31.8 21 -1.693  0.3522 
##  Slope Chalk - City Limestone        -15.6 31.8 21 -0.490  0.9606 
##  City Chalk - Slope Limestone        -17.2 33.0 21 -0.520  0.9533 
##  City Chalk - City Limestone          21.1 33.0 21  0.639  0.9181 
##  Slope Limestone - City Limestone     38.3 33.0 21  1.159  0.6581 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
(ACE_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART
Location Rock.type emmean SE df lower.CL upper.CL .group
1 Slope Chalk 388.1990 21.63547 21 329.2849 447.1130 a
4 City Limestone 403.7881 23.36898 21 340.1536 467.4225 a
2 City Chalk 424.9111 23.36898 21 361.2766 488.5455 a
3 Slope Limestone 442.0997 23.36898 21 378.4653 505.7341 a
(mod_ACE %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_ACE_ANOVA)
Df Sum Sq Mean Sq F value Pr(>F) Part Eta Sq
Location 1 10.11758 10.11758 0.0030878 0.9562114 0.0001272
Rock.type 1 1969.32310 1969.32310 0.6010164 0.4468331 0.0247574
Location:Rock.type 1 8755.52902 8755.52902 2.6720940 0.1170242 0.1100705
Residuals 21 68809.74549 3276.65455 NA NA 0.8650448
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_ACE, Location ~ Rock.type)

# summary(as.glht(pairs(marginal))) # fails because of unbalanced design

#Inv. Simpson
(mod_InvSim <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Inv. Simpson")))
## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                  Location Rock.type Location:Rock.type Residuals
## Sum of Squares   507.6609  107.4251           117.0279 1059.8590
## Deg. of Freedom         1         1                  1        21
## 
## Residual standard error: 7.104187
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                    Sum Sq Df  F value    Pr(>F)    
## (Intercept)        7346.2  1 145.5569 6.594e-11 ***
## Location            542.8  1  10.7552  0.003577 ** 
## Rock.type            99.1  1   1.9643  0.175658    
## Location:Rock.type  117.0  1   2.3188  0.142737    
## Residuals          1059.9 21                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##          
## 17.20043 
## 
##  Location 
##     Slope City
##      21.5 12.5
## rep  13.0 12.0
## 
##  Rock.type 
##     Chalk Limestone
##      15.2      19.4
## rep  13.0      12.0
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 17.69 26.01    
##    rep    7.00  6.00    
##    City  12.68 12.34    
##    rep    6.00  6.00

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                  Location Rock.type Location:Rock.type Residuals
## Sum of Squares   507.6609  107.4251           117.0279 1059.8590
## Deg. of Freedom         1         1                  1        21
## 
## Residual standard error: 7.104187
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_InvSim,
                   ~ Location : Rock.type)
summary(marginal)
Location Rock.type emmean SE df lower.CL upper.CL
Slope Chalk 17.68601 2.685130 21 12.101979 23.27005
City Chalk 12.68276 2.900272 21 6.651317 18.71421
Slope Limestone 26.01445 2.900272 21 19.983005 32.04590
City Limestone 12.33755 2.900272 21 6.306105 18.36900
contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")
##  contrast                         estimate   SE df t.ratio p.value
##  Slope Chalk - City Chalk            5.003 3.95 21  1.266  0.5937 
##  Slope Chalk - Slope Limestone      -8.328 3.95 21 -2.107  0.1834 
##  Slope Chalk - City Limestone        5.348 3.95 21  1.353  0.5410 
##  City Chalk - Slope Limestone      -13.332 4.10 21 -3.250  0.0185 
##  City Chalk - City Limestone         0.345 4.10 21  0.084  0.9998 
##  Slope Limestone - City Limestone   13.677 4.10 21  3.335  0.0154 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
(InvSim_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART
Location Rock.type emmean SE df lower.CL upper.CL .group
4 City Limestone 12.33755 2.900272 21 4.440021 20.23508 a
2 City Chalk 12.68276 2.900272 21 4.785233 20.58029 a
1 Slope Chalk 17.68601 2.685130 21 10.374321 24.99771 ab
3 Slope Limestone 26.01445 2.900272 21 18.116921 33.91198 b
(mod_InvSim %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_InvSim_ANOVA)
Df Sum Sq Mean Sq F value Pr(>F) Part Eta Sq
Location 1 507.6609 507.66088 10.058770 0.0045968 0.2832972
Rock.type 1 107.4251 107.42510 2.128516 0.1593772 0.0599479
Location:Rock.type 1 117.0279 117.02787 2.318785 0.1427372 0.0653067
Residuals 21 1059.8590 50.46948 NA NA 0.5914481
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_InvSim, Location ~ Rock.type)

# summary(as.glht(pairs(marginal))) # fails because of unbalanced design


#Berger Parker
(mod_BP <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Berger Parker")))
## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                   Location  Rock.type Location:Rock.type  Residuals
## Sum of Squares  0.07180240 0.01029748         0.00019359 0.11203611
## Deg. of Freedom          1          1                  1         21
## 
## Residual standard error: 0.07304145
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                     Sum Sq Df  F value    Pr(>F)    
## (Intercept)        0.95185  1 178.4141 9.851e-12 ***
## Location           0.07398  1  13.8668  0.001255 ** 
## Rock.type          0.01018  1   1.9080  0.181711    
## Location:Rock.type 0.00019  1   0.0363  0.850753    
## Residuals          0.11204 21                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##           
## 0.1943002 
## 
##  Location 
##      Slope  City
##      0.143  0.25
## rep 13.000 12.00
## 
##  Rock.type 
##      Chalk Limestone
##      0.214     0.173
## rep 13.000    12.000
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 0.16  0.12     
##    rep   7.00  6.00     
##    City  0.27  0.23     
##    rep   6.00  6.00

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                   Location  Rock.type Location:Rock.type  Residuals
## Sum of Squares  0.07180240 0.01029748         0.00019359 0.11203611
## Deg. of Freedom          1          1                  1         21
## 
## Residual standard error: 0.07304145
## Estimated effects may be unbalanced
# Post-hoc test
marginal <- emmeans(mod_BP,
                   ~ Location : Rock.type)
summary(marginal)
Location Rock.type emmean SE df lower.CL upper.CL
Slope Chalk 0.1640529 0.0276071 21 0.1066408 0.2214649
City Chalk 0.2675148 0.0298190 21 0.2055027 0.3295269
Slope Limestone 0.1180283 0.0298190 21 0.0560162 0.1800404
City Limestone 0.2326462 0.0298190 21 0.1706341 0.2946583
contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")
##  contrast                         estimate     SE df t.ratio p.value
##  Slope Chalk - City Chalk          -0.1035 0.0406 21 -2.546  0.0814 
##  Slope Chalk - Slope Limestone      0.0460 0.0406 21  1.133  0.6741 
##  Slope Chalk - City Limestone      -0.0686 0.0406 21 -1.688  0.3545 
##  City Chalk - Slope Limestone       0.1495 0.0422 21  3.545  0.0096 
##  City Chalk - City Limestone        0.0349 0.0422 21  0.827  0.8411 
##  Slope Limestone - City Limestone  -0.1146 0.0422 21 -2.718  0.0577 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates
(BP_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART
Location Rock.type emmean SE df lower.CL upper.CL .group
3 Slope Limestone 0.1180283 0.0298190 21 0.0368302 0.1992265 a
1 Slope Chalk 0.1640529 0.0276071 21 0.0888780 0.2392278 ab
4 City Limestone 0.2326462 0.0298190 21 0.1514480 0.3138443 ab
2 City Chalk 0.2675148 0.0298190 21 0.1863167 0.3487130 b
(mod_BP %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_BP_ANOVA)
Df Sum Sq Mean Sq F value Pr(>F) Part Eta Sq
Location 1 0.0718024 0.0718024 13.4586112 0.0014309 0.3694878
Rock.type 1 0.0102975 0.0102975 1.9301546 0.1792983 0.0529898
Location:Rock.type 1 0.0001936 0.0001936 0.0362871 0.8507532 0.0009962
Residuals 21 0.1120361 0.0053351 NA NA 0.5765263
# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_BP, Location ~ Rock.type)

# summary(as.glht(pairs(marginal))) # fails because of unbalanced design

Plot all alpha diversity metrics together

Richness_Diversity_long %>% 
  dplyr::filter(!Metric %in% c("Chao1", "ACE")) %>% 
    mutate_at(., "Metric", ~fct_recode(., "Observed S" = "S obs.", "Inv. Simpson" = "Inv. Simpson", "Berger Parker" = "Berger Parker")) %>% 
  mutate_at(., "Metric", ~fct_relevel(., "Observed S", "Inv. Simpson", "Shannon", "Berger Parker")) %>% 
  droplevels() ->
  Richness_Diversity_long2plot

p_alpha <- ggplot() +
  geom_violin(data = Richness_Diversity_long2plot,
             aes(
               x = Location,
               y = Estimate,
               ymin = lerr,
               ymax = herr
             ), colour = "grey",
              fill = "grey",
              alpha = 1 / 3) +
  geom_jitter2(data = Richness_Diversity_long2plot,
               aes(x = Location,
               y = Estimate,
               ymin = lerr,
               ymax = herr,
               colour = Location), size = 3, width = 0.2, alpha = 2/3) +
  scale_colour_manual(values = Gradient.colours[c(5, 6, 11)], name = "") +
  # geom_errorbar(alpha = 1 / 2, width = 0.3) +
  xlab("") +
  ylab("") +
  theme(axis.text.x = element_text(
    angle = 45,
    vjust = 0.9,
    hjust = 1
  ),
  legend.position="none") +
  facet_grid(Metric ~ Rock.type, scale = "free") +
  theme(strip.text = element_text(size = f_size - 4)) +
  background_grid(major = "y",
                  minor = "none",
                  size.major = 0.8) 

dat_text <- data.frame(
  label = str_remove_all(c(obsS_pairwise$.group[1:4], 
                           Shannon_pairwise$.group[1:4], 
                           InvSim_pairwise$.group[1:4],
                           BP_pairwise$.group[1:4]), 
                         pattern = " "),
  Metric = fct_inorder(rep(levels(Richness_Diversity_long2plot$Metric), each = 4)),
  Rock.type = fct_c(obsS_pairwise$Rock.type[1:4], 
                           Shannon_pairwise$Rock.type[1:4], 
                           InvSim_pairwise$Rock.type[1:4],
                           BP_pairwise$Rock.type[1:4]), 
  x = fct_c(obsS_pairwise$Location[1:4], 
                           Shannon_pairwise$Location[1:4], 
                           InvSim_pairwise$Location[1:4],
                           BP_pairwise$Location[1:4]),
  # x     = as.factor(levels(Richness_Diversity_long2plot$Climate.Source)),
  y = rep(c(520, 45, 5.2, 0.52), each = 4)
  # y = rep(c(40, 140, 0.5), each = 6)
)
p_alpha <- p_alpha + geom_text(
  data = dat_text,
  mapping = aes(x = x, y = y, label = label),
  nudge_x = 0,
  nudge_y = 0
)
print(p_alpha)

Beta diversity

Calculate and plot beta diversity metrics.

(mod1 <-  adonis(
  otu_table(Ps_obj_filt_GMPR) ~ Location * Rock.type,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
))
## 
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Location * Rock.type,      data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999,      method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##                    Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)  
## Location            1      0.63   0.629    2.12 0.081  0.024 *
## Rock.type           1      0.39   0.386    1.30 0.050  0.231  
## Location:Rock.type  1      0.49   0.486    1.63 0.063  0.101  
## Residuals          21      6.25   0.297         0.806         
## Total              24      7.75                 1.000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(mod2 <- adonis(
  otu_table(Ps_obj_filt_GMPR) ~ Location,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
))
## 
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Location, data = as(sample_data(Ps_obj_filt_GMPR),      "data.frame"), permutations = 9999, method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)  
## Location   1      0.63   0.629    2.03 0.081  0.035 *
## Residuals 23      7.12   0.309         0.919         
## Total     24      7.75                 1.000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod1_pairwise <- PairwiseAdonis(
  otu_table(Ps_obj_filt_GMPR),
  sample_data(Ps_obj_filt_GMPR)$Location,
  sim.function = "vegdist",
  sim.method = "horn",
  p.adjust.m = "BH"
)
print(mod1_pairwise)
##          pairs total.DF F.Model     R2 p.value p.adjusted sig
## 1 City - Slope       24    2.03 0.0812   0.038      0.038   .
(sig_pairs1 <- list(as.character(mod1_pairwise$pairs[mod1_pairwise$p.adjusted < 0.05])))
## [[1]]
## [1] "City - Slope"
mod2_pairwise <- PairwiseAdonis(
  otu_table(Ps_obj_filt_GMPR),
  sample_data(Ps_obj_filt_GMPR)$Rock.type,
  sim.function = "vegdist",
  sim.method = "horn",
  p.adjust.m = "BH"
)
print(mod2_pairwise)
##               pairs total.DF F.Model     R2 p.value p.adjusted sig
## 1 Chalk - Limestone       24    1.21 0.0501   0.281      0.281
(sig_pairs2 <- list(as.character(mod2_pairwise$pairs[mod2_pairwise$p.adjusted < 0.05])))
## [[1]]
## character(0)
mod3_pairwise <- PairwiseAdonis(
  otu_table(Ps_obj_filt_GMPR),
  sample_data(Ps_obj_filt_GMPR)$Location.rock,
  sim.function = "vegdist",
  sim.method = "horn",
  p.adjust.m = "BH"
)
print(mod3_pairwise)
##                              pairs total.DF F.Model     R2 p.value p.adjusted sig
## 1      City:Chalk - City:Limestone       11    1.20 0.1069   0.329      0.395    
## 2     City:Chalk - Slope:Limestone       11    1.43 0.1251   0.182      0.273    
## 3         City:Chalk - Slope:Chalk       12    2.92 0.2099   0.012      0.072    
## 4 City:Limestone - Slope:Limestone       11    1.07 0.0967   0.424      0.424    
## 5     City:Limestone - Slope:Chalk       12    1.99 0.1531   0.089      0.178    
## 6    Slope:Limestone - Slope:Chalk       12    1.90 0.1471   0.074      0.178
(sig_pairs3 <- list(as.character(mod3_pairwise$pairs[mod3_pairwise$p.adjusted < 0.1])))
## [[1]]
## [1] "City:Chalk - Slope:Chalk"
Unifrac_mat <- UniFrac(Ps_obj_filt, 
                       weighted = TRUE, 
                       normalized = TRUE, 
                       parallel = TRUE, 
                       fast = TRUE)

(mod1 <-  adonis(
  Unifrac_mat ~ Location * Rock.type,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
))
## 
## Call:
## adonis(formula = Unifrac_mat ~ Location * Rock.type, data = as(sample_data(Ps_obj_filt_GMPR),      "data.frame"), permutations = 9999, method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##                    Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)  
## Location            1     0.044  0.0443    1.24 0.049  0.268  
## Rock.type           1     0.050  0.0496    1.39 0.055  0.193  
## Location:Rock.type  1     0.060  0.0601    1.68 0.066  0.097 .
## Residuals          21     0.751  0.0357         0.830         
## Total              24     0.905                 1.000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(mod2 <- adonis(
  Unifrac_mat ~ Location,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
))
## 
## Call:
## adonis(formula = Unifrac_mat ~ Location, data = as(sample_data(Ps_obj_filt_GMPR),      "data.frame"), permutations = 9999, method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)
## Location   1     0.044  0.0443    1.19 0.049   0.28
## Residuals 23     0.860  0.0374         0.951       
## Total     24     0.905                 1.000

The difference between city and slope is significant based on the Morisita-Horn distances between OTUs but not based on UniFrac.

Calculate ordinations
Ps_obj_ord1 <- ordinate(Ps_obj_filt_GMPR, "CAP", "horn", formula = Ps_obj_filt_GMPR ~  Location * Rock.type)
Ps_obj_ord2 <- ordinate(Ps_obj_filt_GMPR, "CAP", "horn", formula = Ps_obj_filt_GMPR ~  Location)

explained <- eigenvals(Ps_obj_ord2)/sum( eigenvals(Ps_obj_ord2)) * 100
explained <- as.numeric(format(round(explained, 1), nsmall = 1))

Ps_obj_filt_GMPR %>% 
  plot_ordination(., Ps_obj_ord2, type = "samples", shape = "Rock.type", color = "Location", justDF = TRUE) -> 
  ord_df

p_ord <- ggplot(ord_df,
             aes(
               x = CAP1,
               y = MDS1,
               shape = Rock.type,
               color = Location
             )) +
  stat_ellipse(
    aes(x = CAP1, 
        y = MDS1, 
        fill = Location
        ),
    geom = "polygon",
    alpha = 1/4,
    type = "t",
    level = 0.9,
    # linetype = 2,
    inherit.aes = FALSE
  ) +
  geom_point2(size = 4, alpha = 2 / 3) +
  guides(colour = guide_legend(title = "Location"), shape = guide_legend(title = "Rock.type")) +
  scale_colour_manual(values = Gradient.colours) +
  scale_fill_manual(values = Gradient.colours, guide = "none") +
  labs(x = sprintf("CAP1 (%s%%)", explained[1]), 
       y = sprintf("CAP2 (%s%%)", explained[2])) +
  coord_fixed(ratio = sqrt(explained[2] / explained[1])) +
   theme(legend.justification = "top")
  # facet_wrap(. ~ Rock.type)
print(p_ord)

Ps_obj_ord1 <- ordinate(Ps_obj_filt, "PCoA", "Unifrac", formula = Ps_obj_filt ~ Location * Rock.type)
Ps_obj_ord2 <- ordinate(Ps_obj_filt, "PCoA", "Unifrac", formula = Ps_obj_filt ~ Location)

explained <- Ps_obj_ord2$values$Relative_eig/sum(Ps_obj_ord2$values$Relative_eig) * 100
explained <- as.numeric(format(round(explained, 1), nsmall = 1))

Ps_obj_filt %>% 
  plot_ordination(., Ps_obj_ord2, type = "samples", shape = "Rock.type", color = "Location", justDF = TRUE) -> 
  ord_df

p_ord_phylo <- ggplot(ord_df,
             aes(
               x = Axis.1,
               y = Axis.2,
               shape = Rock.type,
               color = Location
             )) +
  stat_ellipse(
    aes(x = Axis.1, 
        y = Axis.2, 
        fill = Location
        ),
    geom = "polygon",
    alpha = 1/4,
    type = "t",
    level = 0.9,
    # linetype = 2,
    inherit.aes = FALSE
  ) +
  geom_point2(size = 4, alpha = 2 / 3) +
  # theme_bw(base_size = f_size) +
  guides(colour = guide_legend(title = "Location"), shape = guide_legend(title = "Rock.type")) +
  scale_colour_manual(values = Gradient.colours) +
  scale_fill_manual(values = Gradient.colours, guide = "none") +
  labs(x = sprintf("CAP1 (%s%%)", explained[1]), 
       y = sprintf("CAP2 (%s%%)", explained[2])) +
  coord_fixed(ratio = sqrt(explained[2] / explained[1])) #+ 
  # facet_wrap(. ~ Rock.type)
print(p_ord_phylo)

Test differences between samples on the phylum level

STAMPR analysis of the differences of each phylum between locations using Aligned Rank Transformed ANOVA test and a post-hoc estimated marginal means.

Taxa_tests_phylum1 <- STAMPR2(Ps_obj_filt, vars2test = "Location", threshold = 0.05, outputfile = paste0(Proj_name, "_Location"))

pSTAMPR1 <- plotSTAMPR(Taxa_tests_phylum1, pair = "Slope - City", f_size = f_size)
print(pSTAMPR1)

Taxa_tests_phylum2 <- STAMPR2(Ps_obj_filt, vars2test = c("Location", "Rock.type"), threshold = 0.05, outputfile = paste0(Proj_name, "_Location_Rock"))

pSTAMPR2 <- plotSTAMPR(Taxa_tests_phylum2, pair = "Slope:Chalk - City:Chalk", f_size = f_size)
print(pSTAMPR2)

Taxonmic distribution analysis

Agglomerate data and tag rare taxa

Ps_obj_filt_GMPR %>% 
  transform_sample_counts(., function(x){x / sum(x)} * 100) %>% 
  tax_glom(., 
           "Phylum",
           NArm = TRUE) ->
  Ps_obj_filt_GMPR_glom

Ps_obj_filt_GMPR_glom_DF <- speedyseq::psmelt(Ps_obj_filt_GMPR_glom)
Ps_obj_filt_GMPR_glom_DF$Phylum %<>% as.character()
# Ps_obj_filt3_glom_DF %<>% mutate(Species = fct_relevel(Species, "NA", after = Inf))

# group dataframe by Phylum, calculate sum rel. abundance
Ps_obj_filt_GMPR_glom_DF %>%
  group_by(Phylum) %>%
  summarise(Sum = sum(Abundance) / nsamples(Ps_obj_filt_GMPR_glom) ) ->
  Sums

# find Phyla whose rel. abund. is less than 5%
Rare_phyla0.05 <- Sums[Sums$Sum <= 0.05, ]$Phylum

# change their name to "Rare"
Ps_obj_filt_GMPR_glom_DF[Ps_obj_filt_GMPR_glom_DF$Phylum %in% Rare_phyla0.05, ]$Phylum <- 'Rare'
# re-group
Ps_obj_filt_GMPR_glom_DF %>%
  group_by(Sample, Phylum, Location, Rock.type, Location.rock) %>%
  summarise(Abundance = sum(Abundance)) ->
  Ps_obj_filt_GMPR_glom_DF_2plot

# ab.taxonomy$Freq <- sqrt(ab.taxonomy$Freq)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("unclassified", "Unclassified", .)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("uncultured", "Unclassified", .)

Ps_obj_filt_GMPR_glom_DF_2plot %>% 
  group_by(Sample) %>% 
  filter(Phylum == "Rare") %>% 
  summarise(`Rares (%)` = sum(Abundance)) -> 
  Rares

Summarise taxonomy

# Percentage of reads classified as rare 
Rares %>%
  kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Percentage of reads per sample classified as rare:
Sample Rares (%)
ShivSClk1S12 0.01
ShivSClk2S15 0.59
ShivSClk3S16 0.02
ShivSClk4S17 0.01
ShivSClk5S18 0.00
ShivSLimek1S13 0.02
ShivSLimek2S14 0.04
ShivSLimek3S19 0.11
ShivSLimek4S20 0.24
ShivSLimek5S21 0.01
ShivSLimek6S25 0.01
SlpClk1S3 0.24
SlpClk2S4 0.03
SlpClk3S5 0.03
SlpClk4S6 0.01
SlpClk5S7 0.01
SlpClk6S8 0.03
SlpClk7S9 0.01
SlpClk8S10 0.02
SlpClk9S11 0.03
SlpLime1S1 0.01
SlpLime2S2 0.02
SlpLime3S22 0.07
SlpLime4S23 0.01
SlpLime5S24 0.02
sample_order <- match(Rares$Sample, row.names(sample_data(Ps_obj_filt_GMPR_glom)))
Rares %<>% arrange(., sample_order)

Rares %>% 
  cbind(., sample_data(Ps_obj_filt_GMPR_glom)) %>% 
  group_by(Location.rock) %>% 
  setNames(make.names(names(.), unique = TRUE)) %>% # fails for some reason without it
  summarise(`Rares (%)` = mean(`Rares....`)) -> 
  Rares_merged

# Percentage of reads classified as rare 
Rares_merged %>%
  kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)
Percentage of reads per sample classified as rare:
Location.rock Rares (%)
City:Chalk 0.11
City:Limestone 0.07
Slope:Chalk 0.02
Slope:Limestone 0.06

Plot taxonomy box-plot

Ps_obj_filt_GMPR_glom_DF_2plot %>% 
  group_by(Phylum) %>% 
  summarise(sum.Taxa = sum(Abundance)) %>% 
  arrange(desc(sum.Taxa)) -> Taxa_rank
Ps_obj_filt_GMPR_glom_DF_2plot$Phylum %<>% 
  factor(., levels = Taxa_rank$Phylum) %>% 
  fct_relevel(., "Rare", after = Inf)
  
p_taxa_box <-
  ggplot(Ps_obj_filt_GMPR_glom_DF_2plot, aes(x = Phylum, y = (Abundance))) +
  geom_boxplot(aes(group = interaction(Phylum, Location)), position = position_dodge(width = 0.9), fatten = 1) +
  geom_point2(
    aes(colour = Rock.type),
    position = position_jitterdodge(dodge.width = 1),
    alpha = 1 / 2,
    stroke = 0,
    size = 2
  ) +
  scale_colour_manual(values = Gradient.colours, name = "") +
  # theme_bw()+
  labs(x = NULL, y = "Relative abundance (%)") +
  guides(colour = guide_legend(override.aes = list(size = 5))) +
  facet_grid(Location ~ .) +
  background_grid(major = "xy",
                  minor = "none") +
  theme(axis.text.x = element_text(
    angle = 45,
    vjust = 0.9,
    hjust = 0.9
  ),
  legend.position = c(.99, .99),
  legend.justification = c("right", "top"),
  legend.box.just = "top",
  legend.margin = margin(0, 3, 3, 3))
print(p_taxa_box)

Differential abundance models

Tag rare phyla (for plotting purposes only)

Ps_obj_filt_GMPR_glom <- tax_glom(Ps_obj_filt_GMPR, 
                             "Phylum", 
                             NArm = TRUE) # glomerate to the phylum level
Ps_obj_filt_GMPR_glom_rel <- transform_sample_counts(Ps_obj_filt_GMPR_glom, function(x) x / sum(x)) # transform to rel. ab.
Ps_obj_filt_GMPR_glom_rel_DF <- speedyseq::psmelt(Ps_obj_filt_GMPR_glom_rel) # generate a df
Ps_obj_filt_GMPR_glom_rel_DF$Phylum %<>% as.character() # factor to char

# group dataframe by Phylum, calculate sum rel. abundance
Ps_obj_filt_GMPR_glom_rel_DF %>%
  group_by(Phylum) %>%
  summarise(Sum = sum(Abundance) / nsamples(Ps_obj_filt_GMPR_glom) ) ->
  Sums

# find Phyla whose mean rel. abund. is less than 0.5%
Rare_phyla0.05 <- Sums[Sums$Sum <= 0.05, ]$Phylum

# change their name to "Rare"
Ps_obj_filt_GMPR_glom_rel_DF[Ps_obj_filt_GMPR_glom_rel_DF$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' 

# re-group
Ps_obj_filt_GMPR_glom_rel_DF %>%
  group_by(Phylum) %>%
  summarise(Abundance = sum(Abundance)) %>% 
  arrange(desc(Abundance)) -> Taxa_rank

Detect differentially abundant OTUs using corncob (Martin, Witten, and Willis 2020)

comparison_string <- c("City", "Slope")

Ps_obj_filt %>%
  subset_samples(Location %in% c(comparison_string[1], comparison_string[2])) %>%
  tax_glom("Order") ->
  Ps_obj_filt_pairwise_glom

# Test differential abundance for location
da_Loc <- differentialTest(formula = ~ Location,
                           phi.formula = ~ Location,
                           formula_null = ~ 1,
                           phi.formula_null = ~ Location, 
                           test = "Wald", boot = FALSE,
                           data = Ps_obj_filt,
                           fdr_cutoff = 0.05,
                           full_output = TRUE)
da_Loc_intervals <- plot(da_Loc, level = "Class", data_only = T)
which(is.na(da_Loc$p)) %>% names
## [1] "OTU398" "OTU164" "OTU476" "OTU641" "OTU239" "OTU481" "OTU727" "OTU245"
Ps_obj_filt %>%
  transform_sample_counts(., function(x) x / sum(x) * 100) %>% 
  taxa_sums(.) %>% 
  map_dbl(~(.x / nsamples(Ps_obj_filt))) %>% 
  enframe(name = "OTU", value = "Mean abundance (%)") -> 
  baseMean

map(da_Loc$all_models,15) %>% 
  map(.,2) %>% 
  unlist %>%  # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
  bind_cols(OTU = taxa_names(Ps_obj_filt), 
            tax_table(Ps_obj_filt), 
            `Differential abundance` = .,
            Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Loc$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
            ymin = as.numeric(NA),
            ymax = as.numeric(NA)
            ) %>%
  left_join(., baseMean, by = "OTU") ->
  da_Loc_df

da_Loc_df %<>% rows_update(., tibble(ymin = da_Loc_intervals$xmin, OTU = da_Loc$significant_taxa), by = "OTU")
da_Loc_df %<>% rows_update(., tibble(ymax = da_Loc_intervals$xmax, OTU = da_Loc$significant_taxa), by = "OTU")
da_Loc_df[da_Loc_df$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' # rare_phyla is

p_corncob_loc <- GGPlotCorncob(da_Loc_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)

corncob_summary <- tibble(Label = c(paste0("", sum(da_Loc_df$`Differential abundance` > 0 &  da_Loc_df$Significance == "Pass"), "", sum(da_Loc_df$`Differential abundance` < 0 &  da_Loc_df$Significance == "Pass"), " (", nrow(da_Loc_df), ")")))

p_corncob_loc <- p_corncob_loc +
  labs(title = paste(comparison_string, collapse = " - ")) +
  coord_cartesian(ylim = c(-10, 10))
print(p_corncob_loc)

write.csv(da_Loc_df, file = paste0("./Results/corncob_", comparison_string[1], "_vs_", comparison_string[2], ".csv"))

Modelling differential abundance and variance between locations discovered length(da_Loc$significant_taxa)

comparison_string <- c("Limestone", "Chalk")
# Test differential abundance and variance for rock type
da_Rock <- differentialTest(formula = ~ Rock.type,
                                 phi.formula = ~ Rock.type,
                                 formula_null = ~ 1,
                                 phi.formula_null = ~ Rock.type, 
                                 test = "Wald", boot = FALSE,
                                 data = Ps_obj_filt,
                                 fdr_cutoff = 0.05,
                                full_output = TRUE)
da_Rock_intervals <- plot(da_Rock, level = "Class", data_only = TRUE)
which(is.na(da_Rock$p)) %>% names
##  [1] "OTU780" "OTU164" "OTU580" "OTU679" "OTU696" "OTU712" "OTU519" "OTU503" "OTU542"
## [10] "OTU245" "OTU229"
map(da_Rock$all_models,15) %>% 
  map(.,2) %>% 
  unlist %>%  # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
  bind_cols(OTU = taxa_names(Ps_obj_filt), 
            tax_table(Ps_obj_filt), 
            `Differential abundance` = .,
            Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Rock$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
            ymin = as.numeric(NA),
            ymax = as.numeric(NA)
            ) %>%
  left_join(., baseMean, by = "OTU") ->
  da_Rock_df

da_Rock_df %<>% rows_update(., tibble(ymin = da_Rock_intervals$xmin, OTU = da_Rock$significant_taxa), by = "OTU")
da_Rock_df %<>% rows_update(., tibble(ymax = da_Rock_intervals$xmax, OTU = da_Rock$significant_taxa), by = "OTU")
da_Rock_df[da_Rock_df$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' # rare_phyla is

p_corncob_rock <- GGPlotCorncob(da_Rock_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)

corncob_summary <- tibble(Label = c(paste0("", sum(da_Rock_df$`Differential abundance` > 0 &  da_Rock_df$Significance == "Pass"), "", sum(da_Rock_df$`Differential abundance` < 0 &  da_Rock_df$Significance == "Pass"), " (", nrow(da_Rock_df), ")")))

p_corncob_rock <- p_corncob_rock +
  labs(title = paste(comparison_string, collapse = " - ")) +
  coord_cartesian(ylim = c(-10, 10))
print(p_corncob_rock)

write.csv(da_Rock_df, file = paste0("./Results/corncob_", comparison_string[1], "_vs_", comparison_string[2], ".csv"))

Modelling differential abundance and variance between rock types discovered length(da_Rock$significant_taxa)

# Test differential abundance for location, control for Rock.type for both cases
comparison_string <- c("City", "Slope")
da_Loc_exRock <- differentialTest(formula = ~ Location + Rock.type,
                                 phi.formula = ~ Location + Rock.type,
                                 formula_null = ~ Rock.type,
                                 phi.formula_null = ~ Location + Rock.type, 
                                 test = "Wald", boot = FALSE,
                                 data = Ps_obj_filt,
                                 fdr_cutoff = 0.05,
                                full_output = TRUE)
da_Loc_exRock_intervals <- plot(da_Loc_exRock, level = "Class", data_only = TRUE)

which(is.na(da_Loc_exRock$p)) %>% names
##  [1] "OTU780" "OTU398" "OTU164" "OTU476" "OTU580" "OTU679" "OTU696" "OTU712" "OTU641"
## [10] "OTU519" "OTU239" "OTU481" "OTU503" "OTU542" "OTU727" "OTU245" "OTU229"
map(da_Loc_exRock$all_models, 15) %>% 
  map(., 2) %>% 
  unlist %>%  # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
  bind_cols(OTU = taxa_names(Ps_obj_filt), 
            tax_table(Ps_obj_filt), 
            `Differential abundance` = .,
            Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Loc_exRock$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
            ymin = as.numeric(NA),
            ymax = as.numeric(NA)
            ) %>%
  left_join(., baseMean, by = "OTU") ->
  da_Loc_exRock_df

da_Loc_exRock_df %<>% rows_update(., tibble(ymin = da_Loc_exRock_intervals$xmin, OTU = da_Loc_exRock$significant_taxa), by = "OTU")
da_Loc_exRock_df %<>% rows_update(., tibble(ymax = da_Loc_exRock_intervals$xmax, OTU = da_Loc_exRock$significant_taxa), by = "OTU")
da_Loc_exRock_df[da_Loc_exRock_df$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' # rare_phyla is

p_corncob_locExroc <- GGPlotCorncob(da_Loc_exRock_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)

corncob_summary <- tibble(Label = c(paste0("", sum(da_Loc_exRock_df$`Differential abundance` > 0 &  da_Loc_exRock_df$Significance == "Pass"), "", sum(da_Loc_exRock_df$`Differential abundance` < 0 &  da_Loc_exRock_df$Significance == "Pass"), " (", nrow(da_Loc_exRock_df), ")")))

p_corncob_locExroc <- p_corncob_locExroc +
  labs(title = paste(comparison_string, collapse = " - ")) +
  coord_cartesian(ylim = c(-10, 10))
print(p_corncob_locExroc)

write.csv(da_Loc_exRock_df, file = paste0("./Results/corncob_", comparison_string[1], "_vs_", comparison_string[2], "_ExRockType.csv"))

Modelling differential abundance between locations, while controlling for rock type discovered length(da_Loc_exRock$significant_taxa)

mod260 <- bbdml(formula = OTU260 ~ 1,
             phi.formula = ~ 1,
             data = Ps_obj_filt)
mod260_Loc <- bbdml(formula = OTU260 ~ Location,
             phi.formula = ~ Location,
             data = Ps_obj_filt)
mod260_Loc_rock <- bbdml(formula = OTU97 ~ Location*Rock.type,
             phi.formula = ~ Location*Rock.type,
             data = Ps_obj_filt)
lrtest(mod_null = mod260, mod = mod260_Loc)
## [1] 0.00401
# lrtest(mod_null = mod260_Loc, mod = mod260_Loc_rock)
summary(mod260_Loc)
## 
## Call:
## bbdml(formula = OTU260 ~ Location, phi.formula = ~Location, data = Ps_obj_filt)
## 
## 
## Coefficients associated with abundance:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.544      0.241  -35.51   <2e-16 ***
## Location1      0.658      0.241    2.73    0.012 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Coefficients associated with dispersion:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.486      0.406   -20.9  1.5e-15 ***
## Location1      1.379      0.406     3.4   0.0027 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Log-likelihood: -80.86
plot(mod260_Loc, color = "Location", shape = "Rock.type") # add total = TRUE for total counts (i.e. not relative abundance)

Compose figures

# composite_plot <- ((p_alpha + p_taxa_box +  plot_layout(widths = c(1, 2))) /(p_ord + pSTAMPR1) + plot_annotation(tag_levels = 'A') & theme(plot.tag = element_text(size = f_size)))
composite_plot <- (p_alpha + p_ord) /(p_taxa_box) / (pSTAMPR1) + 
  plot_layout(heights = c(1.5, 1, 1)) +
  plot_annotation(tag_levels = 'A') & theme(plot.tag = element_text(size = f_size)) 

plot_file <- "./Results/Microbiome_1"
svglite(paste0(plot_file, ".svg"), 
        width = 10, 
        height = 11)
print(composite_plot)
invisible(dev.off())

agg_png(paste0(plot_file, ".png"),
        width = 10, 
        height = 11, 
        units = 'cm',
        res = 900, 
        scaling = 0.38)
print(composite_plot)
invisible(invisible(dev.off()))

gz(paste0(plot_file, ".svg"), paste0(plot_file, ".svgz"))
knitr::include_graphics(paste0(plot_file, ".png"))

plot_file <- "./Results/Microbiome_2"
svglite(paste0(plot_file, ".svg"), 
        width = 12, 
        height = 10)
print(p_corncob_locExroc)
invisible(dev.off())

agg_png(paste0(plot_file, ".png"),
        width = 12, 
        height = 10.5, 
        units = 'cm',
        res = 900, 
        scaling = 0.38)
print(p_corncob_locExroc)
invisible(invisible(dev.off()))
gz(paste0(plot_file, ".svg"), paste0(plot_file, ".svgz"))

References

Chen, Jun, and Li Chen. 2017. “GMPR: A novel normalization method for microbiome sequencing data.” bioRxiv, February, 112565. https://doi.org/10.1101/112565.

Martin, Bryan D., Daniela Witten, and Amy D. Willis. 2020. “Modeling Microbial Abundances and Dysbiosis with Beta-Binomial Regression.” Annals of Applied Statistics 14 (1): 94–115. https://doi.org/10.1214/19-AOAS1283.