How fast does biological rock crust grow?

20 April, 2021

Setting general parameters:
Description

Setting general parameters:

set.seed(15102020)
bootstraps <- 1000
data_path <- "./Data/"
Proj_name <- "BRC_growth_rate"

Browns <- RColorBrewer::brewer.pal(n = 9, "YlOrBr")[9:6]
Greens <- RColorBrewer::brewer.pal(n = 9, "YlGn")[9:6]
Blues <- RColorBrewer::brewer.pal(n = 9, "YlGnBu")[9:6]
Gradient.colours <- c(Browns[1], Greens[1], Browns[2], Greens[2], Browns[3], Greens[3], Browns[4], Greens[4], Blues)

Description

This script reproduces all sequence analysis steps and plots included in the paper plus some additional exploratory analyses.

Load data

OTUmat <- t(read.csv(paste0(data_path, "Shivta_site_otuTab2.txt"), header = TRUE, row.names = 1))
sort.order <- as.numeric(gsub("OTU([0-9]+)", "\\1", colnames( OTUmat )))
OTUmat <- OTUmat[, order(sort.order )]

Metadata <- read.csv(paste0(data_path, "Shivta_metadata.csv"), row.names = 1, header = TRUE)

read_csv(paste0(data_path, "Shivta_metadata.csv"),
                     trim_ws = TRUE) %>%
  mutate_at(
    c(
      "Rock.type",
      "Location"
    ), 
    ~(factor(.))
  ) %>% 
  column_to_rownames("Sample.code") ->
  Metadata

row.names(OTUmat) <- gsub("(.*)Nimrod[0-9]+|Osnat[0-9]+", "\\1", row.names( OTUmat))
Metadata <- Metadata[order(row.names(Metadata)), ]
OTUmat <- OTUmat[order(row.names(OTUmat)), ]
# calculate sample size
Metadata$Library.size = rowSums(OTUmat)
Metadata$Location.rock <- with(Metadata, Location:Rock.type)

# Load taxonomy data
tax.file <- "Shivta_site_silva.nrv119.taxonomy"
Taxonomy <- read.table(paste0(data_path, tax.file), stringsAsFactors = FALSE) # read taxonomy file

# count how many ';' in each cell and add up to 6
for (i in 1:nrow(Taxonomy)){
  semicolons <- length(gregexpr(";", Taxonomy$V2[i] )[[1]])
  if (semicolons < 6){
    x <- paste0( rep("Unclassified;", 6 - semicolons ), collapse = "")
    Taxonomy$V2[i] <- paste0( Taxonomy$V2[i], x, sep = "")
  }
}
# split taxonomy to columns
do.call( "rbind", strsplit( Taxonomy$V1, ";", fixed = TRUE)) %>% 
  gsub( "size=([0-9]+)", "\\1", .) %>%
  data.frame( ., do.call( "rbind", strsplit( Taxonomy$V2, ";", fixed = TRUE)), stringsAsFactors = F) %>% 
  apply(., 2, function(x) gsub( "\\(.*\\)", "", x)) %>% 
  replace(., . == "unclassified", "Unclassified") -> 
  Taxonomy
colnames( Taxonomy ) <- c( "OTU", "Frequency", "Domain", "Phylum", "Class", "Order", "Family", "Genus" )
# rownames(Taxonomy) <- colnames(Rock_weathering_OTUmat)
rownames(Taxonomy) <- Taxonomy[, 1]

Tree_IQ <- read_tree(paste0(data_path, "Shivta_site_otuReps.filtered.align.treefile"))

# generate phyloseq object
Ps_obj <- phyloseq(otu_table(OTUmat, taxa_are_rows = FALSE),
                   tax_table(Taxonomy[, -c(1, 2)]),
                   sample_data(Metadata),
                   phy_tree(Tree_IQ)
)
# Reorder factors for plotting
sample_data(Ps_obj)$Location %<>% fct_relevel("Slope", "City")

Remove un- and mis-classified sequences, chloroplasts and mitochondria

domains2remove <- c("", "Archaea", "Eukaryota", "Unclassified")
classes2remove <- c("Chloroplast")
families2remove <- c("Mitochondria")
Ps_obj_filt <- subset_taxa(Ps_obj, !is.na(Phylum) &
                        !Domain %in% domains2remove &
                      !Class %in% classes2remove &
                      !Family %in% families2remove)

Inspect library size and number of OTU

Ps_obj_df <-
  as.data.frame(sample_data(Ps_obj_filt)) # Put sample_data into a ggplot-friendly data.frame
Ps_obj_df <- Ps_obj_df[order(Ps_obj_df$Library.size), ]
Ps_obj_df$Index <- seq(nrow(Ps_obj_df))
ggplot(data = Ps_obj_df, 
       aes(x = Index, y = Library.size, color = Location.rock)) + 
  geom_point2(size = 4) + 
  scale_colour_manual(values = ggpomological:::pomological_palette[c(2, 1, 9, 3)], name = "Location.rock")

summary(sample_sums(Ps_obj_filt))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   23966   44989   53000   52663   61348   77459

summary(taxa_sums(Ps_obj_filt))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       3      70     198    1852     771   76180

Explore the prevalence of different taxa in the database

prevdf <- apply(X = otu_table(Ps_obj_filt),
                 MARGIN = ifelse(taxa_are_rows(Ps_obj_filt), yes = 1, no = 2),
                 FUN = function(x){sum(x > 0)})
# Add taxonomy and total read counts to this data.frame
prevdf <- data.frame(Prevalence = prevdf,
                      TotalAbundance = taxa_sums(Ps_obj_filt),
                      tax_table(Ps_obj_filt))
prevdf %>%
  group_by(Phylum) %>%
  summarise(`Mean prevalence` = mean(Prevalence),
            `Sum prevalence` = sum(Prevalence)) ->
  Prevalence_phylum_summary

Prevalence_phylum_summary %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)

Phylum	Mean prevalence	Sum prevalence
Acidobacteria	11.7	152
Actinobacteria	16.3	3414
Aquificae	4.0	12
Armatimonadetes	7.6	76
Bacteroidetes	11.8	1027
Candidate\_division\_KB1	8.0	8
Candidate\_division\_OD1	4.0	4
Candidate\_division\_OP11	4.5	9
Candidate\_division\_TM7	7.9	158
Chloroflexi	11.0	1124
Cyanobacteria	14.9	506
Deinococcus-Thermus	15.8	174
Firmicutes	13.4	214
Gemmatimonadetes	11.2	617
Nitrospirae	6.0	12
Planctomycetes	10.8	140
Proteobacteria	14.4	1817
Tenericutes	3.0	3
Verrucomicrobia	23.0	23
WCHB1-60	7.0	28

prevdf %>%
  group_by(Order) %>%
  summarise(`Mean prevalence` = mean(Prevalence),
            `Sum prevalence` = sum(Prevalence)) ->
  Prevalence_Order_summary

Prevalence_Order_summary %>% 
  kable(., digits = c(0, 1, 0)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)

Order	Mean prevalence	Sum prevalence
AKIW781	9.4	198
AKYG1722	10.7	75
AT425-EubC11\_terrestrial\_group	10.2	378
Acidimicrobiales	14.7	368
Alteromonadales	12.0	12
Aquificales	4.0	12
Ardenticatenales	8.0	16
B103G10	7.0	7
BD2-11\_terrestrial\_group	5.0	5
Bacillales	13.7	164
Bacteroidales	17.0	17
Bdellovibrionales	8.0	40
Burkholderiales	18.5	204
C0119	5.0	5
Caldilineales	18.0	18
Campylobacterales	6.0	6
Caulobacterales	23.7	71
Chromatiales	20.0	20
Chthoniobacterales	23.0	23
Clostridiales	6.5	13
Corynebacteriales	8.0	40
Cytophagales	11.8	695
Deinococcales	15.1	151
Desulfovibrionales	5.0	5
E6aD10	11.0	11
EMP-G18	3.0	3
Enterobacteriales	22.0	44
Euzebyales	13.5	202
Flavobacteriales	12.7	38
Frankiales	20.9	356
Gaiellales	10.4	94
Gemmatimonadales	14.3	214
JG30-KF-CM45	13.0	468
Kineosporiales	20.2	121
Lactobacillales	18.5	37
Micrococcales	18.0	198
Micromonosporales	19.5	39
Myxococcales	10.2	51
Nitriliruptorales	9.7	58
Nitrosomonadales	11.5	23
Nitrospirales	6.0	12
Oceanospirillales	16.0	16
Orbales	7.0	7
Order\_II	4.8	24
Order\_III	8.7	26
PAUC43f\_marine\_benthic\_group	2.0	2
Planctomycetales	9.2	74
Propionibacteriales	14.5	318
Pseudomonadales	14.7	103
Pseudonocardiales	18.0	306
Rhizobiales	16.1	386
Rhodobacterales	14.8	163
Rhodospirillales	11.8	271
Rickettsiales	11.2	146
Rubrobacterales	22.3	491
S0134\_terrestrial\_group	18.0	18
SBYG-4553	10.0	10
Solirubrobacterales	17.2	638
Sphaerobacterales	3.3	10
Sphingobacteriales	14.2	227
Sphingomonadales	19.6	196
Streptomycetales	24.5	49
Streptosporangiales	3.0	3
Subgroup\_3	15.0	15
Subgroup\_4	10.6	85
Subgroup\_6	13.0	52
SubsectionI	13.0	13
SubsectionII	15.8	315
SubsectionIII	14.8	118
SubsectionIV	13.5	54
TRA3-20	14.0	14
Thermales	23.0	23
Thermophilales	15.0	30
Unclassified	9.2	642
Unknown\_Order	9.8	98
Vampirovibrionales	6.0	6
WD2101\_soil\_group	13.0	39
Xanthomonadales	18.0	18

Based on that we will remove all phyla with a prevalence of under 8

Prevalence_phylum_summary %>% 
  filter(`Sum prevalence` < 8) %>% 
  dplyr::select(Phylum) %>% 
  map(as.character) %>% 
  unlist() ->
  filterPhyla
Ps_obj_filt %<>% subset_taxa(!Phylum %in% filterPhyla)
sample_data(Ps_obj_filt)$Library.size <- rowSums(otu_table(Ps_obj_filt))
print(Ps_obj)

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 767 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 767 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 767 tips and 765 internal nodes ]
## taxa are columns

print(Ps_obj_filt)

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 709 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 709 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 709 tips and 707 internal nodes ]
## taxa are columns

Plot general prevalence features of the phyla

# Subset to the remaining phyla
prevdf_phylum_filt <- subset(prevdf, Phylum %in% get_taxa_unique(Ps_obj_filt, "Phylum"))
ggplot(prevdf_phylum_filt,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt), color = Phylum)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point2(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Phylum) + theme(legend.position = "none")

Plot general prevalence features of the top 20 orders

# Subset to the remaining phyla
prevdf_order_filt <- subset(prevdf, Order %in% get_taxa_unique(Ps_obj_filt, "Order"))
# grab the top 30 most abundant orders
prevdf_order_filt %>% 
  group_by(Order) %>%
  summarise(Combined.abundance = sum(TotalAbundance)) %>% 
  arrange(desc(Combined.abundance)) %>% 
  .[1:30, "Order"]  ->
  Orders2plot
prevdf_order_filt2 <- subset(prevdf, Order %in% Orders2plot$Order)
ggplot(prevdf_order_filt2,
       aes(TotalAbundance, Prevalence / nsamples(Ps_obj_filt), color = Order)) +
  # Include a guess for parameter
  geom_hline(yintercept = 0.05,
             alpha = 0.5,
             linetype = 2) + geom_point2(size = 2, alpha = 0.7) +
  scale_x_log10() +  xlab("Total Abundance") + ylab("Prevalence [Frac. Samples]") +
  facet_wrap( ~ Order) + theme(legend.position = "none")

Unsupervised filtering by prevalence

We will remove all sequences which appear in less than 10% of the samples

# Define prevalence threshold as 10% of total samples
prevalenceThreshold <- 0.1 * nsamples(Ps_obj_filt)
prevalenceThreshold

## [1] 2.5

# Execute prevalence filter, using `prune_taxa()` function
keepTaxa <-
  row.names(prevdf_phylum_filt)[(prevdf_phylum_filt$Prevalence >= prevalenceThreshold)]
Ps_obj_filt  %<>%  prune_taxa(keepTaxa, .)
sample_data(Ps_obj_filt)$Library.size <- rowSums(otu_table(Ps_obj_filt))
print(Ps_obj)

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 767 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 767 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 767 tips and 765 internal nodes ]
## taxa are columns

print(Ps_obj_filt)

## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 694 taxa and 25 samples ]
## sample_data() Sample Data:       [ 25 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 694 taxa by 6 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 694 tips and 692 internal nodes ]
## taxa are columns

This removed 73 or 10% of the sequences.

Exploring the dataset features

First let’s look at the count data distribution after filtering:

PlotLibDist(Ps_obj_filt)

sample_data(Ps_obj_filt) %>% 
  as_tibble() %>% 
  dplyr::select(Sample.name, Library.size) %>% 
  as(., "data.frame") %>% 
  kable(.) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)

Sample.name	Library.size
City Chalk 2	49453
City Chalk 3	54155
City Chalk 4	44943
City Chalk 5	58835
City Chalk 6	77459
City Limestone 1	36122
City Limestone 2	52816
City Limestone 3	48378
City Limestone 4	47108
City Limestone 5	61346
City Limestone 6	54505
Slope Limestone 1	75578
Slope Chalk 1	51651
Slope Chalk 2	45197
Slope Chalk 3	33625
Slope Chalk 4	70886
Slope Chalk 5	71458
Slope Chalk 6	58959
Slope Chalk 7	36756
City Chalk 1	23962
Slope Limestone 2	62456
Slope Limestone 3	36628
Slope Limestone 4	57283
Slope Limestone 5	64509
Slope Limestone 6	41564

The figure and table indicate only a small deviation in the number of reads per samples.

(mod1 <- adonis(
  otu_table(Ps_obj_filt) ~ Library.size,
  data = as(sample_data(Ps_obj_filt), "data.frame"), 
  method = "horn",
  permutations = 9999
))

## 
## Call:
## adonis(formula = otu_table(Ps_obj_filt) ~ Library.size, data = as(sample_data(Ps_obj_filt),      "data.frame"), permutations = 9999, method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##              Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## Library.size  1    0.4246 0.42456  1.3337 0.05481 0.2192
## Residuals    23    7.3218 0.31834         0.94519       
## Total        24    7.7464                 1.00000

PlotReadHist(as(otu_table(Ps_obj_filt), "matrix"))

notAllZero <- (rowSums(t(otu_table(Ps_obj_filt))) > 0)
vsn::meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_filt))[notAllZero, ] + 1)))

The difference in library sizes is low and its effect on the community composition is minimal. We’ll use the GMPR method for library size normalisation (Chen and Chen 2017)

Ps_obj_filt_GMPR <- Ps_obj_filt
Ps_obj_filt %>%
  otu_table(.) %>%
  t() %>%
  as(., "matrix") %>%
  GMPR() ->
  GMPR_factors

## Begin GMPR size factor calculation ...
## Completed!
## Please watch for the samples with limited sharing with other samples based on NSS! They may be outliers!

Ps_obj_filt %>%
  otu_table(.) %>%
  t() %*% diag(1 / GMPR_factors$gmpr) %>%
  t() %>%
  as.data.frame(., row.names = sample_names(Ps_obj_filt)) %>%
  otu_table(., taxa_are_rows = FALSE) ->
  otu_table(Ps_obj_filt_GMPR)
sample_data(Ps_obj_filt_GMPR)$Library.size <- sample_sums(Ps_obj_filt)
adonis(
  otu_table(Ps_obj_filt_GMPR) ~ Library.size,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
)

## 
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Library.size,      data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999,      method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##              Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)
## Library.size  1    0.4246 0.42456  1.3337 0.05481  0.228
## Residuals    23    7.3218 0.31834         0.94519       
## Total        24    7.7464                 1.00000

PlotLibDist(Ps_obj_filt_GMPR)

Did it improve anything?

PlotReadHist(as(otu_table(Ps_obj_filt_GMPR), "matrix"))

notAllZero <- (rowSums(t(otu_table(Ps_obj_filt_GMPR))) > 0)
vsn::meanSdPlot(as.matrix(log2(t(otu_table(Ps_obj_filt_GMPR))[notAllZero, ] + 1)))

Alpha diversity

Calculate and plot alpha diversity metrics

We do that by simulating 1000 rarefaction events and calculating the metrics each time. Then, the result is averaged.

rarefaction.mat <- matrix(0, nrow = nsamples(Ps_obj_filt), ncol = bootstraps)
rownames(rarefaction.mat) <- sample_names(Ps_obj_filt)
rich.ests <- list(S.obs = rarefaction.mat, S.chao1 = rarefaction.mat, se.chao1 = rarefaction.mat,
                   S.ACE = rarefaction.mat, se.ACE = rarefaction.mat)
for (i in seq(bootstraps)) {
  sub.OTUmat <- rrarefy(otu_table(Ps_obj_filt), min(rowSums(otu_table(Ps_obj_filt))))
  for (j in seq(length(rich.ests))) {
    rich.ests[[j]][, i] <- t(estimateR(sub.OTUmat))[, j]
  }
}
Richness <- data.frame(row.names = row.names(rich.ests[[1]]))
for (i in c(1, seq(2, length(rich.ests), 2))) {
  S <- apply(rich.ests[[i]], 1, mean)
  if (i == 1) { 
    se <- apply(rich.ests[[i]], 1, function(x) (mean(x)/sqrt(length(x))))
    } else se <- apply(rich.ests[[i + 1]], 1, mean)
  Richness <- cbind(Richness, S, se)
}
colnames(Richness) <- c("S.obs", "S.obs.se", "S.chao1", "S.chao1.se", "S.ACE", "S.ACE.se")
saveRDS(Richness, file = paste0("./Results/", Proj_name, "_richness.RDS"))
write.csv(Richness, file = paste0("./Results/", Proj_name, "_richness.csv"))
ses <- grep("\\.se", colnames(Richness))
Richness[, ses] %>% 
  gather(key = "est.se") -> se.dat
Richness[, -unique(ses)] %>% 
  gather(key = "est") -> mean.dat
n <- length(unique(mean.dat$est))
# diversity indices
diversity.inds <- list(Shannon = rarefaction.mat, inv.simpson = rarefaction.mat, BP = rarefaction.mat)
for (i in seq(bootstraps)) {
  sub.OTUmat <- rrarefy(otu_table(Ps_obj_filt), min(rowSums(otu_table(Ps_obj_filt))))
  diversity.inds$Shannon[, i] <- diversityresult(sub.OTUmat, index = 'Shannon', method = 'each site', digits = 3)[, 1]
  diversity.inds$inv.simpson[, i] <- diversityresult(sub.OTUmat, index = 'inverseSimpson', method = 'each site', digits = 3)[, 1]
  diversity.inds$BP[, i] <- diversityresult(sub.OTUmat, index = 'Berger', method = 'each site', digits = 3)[, 1]
}
Diversity <- data.frame(row.names = row.names(diversity.inds[[1]]))
for (i in seq(length(diversity.inds))) {
  S <- apply(diversity.inds[[i]], 1, mean)
  se <- apply(diversity.inds[[i]], 1, function(x) (mean(x)/sqrt(length(x))))
  Diversity <- cbind(Diversity, S, se)
}
colnames(Diversity) <- c("Shannon", "Shannon.se", "Inv.simpson", "Inv.simpson.se", "BP", "BP.se")
ses <- grep("\\.se", colnames(Diversity))
Diversity[, ses] %>% gather(key = "est.se") -> se.dat
Diversity[, -unique(ses)] %>% gather(key = "est") -> mean.dat
saveRDS(Diversity, file = paste0("./Results/", Proj_name, "_diversity.RDS"))
write.csv(Diversity, file = paste0("./Results/", Proj_name, "_diversity.csv"))

Test the differences in alpha diversity.

# make combined richness diversity
Richness_Diversity <- cbind(Richness, Diversity)
ses <- grep("\\.se", colnames(Richness_Diversity))
Richness_Diversity[, ses] %>% 
  gather(key = "est.se") -> 
  se.dat
Richness_Diversity[, -unique(ses)] %>% 
  gather(key = "Metric", 
         value = "Estimate") -> 
  mean.dat
Richness_Diversity_long <-
  cbind(
    Sample = rep(rownames(Richness_Diversity), times = length(unique(mean.dat$Metric))),
    mean.dat,
    lerr = mean.dat$Estimate - se.dat$value,
    herr = mean.dat$Estimate + se.dat$value
  )
Richness_Diversity_long$Metric <-
  factor(
    Richness_Diversity_long$Metric,
    levels = c("S.obs", "S.chao1", "S.ACE", "Shannon", "Inv.simpson", "BP"),
    labels = c("S obs.", "Chao1", "ACE", "Shannon", "Inv. Simpson" , "Berger Parker")
  )
Richness_Diversity_long %<>%
  cbind(., 
        sample_data(Ps_obj_filt))

# S Obs
(mod_obsS <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "S obs.")))

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares     67.75    647.17           10123.07  62200.46
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 54.42358
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                    Sum Sq Df  F value    Pr(>F)    
## (Intercept)        588166  1 198.5755 3.566e-12 ***
## Location             3993  1   1.3480   0.25866    
## Rock.type            7767  1   2.6224   0.12028    
## Location:Rock.type  10123  1   3.4177   0.07863 .  
## Residuals           62200 21                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##          
## 310.9173 
## 
##  Location 
##     Slope City
##       312  309
## rep    13   12
## 
##  Rock.type 
##     Chalk Limestone
##       306       316
## rep    13        12
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 290   339      
##    rep     7     6      
##    City  325   293      
##    rep     6     6

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares     67.75    647.17           10123.07  62200.46
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 54.42358
## Estimated effects may be unbalanced

# Post-hoc test
marginal <- emmeans(mod_obsS,
                   ~ Location : Rock.type)
summary(marginal)

Location	Rock.type	emmean	SE	df	lower.CL	upper.CL
Slope	Chalk	289.8684	20.57018	21	247.0904	332.6465
City	Chalk	325.0227	22.21834	21	278.8171	371.2282
Slope	Limestone	338.9012	22.21834	21	292.6956	385.1067
City	Limestone	293.3852	22.21834	21	247.1796	339.5907

contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")

##  contrast                         estimate   SE df t.ratio p.value
##  Slope Chalk - City Chalk           -35.15 30.3 21 -1.161  0.6571 
##  Slope Chalk - Slope Limestone      -49.03 30.3 21 -1.619  0.3897 
##  Slope Chalk - City Limestone        -3.52 30.3 21 -0.116  0.9994 
##  City Chalk - Slope Limestone       -13.88 31.4 21 -0.442  0.9705 
##  City Chalk - City Limestone         31.64 31.4 21  1.007  0.7472 
##  Slope Limestone - City Limestone    45.52 31.4 21  1.449  0.4847 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates

(obsS_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART

	Location	Rock.type	emmean	SE	df	lower.CL	upper.CL	.group
1	Slope	Chalk	289.8684	20.57018	21	233.8552	345.8817	a
4	City	Limestone	293.3852	22.21834	21	232.8840	353.8864	a
2	City	Chalk	325.0227	22.21834	21	264.5215	385.5239	a
3	Slope	Limestone	338.9012	22.21834	21	278.4000	399.4024	a

(mod_obsS %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_obsS_ANOVA)

	Df	Sum Sq	Mean Sq	F value	Pr(>F)	Part Eta Sq
Location	1	67.7481	67.7481	0.0228730	0.8812313	0.0009276
Rock.type	1	647.1708	647.1708	0.2184966	0.6450019	0.0088607
Location:Rock.type	1	10123.0692	10123.0692	3.4177313	0.0786297	0.1385992
Residuals	21	62200.4587	2961.9266	NA	NA	0.8516126

# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_obsS, Location ~ Rock.type)

# summary(as.glht(pairs(marginal))) # fails because of unbalanced design

# Shannon
(mod_Shannon <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Shannon")))

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares  1.325063  0.093952           0.262865  4.794581
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 0.4778215
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                     Sum Sq Df   F value  Pr(>F)    
## (Intercept)        308.927  1 1353.0847 < 2e-16 ***
## Location             1.393  1    6.1011 0.02216 *  
## Rock.type            0.083  1    0.3616 0.55407    
## Location:Rock.type   0.263  1    1.1513 0.29545    
## Residuals            4.795 21                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##          
## 3.526152 
## 
##  Location 
##     Slope  City
##      3.75  3.29
## rep 13.00 12.00
## 
##  Rock.type 
##     Chalk Limestone
##      3.47      3.59
## rep 13.00     12.00
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 3.60  3.92     
##    rep   7.00  6.00     
##    City  3.33  3.24     
##    rep   6.00  6.00

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares  1.325063  0.093952           0.262865  4.794581
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 0.4778215
## Estimated effects may be unbalanced

# Post-hoc test
marginal <- emmeans(mod_Shannon,
                   ~ Location : Rock.type)
summary(marginal)

Location	Rock.type	emmean	SE	df	lower.CL	upper.CL
Slope	Chalk	3.599318	0.1805996	21	3.223740	3.974895
City	Chalk	3.331707	0.1950698	21	2.926037	3.737377
Slope	Limestone	3.920039	0.1950698	21	3.514369	4.325709
City	Limestone	3.241351	0.1950698	21	2.835681	3.647020

contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")

##  contrast                         estimate    SE df t.ratio p.value
##  Slope Chalk - City Chalk           0.2676 0.266 21  1.007  0.7473 
##  Slope Chalk - Slope Limestone     -0.3207 0.266 21 -1.206  0.6297 
##  Slope Chalk - City Limestone       0.3580 0.266 21  1.347  0.5450 
##  City Chalk - Slope Limestone      -0.5883 0.276 21 -2.133  0.1754 
##  City Chalk - City Limestone        0.0904 0.276 21  0.328  0.9875 
##  Slope Limestone - City Limestone   0.6787 0.276 21  2.460  0.0962 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates

(Shannon_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART

	Location	Rock.type	emmean	SE	df	lower.CL	upper.CL	.group
4	City	Limestone	3.241351	0.1950698	21	2.710170	3.772532	a
2	City	Chalk	3.331707	0.1950698	21	2.800526	3.862888	a
1	Slope	Chalk	3.599318	0.1805996	21	3.107540	4.091096	a
3	Slope	Limestone	3.920039	0.1950698	21	3.388858	4.451220	a

(mod_Shannon %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_Shannon_ANOVA)

	Df	Sum Sq	Mean Sq	F value	Pr(>F)	Part Eta Sq
Location	1	1.3250630	1.3250630	5.8037028	0.0252549	0.2045968
Rock.type	1	0.0939515	0.0939515	0.4115025	0.5281480	0.0145066
Location:Rock.type	1	0.2628651	0.2628651	1.1513345	0.2954460	0.0405878
Residuals	21	4.7945811	0.2283134	NA	NA	0.7403088

# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_Shannon, Location ~ Rock.type)

# ACE
(mod_ACE <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "ACE")))

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares     10.12   1969.32            8755.53  68809.75
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 57.24207
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                     Sum Sq Df   F value Pr(>F)    
## (Intercept)        4281315  1 1306.6116 <2e-16 ***
## Location                 4  1    0.0012 0.9725    
## Rock.type             1671  1    0.5100 0.4830    
## Location:Rock.type    8756  1    2.6721 0.1170    
## Residuals            68810 21                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##          
## 413.6874 
## 
##  Location 
##     Slope City
##       413  414
## rep    13   12
## 
##  Rock.type 
##     Chalk Limestone
##       405       423
## rep    13        12
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 388   442      
##    rep     7     6      
##    City  425   404      
##    rep     6     6

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                 Location Rock.type Location:Rock.type Residuals
## Sum of Squares     10.12   1969.32            8755.53  68809.75
## Deg. of Freedom        1         1                  1        21
## 
## Residual standard error: 57.24207
## Estimated effects may be unbalanced

# Post-hoc test
marginal <- emmeans(mod_ACE,
                   ~ Location : Rock.type)
summary(marginal)

Location	Rock.type	emmean	SE	df	lower.CL	upper.CL
Slope	Chalk	388.1990	21.63547	21	343.2056	433.1924
City	Chalk	424.9111	23.36898	21	376.3126	473.5095
Slope	Limestone	442.0997	23.36898	21	393.5012	490.6981
City	Limestone	403.7881	23.36898	21	355.1896	452.3865

contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")

##  contrast                         estimate   SE df t.ratio p.value
##  Slope Chalk - City Chalk            -36.7 31.8 21 -1.153  0.6620 
##  Slope Chalk - Slope Limestone       -53.9 31.8 21 -1.693  0.3522 
##  Slope Chalk - City Limestone        -15.6 31.8 21 -0.490  0.9606 
##  City Chalk - Slope Limestone        -17.2 33.0 21 -0.520  0.9533 
##  City Chalk - City Limestone          21.1 33.0 21  0.639  0.9181 
##  Slope Limestone - City Limestone     38.3 33.0 21  1.159  0.6581 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates

(ACE_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART

	Location	Rock.type	emmean	SE	df	lower.CL	upper.CL	.group
1	Slope	Chalk	388.1990	21.63547	21	329.2849	447.1130	a
4	City	Limestone	403.7881	23.36898	21	340.1536	467.4225	a
2	City	Chalk	424.9111	23.36898	21	361.2766	488.5455	a
3	Slope	Limestone	442.0997	23.36898	21	378.4653	505.7341	a

(mod_ACE %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_ACE_ANOVA)

	Df	Sum Sq	Mean Sq	F value	Pr(>F)	Part Eta Sq
Location	1	10.11758	10.11758	0.0030878	0.9562114	0.0001272
Rock.type	1	1969.32310	1969.32310	0.6010164	0.4468331	0.0247574
Location:Rock.type	1	8755.52902	8755.52902	2.6720940	0.1170242	0.1100705
Residuals	21	68809.74549	3276.65455	NA	NA	0.8650448

# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_ACE, Location ~ Rock.type)

# summary(as.glht(pairs(marginal))) # fails because of unbalanced design

#Inv. Simpson
(mod_InvSim <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Inv. Simpson")))

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                  Location Rock.type Location:Rock.type Residuals
## Sum of Squares   507.6609  107.4251           117.0279 1059.8590
## Deg. of Freedom         1         1                  1        21
## 
## Residual standard error: 7.104187
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                    Sum Sq Df  F value    Pr(>F)    
## (Intercept)        7346.2  1 145.5569 6.594e-11 ***
## Location            542.8  1  10.7552  0.003577 ** 
## Rock.type            99.1  1   1.9643  0.175658    
## Location:Rock.type  117.0  1   2.3188  0.142737    
## Residuals          1059.9 21                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##          
## 17.20043 
## 
##  Location 
##     Slope City
##      21.5 12.5
## rep  13.0 12.0
## 
##  Rock.type 
##     Chalk Limestone
##      15.2      19.4
## rep  13.0      12.0
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 17.69 26.01    
##    rep    7.00  6.00    
##    City  12.68 12.34    
##    rep    6.00  6.00

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                  Location Rock.type Location:Rock.type Residuals
## Sum of Squares   507.6609  107.4251           117.0279 1059.8590
## Deg. of Freedom         1         1                  1        21
## 
## Residual standard error: 7.104187
## Estimated effects may be unbalanced

# Post-hoc test
marginal <- emmeans(mod_InvSim,
                   ~ Location : Rock.type)
summary(marginal)

Location	Rock.type	emmean	SE	df	lower.CL	upper.CL
Slope	Chalk	17.68601	2.685130	21	12.101979	23.27005
City	Chalk	12.68276	2.900272	21	6.651317	18.71421
Slope	Limestone	26.01445	2.900272	21	19.983005	32.04590
City	Limestone	12.33755	2.900272	21	6.306105	18.36900

contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")

##  contrast                         estimate   SE df t.ratio p.value
##  Slope Chalk - City Chalk            5.003 3.95 21  1.266  0.5937 
##  Slope Chalk - Slope Limestone      -8.328 3.95 21 -2.107  0.1834 
##  Slope Chalk - City Limestone        5.348 3.95 21  1.353  0.5410 
##  City Chalk - Slope Limestone      -13.332 4.10 21 -3.250  0.0185 
##  City Chalk - City Limestone         0.345 4.10 21  0.084  0.9998 
##  Slope Limestone - City Limestone   13.677 4.10 21  3.335  0.0154 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates

(InvSim_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART

	Location	Rock.type	emmean	SE	df	lower.CL	upper.CL	.group
4	City	Limestone	12.33755	2.900272	21	4.440021	20.23508	a
2	City	Chalk	12.68276	2.900272	21	4.785233	20.58029	a
1	Slope	Chalk	17.68601	2.685130	21	10.374321	24.99771	ab
3	Slope	Limestone	26.01445	2.900272	21	18.116921	33.91198	b

(mod_InvSim %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_InvSim_ANOVA)

	Df	Sum Sq	Mean Sq	F value	Pr(>F)	Part Eta Sq
Location	1	507.6609	507.66088	10.058770	0.0045968	0.2832972
Rock.type	1	107.4251	107.42510	2.128516	0.1593772	0.0599479
Location:Rock.type	1	117.0279	117.02787	2.318785	0.1427372	0.0653067
Residuals	21	1059.8590	50.46948	NA	NA	0.5914481

# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_InvSim, Location ~ Rock.type)

# summary(as.glht(pairs(marginal))) # fails because of unbalanced design


#Berger Parker
(mod_BP <- TestAlphaV3(filter(Richness_Diversity_long, Metric == "Berger Parker")))

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                   Location  Rock.type Location:Rock.type  Residuals
## Sum of Squares  0.07180240 0.01029748         0.00019359 0.11203611
## Deg. of Freedom          1          1                  1         21
## 
## Residual standard error: 0.07304145
## Estimated effects may be unbalanced

## [1] "Unequal group sizes - showing SS type III"
## Anova Table (Type III tests)
## 
## Response: Estimate
##                     Sum Sq Df  F value    Pr(>F)    
## (Intercept)        0.95185  1 178.4141 9.851e-12 ***
## Location           0.07398  1  13.8668  0.001255 ** 
## Rock.type          0.01018  1   1.9080  0.181711    
## Location:Rock.type 0.00019  1   0.0363  0.850753    
## Residuals          0.11204 21                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Tables of means
## Grand mean
##           
## 0.1943002 
## 
##  Location 
##      Slope  City
##      0.143  0.25
## rep 13.000 12.00
## 
##  Rock.type 
##      Chalk Limestone
##      0.214     0.173
## rep 13.000    12.000
## 
##  Location:Rock.type 
##         Rock.type
## Location Chalk Limestone
##    Slope 0.16  0.12     
##    rep   7.00  6.00     
##    City  0.27  0.23     
##    rep   6.00  6.00

## Call:
##    aov(formula = as.formula(paste(response_name, paste(factor_names[1], 
##     factor_names[2], sep = " * "), sep = " ~ ")), data = data2test)
## 
## Terms:
##                   Location  Rock.type Location:Rock.type  Residuals
## Sum of Squares  0.07180240 0.01029748         0.00019359 0.11203611
## Deg. of Freedom          1          1                  1         21
## 
## Residual standard error: 0.07304145
## Estimated effects may be unbalanced

# Post-hoc test
marginal <- emmeans(mod_BP,
                   ~ Location : Rock.type)
summary(marginal)

Location	Rock.type	emmean	SE	df	lower.CL	upper.CL
Slope	Chalk	0.1640529	0.0276071	21	0.1066408	0.2214649
City	Chalk	0.2675148	0.0298190	21	0.2055027	0.3295269
Slope	Limestone	0.1180283	0.0298190	21	0.0560162	0.1800404
City	Limestone	0.2326462	0.0298190	21	0.1706341	0.2946583

contrast(marginal, 
         method = "pairwise", 
         adjust = "tukey")

##  contrast                         estimate     SE df t.ratio p.value
##  Slope Chalk - City Chalk          -0.1035 0.0406 21 -2.546  0.0814 
##  Slope Chalk - Slope Limestone      0.0460 0.0406 21  1.133  0.6741 
##  Slope Chalk - City Limestone      -0.0686 0.0406 21 -1.688  0.3545 
##  City Chalk - Slope Limestone       0.1495 0.0422 21  3.545  0.0096 
##  City Chalk - City Limestone        0.0349 0.0422 21  0.827  0.8411 
##  Slope Limestone - City Limestone  -0.1146 0.0422 21 -2.718  0.0577 
## 
## P value adjustment: tukey method for comparing a family of 4 estimates

(BP_pairwise <- cld(marginal,
                      alpha = 0.05,
                      Letters = letters,
                      adjust = "tukey")) # works with lm but not with two-factor ART

	Location	Rock.type	emmean	SE	df	lower.CL	upper.CL	.group
3	Slope	Limestone	0.1180283	0.0298190	21	0.0368302	0.1992265	a
1	Slope	Chalk	0.1640529	0.0276071	21	0.0888780	0.2392278	ab
4	City	Limestone	0.2326462	0.0298190	21	0.1514480	0.3138443	ab
2	City	Chalk	0.2675148	0.0298190	21	0.1863167	0.3487130	b

(mod_BP %>% 
  anova() %>% 
  mutate(`Part Eta Sq`=`Sum Sq`/sum(`Sum Sq`) ) ->
  mod_BP_ANOVA)

	Df	Sum Sq	Mean Sq	F value	Pr(>F)	Part Eta Sq
Location	1	0.0718024	0.0718024	13.4586112	0.0014309	0.3694878
Rock.type	1	0.0102975	0.0102975	1.9301546	0.1792983	0.0529898
Location:Rock.type	1	0.0001936	0.0001936	0.0362871	0.8507532	0.0009962
Residuals	21	0.1120361	0.0053351	NA	NA	0.5765263

# pwpp(marginal) # Pairwise P-value plot. Fails for unbalanced design
emmip(mod_BP, Location ~ Rock.type)

# summary(as.glht(pairs(marginal))) # fails because of unbalanced design

Plot all alpha diversity metrics together

Richness_Diversity_long %>% 
  dplyr::filter(!Metric %in% c("Chao1", "ACE")) %>% 
    mutate_at(., "Metric", ~fct_recode(., "Observed S" = "S obs.", "Inv. Simpson" = "Inv. Simpson", "Berger Parker" = "Berger Parker")) %>% 
  mutate_at(., "Metric", ~fct_relevel(., "Observed S", "Inv. Simpson", "Shannon", "Berger Parker")) %>% 
  droplevels() ->
  Richness_Diversity_long2plot

p_alpha <- ggplot() +
  geom_violin(data = Richness_Diversity_long2plot,
             aes(
               x = Location,
               y = Estimate,
               ymin = lerr,
               ymax = herr
             ), colour = "grey",
              fill = "grey",
              alpha = 1 / 3) +
  geom_jitter2(data = Richness_Diversity_long2plot,
               aes(x = Location,
               y = Estimate,
               ymin = lerr,
               ymax = herr,
               colour = Location), size = 3, width = 0.2, alpha = 2/3) +
  scale_colour_manual(values = Gradient.colours[c(5, 6, 11)], name = "") +
  # geom_errorbar(alpha = 1 / 2, width = 0.3) +
  xlab("") +
  ylab("") +
  theme(axis.text.x = element_text(
    angle = 45,
    vjust = 0.9,
    hjust = 1
  ),
  legend.position="none") +
  facet_grid(Metric ~ Rock.type, scale = "free") +
  theme(strip.text = element_text(size = f_size - 4)) +
  background_grid(major = "y",
                  minor = "none",
                  size.major = 0.8) 

dat_text <- data.frame(
  label = str_remove_all(c(obsS_pairwise$.group[1:4], 
                           Shannon_pairwise$.group[1:4], 
                           InvSim_pairwise$.group[1:4],
                           BP_pairwise$.group[1:4]), 
                         pattern = " "),
  Metric = fct_inorder(rep(levels(Richness_Diversity_long2plot$Metric), each = 4)),
  Rock.type = fct_c(obsS_pairwise$Rock.type[1:4], 
                           Shannon_pairwise$Rock.type[1:4], 
                           InvSim_pairwise$Rock.type[1:4],
                           BP_pairwise$Rock.type[1:4]), 
  x = fct_c(obsS_pairwise$Location[1:4], 
                           Shannon_pairwise$Location[1:4], 
                           InvSim_pairwise$Location[1:4],
                           BP_pairwise$Location[1:4]),
  # x     = as.factor(levels(Richness_Diversity_long2plot$Climate.Source)),
  y = rep(c(520, 45, 5.2, 0.52), each = 4)
  # y = rep(c(40, 140, 0.5), each = 6)
)
p_alpha <- p_alpha + geom_text(
  data = dat_text,
  mapping = aes(x = x, y = y, label = label),
  nudge_x = 0,
  nudge_y = 0
)
print(p_alpha)

Beta diversity

Calculate and plot beta diversity metrics.

(mod1 <-  adonis(
  otu_table(Ps_obj_filt_GMPR) ~ Location * Rock.type,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
))

## 
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Location * Rock.type,      data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"), permutations = 9999,      method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##                    Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)  
## Location            1      0.63   0.629    2.12 0.081  0.024 *
## Rock.type           1      0.39   0.386    1.30 0.050  0.231  
## Location:Rock.type  1      0.49   0.486    1.63 0.063  0.101  
## Residuals          21      6.25   0.297         0.806         
## Total              24      7.75                 1.000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(mod2 <- adonis(
  otu_table(Ps_obj_filt_GMPR) ~ Location,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
))

## 
## Call:
## adonis(formula = otu_table(Ps_obj_filt_GMPR) ~ Location, data = as(sample_data(Ps_obj_filt_GMPR),      "data.frame"), permutations = 9999, method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)  
## Location   1      0.63   0.629    2.03 0.081  0.035 *
## Residuals 23      7.12   0.309         0.919         
## Total     24      7.75                 1.000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

mod1_pairwise <- PairwiseAdonis(
  otu_table(Ps_obj_filt_GMPR),
  sample_data(Ps_obj_filt_GMPR)$Location,
  sim.function = "vegdist",
  sim.method = "horn",
  p.adjust.m = "BH"
)
print(mod1_pairwise)

##          pairs total.DF F.Model     R2 p.value p.adjusted sig
## 1 City - Slope       24    2.03 0.0812   0.038      0.038   .

(sig_pairs1 <- list(as.character(mod1_pairwise$pairs[mod1_pairwise$p.adjusted < 0.05])))

## [[1]]
## [1] "City - Slope"

mod2_pairwise <- PairwiseAdonis(
  otu_table(Ps_obj_filt_GMPR),
  sample_data(Ps_obj_filt_GMPR)$Rock.type,
  sim.function = "vegdist",
  sim.method = "horn",
  p.adjust.m = "BH"
)
print(mod2_pairwise)

##               pairs total.DF F.Model     R2 p.value p.adjusted sig
## 1 Chalk - Limestone       24    1.21 0.0501   0.281      0.281

(sig_pairs2 <- list(as.character(mod2_pairwise$pairs[mod2_pairwise$p.adjusted < 0.05])))

## [[1]]
## character(0)

mod3_pairwise <- PairwiseAdonis(
  otu_table(Ps_obj_filt_GMPR),
  sample_data(Ps_obj_filt_GMPR)$Location.rock,
  sim.function = "vegdist",
  sim.method = "horn",
  p.adjust.m = "BH"
)
print(mod3_pairwise)

##                              pairs total.DF F.Model     R2 p.value p.adjusted sig
## 1      City:Chalk - City:Limestone       11    1.20 0.1069   0.329      0.395    
## 2     City:Chalk - Slope:Limestone       11    1.43 0.1251   0.182      0.273    
## 3         City:Chalk - Slope:Chalk       12    2.92 0.2099   0.012      0.072    
## 4 City:Limestone - Slope:Limestone       11    1.07 0.0967   0.424      0.424    
## 5     City:Limestone - Slope:Chalk       12    1.99 0.1531   0.089      0.178    
## 6    Slope:Limestone - Slope:Chalk       12    1.90 0.1471   0.074      0.178

(sig_pairs3 <- list(as.character(mod3_pairwise$pairs[mod3_pairwise$p.adjusted < 0.1])))

## [[1]]
## [1] "City:Chalk - Slope:Chalk"

Unifrac_mat <- UniFrac(Ps_obj_filt, 
                       weighted = TRUE, 
                       normalized = TRUE, 
                       parallel = TRUE, 
                       fast = TRUE)

(mod1 <-  adonis(
  Unifrac_mat ~ Location * Rock.type,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
))

## 
## Call:
## adonis(formula = Unifrac_mat ~ Location * Rock.type, data = as(sample_data(Ps_obj_filt_GMPR),      "data.frame"), permutations = 9999, method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##                    Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)  
## Location            1     0.044  0.0443    1.24 0.049  0.268  
## Rock.type           1     0.050  0.0496    1.39 0.055  0.193  
## Location:Rock.type  1     0.060  0.0601    1.68 0.066  0.097 .
## Residuals          21     0.751  0.0357         0.830         
## Total              24     0.905                 1.000         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(mod2 <- adonis(
  Unifrac_mat ~ Location,
  data = as(sample_data(Ps_obj_filt_GMPR), "data.frame"),
  method = "horn",
  permutations = 9999
))

## 
## Call:
## adonis(formula = Unifrac_mat ~ Location, data = as(sample_data(Ps_obj_filt_GMPR),      "data.frame"), permutations = 9999, method = "horn") 
## 
## Permutation: free
## Number of permutations: 9999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model    R2 Pr(>F)
## Location   1     0.044  0.0443    1.19 0.049   0.28
## Residuals 23     0.860  0.0374         0.951       
## Total     24     0.905                 1.000

The difference between city and slope is significant based on the Morisita-Horn distances between OTUs but not based on UniFrac.

Calculate ordinations

Ps_obj_ord1 <- ordinate(Ps_obj_filt_GMPR, "CAP", "horn", formula = Ps_obj_filt_GMPR ~  Location * Rock.type)
Ps_obj_ord2 <- ordinate(Ps_obj_filt_GMPR, "CAP", "horn", formula = Ps_obj_filt_GMPR ~  Location)

explained <- eigenvals(Ps_obj_ord2)/sum( eigenvals(Ps_obj_ord2)) * 100
explained <- as.numeric(format(round(explained, 1), nsmall = 1))

Ps_obj_filt_GMPR %>% 
  plot_ordination(., Ps_obj_ord2, type = "samples", shape = "Rock.type", color = "Location", justDF = TRUE) -> 
  ord_df

p_ord <- ggplot(ord_df,
             aes(
               x = CAP1,
               y = MDS1,
               shape = Rock.type,
               color = Location
             )) +
  stat_ellipse(
    aes(x = CAP1, 
        y = MDS1, 
        fill = Location
        ),
    geom = "polygon",
    alpha = 1/4,
    type = "t",
    level = 0.9,
    # linetype = 2,
    inherit.aes = FALSE
  ) +
  geom_point2(size = 4, alpha = 2 / 3) +
  guides(colour = guide_legend(title = "Location"), shape = guide_legend(title = "Rock.type")) +
  scale_colour_manual(values = Gradient.colours) +
  scale_fill_manual(values = Gradient.colours, guide = "none") +
  labs(x = sprintf("CAP1 (%s%%)", explained[1]), 
       y = sprintf("CAP2 (%s%%)", explained[2])) +
  coord_fixed(ratio = sqrt(explained[2] / explained[1])) +
   theme(legend.justification = "top")
  # facet_wrap(. ~ Rock.type)
print(p_ord)

Ps_obj_ord1 <- ordinate(Ps_obj_filt, "PCoA", "Unifrac", formula = Ps_obj_filt ~ Location * Rock.type)
Ps_obj_ord2 <- ordinate(Ps_obj_filt, "PCoA", "Unifrac", formula = Ps_obj_filt ~ Location)

explained <- Ps_obj_ord2$values$Relative_eig/sum(Ps_obj_ord2$values$Relative_eig) * 100
explained <- as.numeric(format(round(explained, 1), nsmall = 1))

Ps_obj_filt %>% 
  plot_ordination(., Ps_obj_ord2, type = "samples", shape = "Rock.type", color = "Location", justDF = TRUE) -> 
  ord_df

p_ord_phylo <- ggplot(ord_df,
             aes(
               x = Axis.1,
               y = Axis.2,
               shape = Rock.type,
               color = Location
             )) +
  stat_ellipse(
    aes(x = Axis.1, 
        y = Axis.2, 
        fill = Location
        ),
    geom = "polygon",
    alpha = 1/4,
    type = "t",
    level = 0.9,
    # linetype = 2,
    inherit.aes = FALSE
  ) +
  geom_point2(size = 4, alpha = 2 / 3) +
  # theme_bw(base_size = f_size) +
  guides(colour = guide_legend(title = "Location"), shape = guide_legend(title = "Rock.type")) +
  scale_colour_manual(values = Gradient.colours) +
  scale_fill_manual(values = Gradient.colours, guide = "none") +
  labs(x = sprintf("CAP1 (%s%%)", explained[1]), 
       y = sprintf("CAP2 (%s%%)", explained[2])) +
  coord_fixed(ratio = sqrt(explained[2] / explained[1])) #+ 
  # facet_wrap(. ~ Rock.type)
print(p_ord_phylo)

Test differences between samples on the phylum level

STAMPR analysis of the differences of each phylum between locations using Aligned Rank Transformed ANOVA test and a post-hoc estimated marginal means.

Taxa_tests_phylum1 <- STAMPR2(Ps_obj_filt, vars2test = "Location", threshold = 0.05, outputfile = paste0(Proj_name, "_Location"))

pSTAMPR1 <- plotSTAMPR(Taxa_tests_phylum1, pair = "Slope - City", f_size = f_size)
print(pSTAMPR1)

Taxa_tests_phylum2 <- STAMPR2(Ps_obj_filt, vars2test = c("Location", "Rock.type"), threshold = 0.05, outputfile = paste0(Proj_name, "_Location_Rock"))

pSTAMPR2 <- plotSTAMPR(Taxa_tests_phylum2, pair = "Slope:Chalk - City:Chalk", f_size = f_size)
print(pSTAMPR2)

Taxonmic distribution analysis

Agglomerate data and tag rare taxa

Ps_obj_filt_GMPR %>% 
  transform_sample_counts(., function(x){x / sum(x)} * 100) %>% 
  tax_glom(., 
           "Phylum",
           NArm = TRUE) ->
  Ps_obj_filt_GMPR_glom

Ps_obj_filt_GMPR_glom_DF <- speedyseq::psmelt(Ps_obj_filt_GMPR_glom)
Ps_obj_filt_GMPR_glom_DF$Phylum %<>% as.character()
# Ps_obj_filt3_glom_DF %<>% mutate(Species = fct_relevel(Species, "NA", after = Inf))

# group dataframe by Phylum, calculate sum rel. abundance
Ps_obj_filt_GMPR_glom_DF %>%
  group_by(Phylum) %>%
  summarise(Sum = sum(Abundance) / nsamples(Ps_obj_filt_GMPR_glom) ) ->
  Sums

# find Phyla whose rel. abund. is less than 5%
Rare_phyla0.05 <- Sums[Sums$Sum <= 0.05, ]$Phylum

# change their name to "Rare"
Ps_obj_filt_GMPR_glom_DF[Ps_obj_filt_GMPR_glom_DF$Phylum %in% Rare_phyla0.05, ]$Phylum <- 'Rare'
# re-group
Ps_obj_filt_GMPR_glom_DF %>%
  group_by(Sample, Phylum, Location, Rock.type, Location.rock) %>%
  summarise(Abundance = sum(Abundance)) ->
  Ps_obj_filt_GMPR_glom_DF_2plot

# ab.taxonomy$Freq <- sqrt(ab.taxonomy$Freq)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("unclassified", "Unclassified", .)
# Ps_obj_filt3_glom_rel_DF$Phylum %<>% sub("uncultured", "Unclassified", .)

Ps_obj_filt_GMPR_glom_DF_2plot %>% 
  group_by(Sample) %>% 
  filter(Phylum == "Rare") %>% 
  summarise(`Rares (%)` = sum(Abundance)) -> 
  Rares

Summarise taxonomy

# Percentage of reads classified as rare 
Rares %>%
  kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)

Percentage of reads per sample classified as rare:

Sample	Rares (%)
ShivSClk1S12	0.01
ShivSClk2S15	0.59
ShivSClk3S16	0.02
ShivSClk4S17	0.01
ShivSClk5S18	0.00
ShivSLimek1S13	0.02
ShivSLimek2S14	0.04
ShivSLimek3S19	0.11
ShivSLimek4S20	0.24
ShivSLimek5S21	0.01
ShivSLimek6S25	0.01
SlpClk1S3	0.24
SlpClk2S4	0.03
SlpClk3S5	0.03
SlpClk4S6	0.01
SlpClk5S7	0.01
SlpClk6S8	0.03
SlpClk7S9	0.01
SlpClk8S10	0.02
SlpClk9S11	0.03
SlpLime1S1	0.01
SlpLime2S2	0.02
SlpLime3S22	0.07
SlpLime4S23	0.01
SlpLime5S24	0.02

sample_order <- match(Rares$Sample, row.names(sample_data(Ps_obj_filt_GMPR_glom)))
Rares %<>% arrange(., sample_order)

Rares %>% 
  cbind(., sample_data(Ps_obj_filt_GMPR_glom)) %>% 
  group_by(Location.rock) %>% 
  setNames(make.names(names(.), unique = TRUE)) %>% # fails for some reason without it
  summarise(`Rares (%)` = mean(`Rares....`)) -> 
  Rares_merged

# Percentage of reads classified as rare 
Rares_merged %>%
  kable(., digits = c(2), caption = "Percentage of reads per sample classified as rare:") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = F)

Percentage of reads per sample classified as rare:

Location.rock	Rares (%)
City:Chalk	0.11
City:Limestone	0.07
Slope:Chalk	0.02
Slope:Limestone	0.06

Plot taxonomy box-plot

Ps_obj_filt_GMPR_glom_DF_2plot %>% 
  group_by(Phylum) %>% 
  summarise(sum.Taxa = sum(Abundance)) %>% 
  arrange(desc(sum.Taxa)) -> Taxa_rank
Ps_obj_filt_GMPR_glom_DF_2plot$Phylum %<>% 
  factor(., levels = Taxa_rank$Phylum) %>% 
  fct_relevel(., "Rare", after = Inf)
  
p_taxa_box <-
  ggplot(Ps_obj_filt_GMPR_glom_DF_2plot, aes(x = Phylum, y = (Abundance))) +
  geom_boxplot(aes(group = interaction(Phylum, Location)), position = position_dodge(width = 0.9), fatten = 1) +
  geom_point2(
    aes(colour = Rock.type),
    position = position_jitterdodge(dodge.width = 1),
    alpha = 1 / 2,
    stroke = 0,
    size = 2
  ) +
  scale_colour_manual(values = Gradient.colours, name = "") +
  # theme_bw()+
  labs(x = NULL, y = "Relative abundance (%)") +
  guides(colour = guide_legend(override.aes = list(size = 5))) +
  facet_grid(Location ~ .) +
  background_grid(major = "xy",
                  minor = "none") +
  theme(axis.text.x = element_text(
    angle = 45,
    vjust = 0.9,
    hjust = 0.9
  ),
  legend.position = c(.99, .99),
  legend.justification = c("right", "top"),
  legend.box.just = "top",
  legend.margin = margin(0, 3, 3, 3))
print(p_taxa_box)

Differential abundance models

Tag rare phyla (for plotting purposes only)

Ps_obj_filt_GMPR_glom <- tax_glom(Ps_obj_filt_GMPR, 
                             "Phylum", 
                             NArm = TRUE) # glomerate to the phylum level
Ps_obj_filt_GMPR_glom_rel <- transform_sample_counts(Ps_obj_filt_GMPR_glom, function(x) x / sum(x)) # transform to rel. ab.
Ps_obj_filt_GMPR_glom_rel_DF <- speedyseq::psmelt(Ps_obj_filt_GMPR_glom_rel) # generate a df
Ps_obj_filt_GMPR_glom_rel_DF$Phylum %<>% as.character() # factor to char

# group dataframe by Phylum, calculate sum rel. abundance
Ps_obj_filt_GMPR_glom_rel_DF %>%
  group_by(Phylum) %>%
  summarise(Sum = sum(Abundance) / nsamples(Ps_obj_filt_GMPR_glom) ) ->
  Sums

# find Phyla whose mean rel. abund. is less than 0.5%
Rare_phyla0.05 <- Sums[Sums$Sum <= 0.05, ]$Phylum

# change their name to "Rare"
Ps_obj_filt_GMPR_glom_rel_DF[Ps_obj_filt_GMPR_glom_rel_DF$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' 

# re-group
Ps_obj_filt_GMPR_glom_rel_DF %>%
  group_by(Phylum) %>%
  summarise(Abundance = sum(Abundance)) %>% 
  arrange(desc(Abundance)) -> Taxa_rank

Detect differentially abundant OTUs using corncob (Martin, Witten, and Willis 2020)

comparison_string <- c("City", "Slope")

Ps_obj_filt %>%
  subset_samples(Location %in% c(comparison_string[1], comparison_string[2])) %>%
  tax_glom("Order") ->
  Ps_obj_filt_pairwise_glom

# Test differential abundance for location
da_Loc <- differentialTest(formula = ~ Location,
                           phi.formula = ~ Location,
                           formula_null = ~ 1,
                           phi.formula_null = ~ Location, 
                           test = "Wald", boot = FALSE,
                           data = Ps_obj_filt,
                           fdr_cutoff = 0.05,
                           full_output = TRUE)
da_Loc_intervals <- plot(da_Loc, level = "Class", data_only = T)
which(is.na(da_Loc$p)) %>% names

## [1] "OTU398" "OTU164" "OTU476" "OTU641" "OTU239" "OTU481" "OTU727" "OTU245"

Ps_obj_filt %>%
  transform_sample_counts(., function(x) x / sum(x) * 100) %>% 
  taxa_sums(.) %>% 
  map_dbl(~(.x / nsamples(Ps_obj_filt))) %>% 
  enframe(name = "OTU", value = "Mean abundance (%)") -> 
  baseMean

map(da_Loc$all_models,15) %>% 
  map(.,2) %>% 
  unlist %>%  # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
  bind_cols(OTU = taxa_names(Ps_obj_filt), 
            tax_table(Ps_obj_filt), 
            `Differential abundance` = .,
            Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Loc$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
            ymin = as.numeric(NA),
            ymax = as.numeric(NA)
            ) %>%
  left_join(., baseMean, by = "OTU") ->
  da_Loc_df

da_Loc_df %<>% rows_update(., tibble(ymin = da_Loc_intervals$xmin, OTU = da_Loc$significant_taxa), by = "OTU")
da_Loc_df %<>% rows_update(., tibble(ymax = da_Loc_intervals$xmax, OTU = da_Loc$significant_taxa), by = "OTU")
da_Loc_df[da_Loc_df$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' # rare_phyla is

p_corncob_loc <- GGPlotCorncob(da_Loc_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)

corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Loc_df$`Differential abundance` > 0 &  da_Loc_df$Significance == "Pass"), " ⬇", sum(da_Loc_df$`Differential abundance` < 0 &  da_Loc_df$Significance == "Pass"), " (", nrow(da_Loc_df), ")")))

p_corncob_loc <- p_corncob_loc +
  labs(title = paste(comparison_string, collapse = " - ")) +
  coord_cartesian(ylim = c(-10, 10))
print(p_corncob_loc)

write.csv(da_Loc_df, file = paste0("./Results/corncob_", comparison_string[1], "_vs_", comparison_string[2], ".csv"))

Modelling differential abundance and variance between locations discovered length(da_Loc$significant_taxa)

comparison_string <- c("Limestone", "Chalk")
# Test differential abundance and variance for rock type
da_Rock <- differentialTest(formula = ~ Rock.type,
                                 phi.formula = ~ Rock.type,
                                 formula_null = ~ 1,
                                 phi.formula_null = ~ Rock.type, 
                                 test = "Wald", boot = FALSE,
                                 data = Ps_obj_filt,
                                 fdr_cutoff = 0.05,
                                full_output = TRUE)
da_Rock_intervals <- plot(da_Rock, level = "Class", data_only = TRUE)
which(is.na(da_Rock$p)) %>% names

##  [1] "OTU780" "OTU164" "OTU580" "OTU679" "OTU696" "OTU712" "OTU519" "OTU503" "OTU542"
## [10] "OTU245" "OTU229"

map(da_Rock$all_models,15) %>% 
  map(.,2) %>% 
  unlist %>%  # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
  bind_cols(OTU = taxa_names(Ps_obj_filt), 
            tax_table(Ps_obj_filt), 
            `Differential abundance` = .,
            Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Rock$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
            ymin = as.numeric(NA),
            ymax = as.numeric(NA)
            ) %>%
  left_join(., baseMean, by = "OTU") ->
  da_Rock_df

da_Rock_df %<>% rows_update(., tibble(ymin = da_Rock_intervals$xmin, OTU = da_Rock$significant_taxa), by = "OTU")
da_Rock_df %<>% rows_update(., tibble(ymax = da_Rock_intervals$xmax, OTU = da_Rock$significant_taxa), by = "OTU")
da_Rock_df[da_Rock_df$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' # rare_phyla is

p_corncob_rock <- GGPlotCorncob(da_Rock_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)

corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Rock_df$`Differential abundance` > 0 &  da_Rock_df$Significance == "Pass"), " ⬇", sum(da_Rock_df$`Differential abundance` < 0 &  da_Rock_df$Significance == "Pass"), " (", nrow(da_Rock_df), ")")))

p_corncob_rock <- p_corncob_rock +
  labs(title = paste(comparison_string, collapse = " - ")) +
  coord_cartesian(ylim = c(-10, 10))
print(p_corncob_rock)

write.csv(da_Rock_df, file = paste0("./Results/corncob_", comparison_string[1], "_vs_", comparison_string[2], ".csv"))

Modelling differential abundance and variance between rock types discovered length(da_Rock$significant_taxa)

# Test differential abundance for location, control for Rock.type for both cases
comparison_string <- c("City", "Slope")
da_Loc_exRock <- differentialTest(formula = ~ Location + Rock.type,
                                 phi.formula = ~ Location + Rock.type,
                                 formula_null = ~ Rock.type,
                                 phi.formula_null = ~ Location + Rock.type, 
                                 test = "Wald", boot = FALSE,
                                 data = Ps_obj_filt,
                                 fdr_cutoff = 0.05,
                                full_output = TRUE)
da_Loc_exRock_intervals <- plot(da_Loc_exRock, level = "Class", data_only = TRUE)

which(is.na(da_Loc_exRock$p)) %>% names

##  [1] "OTU780" "OTU398" "OTU164" "OTU476" "OTU580" "OTU679" "OTU696" "OTU712" "OTU641"
## [10] "OTU519" "OTU239" "OTU481" "OTU503" "OTU542" "OTU727" "OTU245" "OTU229"

map(da_Loc_exRock$all_models, 15) %>% 
  map(., 2) %>% 
  unlist %>%  # grab all mu.LocationSlope Estimates (differences in estimated population relative abundance)
  bind_cols(OTU = taxa_names(Ps_obj_filt), 
            tax_table(Ps_obj_filt), 
            `Differential abundance` = .,
            Significance = fct_recode(as_factor(taxa_names(Ps_obj_filt) %in% da_Loc_exRock$significant_taxa), Pass = "TRUE", Fail = "FALSE"),
            ymin = as.numeric(NA),
            ymax = as.numeric(NA)
            ) %>%
  left_join(., baseMean, by = "OTU") ->
  da_Loc_exRock_df

da_Loc_exRock_df %<>% rows_update(., tibble(ymin = da_Loc_exRock_intervals$xmin, OTU = da_Loc_exRock$significant_taxa), by = "OTU")
da_Loc_exRock_df %<>% rows_update(., tibble(ymax = da_Loc_exRock_intervals$xmax, OTU = da_Loc_exRock$significant_taxa), by = "OTU")
da_Loc_exRock_df[da_Loc_exRock_df$Phylum %in% Rare_phyla0.05, "Phylum"] <- 'Rare' # rare_phyla is

p_corncob_locExroc <- GGPlotCorncob(da_Loc_exRock_df, OTU_labels = FALSE, Taxa = "Phylum", Y_val = "Differential abundance", sig_level = 0.05, Rank = Taxa_rank)

corncob_summary <- tibble(Label = c(paste0("⬆", sum(da_Loc_exRock_df$`Differential abundance` > 0 &  da_Loc_exRock_df$Significance == "Pass"), " ⬇", sum(da_Loc_exRock_df$`Differential abundance` < 0 &  da_Loc_exRock_df$Significance == "Pass"), " (", nrow(da_Loc_exRock_df), ")")))

p_corncob_locExroc <- p_corncob_locExroc +
  labs(title = paste(comparison_string, collapse = " - ")) +
  coord_cartesian(ylim = c(-10, 10))
print(p_corncob_locExroc)

write.csv(da_Loc_exRock_df, file = paste0("./Results/corncob_", comparison_string[1], "_vs_", comparison_string[2], "_ExRockType.csv"))

Modelling differential abundance between locations, while controlling for rock type discovered length(da_Loc_exRock$significant_taxa)

mod260 <- bbdml(formula = OTU260 ~ 1,
             phi.formula = ~ 1,
             data = Ps_obj_filt)
mod260_Loc <- bbdml(formula = OTU260 ~ Location,
             phi.formula = ~ Location,
             data = Ps_obj_filt)
mod260_Loc_rock <- bbdml(formula = OTU97 ~ Location*Rock.type,
             phi.formula = ~ Location*Rock.type,
             data = Ps_obj_filt)
lrtest(mod_null = mod260, mod = mod260_Loc)

## [1] 0.00401

# lrtest(mod_null = mod260_Loc, mod = mod260_Loc_rock)
summary(mod260_Loc)

## 
## Call:
## bbdml(formula = OTU260 ~ Location, phi.formula = ~Location, data = Ps_obj_filt)
## 
## 
## Coefficients associated with abundance:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.544      0.241  -35.51   <2e-16 ***
## Location1      0.658      0.241    2.73    0.012 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Coefficients associated with dispersion:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -8.486      0.406   -20.9  1.5e-15 ***
## Location1      1.379      0.406     3.4   0.0027 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Log-likelihood: -80.86

plot(mod260_Loc, color = "Location", shape = "Rock.type") # add total = TRUE for total counts (i.e. not relative abundance)

Compose figures

# composite_plot <- ((p_alpha + p_taxa_box +  plot_layout(widths = c(1, 2))) /(p_ord + pSTAMPR1) + plot_annotation(tag_levels = 'A') & theme(plot.tag = element_text(size = f_size)))
composite_plot <- (p_alpha + p_ord) /(p_taxa_box) / (pSTAMPR1) + 
  plot_layout(heights = c(1.5, 1, 1)) +
  plot_annotation(tag_levels = 'A') & theme(plot.tag = element_text(size = f_size)) 

plot_file <- "./Results/Microbiome_1"
svglite(paste0(plot_file, ".svg"), 
        width = 10, 
        height = 11)
print(composite_plot)
invisible(dev.off())

agg_png(paste0(plot_file, ".png"),
        width = 10, 
        height = 11, 
        units = 'cm',
        res = 900, 
        scaling = 0.38)
print(composite_plot)
invisible(invisible(dev.off()))

gz(paste0(plot_file, ".svg"), paste0(plot_file, ".svgz"))
knitr::include_graphics(paste0(plot_file, ".png"))

plot_file <- "./Results/Microbiome_2"
svglite(paste0(plot_file, ".svg"), 
        width = 12, 
        height = 10)
print(p_corncob_locExroc)
invisible(dev.off())

agg_png(paste0(plot_file, ".png"),
        width = 12, 
        height = 10.5, 
        units = 'cm',
        res = 900, 
        scaling = 0.38)
print(p_corncob_locExroc)
invisible(invisible(dev.off()))
gz(paste0(plot_file, ".svg"), paste0(plot_file, ".svgz"))

References

Chen, Jun, and Li Chen. 2017. “GMPR: A novel normalization method for microbiome sequencing data.” bioRxiv, February, 112565. https://doi.org/10.1101/112565.

Martin, Bryan D., Daniela Witten, and Amy D. Willis. 2020. “Modeling Microbial Abundances and Dysbiosis with Beta-Binomial Regression.” Annals of Applied Statistics 14 (1): 94–115. https://doi.org/10.1214/19-AOAS1283.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

BRC_growth_rate_microbiome.md

BRC_growth_rate_microbiome.md

How fast does biological rock crust grow?

Setting general parameters:

Description

Load data

Inspect library size and number of OTU

Explore the prevalence of different taxa in the database

Plot general prevalence features of the phyla

Plot general prevalence features of the top 20 orders

Unsupervised filtering by prevalence

Exploring the dataset features

Alpha diversity

Calculate and plot alpha diversity metrics

Test the differences in alpha diversity.

Plot all alpha diversity metrics together

Beta diversity

Calculate ordinations

Test differences between samples on the phylum level

Taxonmic distribution analysis

Differential abundance models

Compose figures

References

Files

BRC_growth_rate_microbiome.md

Latest commit

History

BRC_growth_rate_microbiome.md

File metadata and controls

How fast does biological rock crust grow?

Setting general parameters:

Description

Load data

Inspect library size and number of OTU

Explore the prevalence of different taxa in the database

Plot general prevalence features of the phyla

Plot general prevalence features of the top 20 orders

Unsupervised filtering by prevalence

Exploring the dataset features

Alpha diversity

Calculate and plot alpha diversity metrics

Test the differences in alpha diversity.

Plot all alpha diversity metrics together

Beta diversity

Calculate ordinations

Test differences between samples on the phylum level

Taxonmic distribution analysis

Differential abundance models

Compose figures

References