In [1]:
library(tidyverse)
library(phyloseq)
library(ape)
library(microbiome)

── [1mAttaching packages[22m ─────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.3     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.6     [32m✔[39m [34mdplyr  [39m 1.0.4
[32m✔[39m [34mtidyr  [39m 1.1.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


microbiome R package (microbiome.github.com)
    


 Copyright (C) 2011-2020 Leo Lahti, 
    Sudarshan Shetty et al. <microbiome.github.io>



Attaching package: ‘microbiome’


The following object is masked from ‘package:ggplot2’:

    alpha


The following object is masked from ‘p

In [2]:
otu <- read.delim("./data/otu.capitalist.98.tsv", sep = "\t", header = TRUE)

In [3]:
tax_map <- read.delim('./data/r95.gtdb.tax', sep="\t", header=F)

In [4]:
colnames(tax_map) <- c('X.OTU.ID', 'tax')

In [5]:
otu <- tax_map %>% inner_join(otu, by = 'X.OTU.ID')

rownames(otu)<- otu$X.OTU.ID

In [6]:
taxa <- otu %>% 
  select(tax) %>% 
  separate(tax, c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"),
           ";")  

In [7]:
#the output is a data frame of characters, and we need taxa to be recognized as factors
taxa<- taxa %>% 
  mutate_if(is.character, as.factor)

# taxa<- cbind(otu$X.OTU.ID, taxa)

# colnames(taxa)[1] <- "X.OTU.ID"

otu<- otu %>% 
  select(-X.OTU.ID, -tax)

# rownames(taxa) <- taxa$X.OTU.ID

# taxa <- taxa %>% 
#   select(-X.OTU.ID)

In [8]:
meta <- read.delim("./data/metadata.csv", sep = ",", header = TRUE)

rownames(meta) <- meta$sample_id

#and delete the first column because it is now redundant
meta<- meta %>% 
        select(-sample_id)

ids <- intersect(rownames(meta), colnames(otu))

In [9]:
out <- 1:length(colnames(otu))
for (i in 1:length(colnames(otu))) {
    name <- colnames(otu)[i]
    split <- unlist(strsplit(name, "\\."))
    if ( str_starts(split[1], "pos")) {
        out[i] <- toupper(paste0(c(split[1], split[3]), collapse="_"))
    } else {
        out[i] <- toupper(paste0(split[1:2], collapse="_"))
    }    
}
colnames(otu) <- out

In [10]:
meta <- read.delim("./data/metadata.csv", sep = ",", header = TRUE)

rownames(meta) <- toupper(meta$sample_id)

#and delete the first column because it is now redundant
meta<- meta %>% 
        select(-sample_id)

ids <- intersect(rownames(meta), colnames(otu))

In [11]:
# otu <- otu %>% rownames_to_column('rn') %>% filter(rn %in% ids)
# rownames(otu) <- otu$rnb
# otu <- otu %>% select(-rn)

In [12]:
ids

In [13]:
otu <- otu %>% select(ids)

Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(ids)` instead of `ids` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m



In [14]:
meta <- meta %>% rownames_to_column('rn') %>% filter(rn %in% ids)
rownames(meta) <- meta$rn
meta <- meta %>% select(-rn)

In [None]:
# taxa <- taxa %>% rownames_to_column('rn') %>% filter(rn %in% ids)
# rownames(taxa) <- taxa$rn
# taxa <- taxa %>% select(-rn)

In [15]:
otu_mat<- as.matrix(otu)
tax_mat<- as.matrix(taxa)

#transform data to phyloseq objects
phylo_OTU<- otu_table(otu_mat, taxa_are_rows = TRUE)
phylo_TAX<- tax_table(tax_mat)
phylo_samples<- sample_data(meta)

tree <- read.tree("data/r95.gtdb.tree")
#and put them in one object
phylo_object<- phyloseq(phylo_OTU, phylo_TAX, phylo_samples, tree)

In [16]:
phylo_object

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 7513 taxa and 34 samples ]
sample_data() Sample Data:       [ 34 samples by 75 sample variables ]
tax_table()   Taxonomy Table:    [ 7513 taxa by 7 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 7513 tips and 7512 internal nodes ]

In [17]:
sample_sums(phylo_object)

In [18]:
sample_names(phylo_object)      

In [19]:
alpha(phylo_object)

Observed richness

Other forms of richness

Diversity

Evenness

“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer 

Unnamed: 0_level_0,observed,chao1,diversity_inverse_simpson,diversity_gini_simpson,diversity_shannon,diversity_fisher,diversity_coverage,evenness_camargo,evenness_pielou,evenness_simpson,⋯,dominance_dbp,dominance_dmn,dominance_absolute,dominance_relative,dominance_simpson,dominance_core_abundance,dominance_gini,rarity_log_modulo_skewness,rarity_low_abundance,rarity_rare_abundance
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
MUNA_98,2817,3284.04,15.673254,0.936197,3.866205,267.5548,7,,0.4867175,0.0055638103,⋯,0.20157774,0.28356309,2015545,0.20157774,0.06380296,0.089601031,0.9912279,2.06142,0.14062151,0.354027
BODA_52,3258,3390.036,61.755086,0.983807,5.053914,304.7165,24,,0.6247986,0.0189549068,⋯,0.05063426,0.09977732,678838,0.05063426,0.016193,0.073948805,0.9778273,2.061419,0.22344412,0.7680897
BUMA_05,3399,3640.661,17.914212,0.9441784,4.189017,313.848,8,,0.5151758,0.0052704359,⋯,0.17042311,0.28047041,2701995,0.17042311,0.0558216,0.39305066,0.9859729,2.061422,0.17179161,0.3191869
DEFI_14,3230,3536.57,8.719329,0.8853123,3.339032,293.5231,4,,0.4132343,0.0026994828,⋯,0.28106352,0.41751383,4960456,0.28106352,0.11468772,0.111848804,0.9922809,2.061421,0.1161327,0.2658082
ROAL_75,2417,2724.347,15.709258,0.9363433,3.632457,232.2653,5,,0.4662805,0.0064994861,⋯,0.14300762,0.28142621,1098214,0.14300762,0.06365673,0.092568712,0.9934851,2.06142,0.10694026,0.1766503
CHMA_61,2488,2770.488,15.462326,0.9353267,3.796813,219.78,6,,0.4855735,0.0062147613,⋯,0.19253567,0.29465517,3490484,0.19253567,0.06467332,0.374385495,0.9924707,2.061419,0.11616703,0.2404192
RODE_80,3026,3233.792,14.257733,0.9298626,3.797458,284.697,6,,0.4737941,0.0047117426,⋯,0.19042401,0.32002083,2239453,0.19042401,0.07013738,0.356697207,0.990555,2.06142,0.12519653,0.3011653
LAES_91,3432,3730.414,26.514556,0.9622849,4.440524,330.245,10,,0.5454587,0.0077256866,⋯,0.0957026,0.18910812,1030526,0.0957026,0.03771513,0.170191059,0.983281,2.061422,0.20045767,0.4487129
ASTER_02,2654,2959.511,4.673362,0.7860213,3.038853,238.9765,2,,0.3854542,0.0017608747,⋯,0.44969836,0.52435866,7151741,0.44969836,0.21397874,0.547855624,0.9937142,2.06142,0.11458115,0.2270447
DOEC_81,3391,3551.046,37.681441,0.9734617,4.731816,314.3283,16,,0.5820993,0.0111121915,⋯,0.10238205,0.16105144,1558846,0.10238205,0.02653826,0.268223654,0.9815149,2.06142,0.19952062,0.4595187


In [20]:
microbiome::alpha(phylo_object)

Observed richness

Other forms of richness

Diversity

Evenness

“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer overflow”
“NAs produced by integer 

Unnamed: 0_level_0,observed,chao1,diversity_inverse_simpson,diversity_gini_simpson,diversity_shannon,diversity_fisher,diversity_coverage,evenness_camargo,evenness_pielou,evenness_simpson,⋯,dominance_dbp,dominance_dmn,dominance_absolute,dominance_relative,dominance_simpson,dominance_core_abundance,dominance_gini,rarity_log_modulo_skewness,rarity_low_abundance,rarity_rare_abundance
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
MUNA_98,2817,3284.04,15.673254,0.936197,3.866205,267.5548,7,,0.4867175,0.0055638103,⋯,0.20157774,0.28356309,2015545,0.20157774,0.06380296,0.089601031,0.9912279,2.06142,0.14062151,0.354027
BODA_52,3258,3390.036,61.755086,0.983807,5.053914,304.7165,24,,0.6247986,0.0189549068,⋯,0.05063426,0.09977732,678838,0.05063426,0.016193,0.073948805,0.9778273,2.061419,0.22344412,0.7680897
BUMA_05,3399,3640.661,17.914212,0.9441784,4.189017,313.848,8,,0.5151758,0.0052704359,⋯,0.17042311,0.28047041,2701995,0.17042311,0.0558216,0.39305066,0.9859729,2.061422,0.17179161,0.3191869
DEFI_14,3230,3536.57,8.719329,0.8853123,3.339032,293.5231,4,,0.4132343,0.0026994828,⋯,0.28106352,0.41751383,4960456,0.28106352,0.11468772,0.111848804,0.9922809,2.061421,0.1161327,0.2658082
ROAL_75,2417,2724.347,15.709258,0.9363433,3.632457,232.2653,5,,0.4662805,0.0064994861,⋯,0.14300762,0.28142621,1098214,0.14300762,0.06365673,0.092568712,0.9934851,2.06142,0.10694026,0.1766503
CHMA_61,2488,2770.488,15.462326,0.9353267,3.796813,219.78,6,,0.4855735,0.0062147613,⋯,0.19253567,0.29465517,3490484,0.19253567,0.06467332,0.374385495,0.9924707,2.061419,0.11616703,0.2404192
RODE_80,3026,3233.792,14.257733,0.9298626,3.797458,284.697,6,,0.4737941,0.0047117426,⋯,0.19042401,0.32002083,2239453,0.19042401,0.07013738,0.356697207,0.990555,2.06142,0.12519653,0.3011653
LAES_91,3432,3730.414,26.514556,0.9622849,4.440524,330.245,10,,0.5454587,0.0077256866,⋯,0.0957026,0.18910812,1030526,0.0957026,0.03771513,0.170191059,0.983281,2.061422,0.20045767,0.4487129
ASTER_02,2654,2959.511,4.673362,0.7860213,3.038853,238.9765,2,,0.3854542,0.0017608747,⋯,0.44969836,0.52435866,7151741,0.44969836,0.21397874,0.547855624,0.9937142,2.06142,0.11458115,0.2270447
DOEC_81,3391,3551.046,37.681441,0.9734617,4.731816,314.3283,16,,0.5820993,0.0111121915,⋯,0.10238205,0.16105144,1558846,0.10238205,0.02653826,0.268223654,0.9815149,2.06142,0.19952062,0.4595187


In [21]:
save.image("data/environment.RData")