In [1]:
library(tidyverse)
library(phyloseq)
library(ape)
library(microbiome)

── [1mAttaching packages[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.3     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.6     [32m✔[39m [34mdplyr  [39m 1.0.4
[32m✔[39m [34mtidyr  [39m 1.1.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


microbiome R package (microbiome.github.com)
    


 Copyright (C) 2011-2020 Leo Lahti, 


In [2]:
otu <- read.delim("../data/otu.GTDB98.txt", sep = "\t", header = TRUE)

In [3]:
tax_map <- read.delim('/mnt/btrfs/data/shogun/gtdb_95/r95.gtdb.tax', sep="\t", header=F)

In [4]:
colnames(tax_map) <- c('X.OTU.ID', 'tax')

In [5]:
otu <- tax_map %>% inner_join(otu, by = 'X.OTU.ID')

rownames(otu)<- otu$X.OTU.ID

In [6]:
taxa <- otu %>% 
  select(tax) %>% 
  separate(tax, c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"),
           ";")  

In [7]:
#the output is a data frame of characters, and we need taxa to be recognized as factors
taxa<- taxa %>% 
  mutate_if(is.character, as.factor)

# taxa<- cbind(otu$X.OTU.ID, taxa)

# colnames(taxa)[1] <- "X.OTU.ID"

otu<- otu %>% 
  select(-X.OTU.ID, -tax)

# rownames(taxa) <- taxa$X.OTU.ID

# taxa <- taxa %>% 
#   select(-X.OTU.ID)

In [8]:
meta <- read.delim("../data/SampleID_map.txt", sep = "\t", header = TRUE)

rownames(meta) <- meta$X.SampleID

#and delete the first column because it is now redundant
meta<- meta %>% 
        select(-X.SampleID)

ids <- intersect(rownames(meta), colnames(otu))

In [9]:
# otu <- otu %>% rownames_to_column('rn') %>% filter(rn %in% ids)
# rownames(otu) <- otu$rnb
# otu <- otu %>% select(-rn)

In [10]:
otu <- otu %>% select(ids)

Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(ids)` instead of `ids` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m



In [11]:
meta <- meta %>% rownames_to_column('rn') %>% filter(rn %in% ids)
rownames(meta) <- meta$rn
meta <- meta %>% select(-rn)

In [12]:
# taxa <- taxa %>% rownames_to_column('rn') %>% filter(rn %in% ids)
# rownames(taxa) <- taxa$rn
# taxa <- taxa %>% select(-rn)

In [13]:
otu_mat<- as.matrix(otu)
tax_mat<- as.matrix(taxa)

#transform data to phyloseq objects
phylo_OTU<- otu_table(otu_mat, taxa_are_rows = TRUE)
phylo_TAX<- tax_table(tax_mat)
phylo_samples<- sample_data(meta)

tree <- read.tree("../data/shogun.tre")

#and put them in one object
phylo_object<- phyloseq(phylo_OTU, phylo_TAX, phylo_samples, tree_file=tree)

In [14]:
tree


Phylogenetic tree with 31910 tips and 31909 internal nodes.

Tip labels:
  GCF_002286985.1, GCF_003781945.1, GCF_000739595.1, GCF_002727125.1, GCF_000337915.1, GCF_004114995.1, ...
Node labels:
  , d__Archaea, 100, 74, 100, 100:p__Halobacteriota, ...

Rooted; includes branch lengths.

In [15]:
sample_names(phylo_object)    

In [16]:
phylo_object

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 477 taxa and 415 samples ]
sample_data() Sample Data:       [ 415 samples by 23 sample variables ]
tax_table()   Taxonomy Table:    [ 477 taxa by 7 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 477 tips and 476 internal nodes ]

In [17]:
tax_table(phylo_object)

Unnamed: 0,Domain,Phylum,Class,Order,Family,Genus,Species
GCF_000404225.1,k__Archaea,p__Thermoplasmatota,c__Thermoplasmata,o__Methanomassiliicoccales,f__Methanomassiliicoccaceae,g__Methanomassiliicoccus,s__Methanomassiliicoccus_intestinalis
GCF_000026325.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Enterobacterales,f__Enterobacteriaceae,g__Escherichia,s__Escherichia_coli_D
GCF_003697165.2,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Enterobacterales,f__Enterobacteriaceae,g__Escherichia,s__Escherichia_coli
GCF_002950215.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Enterobacterales,f__Enterobacteriaceae,g__Escherichia,s__Escherichia_flexneri
GCF_001729805.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Enterobacterales,f__Enterobacteriaceae,g__Enterobacter,s__Enterobacter_roggenkampii
GCF_001750725.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Enterobacterales,f__Enterobacteriaceae,g__Enterobacter,s__Enterobacter_ludwigii
GCF_900200035.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Enterobacterales,f__Enterobacteriaceae,g__Klebsiella_A,s__Klebsiella_A_grimontii
GCF_000191405.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Enterobacterales,f__Pasteurellaceae,g__Haemophilus_D,s__Haemophilus_D_parainfluenzae
GCF_001815355.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Enterobacterales,f__Pasteurellaceae,g__Haemophilus_D,s__Haemophilus_D_sp001815355
GCF_000210895.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Enterobacterales,f__Pasteurellaceae,g__Haemophilus_D,s__Haemophilus_D_parainfluenzae_A


In [18]:
unifrac_weighted = UniFrac(phylo_object, weighted=T, fast=F)

“Option `fast=FALSE` is deprecated. Only 'fast' UniFrac is supported in phyloseq.”


In [19]:
unifrac_unweighted = UniFrac(phylo_object, weighted=F, fast=F)

“Option `fast=FALSE` is deprecated. Only 'fast' UniFrac is supported in phyloseq.”


In [20]:
write.table(as.matrix(unifrac_weighted), file = "../data/weighted.unifrac.txt", sep = "\t", quote = F, row.names = T)
write.table(as.matrix(unifrac_unweighted), file = "../data/unweighted.unifrac.txt", sep = "\t", quote = F, row.names = T)

In [21]:
alpha(phylo_object)

Observed richness

Other forms of richness

Diversity

Evenness

Dominance

Rarity



Unnamed: 0_level_0,observed,chao1,diversity_inverse_simpson,diversity_gini_simpson,diversity_shannon,diversity_fisher,diversity_coverage,evenness_camargo,evenness_pielou,evenness_simpson,⋯,dominance_dbp,dominance_dmn,dominance_absolute,dominance_relative,dominance_simpson,dominance_core_abundance,dominance_gini,rarity_log_modulo_skewness,rarity_low_abundance,rarity_rare_abundance
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
MCT.f.0035,292,292.0000,12.249676,0.9183652,3.577229,32.82235,5,0.17897924,0.6301540,0.04195094,⋯,0.23087703,0.3509725,55353,0.23087703,0.08163481,0.69910032,0.9190019,2.059527,0.09588281,0.11308399
MCT.f.0052,314,314.0000,15.001803,0.9333413,3.659039,35.62320,7,0.33753983,0.6364218,0.04777644,⋯,0.18830424,0.2955640,45145,0.18830424,0.06665865,0.35717533,0.9190603,2.059000,0.08882354,0.32280548
MCT.f.0069,333,348.5741,13.671594,0.9268556,3.376505,38.06476,5,0.17264706,0.5813399,0.04105584,⋯,0.15086108,0.2966962,36170,0.15086108,0.07314436,0.51759490,0.9410301,2.060870,0.08016867,0.21118049
MCT.f.0417,365,434.1364,8.554677,0.8831049,3.181298,42.22315,3,0.08878411,0.5392124,0.02343747,⋯,0.25742137,0.4563477,61716,0.25742137,0.11689512,0.15478400,0.9379981,2.061122,0.09388230,0.72697469
MCT.f.0103,383,407.5000,21.097570,0.9526012,3.793565,44.58596,8,0.13202947,0.6377845,0.05508504,⋯,0.11754401,0.2183211,28182,0.11754401,0.04739882,0.40496002,0.9157124,2.059277,0.08394333,0.35694891
MCT.f.0120,334,359.8065,6.870727,0.8544550,2.966498,38.19403,2,0.09512095,0.5104847,0.02057104,⋯,0.28605452,0.5199668,68581,0.28605452,0.14554501,0.81691610,0.9463791,2.061133,0.08272436,0.10103108
MCT.f.0137,334,334.4310,22.116700,0.9547853,3.754024,38.19366,8,0.11761049,0.6460046,0.06621766,⋯,0.11036594,0.2068350,26462,0.11036594,0.04521470,0.58413203,0.9224552,2.060280,0.08721420,0.23684759
MCT.f.0154,402,451.8462,22.757050,0.9560576,3.957415,47.09837,9,0.12491273,0.6599595,0.05660958,⋯,0.11690316,0.2282381,28028,0.11690316,0.04394243,0.46107677,0.8988067,2.060154,0.09785447,0.20307065
MCT.f.0222,364,364.0000,30.152887,0.9668357,4.204436,42.09189,12,0.17832916,0.7129602,0.08283760,⋯,0.09020803,0.1766291,21629,0.09020803,0.03316432,0.32126889,0.8758264,2.060014,0.10762070,0.47123886
MCT.f.0256,387,400.7812,19.971075,0.9499276,3.930663,45.11355,8,0.17322293,0.6596815,0.05160485,⋯,0.13213402,0.2457111,31679,0.13213402,0.05007242,0.46850248,0.8949728,2.060820,0.11017356,0.34805985


In [22]:
microbiome::alpha(phylo_object)

Observed richness

Other forms of richness

Diversity

Evenness

Dominance

Rarity



Unnamed: 0_level_0,observed,chao1,diversity_inverse_simpson,diversity_gini_simpson,diversity_shannon,diversity_fisher,diversity_coverage,evenness_camargo,evenness_pielou,evenness_simpson,⋯,dominance_dbp,dominance_dmn,dominance_absolute,dominance_relative,dominance_simpson,dominance_core_abundance,dominance_gini,rarity_log_modulo_skewness,rarity_low_abundance,rarity_rare_abundance
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
MCT.f.0035,292,292.0000,12.249676,0.9183652,3.577229,32.82235,5,0.17897924,0.6301540,0.04195094,⋯,0.23087703,0.3509725,55353,0.23087703,0.08163481,0.69910032,0.9190019,2.059527,0.09588281,0.11308399
MCT.f.0052,314,314.0000,15.001803,0.9333413,3.659039,35.62320,7,0.33753983,0.6364218,0.04777644,⋯,0.18830424,0.2955640,45145,0.18830424,0.06665865,0.35717533,0.9190603,2.059000,0.08882354,0.32280548
MCT.f.0069,333,348.5741,13.671594,0.9268556,3.376505,38.06476,5,0.17264706,0.5813399,0.04105584,⋯,0.15086108,0.2966962,36170,0.15086108,0.07314436,0.51759490,0.9410301,2.060870,0.08016867,0.21118049
MCT.f.0417,365,434.1364,8.554677,0.8831049,3.181298,42.22315,3,0.08878411,0.5392124,0.02343747,⋯,0.25742137,0.4563477,61716,0.25742137,0.11689512,0.15478400,0.9379981,2.061122,0.09388230,0.72697469
MCT.f.0103,383,407.5000,21.097570,0.9526012,3.793565,44.58596,8,0.13202947,0.6377845,0.05508504,⋯,0.11754401,0.2183211,28182,0.11754401,0.04739882,0.40496002,0.9157124,2.059277,0.08394333,0.35694891
MCT.f.0120,334,359.8065,6.870727,0.8544550,2.966498,38.19403,2,0.09512095,0.5104847,0.02057104,⋯,0.28605452,0.5199668,68581,0.28605452,0.14554501,0.81691610,0.9463791,2.061133,0.08272436,0.10103108
MCT.f.0137,334,334.4310,22.116700,0.9547853,3.754024,38.19366,8,0.11761049,0.6460046,0.06621766,⋯,0.11036594,0.2068350,26462,0.11036594,0.04521470,0.58413203,0.9224552,2.060280,0.08721420,0.23684759
MCT.f.0154,402,451.8462,22.757050,0.9560576,3.957415,47.09837,9,0.12491273,0.6599595,0.05660958,⋯,0.11690316,0.2282381,28028,0.11690316,0.04394243,0.46107677,0.8988067,2.060154,0.09785447,0.20307065
MCT.f.0222,364,364.0000,30.152887,0.9668357,4.204436,42.09189,12,0.17832916,0.7129602,0.08283760,⋯,0.09020803,0.1766291,21629,0.09020803,0.03316432,0.32126889,0.8758264,2.060014,0.10762070,0.47123886
MCT.f.0256,387,400.7812,19.971075,0.9499276,3.930663,45.11355,8,0.17322293,0.6596815,0.05160485,⋯,0.13213402,0.2457111,31679,0.13213402,0.05007242,0.46850248,0.8949728,2.060820,0.11017356,0.34805985


In [23]:
save.image("../data/environment.RData")