In [1]:
library(tidyverse)
library(phyloseq)
library(ape)
library(microbiome)

── [1mAttaching packages[22m ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

[32m✔[39m [34mggplot2[39m 3.3.3     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.0.6     [32m✔[39m [34mdplyr  [39m 1.0.4
[32m✔[39m [34mtidyr  [39m 1.1.2     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


microbiome R package (microbiome.github.com)
    


 Copyright (C) 2011-2020 Leo Lahti, 


In [2]:
otu <- read.delim("../data/otu.GTDB98.txt", sep = "\t", header = TRUE)

In [3]:
tax_map <- read.delim('/mnt/btrfs/data/shogun/gtdb_95/r95.gtdb.tax', sep="\t", header=F)

In [4]:
colnames(tax_map) <- c('X.OTU.ID', 'tax')

In [5]:
otu <- tax_map %>% inner_join(otu, by = 'X.OTU.ID')

rownames(otu)<- otu$X.OTU.ID

In [6]:
taxa <- otu %>% 
  select(tax) %>% 
  separate(tax, c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species"),
           ";")  

In [7]:
#the output is a data frame of characters, and we need taxa to be recognized as factors
taxa<- taxa %>% 
  mutate_if(is.character, as.factor)

# taxa<- cbind(otu$X.OTU.ID, taxa)

# colnames(taxa)[1] <- "X.OTU.ID"

otu<- otu %>% 
  select(-X.OTU.ID, -tax)

# rownames(taxa) <- taxa$X.OTU.ID

# taxa <- taxa %>% 
#   select(-X.OTU.ID)

In [8]:
meta <- read.delim("../data/SampleID_map.txt", sep = "\t", header = TRUE)

rownames(meta) <- meta$X.SampleID

#and delete the first column because it is now redundant
meta<- meta %>% 
        select(-X.SampleID)

ids <- intersect(rownames(meta), colnames(otu))

In [9]:
# otu <- otu %>% rownames_to_column('rn') %>% filter(rn %in% ids)
# rownames(otu) <- otu$rnb
# otu <- otu %>% select(-rn)

In [10]:
otu <- otu %>% select(ids)

Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(ids)` instead of `ids` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m



In [11]:
meta <- meta %>% rownames_to_column('rn') %>% filter(rn %in% ids)
rownames(meta) <- meta$rn
meta <- meta %>% select(-rn)

In [12]:
# taxa <- taxa %>% rownames_to_column('rn') %>% filter(rn %in% ids)
# rownames(taxa) <- taxa$rn
# taxa <- taxa %>% select(-rn)

In [13]:
otu_mat<- as.matrix(otu)
tax_mat<- as.matrix(taxa)

#transform data to phyloseq objects
phylo_OTU<- otu_table(otu_mat, taxa_are_rows = TRUE)
phylo_TAX<- tax_table(tax_mat)
phylo_samples<- sample_data(meta)

tree <- read.tree("../data/shogun.tre")

#and put them in one object
phylo_object<- phyloseq(phylo_OTU, phylo_TAX, phylo_samples, tree_file=tree)

In [14]:
tree


Phylogenetic tree with 31910 tips and 31909 internal nodes.

Tip labels:
  GCF_002286985.1, GCF_003781945.1, GCF_000739595.1, GCF_002727125.1, GCF_000337915.1, GCF_004114995.1, ...
Node labels:
  , d__Archaea, 100, 74, 100, 100:p__Halobacteriota, ...

Rooted; includes branch lengths.

In [15]:
sample_names(phylo_object)    

In [16]:
phylo_object

phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 268 taxa and 409 samples ]
sample_data() Sample Data:       [ 409 samples by 23 sample variables ]
tax_table()   Taxonomy Table:    [ 268 taxa by 7 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 268 tips and 267 internal nodes ]

In [17]:
tax_table(phylo_object)

Unnamed: 0,Domain,Phylum,Class,Order,Family,Genus,Species
GCF_003697165.2,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Enterobacterales,f__Enterobacteriaceae,g__Escherichia,s__Escherichia_coli
GCF_001729805.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Enterobacterales,f__Enterobacteriaceae,g__Enterobacter,s__Enterobacter_roggenkampii
GCF_900200035.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Enterobacterales,f__Enterobacteriaceae,g__Klebsiella_A,s__Klebsiella_A_grimontii
GCF_003315195.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Burkholderiales,f__Burkholderiaceae,g__Sutterella,s__Sutterella_wadsworthensis
GCF_000297775.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Burkholderiales,f__Burkholderiaceae,g__Sutterella,s__Sutterella_wadsworthensis_A
GCF_001182045.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Burkholderiales,f__Burkholderiaceae,g__Mesosutterella,s__Mesosutterella_massiliensis
GCF_000205025.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Burkholderiales,f__Burkholderiaceae,g__Parasutterella,s__Parasutterella_excrementihominis
GCA_000980495.1,k__Bacteria,p__Proteobacteria,c__Gammaproteobacteria,o__Burkholderiales,f__Burkholderiaceae,g__Parasutterella,s__Parasutterella_sp000980495
GCA_001917135.1,k__Bacteria,p__Proteobacteria,c__Alphaproteobacteria,o__RF32,f__CAG-239,g__CAG-267,s__CAG-267_sp001917135
GCA_000432275.1,k__Bacteria,p__Proteobacteria,c__Alphaproteobacteria,o__RF32,f__CAG-239,g__CAG-495,s__CAG-495_sp000432275


In [18]:
unifrac_weighted = UniFrac(phylo_object, weighted=T, fast=F)

“Option `fast=FALSE` is deprecated. Only 'fast' UniFrac is supported in phyloseq.”


In [19]:
unifrac_unweighted = UniFrac(phylo_object, weighted=F, fast=F)

“Option `fast=FALSE` is deprecated. Only 'fast' UniFrac is supported in phyloseq.”


In [20]:
write.table(as.matrix(unifrac_weighted), file = "../data/weighted.unifrac.txt", sep = "\t", quote = F, row.names = T)
write.table(as.matrix(unifrac_unweighted), file = "../data/unweighted.unifrac.txt", sep = "\t", quote = F, row.names = T)

In [21]:
alpha(phylo_object)

Observed richness

Other forms of richness

Diversity

Evenness

Dominance

Rarity



Unnamed: 0_level_0,observed,chao1,diversity_inverse_simpson,diversity_gini_simpson,diversity_shannon,diversity_fisher,diversity_coverage,evenness_camargo,evenness_pielou,evenness_simpson,⋯,dominance_dbp,dominance_dmn,dominance_absolute,dominance_relative,dominance_simpson,dominance_core_abundance,dominance_gini,rarity_log_modulo_skewness,rarity_low_abundance,rarity_rare_abundance
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
MCT.f.0035,180,180.0000,11.013986,0.9092063,3.363357,19.04765,5,0.19522577,0.6476767,0.06118881,⋯,0.24373859,0.3705245,58994,0.24373859,0.09079365,0.7948339,0.8919612,2.057064,0.06824548,0.08342492
MCT.f.0052,187,187.0000,12.920237,0.9226020,3.378223,19.87822,5,0.35515395,0.6457949,0.06909217,⋯,0.20346070,0.3193503,49244,0.20346070,0.07739796,0.5187950,0.9010057,2.055345,0.05674869,0.28899898
MCT.f.0069,199,207.1667,11.212783,0.9108161,3.052713,21.31163,4,0.20278680,0.5767121,0.05634564,⋯,0.16757911,0.3295802,40558,0.16757911,0.08918392,0.5781723,0.9299944,2.060668,0.05206530,0.14699429
MCT.f.0417,220,248.1250,7.811913,0.8719904,2.967629,23.84761,3,0.10006641,0.5502103,0.03550869,⋯,0.26948805,0.4777406,65225,0.26948805,0.12800963,0.2051910,0.9218521,2.060647,0.06526383,0.71790210
MCT.f.0103,221,230.0000,19.128450,0.9477218,3.578039,23.96912,7,0.27412014,0.6628253,0.08655407,⋯,0.12354105,0.2294611,29902,0.12354105,0.05227815,0.4769812,0.8902007,2.057012,0.05058234,0.32708095
MCT.f.0120,194,203.0312,6.218211,0.8391820,2.727275,20.71287,2,0.10983280,0.5177198,0.03205263,⋯,0.30083212,0.5468285,72811,0.30083212,0.16081796,0.8666003,0.9354010,2.060563,0.05402591,0.06844549
MCT.f.0137,206,206.0294,19.789758,0.9494688,3.537941,22.15300,7,0.13510632,0.6640434,0.09606679,⋯,0.11686643,0.2190120,28286,0.11686643,0.05053119,0.6360474,0.8968720,2.057718,0.06679144,0.09104806
MCT.f.0154,235,245.2857,19.155332,0.9477952,3.692459,25.67965,7,0.15146544,0.6763259,0.08151205,⋯,0.12892723,0.2517147,31204,0.12892723,0.05220479,0.6160279,0.8705284,2.059313,0.05837341,0.13081957
MCT.f.0222,223,223.0000,22.194954,0.9549447,3.832436,24.21272,9,0.18652701,0.7087691,0.09952894,⋯,0.10967281,0.2147429,26544,0.10967281,0.04505529,0.4423561,0.8520681,2.058175,0.07482574,0.36083692
MCT.f.0256,226,227.0417,15.576310,0.9357999,3.544757,24.57846,6,0.22018099,0.6539497,0.06892173,⋯,0.15091167,0.2806317,36525,0.15091167,0.06420006,0.5626392,0.8821809,2.059892,0.06409562,0.26339406


In [22]:
microbiome::alpha(phylo_object)

Observed richness

Other forms of richness

Diversity

Evenness

Dominance

Rarity



Unnamed: 0_level_0,observed,chao1,diversity_inverse_simpson,diversity_gini_simpson,diversity_shannon,diversity_fisher,diversity_coverage,evenness_camargo,evenness_pielou,evenness_simpson,⋯,dominance_dbp,dominance_dmn,dominance_absolute,dominance_relative,dominance_simpson,dominance_core_abundance,dominance_gini,rarity_log_modulo_skewness,rarity_low_abundance,rarity_rare_abundance
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
MCT.f.0035,180,180.0000,11.013986,0.9092063,3.363357,19.04765,5,0.19522577,0.6476767,0.06118881,⋯,0.24373859,0.3705245,58994,0.24373859,0.09079365,0.7948339,0.8919612,2.057064,0.06824548,0.08342492
MCT.f.0052,187,187.0000,12.920237,0.9226020,3.378223,19.87822,5,0.35515395,0.6457949,0.06909217,⋯,0.20346070,0.3193503,49244,0.20346070,0.07739796,0.5187950,0.9010057,2.055345,0.05674869,0.28899898
MCT.f.0069,199,207.1667,11.212783,0.9108161,3.052713,21.31163,4,0.20278680,0.5767121,0.05634564,⋯,0.16757911,0.3295802,40558,0.16757911,0.08918392,0.5781723,0.9299944,2.060668,0.05206530,0.14699429
MCT.f.0417,220,248.1250,7.811913,0.8719904,2.967629,23.84761,3,0.10006641,0.5502103,0.03550869,⋯,0.26948805,0.4777406,65225,0.26948805,0.12800963,0.2051910,0.9218521,2.060647,0.06526383,0.71790210
MCT.f.0103,221,230.0000,19.128450,0.9477218,3.578039,23.96912,7,0.27412014,0.6628253,0.08655407,⋯,0.12354105,0.2294611,29902,0.12354105,0.05227815,0.4769812,0.8902007,2.057012,0.05058234,0.32708095
MCT.f.0120,194,203.0312,6.218211,0.8391820,2.727275,20.71287,2,0.10983280,0.5177198,0.03205263,⋯,0.30083212,0.5468285,72811,0.30083212,0.16081796,0.8666003,0.9354010,2.060563,0.05402591,0.06844549
MCT.f.0137,206,206.0294,19.789758,0.9494688,3.537941,22.15300,7,0.13510632,0.6640434,0.09606679,⋯,0.11686643,0.2190120,28286,0.11686643,0.05053119,0.6360474,0.8968720,2.057718,0.06679144,0.09104806
MCT.f.0154,235,245.2857,19.155332,0.9477952,3.692459,25.67965,7,0.15146544,0.6763259,0.08151205,⋯,0.12892723,0.2517147,31204,0.12892723,0.05220479,0.6160279,0.8705284,2.059313,0.05837341,0.13081957
MCT.f.0222,223,223.0000,22.194954,0.9549447,3.832436,24.21272,9,0.18652701,0.7087691,0.09952894,⋯,0.10967281,0.2147429,26544,0.10967281,0.04505529,0.4423561,0.8520681,2.058175,0.07482574,0.36083692
MCT.f.0256,226,227.0417,15.576310,0.9357999,3.544757,24.57846,6,0.22018099,0.6539497,0.06892173,⋯,0.15091167,0.2806317,36525,0.15091167,0.06420006,0.5626392,0.8821809,2.059892,0.06409562,0.26339406


In [23]:
save.image("../data/environment.RData")