/
stacked.bar.2.R
109 lines (75 loc) · 4.33 KB
/
stacked.bar.2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
### new stacked bar plot script based on https://joey711.github.io/phyloseq/plot_bar-examples.html
# 1800830 script was updated to include all Sites (include POST in the analysis)
library("magrittr")
library(ggplot2)
library(vegan)
library(dplyr)
library(scales)
library(grid)
library(reshape2)
library(phyloseq)
###### Simple plots for exploration
physeq.rel = transform_sample_counts(physeq.f, function(OTU) OTU/sum(OTU) )
subset = subset_taxa(physeq.rel, family == "Pasteurellaceae")
plot_bar(subset)
plot_bar(subset, fill="genus")
plot_bar(subset, x="C_vs_I",fill="genus")
plot_bar(subset, facet_grid = ~NewPastedVar,fill="genus")
plot_bar(subset, x = "Study_day", facet_grid = ~NewPastedVar,fill="genus")
# this produces a nice chart of Haemophilus by site and infection
plot_bar(subset, x = "Study_day", facet_grid = C_vs_I~Site,fill="genus")
### this ends up with a nice plot, but still is summing the relative abundances
### http://joey711.github.io/phyloseq-demo/Restroom-Biogeography this describes, about
### halfway down, how to average the relative abundances
### it revolves around merge_data function
### this has to be merged by infection, site, and day
### the only way to accomplish this it to make a new merged metadata variable
### described here https://github.com/joey711/phyloseq/issues/293
###### main plots, plot all taxa agglomerated and by high level organization
### Data preparation:
physeq.rel = transform_sample_counts(physeq.f, function(OTU) OTU/sum(OTU) )
sample_data(physeq.rel)$Site <- gsub("POST","PO",sample_data(physeq.rel)$Site) # POST has to be shortened for this merge process
#physeq.rel = subset_samples(physeq.rel, Site != "POST") #POST SHOULD BE INCLUDED IN ANALYSES
variable1 = as.character(get_variable(physeq.rel, "Site"))
variable2 = as.character(get_variable(physeq.rel, "C_vs_I"))
variable3 = as.character(get_variable(physeq.rel, "Study_day"))
sample_data(physeq.rel)$merge <- mapply(paste0, variable1, variable2, variable3,
collapse = "_")
physeq.merge = physeq.rel
merge_samples(physeq.merge, "merge")
#metadata is lost here at the merge
physeq.merge = merge_samples(physeq.rel, "merge")
#recovering the lost metadata
variable1 = row.names(sample_data(physeq.merge))
variable2 = substring(variable1, 1,2)
variable3 = substring(variable1, 3,3)
sample_data(physeq.merge)$Site <- variable2
sample_data(physeq.merge)$C_vs_I <- variable3
# then I have to get relative abundances again
physeq.merge = transform_sample_counts(physeq.merge, function(OTU) OTU/sum(OTU) )
sample_data(physeq.merge)$Site <- gsub('PO',"POST",sample_data(physeq.merge)$Site) #replace PO with POST
## this produces top phyla/class/order etc plots, but must be customized!
physeq_phylum <- tax_glom(physeq.merge, taxrank = "family")
TopNOTUs = names(sort(taxa_sums(physeq_phylum), TRUE)[1:12]) #find the top taxa
physeq.top.phyla = prune_taxa(TopNOTUs, physeq_phylum) #filter the data set to include only the top taxa
plot_bar(physeq.top.phyla, x = "Study_day", facet_grid = C_vs_I~Site,fill="family") +
ggtitle("Relative Abundances of Top 12 Most Abundant Families") +scale_fill_manual(values = phylum_colors)
## CUSTOM TARGETTED PLOTS BEGIN HERE!!!!
## this first formula is for focused analysis
## you have to set which taxon you want to explore
physeq.m.subset = subset_taxa(physeq.merge, genus == "Peptoniphilus")
#consider if you want to look only at one site
physeq.m.subset = subset_samples(physeq.m.subset, Site == "POST")
plot_bar(physeq.m.subset, x = "Study_day", facet_grid = C_vs_I~Site,fill="genus") +
ggtitle("Relative Abundances of Mannheimia in POST") +scale_fill_manual(values = phylum_colors)
## this section is for a chart with too many members, focus on more common
physeq.m.subset = tax_glom(physeq.merge, taxrank = "genus")
topOTUs = names(sort(taxa_sums(physeq.m.subset), TRUE)[1:16])
physeq.top = prune_taxa(topOTUs, order = "Pseudomonadales")
phylum_colors <- c(
"#CBD588", "#5F7FC7", "orange","#DA5724", "#508578", "#CD9BCD",
"#AD6F3B", "#673770","#D14285", "#652926", "#C84248",
"#8569D5", "#5E738F","#D1A33D", "#8A7C64", "#599861")
plot_bar(physeq.top, x = "Study_day", facet_grid = C_vs_I~Site,fill="genus") +
ggtitle("Relative Abundances of top 16 most abundant Genera") +scale_fill_manual(values = phylum_colors) +
ylab("Relative Abundance")