#### This notebook is to perform Non-Redundant Analysis on alpha diversities

##### reference: Falony G, Joossens M, Vieira-Silva S, et al. Population-level analysis of gut microbiome variation. Science 2016;352:560-4

In [2]:
library(vegan)

Loading required package: permute
Loading required package: lattice
This is vegan 2.4-5


In [3]:
mf = read.csv('../data/mros_mapping_alpha.txt', stringsAsFactors=FALSE, sep='\t')
colnames(mf)[1] = 'SampleName'
dim(mf)

In [4]:
names(mf)

In [5]:
head(mf)

SampleName,BarcodeSequence,LinkerPrimerSequence,Experiment_Design_Description,Library_Construction_Protocol,Linker,Platform,Center_Name,Center_Project,Instrument_Model,⋯,M1STATIN,Antihistamine,Laxative,MIDATA,Rstarch_total,Rstarches_c,alpha_pd,observed_otus,shannon,pielou_e
BI0023,TCTGGTGACATT,GGACTACHVGGGTWTCTAAT,16S stool samples sequenced for MrOS Vitamin D study,16S rRNA v4,GT,Illumina,BI,MrOS,Illumina MiSeq,⋯,0: No,0:No,0:No,1: Yes,3.066569,0,27.77117,302,5.727116,0.6951729
BI0056,CAAGCATGCCTA,GGACTACHVGGGTWTCTAAT,16S stool samples sequenced for MrOS Vitamin D study,16S rRNA v4,GT,Illumina,BI,MrOS,Illumina MiSeq,⋯,0: No,0:No,0:No,1: Yes,3.038136,0,17.93266,173,3.888281,0.522996
BI0131,CTATTTGCGACA,GGACTACHVGGGTWTCTAAT,16S stool samples sequenced for MrOS Vitamin D study,16S rRNA v4,GT,Illumina,BI,MrOS,Illumina MiSeq,⋯,1: Yes,0:No,0:No,1: Yes,2.840599,0,21.45366,223,4.05073,0.5192645
BI0153,ATCGGCGTTACA,GGACTACHVGGGTWTCTAAT,16S stool samples sequenced for MrOS Vitamin D study,16S rRNA v4,GT,Illumina,BI,MrOS,Illumina MiSeq,⋯,1: Yes,0:No,0:No,1: Yes,4.106798,0,18.46968,223,4.894253,0.6273959
BI0215,CCTCTCGTGATC,GGACTACHVGGGTWTCTAAT,16S stool samples sequenced for MrOS Vitamin D study,16S rRNA v4,GT,Illumina,BI,MrOS,Illumina MiSeq,⋯,0: No,0:No,0:No,1: Yes,0.971114,0,20.04983,222,5.295055,0.6793396
BI0353,TGCCATCTGAAT,GGACTACHVGGGTWTCTAAT,16S stool samples sequenced for MrOS Vitamin D study,16S rRNA v4,GT,Illumina,BI,MrOS,Illumina MiSeq,⋯,1: Yes,0:No,0:No,1: Yes,4.813568,0,13.75183,155,4.213016,0.5790192


In [6]:
sapply(mf, class)

In [7]:
# not include correlated variables
# convert categorial to factors
vars_cat = c('Sex','GIERACE', 'SITE', 'TUDRAMT', 'TURSMOKE', 'M1ADEPR', 'M1VITMND', 'M1ANTIB', 'M1PROBI', 
               'OHSEAS', 'QLCOMP', 'VDstatus', 'M1STATIN', 'Antihistamine', 'Laxative')
mf[vars_cat] = lapply(mf[vars_cat], factor)

# convert continuous to numeric
vars_cts = c('Latitude', 'Longitude', 'Age', 'BMI', 'PASCORE', 'DTVITD', 'Rstarch_total',
             'OHV1D3', 'OHV24D3', 'OHVD3', 'alpha_pd')

In [8]:
length(vars_cat)
length(vars_cts)

In [9]:
dat = mf[c(vars_cat, vars_cts)]
dim(dat)
head(dat)

Sex,GIERACE,SITE,TUDRAMT,TURSMOKE,M1ADEPR,M1VITMND,M1ANTIB,M1PROBI,OHSEAS,⋯,Longitude,Age,BMI,PASCORE,DTVITD,Rstarch_total,OHV1D3,OHV24D3,OHVD3,alpha_pd
male,1:WHITE,Birmingham,1: Less than one drink per week,M:Not Applicable,0: No,0: No,0: No,0: No,3:SUMMER,⋯,-86.80249,83,28.89012,91.0,250.9,3.066569,0.0393,1.77,25.8,27.77117
male,1:WHITE,Birmingham,0:None drinker,1:PAST,0: No,1: Yes,1: Yes,0: No,2:SPRING,⋯,-86.80249,81,28.5398,199.17857,72.97,3.038136,0.0619,3.91,39.2,17.93266
male,1:WHITE,Birmingham,0:None drinker,1:PAST,0: No,1: Yes,0: No,0: No,2:SPRING,⋯,-86.80249,83,25.01424,161.71429,312.15,2.840599,0.0521,1.49,23.1,21.45366
male,1:WHITE,Birmingham,4: 6-13 drinks per week,1:PAST,0: No,1: Yes,0: No,0: No,2:SPRING,⋯,-86.80249,79,30.87637,88.21429,323.52,4.106798,0.0431,2.14,27.3,18.46968
male,1:WHITE,Birmingham,3: 3-5 drinks per week,1:PAST,0: No,0: No,0: No,0: No,4:FALL,⋯,-86.80249,81,33.58739,256.82143,31.95,0.971114,0.0502,3.62,33.0,20.04983
male,1:WHITE,Birmingham,0:None drinker,1:PAST,0: No,0: No,1: Yes,0: No,2:SPRING,⋯,-86.80249,80,26.41523,179.57143,156.06,4.813568,0.0455,1.79,19.5,13.75183


In [10]:
summary(dat)

   Sex                    GIERACE             SITE    
 male:599   1:WHITE           :520   Birmingham : 75  
            2:AFRICAN AMERICAN: 24   Minneapolis: 91  
            3:ASIAN           : 34   Palo Alto  : 86  
            4:HISPANIC        : 12   Pittsburgh : 92  
            5:OTHER           :  9   Portland   :121  
                                     San Diego  :134  
                                                      
                            TUDRAMT                TURSMOKE     M1ADEPR   
 .                              :  2   0:NO            :230   0: No :546  
 0:None drinker                 :230   1:PAST          :289   1: Yes: 53  
 1: Less than one drink per week: 78   2:CURRENT       :  9               
 2: 1-2drinks per week          : 66   M:Not Applicable: 71               
 3: 3-5 drinks per week         : 90                                      
 4: 6-13 drinks per week        :106                                      
 5: 14 or more drinks per week  : 2

In [11]:
dat = dat[complete.cases(dat), ]
print(dim(dat))
alpha = dat$alpha_pd
dat = dat[, -which(names(dat) %in% 'alpha_pd')]
dim(dat)

[1] 545  26


In [12]:
mod0 <- rda(alpha ~ 1., dat)  # Model with intercept only
mod1 <- rda(alpha ~ ., dat)  # Model with all explanatory variables

In [13]:
step.res <- ordiR2step(mod0, mod1, perm.max = 1000)

Step: R2.adj= 0 
Call: alpha ~ 1 
 
                  R2.adjusted
<All variables>  0.1358048025
+ OHV1D3         0.0528995868
+ M1ANTIB        0.0227745101
+ GIERACE        0.0202594070
+ TUDRAMT        0.0193169647
+ SITE           0.0192963887
+ M1ADEPR        0.0134676017
+ OHV24D3        0.0126697060
+ Rstarch_total  0.0113206998
+ Latitude       0.0083159884
+ BMI            0.0047408113
+ OHSEAS         0.0042540575
+ PASCORE        0.0040791226
+ Age            0.0014860303
+ M1STATIN       0.0010203332
+ Longitude      0.0007589433
+ Laxative       0.0006233309
+ QLCOMP         0.0001640037
<none>           0.0000000000
+ Sex            0.0000000000
+ VDstatus      -0.0003741624
+ OHVD3         -0.0011360122
+ M1PROBI       -0.0015039770
+ DTVITD        -0.0016382488
+ Antihistamine -0.0016647839
+ M1VITMND      -0.0016727506
+ TURSMOKE      -0.0037498847

         Df  AIC      F Pr(>F)   
+ OHV1D3  1 1983 31.385  0.002 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.

In [14]:
table = step.res$anova
table

Unnamed: 0,R2.adj,Df,AIC,F,Pr(>F)
+ OHV1D3,0.05289959,1.0,1983.022,31.384714,0.002
+ GIERACE,0.07071292,4.0,1976.645,3.602167,0.012
+ M1ANTIB,0.08771212,1.0,1967.571,11.043508,0.004
+ SITE,0.10115806,5.0,1964.39,2.609608,0.03
+ M1ADEPR,0.11372319,1.0,1957.694,8.556572,0.004
<All variables>,0.1358048,,,,


In [15]:
table$ES.RDA = c(table$R2.adj[1], table$R2.adj[2]-table$R2.adj[1], 
                       table$R2.adj[3]-table$R2.adj[2], table$R2.adj[4]-table$R2.adj[3],
                       table$R2.adj[5]-table$R2.adj[4], table$R2.adj[6]-table$R2.adj[5])
table = table[-6, ]

In [16]:
step.res$call

rda(formula = alpha ~ OHV1D3 + GIERACE + M1ANTIB + SITE + M1ADEPR, 
    data = dat)

In [17]:
table

Unnamed: 0,R2.adj,Df,AIC,F,Pr(>F),ES.RDA
+ OHV1D3,0.05289959,1,1983.022,31.384714,0.002,0.05289959
+ GIERACE,0.07071292,4,1976.645,3.602167,0.012,0.01781333
+ M1ANTIB,0.08771212,1,1967.571,11.043508,0.004,0.0169992
+ SITE,0.10115806,5,1964.39,2.609608,0.03,0.01344594
+ M1ADEPR,0.11372319,1,1957.694,8.556572,0.004,0.01256513


In [18]:
rownames(table) = c('1,25-(OH)2D', 'Race', 'Oral Antibiotic Use', 'Site', 'Antidepressant Use')

In [22]:
library(ggplot2)
pdf('../figures/RDA_alpha.pdf')
covariates = rownames(table)
ggplot(table, aes(x=reorder(covariates, ES.RDA), y=ES.RDA, fill=covariates)) +
  labs(x = 'Non-redundant Covariants', y = 'Effect Size (PD Alpha Diversity)') +
  geom_bar(stat='identity') +
 theme(axis.text=element_text(size=14, face='bold'), 
       axis.title=element_text(size=17,face="bold"), 
       legend.position="none") + 
  coord_flip()
dev.off()

In [23]:
attach(mf)

In [25]:
pdf('../figures/boxplot_AlphaRDAcovariates.pdf')
par(mfrow=c(2,2))
boxplot(alpha_pd ~ GIERACE, main='Race', ylab='PD Alpha Diversity', 
       names=c('White', 'Black', 'Asian', 'Hispanic', 'Other'), las=2)
boxplot(alpha_pd ~ SITE, main='Site', ylab='', 
       names=c('BI', 'MN', 'PA', 'PI', 'PO', 'SD'))
boxplot(alpha_pd ~ M1ANTIB, main='Antibiotic Use', ylab='PD Alpha Diversity')
boxplot(alpha_pd ~ M1ADEPR, main='Antidepressent Use')
dev.off()