In [None]:
#Load Packages
library(DESeq2)
library(edgeR)
library(limma)
library(gplots)
library(RColorBrewer)
library(pheatmap)
library(ggplot2)
library(ggrepel)
library(pathfindR)
library(scales)
library(data.table)
library(fBasics)
library(forcats)
library(omu)
library(maptools)
library(phyloseq)
library(vegan)

In [None]:
#Set Theme for Figures
theme<-theme(panel.background = element_blank(),panel.border=element_rect(fill=NA),
        panel.grid.major = element_blank(),panel.grid.minor = element_blank(),strip.background=element_blank(),
        axis.text.x=element_text(colour="black"),axis.text.y=element_text(colour="black"),
        axis.ticks=element_line(colour="black"),plot.margin=unit(c(1,1,1,1),"line"), legend.position="none")

In [None]:
#Choose Alpha/FDR
alpha = 0.01

In [None]:
##Load the files needed
file = "Merged.otu_table.biom"
map = "Map.COPD.SmNV.b2.txt"

In [None]:
# Load the abundace table and mapping table 
abundance.table = import_biom(file, taxaPrefix=F)
mapping.table=sample_data(read.table(map, header=T, sep="\t", row.names=1))

In [None]:
#Merge abundance and meta data into a phyloseq object
lung.physeq=phyloseq(otu_table(abundance.table),tax_table(abundance.table), mapping.table)

In [None]:
#Make the columnnames of the pyloseq object of the phylogenetic tree
colnames(tax_table(lung.physeq))=c("Domain", "Phylum", "Class", "Order", "Family", "Genus", "Species")

In [None]:
#Load the tree file (use the unannotated.tree)
treefile = "97_otus.tree"
tree.obj = import_qiime(treefilename = treefile) 

In [None]:
#Now merge the three separate phyloseq objects into a single object
otu.table = merge_phyloseq(lung.physeq, mapping.table, tree.obj)

# Remove taxa with 0 abundance
otu.table = subset_taxa(otu.table, rowSums(otu_table(otu.table)) != 0)

In [None]:
##If you want to nomalize OTU table before
## To normalize data you need to set a function
normalizeSample = function(x) {
    x/sum(x)
}
otu.relative.table = transformSampleCounts(otu.table, normalizeSample)

In [None]:
# # Create phyllum and order tables (do it after normalization and out of the relative table)
Phylum.rel.table = tax_glom(otu.relative.table, taxrank = "Phylum")
Class.rel.table = tax_glom(otu.relative.table, taxrank = "Class")
Order.rel.table = tax_glom(otu.relative.table, taxrank = "Order")
Family.rel.table = tax_glom(otu.relative.table, taxrank = "Family")
Genus.rel.table = tax_glom(otu.relative.table, taxrank = "Genus")
OTU.rel.table = tax_glom(otu.relative.table, taxrank = "OTU")

Genus.table = tax_glom(otu.table, taxrank = "Genus")
Genus.rel.table = transformSampleCounts(Genus.table, normalizeSample)

In [None]:
#Subset Samples
MetaTranscript.Genus.rel.table = subset_samples(Genus.rel.table, Subset_1==1)
MetaTranscript.Genus.rel.table = subset_samples(MetaTranscript.Genus.rel.table, Subject_Type_code %in% c(1, 2))
MetaTranscript.Genus.rel.table = subset_samples(MetaTranscript.Genus.rel.table, Metatranscriptome_plus_BKG  %in% c(1))

MetaTranscript.otu.rel.table = subset_samples(otu.relative.table, Subset_1==1)
MetaTranscript.otu.rel.table = subset_samples(MetaTranscript.otu.rel.table, Subject_Type_code %in% c(1, 2))
MetaTranscript.otu.rel.table = subset_samples(MetaTranscript.otu.rel.table, Metatranscriptome_plus_BKG  %in% c(1))

In [None]:
#HeatMap
#select most abundant taxa genera present in >2% relative abundance in 1% of the samples (this approach brings in > 70% of the data in almost all samples)
Genus.rel.wh1 = genefilter_sample(MetaTranscript.Genus.rel.table, filterfun_sample(function(x) x > 0.03), A = 0.01 * nsamples(MetaTranscript.Genus.rel.table))
Genus.rel.table1B = prune_taxa(Genus.rel.wh1, MetaTranscript.Genus.rel.table)
#set data tables  
GenusData <-otu_table(Genus.rel.table1B) #pruned to selected Genuses based on abundance

#cluster Genuses(row)
GenusData.Bray.dist <-vegdist(GenusData, method = "bray")

#cluster samples(Col)
Samples.Bray.dist = distance(GenusData, method="bray")


#Set Color Scale for Heatmap
mypalette <- colorRampPalette(c('#ffffff','#4169E1','#0000CD'))(100)
#Set Colors for each sample type for HeatMap
annon_colors= list(Sample_Type_Simple=c(BKG="#000001", BAL="#8EFA00", Sup="#932CE7"))

#Choose lables for Samples
df2 <- data.frame(Sample_Type_Simple = sample_data(Genus.rel.table1B)[,c("Sample_Type_Simple")], row.names = rownames(sample_data(Genus.rel.table1B)))

#Create dataframe of count data
df <- as.data.frame(GenusData)
#Get Taxa Names from Phyloseq Object
df = cbind(as(df, "data.frame"), as(tax_table(Genus.rel.table1B)[rownames(df), ], "matrix"))

#Replace any no genus annotation as NA
df[df=="g__"]<-NA
df[df=="f__"]<-NA
df[df=="o__"]<-NA
df[df=="c__"]<-NA
#Create name with family and (u.g)
df$gs <- ifelse(is.na(df$Genus),paste(df$Family,"(u.g.)"), paste(df$Genus))
df$gs <- ifelse(is.na(df$Family), paste(df$Order,"(u.g.)"),df$gs)
df$gs <- ifelse(is.na(df$Order), paste(df$Class,"(u.g.)"),df$gs)
df$gs <- ifelse(is.na(df$Class), paste(df$Phylum,"(u.g.)"),df$gs)

#Set Rownames
rownames(df) <- df$gs

#Keep only the count data
drops <- c("Domain","Phylum","Class","Order","Family","Genus","OTU","gs","Species")
df <- df[ , !(names(df) %in% drops)]
#Change the names if you need to
colnames(df) <- gsub("COPD.","",colnames(df))
colnames(df) <- gsub("SmNV.","",colnames(df))
colnames(df) <- gsub(".171","",colnames(df))
colnames(df) <- gsub(".172","",colnames(df))
colnames(df) <- gsub(".173","",colnames(df))

rownames(df2) <- gsub("COPD.","",rownames(df2))
rownames(df2) <- gsub("SmNV.","",rownames(df2))
rownames(df2) <- gsub(".171","",rownames(df2))
rownames(df2) <- gsub(".172","",rownames(df2))
rownames(df2) <- gsub(".173","",rownames(df2))

#Print HeatMap
    pheatmap(df, cluster_rows=TRUE, show_rownames=TRUE, 
    cluster_cols=TRUE,annotation_col=df2,scale="row",
    clustering_distance_rows = GenusData.Bray.dist,clustering_distance_cols = Samples.Bray.dist,
    clustering_method="average",
    gaps_col=50,
    border_color="black",
    color = colorRampPalette(c('#4169E1','#ffffff','#0000CD'))(100),
    annotation_colors=annon_colors[1],legend=FALSE)

In [None]:
#Convert Phyloseq Object to DESEq, correncting for any potential confounders
diagdds <- phyloseq_to_deseq2(Genus.table, ~ Sample_Type_DMM_Class_ReSeq)
diagdds <- phyloseq_to_deseq2(otu.table, ~ Sample_Type_DMM_Class_ReSeq)

In [None]:
#Calculate geometric means prior to estimate size factor
gm_mean = function(x, na.rm=TRUE){ exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x))}
geoMeans = apply(counts(diagdds), 1, gm_mean)

# Estimate Size, Disperssion and Variance
diagdds = estimateSizeFactors(diagdds, geoMeans = geoMeans)
diagdds = estimateDispersions(diagdds)

In [None]:
#Subset BAL for analysis
diagdds <- diagdds[, diagdds$Subset_1 %in% c(1)]
diagdds <- diagdds[, diagdds$Subject_Type_code  %in% c(1, 2)]
diagdds <- diagdds[, diagdds$Metatranscriptome_plus_BKG  %in% c(1)]
diagdds.bal <- diagdds[, diagdds$Sample_Description_s_code  %in% c(5)]

In [None]:
#Make sure all unwanted levels are removed from dataset
diagdds$Sample_Type_DMM_Class_ReSeq <- droplevels(diagdds$Sample_Type_DMM_Class_ReSeq)

In [None]:
#Run the differential Analysis: Lung Cancer Vs Wild Type --> positive is upregulated in Lung Cancer; Negative is down regulated
diagdds<- DESeq(diagdds)
res4 <- results(diagdds)