# Clear the decks rm(list = ls()) # Set Working Directory WorkingDirectory <- "~/projects/MicrobiomeWorkflowII" # assign a value to a variable system( paste0("mkdir -p ",WorkingDirectory) ) # Invoke a System Command setwd(WorkingDirectory); getwd() # Set and Get Working Directory dir() # List the Files in a Directory # Workflow for Microbiome Data Analysis: from raw reads to community analyses. # https://f1000research.com/articles/5-1492 # Introduction # Methods # Amplicon bioinformatics: from raw reads to tables # First we load the necessary packages. library("knitr") library("BiocStyle") # BiocManager::install("BiocStyle") .cran_packages <- c("ggplot2", "gridExtra") .bioc_packages <- c("dada2", "phyloseq", "DECIPHER", "phangorn") .inst <- .cran_packages %in% installed.packages() if(any(!.inst)) { install.packages(.cran_packages[!.inst]) } .inst <- .bioc_packages %in% installed.packages() if(any(!.inst)) { if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install(.bioc_packages[!.inst]) } # Load packages into session, and print package version sapply(c(.cran_packages, .bioc_packages), require, character.only = TRUE) sessionInfo() set.seed(100) # Download data miseq_path <- file.path("data", "MiSeq_SOP") if(!file_test("-d", miseq_path)) { dir.create(miseq_path, recursive = TRUE) } url <- "http://www.mothur.org/w/images/d/d6/MiSeqSOPData.zip" filename <- basename(url) download.file(url = url, destfile = file.path("data", filename) ) system( paste0("unzip ",file.path("data", filename), " -d data") ) fns <- sort(list.files(miseq_path, full.names = TRUE)) fnFs <- fns[grepl("R1", fns)] fnRs <- fns[grepl("R2", fns)] # Trim and Filter # inspecting the quality of the data before proceeding ii <- sample(length(fnFs), 3) for(i in ii) { print(plotQualityProfile(fnFs[i]) + ggtitle("Fwd")) } for(i in ii) { print(plotQualityProfile(fnRs[i]) + ggtitle("Rev")) } # Figure 1. Forward and Reverse Error Profiles, # Trimming and filtering is performed on paired reads jointly, filt_path <- file.path("data", "filtered") if(!file_test("-d", filt_path)) dir.create(filt_path) filtFs <- file.path(filt_path, basename(fnFs)) filtRs <- file.path(filt_path, basename(fnRs)) for(i in seq_along(fnFs)) { fastqPairedFilter(c(fnFs[[i]], fnRs[[i]]), c(filtFs[[i]], filtRs[[i]]), trimLeft=10, truncLen=c(245, 160), maxN=0, maxEE=2, truncQ=2, compress=TRUE) } # Infer sequence variants # dereplicated to remove redundancy. # name the resulting derep-class objects by their sample name. derepFs <- derepFastq(filtFs) derepRs <- derepFastq(filtRs) sam.names <- sapply(strsplit(basename(filtFs), "_"), `[`, 1) names(derepFs) <- sam.names names(derepRs) <- sam.names # estimate the error rates ddF <- dada(derepFs[1:40], err=NULL, selfConsist=TRUE) ddR <- dada(derepRs[1:40], err=NULL, selfConsist=TRUE) # Figure 2. Forward and Reverse Read Error Profiles,