16S Sequencing Clean-up

In [None]:
%%bash
#Import Raw Sequence data into qiime
qiime tools import   
--type MultiplexedSingleEndBarcodeInSequence  \
--input-path seqs/   \
--output-path multiplexed_seqs.qza

#Demultiplex sequences (use barcodes to determine which sequences belong to which sample)
qiime cutadapt demux-single  \
--i-seqs multiplexed_seqs.qza  \
--m-barcodes-file metadata.tsv   \
--m-barcodes-column barcodes   \
--p-error-rate 0   \
--o-per-sample-sequences demultiplexed-seqs.qza   \
--o-untrimmed-sequences untrimmed.qza

#Check quality of sequences
qiime demux summarize \
  --i-data demultiplexed-seqs.qza \
  --o-visualization demux.qzv

mkdir trim250

#Trim sequences to include sequences where mean is ~>30 (determined by looking at demux.qzv)
qiime dada2 denoise-single \
--i-demultiplexed-seqs demultiplexed-seqs.qza \
--p-trim-left 0 --p-trunc-len 250 \
--o-representative-sequences trim250/repseqs.qza \
--o-table trim250/table.qza \
--o-denoising-stats stats-dada2.qza

16S Taxonomy and filtering

In [None]:
%%bash
wget https://data.qiime2.org/2022.8/common/silva-138-99-515-806-nb-classifier.qza

#Assign Taxonomy using silva classifier
qiime feature-classifier classify-sklearn \
--i-classifier silva-138-99-515-806-nb-classifier.qza \
--i-reads trim250/repseqs.qza \
--o-classification trim250/taxonomy.qza

cd trim250/

#Create taxa feature table to assign taxonomy (exclude mitochondria and chloroplasts)
qiime taxa filter-table --i-table table.qza -\
-i-taxonomy taxonomy.qza \
--p-exclude mitochondria,chloroplast \
--o-filtered-table ex-mito_ex-chloro.qza

#Collapse feature table
qiime taxa collapse 
--i-table ex-mito_ex-chloro.qza 
--i-taxonomy taxonomy.qza 
--p-level 7 
--o-collapsed-table 16S-taxon-table.qza 

#create a new folder for filtering
mkdir filtering; cd filtering

#Visualize feature table and determine where to cut off samples (500 in this case) and remove samples below cut off
qiime feature-table summarize \
--i-table ../16S-taxon-table.qza \
--o-visualization taxontable.qzv \
--m-sample-metadata-file ../metadata.tsv

qiime feature-table filter-samples 
--i-table ../16S-taxon-table.qza 
--p-min-frequency 500 
--m-metadata-file ../metadata.tsv 
--o-filtered-table sampfilt500.qza

#Run Adrian's code to determine how many samples a feature must be in to be kept (3.25 in this case)
#Note you need to export the frequency table then convert to a .tsv for Adrian's code to work
#Then remove those samples in QIIME2
qiime tools export 
--input-path sampfilt500.qza
--output-path . 
biom convert -i feature-table.biom -o 16Sfeaturetable.tsv --to-tsv

python3 ../calccuttoff.py ./16Sfeattable.tsv
qiime feature-table filter-features 
--i-table nomitochloro.qza 
--p-min-frequency 4  #remove samples with less than 4
--m-metadata-file metadata.tsv 
--o-filtered-table freqfiltered4.qza

#Remove positive/negative controls
echo "sampleid" > sampleids.txt ; head sampleids.txt
awk -F '\t' '{print $1}' metadata.tsv  >> sampleids.txt
grep -v "P\|N" sampleids.txt > samples_to_keep.tsv

qiime feature-table filter-samples 
--i-table sampfilt4-500.qza 
--m-metadata-file samples_to_keep.tsv 
--o-filtered-table fully-filt.qza

#Create barplot for visualization
qiime taxa barplot \
--i-table 
--i-taxonomy taxonomy.qza \
--m-metadata-file ../metadata.tsv \
--o-visualization taxabarplot.qzv

#Export feature table
qiime tools export 
--input-path 
--output-path taxon-exported-feature-table

cd taxon-exported-feature-table
#Convert biom table to tsv file
biom convert -i feature-table.biom -o 16Sfeaturetable.tsv --to-tsv

16S Basic Analysis

In [None]:
%%bash
#Calculate core diversity metrics
qiime diversity core-metrics-phylogenetic \
--i-phylogeny rooted-tree.qza \
--i-table table.qza \
--p-sampling-depth 2000 \
--m-metadata-file metadata.tsv \
--output-dir core-metrics-results

#Create Tree
qiime phylogeny align-to-tree-mafft-fasttree \
--i-sequences repseqs.qza \
--o-alignment aligned-repseqs.qza \
--o-masked-alignment masked-align-repseqs.qza \
--o-tree unrooted-tree.qza \
--o-rooted-tree rooted-tree.qza

#Faith's PD Alpha Diversity
qiime diversity alpha-group-significance \
--i-alpha-diversity core-metrics-results/faith_pd_vector.qza \
--m-metadata-file metadata.tsv \
--o-visualization core-metrics-results/faithpd-group-sig.qzv

#Evenness Alpha Diversity
qiime diversity alpha-group-significance \
--i-alpha-diversity core-metrics-results/evenness_vector.qza \
--m-metadata-file metadata.tsv \
--o-visualization core-metrics-results/evenness-group-sig.qzv

ITS Sequencing Clean-up

In [None]:
%%bash
qiime tools import   \
--type MultiplexedSingleEndBarcodeInSequence   \
--input-path seqs/   \
--output-path multiplexed_seqs.qza

qiime cutadapt demux-single   \
--i-seqs multiplexed_seqs.qza   \
--m-barcodes-file its_metadata.tsv   \
--m-barcodes-column barcodes   \
--p-error-rate 0   \
--o-per-sample-sequences demultiplexed-seqs.qza   \
--o-untrimmed-sequences untrimmed.qza

qiime demux summarize \
--i-data demultiplexed-seqs.qza \
--o-visualization demultiplexed-seqs.qza

qiime dada2 denoise-single \
--i-demultiplexed-seqs demultiplexed-seqs.qza \
--p-trim-left 0 \
--p-trunc-len 240 
--o-representative-sequences trim240/repseqs.qza \
--o-table trim240/table.qza \
--o-denoising-stats stats-dada2.qza\


Unite Database Training

In [None]:
%%bash
wget https://files.plutof.ut.ee/public/orig/C5/54/C5547B97AAA979E45F79DC4C8C4B12113389343D7588716B5AD330F8BDB300C9.tgz
tar xzf C5547B97AAA979E45F79DC4C8C4B12113389343D7588716B5AD330F8BDB300C9.tgz
cd sh_qiime_release_10.05.2021/developer/
awk '/^>/ {print($0)}; /^[^>]/ {print(toupper($0))}' sh_refs_qiime_ver8_99_10.05.2021_dev.fasta | tr -d ' ' > sh_refs_qiime_ver8_99_10.05.2021_dev_uppercase.fasta

qiime tools import \
--type FeatureData[Sequence] \
--input-path sh_refs_qiime_ver8_99_10.05.2021_dev.fasta \
--output-path unite-ver8-seqs_99_10.05.2021.qza

qiime tools import \
--type FeatureData[Taxonomy] \
--input-path sh_taxonomy_qiime_ver8_99_10.05.2021_dev.txt \
--output-path unite-ver8-taxonomy_99_10-05-2021.qza \
--input-format HeaderlessTSVTaxonomyFormat

qiime feature-classifier fit-classifier-naive-bayes 
--i-reference-reads unite-ver8-seqs_99_10.05.2021.qza 
--i-reference-taxonomy unite-ver8-taxonomy_99_10-05-2021.qza 
--o-classifier classifier.qza

ITS Basic Analysis

In [None]:
%%bash
cd trim240/

qiime feature-classifier classify-sklearn \
--i-classifier ../classifier.qza \
--i-reads repseqs.qza \
--o-classification its_taxonomy.qza

qiime taxa barplot \
--i-table table.qza \
--i-taxonomy its_taxonomy.qza \
--m-metadata-file ../its_metadata.tsv 
--o-visualization its_taxa_barplot.qzv

qiime taxa collapse 
--i-table table.qza 
--i-taxonomy its_taxonomy.qza 
--p-level 7 
--o-collapsed-table its_taxtable.qza 
--output-dir ./taxtable

qiime tools export 
--input-path its_taxtable.qza 
--output-path taxon-exported-feature-table

cd taxon-exported-feature-table
biom convert -i feature-table.biom -o itsfeaturetable.tsv --to-tsv