<a href="https://colab.research.google.com/github/pjd-code/millipede-microplastic/blob/main/millipede_microplastic_qiime%2Bgreengenes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook will quickly run through the steps I used to process the raw reads. 

In [None]:
#clean up the Colab environment
%cd /content/sample_data
!rm *.csv
!rm *.md
!rm *.json
%cd /content/sample_data

In [None]:
from google.colab import drive
drive.mount('/content/sample_data/googledrive')

In [None]:
!git clone https://github.com/pjd-code/millipede-microplastic.git

In [None]:
%run /content/sample_data/hudson-valley-tick/setup_qiime2.py

In [None]:
%cd /bin
!wget "https://launch.basespace.illumina.com/CLI/latest/amd64-linux/bs" -O /bin/bs
!chmod u+x /bin/bs

In [None]:
!bs auth
!bs  whoami

In [None]:
!bs project download --id 347968211 --extension=fastq.gz -o /content/sample_data/sequence

In [None]:
%cd /content/sample_data/sequence

In [None]:
#consolidate in one folder
!mkdir samples 
!find . -name "*.gz" -exec mv "{}" samples \;
!rmdir */

In [None]:
!mv \
/content/sample_data/hudson-valley-tick/v4-150bp-se-ref-seqs-gg.qza \
/content/sample_data/hudson-valley-tick/v4-150bp-classifier-gg.qza \
/content/sample_data/sequence

In [None]:
!qiime tools import \
  --type 'SampleData[PairedEndSequencesWithQuality]' \
  --input-path /content/sample_data/sequence/samples \
  --input-format CasavaOneEightSingleLanePerSampleDirFmt \
  --output-path demux-paired-end.qza

In [None]:
!qiime tools peek demux-paired-end.qza

In [None]:
!qiime demux summarize \
  --i-data demux-paired-end.qza \
  --o-visualization demux-paired-end.qzv

In [None]:
# This method denoises single-end sequences, dereplicates them, and filters chimeras.

!qiime dada2 denoise-single \
  --i-demultiplexed-seqs demux-paired-end.qza \
  --p-trim-left 0 \
  --p-trunc-len 150 \
  --o-representative-sequences se-rep-seqs-dada2.qza \
  --o-table se-table-dada2.qza \
  --o-denoising-stats se-stats-dada2.qza

In [None]:
!qiime feature-table summarize \
  --i-table se-table-dada2.qza \
  --o-visualization se-rep-seqs-dada2.qzv \
  --m-sample-metadata-file /content/sample_data/Milli_Pooh02_220421_illumia_run[17346].txt

!qiime feature-table tabulate-seqs \
  --i-data se-rep-seqs-dada2.qza \
  --o-visualization se-rep-seqs-dada2.qzv

In [None]:
#open reference cluster using grene genes as referance
!qiime vsearch cluster-features-open-reference \
  --i-sequences se-rep-seqs-dada2.qza \
  --i-table se-table-dada2.qza \
  --i-reference-sequences v4-150bp-se-ref-seqs-gg.qza \
  --p-perc-identity .99 \
  --p-threads 0 \
  --o-clustered-table se-vs_clst99-table.qza \
  --o-clustered-sequences se-vs_clst99-seq.qza \
  --o-new-reference-sequences tick_refseq99_gg.qza

In [None]:
!qiime feature-table summarize \
  --i-table se-vs_clst99-table.qza \
  --o-visualization se-vs_clst99-table.qzv \
  --m-sample-metadata-file /content/sample_data/sequence/Milli_Pooh02_220421_illumia_run[17346].txt

!qiime feature-table tabulate-seqs \
  --i-data se-vs_clst99-seq.qza \
  --o-visualization se-vs_clst99-seq.qzv

!qiime metadata tabulate \
  --m-input-file se-stats-dada2.qza \
  --o-visualization se-stats-dada2.qzv

In [None]:
!qiime phylogeny align-to-tree-mafft-fasttree \
  --i-sequences se-vs_clst99-seq.qza \
  --o-alignment aligned-rep-seqs.qza \
  --o-masked-alignment masked-aligned-rep-seqs.qza \
  --o-tree unrooted-tree.qza \
  --o-rooted-tree rooted-tree.qza

In [None]:
!qiime feature-classifier classify-sklearn \
  --i-classifier v4-150bp-classifier-gg.qza \
  --i-reads se-vs_clst99-seq.qza \
  --o-classification taxonomy.qza

In [None]:
!qiime taxa barplot \
  --i-table se-vs_clst99-table.qza \
  --i-taxonomy taxonomy.qza \
  --m-metadata-file /content/sample_data/sequence/Milli_Pooh02_220421_illumia_run[17346].txt \
  --o-visualization taxa-bar-plots.qzv

In [None]:
!qiime diversity core-metrics-phylogenetic \
  --i-phylogeny rooted-tree.qza \
  --i-table se-vs_clst99-table.qza \
  --p-sampling-depth 5800 \
  --m-metadata-file /content/sample_data/sequence/Milli_Pooh02_220421_illumia_run[17346].txt \
  --output-dir core-metrics-results3 

In [None]:
!qiime diversity alpha-group-significance \
  --i-alpha-diversity /content/sample_data/sequence/core-metrics-results2/faith_pd_vector.qza \
  --m-metadata-file /content/sample_data/sequence/Milli_Pooh02_220421_illumia_run[17346].txt \
  --o-visualization core-metrics-results/faith-pd-group-significance.qzv

!qiime diversity alpha-group-significance \
  --i-alpha-diversity /content/sample_data/sequence/core-metrics-results2/evenness_vector.qza \
  --m-metadata-file /content/sample_data/sequence/Milli_Pooh02_220421_illumia_run[17346].txt \
  --o-visualization core-metrics-results/evenness-group-significance.qzv

In [None]:
!qiime diversity beta-group-significance \
  --i-distance-matrix core-metrics-results/unweighted_unifrac_distance_matrix.qza \
  --m-metadata-file /content/sample_data/sequence/Milli_Pooh02_220421_illumia_run[17346].txt \
  --m-metadata-column category \
  --o-visualization core-metrics-results/unweighted-unifrac-group-significance.qzv \
  --p-pairwise

In [None]:
!conda install -c conda-forge deicode

In [None]:
!qiime deicode rpca \
  --i-table se-vs_clst99-table.qza \
  --p-min-feature-count 10 \
  --p-min-sample-count 500 \
  --o-biplot ordination.qza \
  --o-distance-matrix distance.qza

!qiime emperor biplot \
  --i-biplot ordination.qza \
  --m-sample-metadata-file /content/sample_data/sequence/Milli_Pooh02_220421_illumia_run[17346].txt \
  --m-feature-metadata-file /content/sample_data/sequence/taxonomy.qza \
  --o-visualization biplot.qzv \
  --p-number-of-features 10

!qiime diversity beta-group-significance \
  --i-distance-matrix distance.qza \
  --m-metadata-file /content/sample_data/sequence/Milli_Pooh02_220421_illumia_run[17346].txt \
  --m-metadata-column category \
  --p-method permanova \
  --o-visualization status_significance.qzv