To do:
*  [x] Filtering
    * [x] Filter rep seqs
    * [x] Filter tables
*  [x] Build Phylogenetic Tree
    - [x] Build tree for PICRUST & Downstream Analysis
    - [x] Build Tree (with genomes)
* [x] Alpha rarefaction
* [ ] Calculate and explore diversity metrics
* [x] Assign taxonomy

In [9]:
import os
import pandas as pd

from qiime2 import Visualization

In [136]:
metadata = pd.read_csv('../data/metadata/sample-metadata.tsv', sep='\t')
metadata = metadata[metadata.Sample.isin(['P5', 'P8', 'P9'])]
df = metadata.loc[:,'#SampleID'].to_frame()
df = df.rename(columns={'#SampleID':'SampleID'})
df.to_csv('../data/metadata/samples-to-keep.tsv',sep='\t', index=False)
df

Unnamed: 0,SampleID
50,P5-rep1
51,P5-rep2
52,P5-rep3
53,P5-rep4
54,P5-rep5
55,P8-rep1
56,P8-rep2
57,P8-rep3
58,P8-rep4
59,P8-rep5


In [169]:
loc = []
for i in ['P5', 'P8', 'P9']:
    for x in range(5):
        loc.append(i)
df['Location'] = loc
df.to_csv('../data/metadata/filtered_sample.tsv',sep='\t', index=False)
df

Unnamed: 0,SampleID,Location
50,P5-rep1,P5
51,P5-rep2,P5
52,P5-rep3,P5
53,P5-rep4,P5
54,P5-rep5,P5
55,P8-rep1,P8
56,P8-rep2,P8
57,P8-rep3,P8
58,P8-rep4,P8
59,P8-rep5,P8


In [127]:
! mkdir ../data/qiime2/filtered

! qiime feature-table filter-samples \
    --i-table ../data/qiime2/table.qza \
    --m-metadata-file ../data/metadata/samples-to-keep.tsv \
    --o-filtered-table ../data/qiime2/filtered/table.qza

! qiime feature-table filter-seqs \
    --i-data ../data/qiime2/rep-seqs.qza \
    --i-table ../data/qiime2/filtered/table.qza \
    --o-filtered-data ../data/qiime2/filtered/rep-seqs.qza

! qiime feature-table summarize \
    --i-table ../data/qiime2/filtered/table.qza \
    --o-visualization ../data/qiime2/filtered/table.qzv

! qiime feature-table tabulate-seqs \
    --i-data ../data/qiime2/filtered/rep-seqs.qza \
    --o-visualization ../data/qiime2/filtered/rep-seqs.qzv

mkdir: cannot create directory ‘../data/qiime2/filtered’: File exists
[32mSaved FeatureTable[Frequency] to: ../data/qiime2/filtered/table.qza[0m
[32mSaved FeatureData[Sequence] to: ../data/qiime2/filtered/rep-seqs.qza[0m
[32mSaved Visualization to: ../data/qiime2/filtered/table.qzv[0m
[32mSaved Visualization to: ../data/qiime2/filtered/rep-seqs.qzv[0m


In [128]:
#carry out a multiple seqeunce alignment using Mafft
! mkdir ../data/qiime2/filtered/tree

! qiime alignment mafft \
  --i-sequences ../data/qiime2/filtered/rep-seqs.qza \
  --o-alignment ../data/qiime2/filtered/tree/aligned-rep-seqs.qza

#mask (or filter) the alignment to remove positions that are highly variable. These positions are generally considered to add noise to a resulting phylogenetic tree.
! qiime alignment mask \
  --i-alignment ../data/qiime2/filtered/tree/aligned-rep-seqs.qza \
  --o-masked-alignment ../data/qiime2/filtered/tree/masked-aligned-rep-seqs.qza

#create the tree using the Fasttree program
! qiime phylogeny fasttree \
  --i-alignment ../data/qiime2/filtered/tree/masked-aligned-rep-seqs.qza \
  --o-tree ../data/qiime2/filtered/tree/unrooted-tree.qza

#root the tree using the longest root
! qiime phylogeny midpoint-root \
  --i-tree ../data/qiime2/filtered/tree/unrooted-tree.qza \
  --o-rooted-tree ../data/qiime2/filtered/tree/rooted-tree.qza

[32mSaved FeatureData[AlignedSequence] to: ../data/qiime2/filtered/tree/aligned-rep-seqs.qza[0m
[32mSaved FeatureData[AlignedSequence] to: ../data/qiime2/filtered/tree/masked-aligned-rep-seqs.qza[0m
[32mSaved Phylogeny[Unrooted] to: ../data/qiime2/filtered/tree/unrooted-tree.qza[0m
[32mSaved Phylogeny[Rooted] to: ../data/qiime2/filtered/tree/rooted-tree.qza[0m


In [138]:
Visualization.load('../data/qiime2/filtered/rep-seqs.qzv')

In [139]:
! qiime tools import \
  --input-path ../data/genomes/16sgenome.fasta \
  --output-path ../data/genomes/16sgenome.qza \
  --type 'FeatureData[Sequence]'

[32mImported ../data/genomes/16sgenome.fasta as DNASequencesDirectoryFormat to ../data/genomes/16sgenome.qza[0m


In [143]:
! qiime feature-table merge-seqs \
    --i-data ../data/genomes/16sgenome.qza ../data/qiime2/filtered/rep-seqs.qza \
    --o-merged-data ../data/qiime2/filtered/rep-seqs-with-genome.qza

[32mSaved FeatureData[Sequence] to: ../data/qiime2/filtered/rep-seqs-with-genome.qza[0m


In [149]:
#carry out a multiple seqeunce alignment using Mafft
! mkdir ../data/qiime2/filtered/tree_with_genomes

! qiime alignment mafft \
  --i-sequences ../data/qiime2/filtered/rep-seqs-with-genome.qza \
  --o-alignment ../data/qiime2/filtered/tree_with_genomes/aligned-rep-seqs.qza

#mask (or filter) the alignment to remove positions that are highly variable. These positions are generally considered to add noise to a resulting phylogenetic tree.
! qiime alignment mask \
  --i-alignment ../data/qiime2/filtered/tree_with_genomes/aligned-rep-seqs.qza \
  --o-masked-alignment ../data/qiime2/filtered/tree_with_genomes/masked-aligned-rep-seqs.qza

#create the tree using the Fasttree program
! qiime phylogeny fasttree \
  --i-alignment ../data/qiime2/filtered/tree_with_genomes/masked-aligned-rep-seqs.qza \
  --o-tree ../data/qiime2/filtered/tree_with_genomes/unrooted-tree.qza

#root the tree using the longest root
! qiime phylogeny midpoint-root \
  --i-tree ../data/qiime2/filtered/tree_with_genomes/unrooted-tree.qza \
  --o-rooted-tree ../data/qiime2/filtered/tree_with_genomes/rooted-tree.qza

mkdir: cannot create directory ‘../data/qiime2/filtered/tree_with_genomes’: File exists
[32mSaved FeatureData[AlignedSequence] to: ../data/qiime2/filtered/tree_with_genomes/aligned-rep-seqs.qza[0m
[32mSaved FeatureData[AlignedSequence] to: ../data/qiime2/filtered/tree_with_genomes/masked-aligned-rep-seqs.qza[0m
[32mSaved Phylogeny[Unrooted] to: ../data/qiime2/filtered/tree_with_genomes/unrooted-tree.qza[0m
[32mSaved Phylogeny[Rooted] to: ../data/qiime2/filtered/tree_with_genomes/rooted-tree.qza[0m


In [150]:
! qiime tools export \
    --input-path ../data/qiime2/filtered/tree_with_genomes/rooted-tree.qza \
    --output-path ../data/qiime2/filtered/tree_with_genomes/rooted-tree

[32mExported ../data/qiime2/filtered/tree_with_genomes/rooted-tree.qza as NewickDirectoryFormat to directory ../data/qiime2/filtered/tree_with_genomes/rooted-tree[0m


In [151]:
! qiime tools export \
    --input-path ../data/qiime2/filtered/tree_with_genomes/masked-aligned-rep-seqs.qza \
    --output-path ../data/qiime2/filtered/tree_with_genomes/masked-aligned-rep-seqs

[32mExported ../data/qiime2/filtered/tree_with_genomes/masked-aligned-rep-seqs.qza as AlignedDNASequencesDirectoryFormat to directory ../data/qiime2/filtered/tree_with_genomes/masked-aligned-rep-seqs[0m


In [152]:
! qiime tools export \
    --input-path ../data/qiime2/filtered/tree_with_genomes/unrooted-tree.qza \
    --output-path ../data/qiime2/filtered/tree_with_genomes/unrooted-tree

[32mExported ../data/qiime2/filtered/tree_with_genomes/unrooted-tree.qza as NewickDirectoryFormat to directory ../data/qiime2/filtered/tree_with_genomes/unrooted-tree[0m


# Alpha Rarefaction

In [174]:
Visualization.load('../data/qiime2/filtered/table.qzv')

In [178]:
! qiime diversity alpha-rarefaction \
  --i-table ../data/qiime2/filtered/table.qza \
  --i-phylogeny ../data/qiime2/filtered/tree/rooted-tree.qza \
  --p-max-depth 1000 \
  --m-metadata-file ../data/metadata/filtered_sample.tsv \
  --o-visualization ../data/qiime2/filtered/alpha-rarefaction.qzv

[32mSaved Visualization to: ../data/qiime2/filtered/alpha-rarefaction.qzv[0m


In [179]:
Visualization.load('../data/qiime2/filtered/alpha-rarefaction.qzv')

# Calculate and explore diversity metrics

In [180]:
! qiime diversity core-metrics-phylogenetic \
  --i-phylogeny ../data/qiime2/filtered/tree/rooted-tree.qza \
  --i-table ../data/qiime2/filtered/table.qza \
  --p-sampling-depth 1000 \
  --m-metadata-file ../data/metadata/filtered_sample.tsv \
  --output-dir ../data/qiime2/filtered/core-metrics-results

[32mSaved FeatureTable[Frequency] to: ../data/qiime2/filtered/core-metrics-results/rarefied_table.qza[0m
[32mSaved SampleData[AlphaDiversity] % Properties('phylogenetic') to: ../data/qiime2/filtered/core-metrics-results/faith_pd_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: ../data/qiime2/filtered/core-metrics-results/observed_otus_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: ../data/qiime2/filtered/core-metrics-results/shannon_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: ../data/qiime2/filtered/core-metrics-results/evenness_vector.qza[0m
[32mSaved DistanceMatrix % Properties('phylogenetic') to: ../data/qiime2/filtered/core-metrics-results/unweighted_unifrac_distance_matrix.qza[0m
[32mSaved DistanceMatrix % Properties('phylogenetic') to: ../data/qiime2/filtered/core-metrics-results/weighted_unifrac_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: ../data/qiime2/filtered/core-metrics-results/jaccard_distance_matrix.qza[0m
[32mSaved Distan

In [181]:
! qiime diversity alpha-group-significance \
  --i-alpha-diversity ../data/qiime2/filtered/core-metrics-results/faith_pd_vector.qza \
  --m-metadata-file ../data/metadata/filtered_sample.tsv \
  --o-visualization ../data/qiime2/filtered/core-metrics-results/faith-pd-group-significance.qzv

! qiime diversity alpha-group-significance \
  --i-alpha-diversity ../data/qiime2/filtered/core-metrics-results/evenness_vector.qza \
  --m-metadata-file ../data/metadata/filtered_sample.tsv \
  --o-visualization ../data/qiime2/filtered/core-metrics-results/evenness-group-significance.qzv

! qiime diversity alpha-group-significance \
  --i-alpha-diversity ../data/qiime2/filtered/core-metrics-results/shannon_vector.qza \
  --m-metadata-file ../data/metadata/filtered_sample.tsv \
  --o-visualization ../data/qiime2/filtered/core-metrics-results/shannon_group-significance.qzv

[32mSaved Visualization to: ../data/qiime2/filtered/core-metrics-results/faith-pd-group-significance.qzv[0m
[32mSaved Visualization to: ../data/qiime2/filtered/core-metrics-results/evenness-group-significance.qzv[0m
[32mSaved Visualization to: ../data/qiime2/filtered/core-metrics-results/shannon_group-significance.qzv[0m


In [183]:
Visualization.load('../data/qiime2/filtered/core-metrics-results/evenness-group-significance.qzv')

# Assign Taxonomy

In [157]:
! wget -O "../data/qiime2/gg-13-8-99-515-806-nb-classifier.qza" "https://data.qiime2.org/2019.10/common/gg-13-8-99-515-806-nb-classifier.qza"
#! qiime feature-classifier --version

--2021-03-21 07:51:03--  https://data.qiime2.org/2019.10/common/gg-13-8-99-515-806-nb-classifier.qza
Resolving data.qiime2.org (data.qiime2.org)... 54.200.1.12
Connecting to data.qiime2.org (data.qiime2.org)|54.200.1.12|:443... connected.
HTTP request sent, awaiting response... 302 FOUND
Location: https://s3-us-west-2.amazonaws.com/qiime2-data/2019.10/common/gg-13-8-99-515-806-nb-classifier.qza [following]
--2021-03-21 07:51:04--  https://s3-us-west-2.amazonaws.com/qiime2-data/2019.10/common/gg-13-8-99-515-806-nb-classifier.qza
Resolving s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)... 52.218.204.216
Connecting to s3-us-west-2.amazonaws.com (s3-us-west-2.amazonaws.com)|52.218.204.216|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 28373760 (27M) [binary/octet-stream]
Saving to: ‘../data/qiime2/gg-13-8-99-515-806-nb-classifier.qza’


2021-03-21 07:51:10 (8.21 MB/s) - ‘../data/qiime2/gg-13-8-99-515-806-nb-classifier.qza’ saved [28373760/28373760]



In [161]:
! qiime feature-classifier classify-sklearn \
    --p-n-jobs 6 \
    --i-classifier ../data/qiime2/gg-13-8-99-515-806-nb-classifier.qza \
    --i-reads ../data/qiime2/filtered/rep-seqs.qza \
    --o-classification ../data/qiime2/filtered/taxonomy.qza \
    --verbose

[32mSaved FeatureData[Taxonomy] to: ../data/qiime2/filtered/taxonomy.qza[0m


In [172]:
! qiime metadata tabulate \
  --m-input-file ../data/qiime2/filtered/taxonomy.qza \
  --o-visualization ../data/qiime2/filtered/taxonomy.qzv

! qiime taxa barplot \
  --i-table ../data/qiime2/filtered/table.qza \
  --i-taxonomy ../data/qiime2/filtered/taxonomy.qza \
  --m-metadata-file ../data/metadata/filtered_sample.tsv \
  --o-visualization ../data/qiime2/filtered/taxa-bar-plots.qzv
'''
#first, export your data as a .biom
! qiime tools export \
  ../data/qiime2/filtered/feature-table.qza \
  --output-dir ../data/qiime2/filtered/exported-feature-table

#then export taxonomy info
! qiime tools export \
  ../data/qiime2/filtered/taxonomy.qza \
  --output-dir ../data/qiime2/filtered/exported-feature-table

#then combine the two using the biome package (dependence loaded as part of QIIME2 install)
'''

[32mSaved Visualization to: ../data/qiime2/filtered/taxonomy.qzv[0m
[32mSaved Visualization to: ../data/qiime2/filtered/taxa-bar-plots.qzv[0m


'\n#first, export your data as a .biom\n! qiime tools export   ../data/qiime2/filtered/feature-table.qza   --output-dir ../data/qiime2/filtered/exported-feature-table\n\n#then export taxonomy info\n! qiime tools export   ../data/qiime2/filtered/taxonomy.qza   --output-dir ../data/qiime2/filtered/exported-feature-table\n\n#then combine the two using the biome package (dependence loaded as part of QIIME2 install)\n'

In [173]:
Visualization.load('../data/qiime2/filtered/taxa-bar-plots.qzv')

In [2]:
#first, export your data as a .biom
! qiime tools export \
  --input-path ../data/qiime2/filtered/table.qza \
  --output-path ../data/qiime2/filtered/exported-feature-table

#then export taxonomy info
! qiime tools export \
  --input-path ../data/qiime2/filtered/taxonomy.qza \
  --output-path ../data/qiime2/filtered/exported-feature-table

[32mExported ../data/qiime2/filtered/table.qza as BIOMV210DirFmt to directory ../data/qiime2/filtered/exported-feature-table[0m
[32mExported ../data/qiime2/filtered/taxonomy.qza as TSVTaxonomyDirectoryFormat to directory ../data/qiime2/filtered/exported-feature-table[0m


In [7]:
! biom convert -i ../data/qiime2/filtered/exported-feature-table/feature-table.biom \
    -o ../data/qiime2/filtered/exported-feature-table/feature-table.tsv \
    --to-tsv

In [28]:
df_biom = pd.read_csv('../data/qiime2/filtered/exported-feature-table/feature-table.txt', sep='\t', skiprows=1, index_col=0)
df_biom.columns = [i.replace('-','_') for i in df_biom.columns.values]
df_biom.index.name = 'asv'
df_biom

Unnamed: 0_level_0,P5_rep1,P5_rep2,P5_rep3,P5_rep4,P5_rep5,P8_rep1,P8_rep2,P8_rep3,P8_rep4,P8_rep5,P9_rep1,P9_rep2,P9_rep3,P9_rep4,P9_rep5
asv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0010861f41d656beec7cf63fe0dbad15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0
001234c4ae948797dac11b95df511a13,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
001dd75457b196d7102586b8624ddbc3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,0.0
002c52fa5c956fc05b69474b3de0bdbe,6.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00342672701d10d40f879d4de14090dc,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ff50bc5a98ccf879e4bae091b504436a,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
ff7e1290218e7e38098328f36e206ad5,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ff8401c9e2a024f4fd2fe2964fff69ab,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0
ffce01c8099c14e264ae88e0bff064bc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0


In [32]:
df_tax = pd.read_csv('../data/qiime2/filtered/exported-feature-table/taxonomy.tsv', sep='\t', index_col=0)

In [36]:
df_tax = df_tax.drop(columns=['Confidence'])

In [39]:
df_tax.Taxon.split(';')

AttributeError: 'Series' object has no attribute 'split'