The following conda environment for Qiime 2 was used for these analyses:

>qiime2-2020.2

The following commands were run using the command line interface (CLI) of Qiime 2.

# Import data

In [None]:
qiime tools import \
--type FeatureTable[Frequency] \
--input-path ./Qiime_input_data/rep200_HiSeq_Fungi_Decontam_HMS_PT.biom \
--input-format BIOMV100Format \
--output-path rep200_HiSeq_Fungi_Decontam_HMS_PT.qza

In [None]:
qiime tools import \
--type FeatureTable[Frequency] \
--input-path ./Qiime_input_data/rep200_HiSeq_Fungi_Decontam_BCM_PT.biom \
--input-format BIOMV100Format \
--output-path rep200_HiSeq_Fungi_Decontam_BCM_PT.qza

In [None]:
qiime tools import \
--type FeatureTable[Frequency] \
--input-path ./Qiime_input_data/rep200_HiSeq_Fungi_Decontam_MDA_PT.biom \
--input-format BIOMV100Format \
--output-path rep200_HiSeq_Fungi_Decontam_MDA_PT.qza

In [None]:
qiime tools import \
--type FeatureTable[Frequency] \
--input-path ./Qiime_input_data/rep200_HiSeq_Fungi_Decontam_WashU_PT.biom \
--input-format BIOMV100Format \
--output-path rep200_HiSeq_Fungi_Decontam_WashU_PT.qza

In [None]:
qiime tools import \
--type FeatureTable[Frequency] \
--input-path ./Qiime_input_data/rep200_HiSeq_Fungi_Decontam_Broad_WGS_PT.biom \
--input-format BIOMV100Format \
--output-path rep200_HiSeq_Fungi_Decontam_Broad_WGS_PT.qza

In [None]:
qiime tools import \
 --type 'FeatureData[Taxonomy]' \
 --input-path ./Qiime_input_data/rep200_taxa_for_qiime.txt \
 --output-path rep200_taxonomy_fungi.qza

# Deicode (beta diversity that does not require rarefaction)

Martino et al. 2019. mSystems. See the following links:
- https://journals.asm.org/doi/10.1128/mSystems.00016-19
- https://forum.qiime2.org/t/robust-aitchison-pca-beta-diversity-with-deicode/8333

### Harvard Medical School (HMS)

In [None]:
qiime deicode rpca \
    --i-table rep200_HiSeq_Fungi_Decontam_HMS_PT.qza \
    --p-min-feature-count 10 \
    --p-min-sample-count 500 \
    --o-biplot ./Deicode_outputs/deicode_ordination_HMS.qza \
    --o-distance-matrix ./Deicode_outputs/deicode_distance_HMS.qza

In [None]:
qiime emperor biplot \
    --i-biplot ./Deicode_outputs/deicode_ordination_HMS.qza \
    --m-sample-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_HMS_PT.txt \
    --m-feature-metadata-file rep200_taxonomy_fungi.qza \
    --o-visualization ./Deicode_outputs/deicode_biplot_HMS.qzv \
    --p-number-of-features 1

In [None]:
qiime diversity beta-group-significance \
    --i-distance-matrix ./Deicode_outputs/deicode_distance_HMS.qza \
    --m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_HMS_PT.txt \
    --m-metadata-column investigation \
    --p-method permanova \
    --o-visualization ./Deicode_outputs/investigation_significance_wgs_pt_HMS.qzv

In [None]:
qiime diversity adonis \
--i-distance-matrix ./Deicode_outputs/deicode_distance_HMS.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_HMS_PT.txt \
--p-formula "investigation" \
--p-n-jobs 16 \
--o-visualization ./Deicode_outputs/deicode_adonis_HMS.qzv

### Baylor College of Medicine (BCM)

In [None]:
qiime deicode rpca \
    --i-table rep200_HiSeq_Fungi_Decontam_BCM_PT.qza \
    --p-min-feature-count 10 \
    --p-min-sample-count 500 \
    --o-biplot ./Deicode_outputs/deicode_ordination_BCM.qza \
    --o-distance-matrix ./Deicode_outputs/deicode_distance_BCM.qza

In [None]:
qiime emperor biplot \
    --i-biplot ./Deicode_outputs/deicode_ordination_BCM.qza \
    --m-sample-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_BCM_PT.txt \
    --m-feature-metadata-file rep200_taxonomy_fungi.qza \
    --o-visualization ./Deicode_outputs/deicode_biplot_BCM.qzv \
    --p-number-of-features 1

In [None]:
qiime diversity beta-group-significance \
    --i-distance-matrix ./Deicode_outputs/deicode_distance_BCM.qza \
    --m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_BCM_PT.txt \
    --m-metadata-column investigation \
    --p-method permanova \
    --o-visualization ./Deicode_outputs/investigation_significance_wgs_pt_BCM.qzv

In [None]:
qiime diversity adonis \
--i-distance-matrix ./Deicode_outputs/deicode_distance_BCM.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_BCM_PT.txt \
--p-formula "investigation" \
--p-n-jobs 16 \
--o-visualization ./Deicode_outputs/deicode_adonis_BCM.qzv

### MD Anderson (MDA)

In [None]:
qiime deicode rpca \
    --i-table rep200_HiSeq_Fungi_Decontam_MDA_PT.qza \
    --p-min-feature-count 10 \
    --p-min-sample-count 500 \
    --o-biplot ./Deicode_outputs/deicode_ordination_MDA.qza \
    --o-distance-matrix ./Deicode_outputs/deicode_distance_MDA.qza

In [None]:
qiime emperor biplot \
    --i-biplot ./Deicode_outputs/deicode_ordination_MDA.qza \
    --m-sample-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_MDA_PT.txt \
    --m-feature-metadata-file rep200_taxonomy_fungi.qza \
    --o-visualization ./Deicode_outputs/deicode_biplot_MDA.qzv \
    --p-number-of-features 1

In [None]:
qiime diversity beta-group-significance \
    --i-distance-matrix ./Deicode_outputs/deicode_distance_MDA.qza \
    --m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_MDA_PT.txt \
    --m-metadata-column investigation \
    --p-method permanova \
    --o-visualization ./Deicode_outputs/investigation_significance_wgs_pt_MDA.qzv

In [None]:
qiime diversity adonis \
--i-distance-matrix ./Deicode_outputs/deicode_distance_MDA.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_MDA_PT.txt \
--p-formula "investigation" \
--p-n-jobs 16 \
--o-visualization ./Deicode_outputs/deicode_adonis_MDA.qzv

### Washington University (WashU)

In [None]:
qiime deicode rpca \
    --i-table rep200_HiSeq_Fungi_Decontam_WashU_PT.qza \
    --p-min-feature-count 10 \
    --p-min-sample-count 500 \
    --o-biplot ./Deicode_outputs/deicode_ordination_WashU.qza \
    --o-distance-matrix ./Deicode_outputs/deicode_distance_WashU.qza

In [None]:
qiime emperor biplot \
    --i-biplot ./Deicode_outputs/deicode_ordination_WashU.qza \
    --m-sample-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_WashU_PT.txt \
    --m-feature-metadata-file rep200_taxonomy_fungi.qza \
    --o-visualization ./Deicode_outputs/deicode_biplot_WashU.qzv \
    --p-number-of-features 1

In [None]:
qiime diversity beta-group-significance \
    --i-distance-matrix ./Deicode_outputs/deicode_distance_WashU.qza \
    --m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_WashU_PT.txt \
    --m-metadata-column investigation \
    --p-method permanova \
    --o-visualization ./Deicode_outputs/investigation_significance_wgs_pt_WashU.qzv

In [None]:
qiime diversity adonis \
--i-distance-matrix ./Deicode_outputs/deicode_distance_WashU.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_WashU_PT.txt \
--p-formula "investigation" \
--p-n-jobs 16 \
--o-visualization ./Deicode_outputs/deicode_adonis_WashU.qzv

### Broad Institute WGS only (Broad WGS)

In [None]:
qiime deicode rpca \
    --i-table rep200_HiSeq_Fungi_Decontam_Broad_WGS_PT.qza \
    --p-min-feature-count 10 \
    --p-min-sample-count 500 \
    --o-biplot ./Deicode_outputs/deicode_ordination_Broad_WGS.qza \
    --o-distance-matrix ./Deicode_outputs/deicode_distance_Broad_WGS.qza

In [None]:
qiime emperor biplot \
    --i-biplot ./Deicode_outputs/deicode_ordination_Broad_WGS.qza \
    --m-sample-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_Broad_WGS_PT.txt \
    --m-feature-metadata-file rep200_taxonomy_fungi.qza \
    --o-visualization ./Deicode_outputs/deicode_biplot_Broad_WGS.qzv \
    --p-number-of-features 1

In [None]:
qiime diversity beta-group-significance \
    --i-distance-matrix ./Deicode_outputs/deicode_distance_Broad_WGS.qza \
    --m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_Broad_WGS_PT.txt \
    --m-metadata-column investigation \
    --p-method permanova \
    --o-visualization ./Deicode_outputs/investigation_significance_wgs_pt_Broad_WGS.qzv

In [None]:
qiime diversity adonis \
--i-distance-matrix ./Deicode_outputs/deicode_distance_Broad_WGS.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_Broad_WGS_PT.txt \
--p-formula "investigation" \
--p-n-jobs 16 \
--o-visualization ./Deicode_outputs/deicode_adonis_Broad_WGS.qzv

# Qiime 2 Core Metrics
Note: Since beta diversity was calculated above using DEICODE above, we are mostly interested in the alpha diversity results. Also note that based on the sample read distribution (shown below per sequencing center) across all sequencing centers, a common rarefaction depth was selected at 5000 reads/samples (often near the 1st quartile of the sample read distribution) except for the Broad Institute, which had many fewer reads/sample on average (in that case, 2000 reads/sample were used for rarefaction).

### Harvard Medical School (HMS)
Note the following sample read distribution from R:
```
> summary(rowSums(rep200_HiSeq_Fungi_Decontam_HMS_PT))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
     15    8641   15306   21824   21885  926983 
```

In [None]:
qiime diversity core-metrics \
--i-table rep200_HiSeq_Fungi_Decontam_HMS_PT.qza \
--p-sampling-depth 5000 \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_HMS_PT.txt \
--output-dir ./core_metrics_hms_5k/

In [None]:
qiime diversity alpha-group-significance \
--i-alpha-diversity ./core_metrics_hms_5k/observed_otus_vector.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_HMS_PT.txt \
--o-visualization ./core_metrics_hms_5k/observed_features_vector_significance.qzv

qiime diversity alpha-group-significance \
--i-alpha-diversity ./core_metrics_hms_5k/shannon_vector.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_HMS_PT.txt \
--o-visualization ./core_metrics_hms_5k/shannon_vector_significance.qzv

In [None]:
qiime diversity beta-group-significance \
--i-distance-matrix ./core_metrics_hms_5k/jaccard_distance_matrix.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_HMS_PT.txt \
--m-metadata-column investigation \
--o-visualization ./core_metrics_hms_5k/jaccard_distance_matrix_significance.qzv

qiime diversity beta-group-significance \
--i-distance-matrix ./core_metrics_hms_5k/bray_curtis_distance_matrix.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_HMS_PT.txt \
--m-metadata-column investigation \
--o-visualization ./core_metrics_hms_5k/bray_curtis_distance_matrix_significance.qzv

### Baylor College of Medicine (BCM)
Note the following sample read distribution from R:
```
> summary(rowSums(rep200_HiSeq_Fungi_Decontam_BCM_PT))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    810    4980    8796   27543   15332  519524  
```

In [None]:
qiime diversity core-metrics \
--i-table rep200_HiSeq_Fungi_Decontam_BCM_PT.qza \
--p-sampling-depth 5000 \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_BCM_PT.txt \
--output-dir ./core_metrics_bcm_5k/

In [None]:
qiime diversity alpha-group-significance \
--i-alpha-diversity ./core_metrics_bcm_5k/observed_otus_vector.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_BCM_PT.txt \
--o-visualization ./core_metrics_bcm_5k/observed_features_vector_significance.qzv

qiime diversity alpha-group-significance \
--i-alpha-diversity ./core_metrics_bcm_5k/shannon_vector.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_BCM_PT.txt \
--o-visualization ./core_metrics_bcm_5k/shannon_vector_significance.qzv

In [None]:
qiime diversity beta-group-significance \
--i-distance-matrix ./core_metrics_bcm_5k/jaccard_distance_matrix.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_BCM_PT.txt \
--m-metadata-column investigation \
--o-visualization ./core_metrics_bcm_5k/jaccard_distance_matrix_significance.qzv

qiime diversity beta-group-significance \
--i-distance-matrix ./core_metrics_bcm_5k/bray_curtis_distance_matrix.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_BCM_PT.txt \
--m-metadata-column investigation \
--o-visualization ./core_metrics_bcm_5k/bray_curtis_distance_matrix_significance.qzv

### MD Anderson (MDA)
Note the following sample read distribution from R:
```
> summary(rowSums(rep200_HiSeq_Fungi_Decontam_MDA_PT))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1392    6367   12795   14727   18058  243830 
```

In [None]:
qiime diversity core-metrics \
--i-table rep200_HiSeq_Fungi_Decontam_MDA_PT.qza \
--p-sampling-depth 5000 \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_MDA_PT.txt \
--output-dir ./core_metrics_mda_5k/

In [None]:
qiime diversity alpha-group-significance \
--i-alpha-diversity ./core_metrics_mda_5k/observed_otus_vector.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_MDA_PT.txt \
--o-visualization ./core_metrics_mda_5k/observed_features_vector_significance.qzv

qiime diversity alpha-group-significance \
--i-alpha-diversity ./core_metrics_mda_5k/shannon_vector.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_MDA_PT.txt \
--o-visualization ./core_metrics_mda_5k/shannon_vector_significance.qzv

In [None]:
qiime diversity beta-group-significance \
--i-distance-matrix ./core_metrics_mda_5k/jaccard_distance_matrix.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_MDA_PT.txt \
--m-metadata-column investigation \
--o-visualization ./core_metrics_mda_5k/jaccard_distance_matrix_significance.qzv

qiime diversity beta-group-significance \
--i-distance-matrix ./core_metrics_mda_5k/bray_curtis_distance_matrix.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_MDA_PT.txt \
--m-metadata-column investigation \
--o-visualization ./core_metrics_mda_5k/bray_curtis_distance_matrix_significance.qzv

### Washington University (WashU)
Note the following sample read distribution from R:
```
> summary(rowSums(rep200_HiSeq_Fungi_Decontam_WashU_PT))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    774    6144    9663   23565   16375  545278 
```

In [None]:
qiime diversity core-metrics \
--i-table rep200_HiSeq_Fungi_Decontam_WashU_PT.qza \
--p-sampling-depth 5000 \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_WashU_PT.txt \
--output-dir ./core_metrics_washU_5k/

In [None]:
qiime diversity alpha-group-significance \
--i-alpha-diversity ./core_metrics_washU_5k/observed_otus_vector.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_WashU_PT.txt \
--o-visualization ./core_metrics_washU_5k/observed_features_vector_significance.qzv

qiime diversity alpha-group-significance \
--i-alpha-diversity ./core_metrics_washU_5k/shannon_vector.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_WashU_PT.txt \
--o-visualization ./core_metrics_washU_5k/shannon_vector_significance.qzv

In [None]:
qiime diversity beta-group-significance \
--i-distance-matrix ./core_metrics_washU_5k/jaccard_distance_matrix.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_WashU_PT.txt \
--m-metadata-column investigation \
--o-visualization ./core_metrics_washU_5k/jaccard_distance_matrix_significance.qzv

qiime diversity beta-group-significance \
--i-distance-matrix ./core_metrics_washU_5k/bray_curtis_distance_matrix.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_WashU_PT.txt \
--m-metadata-column investigation \
--o-visualization ./core_metrics_washU_5k/bray_curtis_distance_matrix_significance.qzv

### Broad Institute WGS only (Broad WGS)
Note the following sample read distribution from R (it is lower than the other seq centers):
```
> summary(rowSums(rep200_HiSeq_Fungi_Decontam_Broad_WGS_PT))
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
      2    1898    3958    6354    9411  150942 
```

In [None]:
qiime diversity core-metrics \
--i-table rep200_HiSeq_Fungi_Decontam_Broad_WGS_PT.qza \
--p-sampling-depth 2000 \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_Broad_WGS_PT.txt \
--output-dir ./core_metrics_broad_WGS_2k/

In [None]:
qiime diversity alpha-group-significance \
--i-alpha-diversity ./core_metrics_broad_WGS_2k/observed_otus_vector.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_Broad_WGS_PT.txt \
--o-visualization ./core_metrics_broad_WGS_2k/observed_features_vector_significance.qzv

qiime diversity alpha-group-significance \
--i-alpha-diversity ./core_metrics_broad_WGS_2k/shannon_vector.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_Broad_WGS_PT.txt \
--o-visualization ./core_metrics_broad_WGS_2k/shannon_vector_significance.qzv

In [None]:
qiime diversity beta-group-significance \
--i-distance-matrix ./core_metrics_broad_WGS_2k/jaccard_distance_matrix.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_Broad_WGS_PT.txt \
--m-metadata-column investigation \
--o-visualization ./core_metrics_broad_WGS_2k/jaccard_distance_matrix_significance.qzv

qiime diversity beta-group-significance \
--i-distance-matrix ./core_metrics_broad_WGS_2k/bray_curtis_distance_matrix.qza \
--m-metadata-file ./Qiime_input_data/metaQiitaCombined_Nonzero_Broad_WGS_PT.txt \
--m-metadata-column investigation \
--o-visualization ./core_metrics_broad_WGS_2k/bray_curtis_distance_matrix_significance.qzv

# Figures for the paper

- Alpha diversity data (downloaded through uploading the alpha diversity .qzv files onto https://view.qiime2.org/ and downloading the raw data) will be plotted using R (see `04-Prepare-TCGA-data-for-Qiime-and-plot-alpha-diversity.R` script)
- Emperor (https://biocore.github.io/emperor/) will be used to view the beta diversity PCoA plots (again using https://view.qiime2.org/) and capture screenshots for the PCoA figures

## Alpha and beta rarefaction template code (not done for now)

In [158]:
qiime diversity alpha-rarefaction \
--i-table qiita_count_fungi_WGS_PT_HMS.qza \
--p-min-depth 50 \
--p-steps 10 \
--p-max-depth 50000 \
--m-metadata-file qiime_meta_fungi_WGS_PT_HMS.txt \
--verbose \
--output-dir alpha-rarefaction-curves-hms-10-50k.qzv

[32mSaved Visualization to: alpha-rarefaction-curves-20-40000.qzv/visualization.qzv[0m


In [162]:
!qiime diversity beta-rarefaction \
--i-phylogeny wol_tree.qza \
--i-table table_noBlanks.qza \
--p-metric 'braycurtis' \
--p-clustering-method 'nj' \
--m-metadata-file q2_metadata_keemei.tsv \
--p-sampling-depth 430 \
--output-dir beta-rarefaction-braycurtis-430

[32mSaved Visualization to: beta-rarefaction-braycurtis-430/visualization.qzv[0m
