# onsite data

In [None]:
!qiime --version

# q2cli version 2019.7.0
# Run `qiime info` for more version details.


In [None]:
from metaBarTools import metaBar_PreX
import os

In [None]:
metaBar = metaBar_PreX()

In [None]:
readspath = "/Reads_metaData/Reads_All_Projects/reads_2018/MicrocosmoSoil_2018/microcosm_onsite/onsite_samples_microcosmo_11012018"

In [None]:
if not os.path.exists("reads_copies_onsite"):
    os.makedirs("reads_copies_onsite")
    
onsite_copies = os.path.abspath("reads_copies_onsite")

In [None]:
# metaBar.metaBar_Copy(readspath, onsite_copies)

In [None]:
# path_16s, path_ITS = metaBar.metaBar_makeSubDir("Analysis/OnSite", ["16S_result", "ITS2_result"])
path_16s = "/running_project/2019/soil_project/micocosm/Microcosm/Analysis/OnSite/16S_result"

path_ITS = "/running_project/2019/soil_project/micocosm/Microcosm/Analysis/OnSite/ITS2_result"

In [None]:
print(path_16s, path_ITS, sep="\n")

## metadata and platesetup

In [None]:
platesetup = os.path.abspath("./metadata_onSite/onsite_soil_microcosmo.xlsx")
print(platesetup)

### manifest file

In [None]:
manifest = metaBar.metaBar_Qiime2_Manifest(onsite_copies, platesetup, colranges=[0,8], colnames = ['Plate', 'Sample_ID', 'PCR_Conc', 'nmol_per_sample', 'Amount_of_Sample', 'Amount_of_Water', 'Well_No', 'Primer_set'], paired=True, matchby="index")

In [None]:
!mkdir manifest_onSite

In [None]:
!mv 16SF@16SR_manifest.csv ITS86@ITS4_manifest.csv manifest_onSite/

In [None]:
man_16s = os.path.abspath("./manifest_onSite/16SF@16SR_manifest.csv")
man_its = os.path.abspath("./manifest_onSite/ITS86@ITS4_manifest.csv")

## 16s

In [None]:
os.chdir(path_16s)

In [None]:
os.getcwd()

In [None]:
!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path $man_16s \
--output-path field_16s_seq.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
!qiime demux summarize \
--i-data field_16s_seq.qza \
--o-visualization field_16s_seq.qzv

In [None]:
mcm_16s_F = len("CCTACGGGNGGCWGCAG")
mcm_16S_R = len("GGACTACHVGGGTATCTAATCC")

In [None]:
if not os.path.exists("feature-tables"):
    os.makedirs("feature-tables")
    
if not os.path.exists("dada2_stats"):
    os.makedirs("dada2_stats")

In [None]:
#289
#267
#------
# 289
# 247
#-------
#289
#227
!qiime dada2 denoise-paired \
--i-demultiplexed-seqs field_16s_seq.qza \
--o-table feature-tables/table_field_16s \
--o-representative-sequences rep_field_16s \
--p-trim-left-f $mcm_16s_F \
--p-trim-left-r $mcm_16S_R \
--p-trunc-len-f 289 \
--p-trunc-len-r 227 \
--o-denoising-stats dada2_stats/dada2_stats_field_16s.qza \
--p-n-threads 12

In [None]:
!qiime metadata tabulate \
--m-input-file dada2_stats/dada2_stats_field_16s.qza \
--o-visualization dada2_stats/stats-dada2.qzv

In [None]:
# adding meta data and cout table
!qiime feature-table summarize \
--i-table feature-tables/table_field_16s.qza \
--o-visualization feature-tables/table_field_16s.qzv \
--m-sample-metadata-file ../../../metadata_onSite/metadata_onSite_16s_tab.csv

## phylogenics (make tree)

In [None]:
!qiime tools export \
--input-path rep_field_16s.qza \
--output-path rep_field_16s_seq

In [None]:
!qiime alignment mafft \
--i-sequences rep_field_16s.qza \
--o-alignment aligned_rep_seqs.qza

In [None]:
!qiime alignment mask \
--i-alignment aligned_rep_seqs.qza \
--o-masked-alignment masked_aligned_rep_seqs.qza

In [None]:
!qiime phylogeny fasttree \
--i-alignment masked_aligned_rep_seqs.qza \
--o-tree unrooted_tree.qza

In [None]:
# root the tree
!qiime phylogeny midpoint-root \
--i-tree unrooted_tree.qza \
--o-rooted-tree rooted_tree.qza

In [None]:
db_seq_path = "/Database/silva_132_release_08102019/SILVA_132_QIIME_release/rep_set/rep_set_16S_only/99/silva_132_99_16S.fna"
tax_path = "/Database/silva_132_release_08102019/SILVA_132_QIIME_release/taxonomy/16S_only/99/trimmed_taxonomy_7_levels.txt"

In [None]:
# use sklearn NB classifier
if not os.path.exists("classifier"):
    os.makedirs("classifier")

In [None]:
!qiime tools import \
--type 'FeatureData[Sequence]' \
--input-path $db_seq_path \
--output-path ./classifier/silva_132_99.qza

In [None]:
!qiime tools import \
--type 'FeatureData[Taxonomy]' \
--input-format HeaderlessTSVTaxonomyFormat \
--input-path $tax_path \
--output-path ./classifier/taxonomy_silva_132_99.qza

In [None]:
!qiime feature-classifier extract-reads \
--i-sequences ./classifier/silva_132_99.qza \
--p-f-primer CCTACGGGNGGCWGCAG \
--p-r-primer GGACTACHVGGGTATCTAATCC \
--p-min-length 100 \
--p-max-length 460 \
--o-reads ./classifier/ref_silva_132_99.qza

In [None]:
!qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ./classifier/ref_silva_132_99.qza \
--i-reference-taxonomy ./classifier/taxonomy_silva_132_99.qza \
--o-classifier ./classifier/classifier_silva_132.qza

In [None]:
if not os.path.exists("taxonomy"):
    os.makedirs("taxonomy")

In [None]:
!qiime feature-classifier classify-sklearn \
--i-classifier ./classifier/classifier_silva_132.qza \
--i-reads rep_field_16s.qza \
--o-classification ./taxonomy/taxonomy_field_16s.qza

In [None]:
!qiime metadata tabulate \
--m-input-file taxonomy/taxonomy_field_16s.qza \
--o-visualization taxonomy/taxonomy_field_16s.qzv

In [None]:
!qiime taxa barplot \
--i-table feature-tables/table_field_16s.qza \
--i-taxonomy taxonomy/taxonomy_field_16s.qza \
--m-metadata-file ../../../metadata_onSite/metadata_onSite_16s_tab.csv \
--o-visualization taxonomy/barplot_field_16s.qzv

## export for decontam

In [None]:
# !qiime tools export \
# --input-path feature-tables/table_field_16s.qza \
# --output-path exported

In [None]:
# !qiime tools export \
# --input-path taxonomy/taxonomy_field_16s.qza \
# --output-path exported/

In [None]:
# !cp exported/taxonomy.tsv exported/biom-taxonomy.tsv

In [None]:
# # change header
# !sed -i '1 s/Feature ID/#OTUID/g; s/Taxon/taxonomy/g; s/Confidence/confidence/g' exported/biom-taxonomy.tsv

In [None]:
# !biom add-metadata \
# -i exported/feature-table.biom \
# -o exported/feature-table-tax.biom \
# --observation-metadata-fp exported/biom-taxonomy.tsv \
# --sample-metadata-fp ../../../metadata_onSite/metadata_onSite_16s_tab.csv \
# --sc-separated taxonomy

In [None]:
# !biom convert \
# -i exported/feature-table-tax.biom \
# -o exported/feature-table.tsv \
# --to-tsv

> the decontam process will be done in R(decontam_mcm_16s.R)

In [None]:
# !qiime feature-table filter-features \
# --i-table feature-tables/table_field_16s.qza \
# --m-metadata-file exported/contam_by_freq.txt \
# --p-exclude-ids True \
# --o-filtered-table exported/decontam_filtered_table.qza

In [None]:
# !qiime feature-table summarize \
# --i-table exported/decontam_filtered_table.qza \
# --o-visualization exported/decontam_filtered_table.qzv \
# --m-sample-metadata-file ../../../metadata_onSite/metadata_onSite_16s_tab.csv

In [None]:
# !qiime taxa filter-table \
# --i-table exported/decontam_filtered_table.qza \
# --i-taxonomy taxonomy/taxonomy_field_16s.qza \
# --p-exclude mitochondria,chloroplast \
# --o-filtered-table feature-tables/nochloroplast_table.qza

> remove controls and mitochondria and chloroplasts

In [None]:
!qiime taxa filter-table \
--i-table feature-tables/table_field_16s.qza \
--i-taxonomy taxonomy/taxonomy_field_16s.qza \
--p-exclude mitochondria,chloroplast \
--o-filtered-table feature-tables/nochloroplast_table.qza

In [None]:
!qiime feature-table filter-samples \
--i-table feature-tables/nochloroplast_table.qza \
--m-metadata-file ../../../metadata_onSite/metadata_onSite_16s_tab.csv \
--p-where "sample_or_control='control'" \
--p-exclude-ids \
--o-filtered-table feature-tables/filtered_table_nocontrols.qza

In [None]:
!qiime feature-table summarize \
--i-table feature-tables/filtered_table_nocontrols.qza \
--m-sample-metadata-file ../../../metadata_onSite/metadata_onSite_16s_tab.csv \
--o-visualization feature-tables/filtered_table_nocontrols.qzv

> get control samples

In [None]:
!qiime feature-table filter-samples \
--i-table feature-tables/table_field_16s.qza \
--m-metadata-file ../../../metadata_onSite/metadata_onSite_16s_tab.csv \
--p-where "sample_or_control='control'" \
--o-filtered-table feature-tables/feature_table_controls.qza

In [None]:
!qiime taxa barplot \
--i-table feature-tables/feature_table_controls.qza \
--m-metadata-file ../../../metadata_onSite/metadata_onSite_16s_tab.csv \
--i-taxonomy taxonomy/taxonomy_field_16s.qza \
--o-visualization taxonomy/barplot_controls.qzv

## export for R

In [None]:
!qiime tools export \
--input-path feature-tables/filtered_table_nocontrols.qza \
--output-path R_process/

In [None]:
!qiime tools export \
--input-path taxonomy/taxonomy_field_16s.qza \
--output-path R_process/

In [None]:
!cp R_process/taxonomy.tsv R_process/biom-taxonomy.tsv

In [None]:
# change header
!sed -i '1 s/Feature ID/#OTUID/g; s/Taxon/taxonomy/g; s/Confidence/confidence/g' R_process/biom-taxonomy.tsv

In [None]:
!biom add-metadata \
-i R_process/feature-table.biom \
-o R_process/feature-table-tax.biom \
--observation-metadata-fp R_process/biom-taxonomy.tsv \
--sample-metadata-fp ../../../metadata_onSite/metadata_field_16s_official.v2.tsv \
--sc-separated taxonomy

In [None]:
!biom convert \
-i R_process/feature-table-tax.biom \
-o R_process/feature-table.tsv \
--to-tsv

In [None]:
!qiime tools export \
--input-path rooted_tree.qza \
--output-path R_process/

### rarefaction curve

In [None]:
!qiime diversity alpha-rarefaction \
--i-table feature-tables/table_field_16s.qza \
--i-phylogeny rooted_tree.qza \
--m-metadata-file ../../../metadata_onSite/metadata_field_16s_official.v2.tsv \
--p-steps 100 \
--p-max-depth 10000 \
--o-visualization alpha-rarecurve_fields_16s

-------------
## ITS

In [None]:
os.chdir(path_ITS)

In [None]:
os.getcwd()

In [None]:
metadatapath = os.path.abspath("../../../metadata_onSite/metadata_field_its_official.v2.tsv")

In [None]:
!qiime tools import \
--type 'SampleData[PairedEndSequencesWithQuality]' \
--input-path $man_its \
--output-path field_its_seq.qza \
--input-format PairedEndFastqManifestPhred33

In [None]:
!qiime demux summarize \
--i-data field_its_seq.qza \
--o-visualization field_its_seq.qzv

In [None]:
ITS_F = len("GTGAATCATCGAATCTTTGAA")
ITS_R = len("TCCTCCGCTTATTGATATGC")

In [None]:
if not os.path.exists("feature-tables"):
    os.makedirs("feature-tables")
    
if not os.path.exists("dada2_stats"):
    os.makedirs("dada2_stats")

In [None]:
#299
#223
#--------------
#290
#219


!qiime dada2 denoise-paired \
--i-demultiplexed-seqs field_its_seq.qza \
--output-dir dada2 \
--o-table feature-tables/table_field_its \
--o-representative-sequences rep_field_its \
--p-trim-left-f $ITS_F \
--p-trim-left-r $ITS_R \
--p-trunc-len-f 290 \
--p-trunc-len-r 219 \
--o-denoising-stats dada2_stats/dada2_stats.qza \
--p-n-threads 12

In [None]:
!qiime metadata tabulate \
--m-input-file dada2_stats/dada2_stats.qza \
--o-visualization dada2_stats/stats-dada2.qzv

In [None]:
# adding meta data and cout table
!qiime feature-table summarize \
--i-table feature-tables/table_field_its.qza \
--o-visualization feature-tables/table_field_its.qzv \
--m-sample-metadata-file $metadatapath

### rooted tree

In [None]:
!qiime tools export \
--input-path rep_field_its.qza \
--output-path rep_field_its_seq

In [None]:
!qiime alignment mafft \
--i-sequences rep_field_its.qza \
--o-alignment aligned_rep_seqs.qza

In [None]:
!qiime alignment mask \
--i-alignment aligned_rep_seqs.qza \
--o-masked-alignment masked_aligned_rep_seqs.qza

In [None]:
!qiime phylogeny fasttree \
--i-alignment masked_aligned_rep_seqs.qza \
--o-tree unrooted_tree.qza

In [None]:
# root the tree
!qiime phylogeny midpoint-root \
--i-tree unrooted_tree.qza \
--o-rooted-tree rooted_tree.qza

# taxonomy

In [None]:
db_seq_dyam_path = r"/Database/UNITE_INSD_ITS/UNITE_release/UNITE_UPDATE_2018-11-18/version1/sh_refs_qiime_ver8_dynamic_02.02.2019.fasta"
db_tax_dyam_path = r"/Database/UNITE_INSD_ITS/UNITE_release/UNITE_UPDATE_2018-11-18/version1/sh_taxonomy_qiime_ver8_dynamic_02.02.2019.txt"

In [None]:
db_seq_99 = r"/Database/UNITE_INSD_ITS/UNITE_release/UNITE_UPDATE_2018-11-18/version2/sh_refs_qiime_ver8_99_s_02.02.2019.fasta"
db_tax_99 = r"/Database/UNITE_INSD_ITS/UNITE_release/UNITE_UPDATE_2018-11-18/version2/sh_taxonomy_qiime_ver8_99_s_02.02.2019.txt"

In [None]:
if not os.path.exists("classifier"):
    os.makedirs("classifier")

In [None]:
if not os.path.exists("taxonomy"):
    os.makedirs("taxonomy")

---

In [None]:
!qiime tools import \
--type "FeatureData[Sequence]" \
--input-path $db_seq_dyam_path \
--output-path ./classifier/UNITED_2018_dyam.qza

In [None]:
!qiime tools import \
--type 'FeatureData[Taxonomy]' \
--input-format HeaderlessTSVTaxonomyFormat \
--input-path $db_tax_dyam_path \
--output-path ./classifier/UNITED_TAX_2018_dyam.qza

In [None]:
!qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ./classifier/UNITED_2018_dyam.qza \
--i-reference-taxonomy ./classifier/UNITED_TAX_2018_dyam.qza \
--o-classifier ./classifier/classifier_UNITE_2018_dyam.qza

In [None]:
!qiime feature-classifier classify-sklearn \
--i-reads rep_field_its.qza \
--i-classifier ./classifier/classifier_UNITE_2018_dyam.qza \
--o-classification ./taxonomy/taxonomy_field_its_dym.qza

In [None]:
!qiime metadata tabulate \
--m-input-file ./taxonomy/taxonomy_field_its.qza \
--o-visualization ./taxonomy/taxonomy_field_its_dym.qzv

In [None]:
!qiime taxa barplot \
--i-table feature-tables/table_field_its.qza \
--i-taxonomy ./taxonomy/taxonomy_field_its_dym.qza \
--m-metadata-file $metadatapath \
--o-visualization ./taxonomy/barplot_tax_field_its_dym.qzv

---
99 cluster

In [None]:
!qiime tools import \
--type "FeatureData[Sequence]" \
--input-path $db_seq_99 \
--output-path ./classifier/UNITED_2020_99.qza

In [None]:
!qiime tools import \
--type 'FeatureData[Taxonomy]' \
--input-format HeaderlessTSVTaxonomyFormat \
--input-path $db_tax_99 \
--output-path ./classifier/UNITED_2020_99_tax.qza

In [None]:
!qiime feature-classifier fit-classifier-naive-bayes \
--i-reference-reads ./classifier/UNITED_2020_99.qza \
--i-reference-taxonomy ./classifier/UNITED_2020_99_tax.qza \
--o-classifier ./classifier/classifier_UNITE_2020_99.qza

In [None]:
!qiime feature-classifier classify-sklearn \
--i-reads rep_field_its.qza \
--i-classifier ./classifier/classifier_UNITE_2020_99.qza \
--o-classification ./taxonomy/taxonomy_field_unite202099.qza

In [None]:
!qiime metadata tabulate \
--m-input-file ./taxonomy/taxonomy_field_unite202099.qza \
--o-visualization ./taxonomy/taxonomy_field_unite202099.qzv

In [None]:
!qiime taxa barplot \
--i-table feature-tables/table_field_its.qza \
--i-taxonomy ./taxonomy/taxonomy_field_unite202099.qza \
--m-metadata-file $metadatapath \
--o-visualization ./taxonomy/barplot_tax_field_unite202099.qzv

### export for decontam

In [None]:
# !qiime tools export \
# --input-path feature-tables/table_field_its.qza \
# --output-path exported

# !qiime tools export \
# --input-path taxonomy/taxonomy_field_unite202099.qza \
# --output-path exported/

In [None]:
# !cp exported/taxonomy.tsv exported/biom-taxonomy.tsv

In [None]:
# # change header
# !sed -i '1 s/Feature ID/#OTUID/g; s/Taxon/taxonomy/g; s/Confidence/confidence/g' exported/biom-taxonomy.tsv

# !biom add-metadata \
# -i exported/feature-table.biom \
# -o exported/feature-table-tax.biom \
# --observation-metadata-fp exported/biom-taxonomy.tsv \
# --sample-metadata-fp $metadatapath \
# --sc-separated taxonomy

# !biom convert \
# -i exported/feature-table-tax.biom \
# -o exported/feature-table.tsv \
# --to-tsv

> the decontam process will be done in R(decontam_mcm_its.R)

In [None]:
# !qiime feature-table filter-features \
# --i-table feature-tables/table_field_its.qza \
# --m-metadata-file exported/contam_by_freq.txt \
# --p-exclude-ids True \
# --o-filtered-table exported/decontam_filtered_table.qza

In [None]:
# !qiime feature-table summarize \
# --i-table exported/decontam_filtered_table.qza \
# --o-visualization exported/decontam_filtered_table.qzv \
# --m-sample-metadata-file $metadatapatha

> remove controls

In [None]:
!qiime feature-table filter-samples \
--i-table feature-tables/table_field_its.qza \
--m-metadata-file $metadatapath \
--p-where "sample_or_control='control'" \
--p-exclude-ids \
--o-filtered-table feature-tables/filtered_table_nocontrols.qza

In [None]:
!qiime feature-table summarize \
--i-table feature-tables/filtered_table_nocontrols.qza \
--m-sample-metadata-file $metadatapath \
--o-visualization feature-tables/filtered_table_nocontrols.qzv

> get control samples

In [None]:
!qiime feature-table filter-samples \
--i-table feature-tables/table_field_its.qza \
--m-metadata-file $metadatapath \
--p-where "sample_or_control='control'" \
--o-filtered-table feature-tables/feature_table_controls.qza

In [None]:
!qiime taxa barplot \
--i-table feature-tables/feature_table_controls.qza \
--m-metadata-file $metadatapath \
--i-taxonomy taxonomy/taxonomy_field_unite202099.qza \
--o-visualization taxonomy/barplot_controls.qzv

## export for R

In [None]:
!qiime tools export \
--input-path feature-tables/filtered_table_nocontrols.qza \
--output-path R_process/

In [None]:
!qiime tools export \
--input-path taxonomy/taxonomy_field_unite202099.qza \
--output-path R_process/

In [None]:
!cp R_process/taxonomy.tsv R_process/biom-taxonomy.tsv

In [None]:
# change header
!sed -i '1 s/Feature ID/#OTUID/g; s/Taxon/taxonomy/g; s/Confidence/confidence/g' R_process/biom-taxonomy.tsv

In [None]:
!biom add-metadata \
-i R_process/feature-table.biom \
-o R_process/feature-table-tax.biom \
--observation-metadata-fp R_process/biom-taxonomy.tsv \
--sample-metadata-fp ../../../metadata_onSite/metadata_field_its_official.v2.tsv \
--sc-separated taxonomy

In [None]:
!biom convert \
-i R_process/feature-table-tax.biom \
-o R_process/feature-table.tsv \
--to-tsv

In [None]:
!qiime tools export \
--input-path rooted_tree.qza \
--output-path R_process/

### rarefaction curves

In [None]:
!qiime diversity alpha-rarefaction \
--i-table feature-tables/table_field_its.qza \
--i-phylogeny rooted_tree.qza \
--m-metadata-file $metadatapath \
--p-steps 100 \
--p-max-depth 10000 \
--o-visualization alpha-rarecurve