```bash
# We need to combine a few datasets:
#
# * ICU (work with Paul Wischmeyer).
# * Infant time series (work with Ruth Ley).
# * Fecal Microbiome Transplant (work with Mike Sadowsky).
# * American Gut Data.
#
#
# While seemingly trivial, this requires some finesse to properly combine
# the data.
# 1) We need to start by deblurring the ICU and FMT data, so first we need
#    to grab the data from Qiita (HDF5 formatted sequences) and import them
#    to QIIME2.
#
mkdir -p demux-files
cp /projects/qiita_data/preprocessed_data/175_seqs.demux /home/yovazquezbaeza/research/ag-animation/demux-files/fmt-seqs.demux
cp /projects/qiita_data/preprocessed_data/615_seqs.demux /home/yovazquezbaeza/research/ag-animation/demux-files/icu-seqs.demux
#
~/bin/make_importable.sh /home/yovazquezbaeza/research/ag-animation/demux-files/fmt-seqs.demux /home/yovazquezbaeza/research/ag-animation/fmt-sequences-qiime2 1
qiime tools import --input-path /home/yovazquezbaeza/research/ag-animation/fmt-sequences-qiime2/ --type 'SampleData[SequencesWithQuality]' --output-path /home/yovazquezbaeza/research/ag-animation/fmt-seqs.qza
#
~/bin/make_importable.sh /home/yovazquezbaeza/research/ag-animation/demux-files/icu-seqs.demux /home/yovazquezbaeza/research/ag-animation/icu-sequences-qiime2 1
qiime tools import --input-path /home/yovazquezbaeza/research/ag-animation/icu-sequences-qiime2/ --type 'SampleData[SequencesWithQuality]' --output-path /home/yovazquezbaeza/research/ag-animation/icu-seqs.qza
#
# 1.1) Deblur the sequences and select trimming at 125 NT.
#
qiime deblur denoise-16S --output-dir /home/yovazquezbaeza/research/ag-animation/deblur-fmt --i-demultiplexed-seqs /home/yovazquezbaeza/research/ag-animation/fmt-seqs.qza --p-trim-length 125 --p-no-hashed-feature-ids --verbose --p-jobs-to-start 16
qiime deblur denoise-16S --output-dir /home/yovazquezbaeza/research/ag-animation/deblur-icu --i-demultiplexed-seqs /home/yovazquezbaeza/research/ag-animation/icu-seqs.qza --p-trim-length 125 --p-no-hashed-feature-ids --verbose --p-jobs-to-start 16
#
# 1.2) Merge sequences
#
qiime feature-table merge --i-table1 /home/yovazquezbaeza/research/ag-animation/deblur-fmt/table.qza --i-table2 /home/yovazquezbaeza/research/ag-animation/deblur-icu/table.qza --o-merged-table /home/yovazquezbaeza/research/ag-animation/deblur-fmt-and-icu/table.qza
#
# 1.3) Remove blooms
# This script will remove the bloom sequences from the table and
# from the representative sequences.
./remove-blooms.py
qiime tools import --input-path /home/yovazquezbaeza/research/ag-animation/deblur-fmt-and-icu/representative-sequences.upper.fna --output-path /home/yovazquezbaeza/research/ag-animation/deblur-fmt-and-icu/representative_sequences.qza --type 'FeatureData[Sequence]'
#
# 2) Grab the ITS sequences
#
mkdir -p sequence-files
cp /projects/qiita_data/preprocessed_data/157_seqs.fna /home/yovazquezbaeza/research/ag-animation/sequence-files/its-seqs.fna
#
# 2.1) trim to 125 NTs
trim_fasta.py -i /home/yovazquezbaeza/research/ag-animation/sequence-files/its-seqs.fna -o /home/yovazquezbaeza/research/ag-animation/sequence-files/its.seqs.125nt.fna -l 125
#
# 2.2) pick OTUs at 99%.
# qsub -l mem=128gb,nodes=1:ppn=32 -l walltime=120:00:00 -e sortmerna.e -o sortmerna.o -N closed commands.sh
pick_closed_reference_otus.py -i /home/yovazquezbaeza/research/ag-animation/sequence-files/its.seqs.125nt.fna -o /home/yovazquezbaeza/research/ag-animation/closed-ref-its  -p /home/yovazquezbaeza/research/ag-animation/sortmerna-params.txt -r /home/yovazquezbaeza/research/gg_13_8_otus/rep_set/99_otus.fasta -t /home/yovazquezbaeza/research/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt
#
# 3) Combine all the Illumina data (FMT, ICU and AGP).
# 3.1) Get American gut data
#  Justine sent me an e-mail with a spreadsheet pointing to this
#  path: /home/mcdonadt/ag-April-26-2017/otu_table_no_blooms_125nt_with_tax_min1250.biom
#
mkdir -p deblur-ag
qiime tools import --input-path /home/mcdonadt/ag-April-26-2017/otu_table_no_blooms_125nt_with_tax_min1250.biom --output-path /home/yovazquezbaeza/research/ag-animation/deblur-ag/table.qza --type 'FeatureTable[Frequency]'
# python make-representative-sequences.py
qiime tools import --input-path /home/yovazquezbaeza/research/ag-animation/deblur-ag/representative-sequences.upper.fna --output-path /home/yovazquezbaeza/research/ag-animation/deblur-ag/representative_sequences.qza --type 'FeatureData[Sequence]'
#
# 3.2) Merge illumina data
#
qiime feature-table merge --i-table1 /home/yovazquezbaeza/research/ag-animation/deblur-fmt-and-icu/table.noblooms.qza --i-table2 /home/yovazquezbaeza/research/ag-animation/deblur-ag/table.qza --o-merged-table /home/yovazquezbaeza/research/ag-animation/deblur-ag-fmt-icu/table.qza
#
qiime feature-table merge-seq-data --i-data1 /home/yovazquezbaeza/research/ag-animation/deblur-ag/representative_sequences.qza --i-data2 /home/yovazquezbaeza/research/ag-animation/deblur-fmt-and-icu/representative_sequences.qza --o-merged-data /home/yovazquezbaeza/research/ag-animation/deblur-ag-fmt-icu/representative_sequences.qza
#
# 3.2) Re-pick OTUs from the deblurred sequences.
#
qiime tools export /home/yovazquezbaeza/research/ag-animation/deblur-ag-fmt-icu/representative_sequences.qza --output-dir /home/yovazquezbaeza/research/ag-animation/deblur-ag-fmt-icu/
#
# submit this like:
# qsub -l mem=128gb,nodes=1:ppn=32 -l walltime=120:00:00 -e sortmerna.e -o sortmerna.o -N closed commands.2.sh
pick_closed_reference_otus.py -i /home/yovazquezbaeza/research/ag-animation/deblur-ag-fmt-icu/dna-sequences.fasta -o /home/yovazquezbaeza/research/ag-animation/deblur-ag-fmt-icu/closed-ref/ -p /home/yovazquezbaeza/research/ag-animation/sortmerna-params.txt -r /home/yovazquezbaeza/research/gg_13_8_otus/rep_set/99_otus.fasta -t /home/yovazquezbaeza/research/gg_13_8_otus/taxonomy/99_otu_taxonomy.txt
#
#
# 3.3) Re-map into an OTU table using Daniel's script
#
/home/yovazquezbaeza/research/ag-animation/expand.py /home/yovazquezbaeza/research/ag-animation/deblur-ag-fmt-icu/table.qza /home/yovazquezbaeza/research/ag-animation/deblur-ag-fmt-icu/closed-ref/sortmerna_picked_otus/dna-sequences_otus.txt /home/yovazquezbaeza/research/ag-animation/deblur-ag-fmt-icu/expanded-otu-table.qza
#
# 3.4) Import GG tree
qiime tools import --input-path /home/yovazquezbaeza/research/gg_13_8_otus/trees/99_otus.tree --output-path /home/yovazquezbaeza/research/ag-animation/deblur-ag-fmt-icu/closed-ref/greengenes.99.qza --type 'Phylogeny[Rooted]'
#
# 4) Combine re-mapped OTU table and ITS OTU table.
#
# 4.1) Import ITS table into QIIME2
#
qiime tools import --input-path /home/yovazquezbaeza/research/ag-animation/closed-ref-its/otu_table.biom  --output-path /home/yovazquezbaeza/research/ag-animation/closed-ref-its/table.qza --type 'FeatureTable[Frequency]'
#
# 4.2) merge OTU tables
mkdir -p remapped-ag-fmt-icu-its
qiime feature-table merge --i-table1 /home/yovazquezbaeza/research/ag-animation/deblur-ag-fmt-icu/expanded-otu-table.qza --i-table2 /home/yovazquezbaeza/research/ag-animation/closed-ref-its/table.qza --o-merged-table /home/yovazquezbaeza/research/ag-animation/remapped-ag-fmt-icu-its/table.qza
qiime feature-table rarefy --i-table /home/yovazquezbaeza/research/ag-animation/remapped-ag-fmt-icu-its/table.qza --p-sampling-depth 1250 --o-rarefied-table /home/yovazquezbaeza/research/ag-animation/remapped-ag-fmt-icu-its/table.even1250.qza
#
#
# 5) Use Greengenes 99% to compute UniFrac
#
# 5.1) compute UniFrac
# qsub -l mem=64gb,nodes=1:ppn=10 -l walltime=120:00:00 -o state-unifrac.o -e state-unifrac.e -N state -M yoshiki89@gmail.com commands.2.sh
qiime state-unifrac unweighted --i-table /home/yovazquezbaeza/research/ag-animation/remapped-ag-fmt-icu-its/table.even1250.qza --i-phylogeny /home/yovazquezbaeza/research/ag-animation/deblur-ag-fmt-icu/closed-ref/greengenes.99.qza --p-threads 10 --o-distance-matrix /home/yovazquezbaeza/research/ag-animation/remapped-ag-fmt-icu-its/unweighted-unifrac.even1250.qza
#
# 5.2) Ordinate distance matrix
qiime diversity pcoa --i-distance-matrix /home/yovazquezbaeza/research/ag-animation/remapped-ag-fmt-icu-its/unweighted-unifrac.even1250.qza  --o-pcoa /home/yovazquezbaeza/research/ag-animation/remapped-ag-fmt-icu-its/pcoa.unweighted-unifrac.even1250.qza
```