In [None]:
#!/bin/bash
#SBATCH --job-name=classifier_test
#SBATCH --time=04:00:00
#SBATCH --mem-per-cpu=8GB
#SBATCH --cpus-per-task=8
#SBATCH --output=taxonomy_%j.log

source ~/miniconda3/etc/profile.d/conda.sh
conda activate qiime2-moshpit-2025.7
cd /cluster/scratch/$USER

#get DB (needs to be updated, with 9 just to check)
echo "Step 1: Loading DB..."
qiime rescript get-unite-data \
    --p-version '2025-02-19' \
    --p-taxon-group eukaryotes \
    --p-cluster-id dynamic \
    --p-no-singletons \
    --verbose \
    --output-dir uniteDB

#remove unhelpful taxonomy
echo "Step 2: Filtering sequences with unhelpful taxonomy..."
qiime taxa filter-seqs \
    --p-exclude Fungi_sp,mycota_sp,mycetes_sp \
    --i-taxonomy uniteDB/taxonomy.qza \
    --i-sequences uniteDB/sequences.qza \
    --o-filtered-sequences uniteDB/sequences-filtered.qza

echo "Step 3: Edit taxonomy..."
qiime rescript edit-taxonomy \
    --i-taxonomy uniteDB/taxonomy.qza \
    --o-edited-taxonomy uniteDB/taxonomy-no-SH.qza \
    --p-search-strings ';sh__.*' \
    --p-replacement-strings '' \
    --p-use-regex

echo "Step 4: Fit classifier..."
qiime feature-classifier fit-classifier-naive-bayes \
    --i-reference-reads uniteDB/sequences-filtered.qza \
    --i-reference-taxonomy uniteDB/taxonomy-no-SH.qza \
    --o-classifier uniteDB/classifier.qza
# (Optional) verify success
echo "Completed whole Code on $(date)"