code for training classifier on Euler 

In [None]:
import os
import pandas as pd
from qiime2 import Visualization
import matplotlib.pyplot as plt
import numpy as np

import qiime2 as q2

%matplotlib inline

# location of this week's data and all the results produced by this notebook 
# - this should be a path relative to your working directory
data_dir = 'Data'

In [None]:
# You're already here when you SSH in!
# Just run commands directly in your terminal:

source ~/.bashrc
conda activate qiime2-moshpit-2025.7
mkdir -p ~/unite_database
cd ~/unite_database

qiime rescript get-unite-data \
  --p-version 2025-02-19 \
  --p-taxon-group eukaryotes \
  --p-cluster-id dynamic \
  --p-no-singletons \
  --verbose \
  --output-dir uniteDB

In [None]:
#train classifier
#!/bin/bash
#SBATCH --job-name=train_unite_classifier
#SBATCH --time=04:00:00
#SBATCH --mem-per-cpu=32G
#SBATCH --cpus-per-task=4
#SBATCH --output=train_%j.out
#SBATCH --error=train_%j.err

# Exit on error
set -e

# Activate QIIME2 environment
source ~/.bashrc
conda activate qiime2-moshpit-2025.7

# Navigate to your database directory
cd ~/unite_database

echo "Step 1: Filtering sequences with unhelpful taxonomy..."
qiime taxa filter-seqs \
  --p-exclude Fungi_sp,mycota_sp,mycetes_sp \
  --i-taxonomy uniteDB/taxonomy.qza \
  --i-sequences uniteDB/sequences.qza \
  --o-filtered-sequences uniteDB/sequences-filtered.qza

echo "Step 2: Removing accession numbers from taxonomy..."
qiime rescript edit-taxonomy \
  --i-taxonomy uniteDB/taxonomy.qza \
  --o-edited-taxonomy uniteDB/taxonomy-no-SH.qza \
  --p-search-strings ';sh__.*' \
  --p-replacement-strings '' \
  --p-use-regex

echo "Step 3: Training the classifier (this may take a while)..."
qiime feature-classifier fit-classifier-naive-bayes \
  --i-reference-reads uniteDB/sequences-filtered.qza \
  --i-reference-taxonomy uniteDB/taxonomy-no-SH.qza \
  --o-classifier uniteDB/classifier.qza \
  --verbose

echo "Pipeline completed successfully!"
echo "Classifier saved to: ~/unite_database/uniteDB/classifier.qza"

In [None]:
#commands to run in terminal to download data in Euler
mkdir -p data_dir
wget "https://polybox.ethz.ch/index.php/s/bbPNGZPpGwBHBct/download" -O polybox_files.zip
unzip polybox_files.zip -d data_dir
rm polybox_files.zip