## Inputs: 
- {subject}_combined_otu_qiime2.tsv
- agora ref seq 

In [12]:
import tqdm as tqdm
import os
import subprocess
from pathlib import Path

# Paths
final_output_dir = "../data/combined_meta_and_otu_outputs/"
rep_seq_path = "../data/uclust_casey_rep_set.qza"
classifier_path = "../data/silva-138-99-nb-classifier.qza"
qiime_output_dir = "../data/qiime_outputs/"
Path(qiime_output_dir).mkdir(parents=True, exist_ok=True)

In [None]:

# Loop through each subject's OTU table
for otu_file in tqdm(otu_files, desc="Processing OTU Tables", unit="file"):
    if otu_file.endswith("_combined_otu_qiime2.tsv"):
        subject_id = otu_file.split("_")[0]  # Extract subject ID (e.g., "F01")
        print(f"Processing SILVA taxonomy mapping for subject {subject_id}...")

        # Define paths
        input_tsv = os.path.join(final_output_dir, otu_file)
        biom_file = os.path.join(qiime_output_dir, f"{subject_id}_combined_otu.biom")
        feature_table = os.path.join(qiime_output_dir, f"{subject_id}_feature_table.qza")
        taxonomy_output = os.path.join(qiime_output_dir, f"{subject_id}_taxonomy.qza")
        taxonomy_export = os.path.join(qiime_output_dir, f"{subject_id}_taxonomy.tsv")

        # Step 1: Convert TSV to BIOM
        print(f"Converting {input_tsv} to BIOM format...")
        subprocess.run([
            "biom", "convert",
            "-i", input_tsv,               # Input TSV file
            "-o", biom_file,               # Output BIOM file
            "--table-type", "OTU table",   # Specify table type
            "--to-hdf5"                    # Use HDF5 format for BIOM
        ], check=True)

        # Step 2: Import BIOM file as a QIIME2 artifact
        print(f"Importing BIOM file {biom_file} into QIIME2...")
        subprocess.run([
            "qiime", "tools", "import",
            "--type", "FeatureTable[Frequency]",  # QIIME2 type
            "--input-path", biom_file,            # Path to the BIOM file
            "--output-path", feature_table        # Output QIIME2 artifact
        ], check=True)

        # Step 3: Classify OTUs using SILVA
        print(f"Classifying OTUs for subject {subject_id} using SILVA...")
        subprocess.run([
            "qiime", "feature-classifier", "classify-sklearn",
            "--i-classifier", classifier_path,    # SILVA classifier
            "--i-reads", rep_seq_path,          # rep seq QIIME2 artifact
            "--o-classification", taxonomy_output  # Output taxonomy
        ], check=True)

        # Step 4: Export taxonomy to TSV
        print(f"Exporting taxonomy for subject {subject_id}...")
        subprocess.run([
            "qiime", "tools", "export",
            "--input-path", taxonomy_output,  # QIIME2 taxonomy artifact
            "--output-path", qiime_output_dir # Export directory
        ], check=True)

        print(f"Taxonomy mapping for subject {subject_id} saved to {taxonomy_export}.")

In [25]:
!qiime feature-table summarize \
    --i-table ../data/qiime_outputs/F01_feature_table.qza \
    --o-visualization ../data/qiime_outputs/F01_feature_table.qzv

[32mSaved Visualization to: ../data/qiime_outputs/F01_feature_table.qzv[0m
[0m

## Running wrapped script with parser 

In [38]:
!python ../scripts/silva_taxonomy_mapping.py \
    -i ../data/combined_meta_and_otu_outputs/ \
    -r ../data/qiime_outputs/rep_set.fna \
    -c ../data/silva-138-99-nb-classifier.qza \
    -o ../data/qiime_outputs/

Converting representative sequences to QIIME2 artifact...
[32mImported ../data/qiime_outputs/rep_set.fna as DNASequencesDirectoryFormat to ../data/qiime_outputs/rep_set.qza[0m
Processing OTU Tables:   0%|                            | 0/3 [00:00<?, ?file/s]
Processing SILVA taxonomy mapping for subject F01...
Converting ../data/combined_meta_and_otu_outputs/F01_combined_otu_qiime2.tsv to BIOM format...
Importing BIOM file ../data/qiime_outputs/F01_combined_otu.biom into QIIME2...
[32mImported ../data/qiime_outputs/F01_combined_otu.biom as BIOMV210DirFmt to ../data/qiime_outputs/F01_feature_table.qza[0m
[0mClassifying OTUs for subject F01 using SILVA...
[32mSaved FeatureData[Taxonomy] to: ../data/qiime_outputs/F01_taxonomy.qza[0m
[0mExporting taxonomy for subject F01...
[32mExported ../data/qiime_outputs/F01_taxonomy.qza as TSVTaxonomyDirectoryFormat to directory ../data/qiime_outputs/[0m
[0mTaxonomy mapping for subject F01 saved to ../data/qiime_outputs/F01_taxonomy.tsv.
Proc

In [36]:
!biom convert \
    -i ../data/qiime_outputs/F01_combined_otu.biom \
    -o ../data/qiime_outputs/F01_combined_otu.tsv \
    --to-tsv