In [1]:
import zipfile
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Paths to QIIME 2 artifacts
taxonomy_qza = "taxonomy.qza"
feature_table_qza = "dada2_table.qza"
extract_dir_tax = "taxonomy_extracted"
extract_dir_table = "table_extracted"

# Extract taxonomy.qza
with zipfile.ZipFile(taxonomy_qza, 'r') as zip_ref:
    zip_ref.extractall(extract_dir_tax)

# Extract feature-table.qza
with zipfile.ZipFile(feature_table_qza, 'r') as zip_ref:
    zip_ref.extractall(extract_dir_table)

# Load taxonomy.tsv
taxonomy_fp = os.path.join(extract_dir_tax, "data", "taxonomy.tsv")
taxonomy_df = pd.read_csv(taxonomy_fp, sep='\t')

# Load feature table biom (converting to TSV first)
biom_fp = os.path.join(extract_dir_table, "data", "feature-table.tsv")
feature_table_df = pd.read_csv(biom_fp, sep='\t', skiprows=1, index_col=0)

# Parse taxonomy strings
def split_taxonomy(tax_str):
    levels = tax_str.split(';')
    return {level[:2]: level[3:] if len(level) > 3 else 'Unassigned' for level in levels}

taxonomy_split_df = taxonomy_df['Taxon'].apply(split_taxonomy).apply(pd.Series)
taxonomy_split_df['Feature ID'] = taxonomy_df['Feature ID']
taxonomy_split_df.set_index('Feature ID', inplace=True)

# Merge feature table with taxonomy
merged_df = feature_table_df.T.merge(taxonomy_split_df, left_index=True, right_index=True, how='left')

# Function to plot stacked bar chart
def plot_stacked_bar(df, level, top_n=10):
    level_df = df.groupby(level).sum()
    level_df = level_df.div(level_df.sum(axis=0), axis=1)  # Convert to relative abundance
    top_taxa = level_df.sum(axis=1).nlargest(top_n).index  # Get top taxa
    level_df = level_df.loc[top_taxa]
    
    level_df.T.plot(kind='bar', stacked=True, figsize=(12, 6), colormap='tab20')
    plt.title(f"Relative Abundance at {level} Level")
    plt.ylabel("Relative Abundance")
    plt.xlabel("Samples")
    plt.legend(title=level, bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.tight_layout()
    plt.show()

# Plot stacked bar charts for each taxonomic level
for lvl in ['k', 'p', 'c', 'o', 'f', 'g']:  # Kingdom to Genus
    plot_stacked_bar(merged_df, lvl)


FileNotFoundError: [Errno 2] No such file or directory: 'taxonomy_extracted/data/taxonomy.tsv'

In [3]:
import zipfile
import os

# Paths
taxonomy_qza = "taxonomy.qza"
feature_table_qza = "dada2_table.qza"

# Function to list contents
def list_qza_contents(qza_path):
    with zipfile.ZipFile(qza_path, 'r') as zip_ref:
        file_list = zip_ref.namelist()
    return file_list

# List contents
taxonomy_files = list_qza_contents(taxonomy_qza)
feature_table_files = list_qza_contents(feature_table_qza)

print("Files in taxonomy.qza:\n", taxonomy_files)
print("\nFiles in feature-table.qza:\n", feature_table_files)


Files in taxonomy.qza:
 ['32001b3e-f4d7-4f22-bfe8-57021eac6ee1/VERSION', '32001b3e-f4d7-4f22-bfe8-57021eac6ee1/checksums.md5', '32001b3e-f4d7-4f22-bfe8-57021eac6ee1/metadata.yaml', '32001b3e-f4d7-4f22-bfe8-57021eac6ee1/provenance/VERSION', '32001b3e-f4d7-4f22-bfe8-57021eac6ee1/provenance/citations.bib', '32001b3e-f4d7-4f22-bfe8-57021eac6ee1/provenance/metadata.yaml', '32001b3e-f4d7-4f22-bfe8-57021eac6ee1/provenance/artifacts/3d95f079-b580-4bce-8c28-8b8a437be0d6/VERSION', '32001b3e-f4d7-4f22-bfe8-57021eac6ee1/provenance/artifacts/3d95f079-b580-4bce-8c28-8b8a437be0d6/citations.bib', '32001b3e-f4d7-4f22-bfe8-57021eac6ee1/provenance/artifacts/3d95f079-b580-4bce-8c28-8b8a437be0d6/metadata.yaml', '32001b3e-f4d7-4f22-bfe8-57021eac6ee1/provenance/artifacts/3d95f079-b580-4bce-8c28-8b8a437be0d6/action/action.yaml', '32001b3e-f4d7-4f22-bfe8-57021eac6ee1/provenance/artifacts/9fb9a8c4-703f-4578-bae4-6d546b2b9eae/VERSION', '32001b3e-f4d7-4f22-bfe8-57021eac6ee1/provenance/artifacts/9fb9a8c4-703f-4578