In [None]:
import os

#make files if don't already exist
os.makedirs("profile_o", exist_ok=True)
os.makedirs("sorted_reads", exist_ok=True)
os.makedirs("seq_reads", exist_ok=True)
os.makedirs("summary_o", exist_ok=True)
os.makedirs("classifier", exist_ok=True)
os.makedirs("classifier_training_data", exist_ok=True)

#downstream analysis: https://www.youtube.com/watch?v=9AmM-BHYxJo

In [7]:
!pip install biopython



In [1]:
## SORT RAW SKINOME READS ##

import os
import shutil
import pandas as pd
import glob

# Define paths
current_dir = os.getcwd()
sorted_reads = os.path.join(current_dir, 'sorted_reads')
seq_reads = os.path.join(current_dir, 'seq_reads')
master_manifest = os.path.join(current_dir, 'DataFrame_2_Pruned.tsv')

# load in df
df = pd.read_csv(master_manifest, sep='\t', encoding='latin1')
df = df.dropna(subset=['Submission'])
submissions = df['Submission'].unique()
submissions = submissions.tolist()

#create seprate file for each submission
for sub in submissions: 
    new_direc = os.path.join(sorted_reads, sub)
    os.makedirs(new_direc, exist_ok=True)
    pruned_df = df[df['Submission'] == sub]
    p_s = pruned_df['Library_Layout'].unique().tolist()[0]
    if p_s == 'PAIRED':
        for run in pruned_df['Run'].tolist():
            sample_direc = os.path.join(new_direc, run)
            sample_1_path = os.path.join(seq_reads, run + '_1.fastq.gz')
            sample_2_path = os.path.join(seq_reads, run + '_2.fastq.gz')
            
            if (not os.path.exists(sample_1_path)) or  (not os.path.exists(sample_2_path)):
                
                if not os.path.exists(sample_1_path):
                    print('file path ' + sample_1_path + ' was not found for paired sample ' + run)
                    
                if not os.path.exists(sample_1_path):
                    print('file path ' + sample_1_path + ' was not found for paired sample ' + run)
                
                continue
                
            os.makedirs(sample_direc, exist_ok=True)
            shutil.move(sample_1_path, sample_direc)
            shutil.move(sample_2_path, sample_direc)
    elif p_s == 'SINGLE':
        for run in pruned_df['Run'].tolist():
            sample_direc = os.path.join(new_direc, run)
            sample_1_path = os.path.join(seq_reads, run + '_1.fastq.gz')
            
            if not os.path.exists(sample_1_path):
                print('file path ' + sample_1_path + ' was not found for single sample ' + run)
                continue
            
            os.makedirs(sample_direc, exist_ok=True)
            shutil.move(sample_1_path, sample_direc)

In [1]:
## Run Qiime2 ##

import os
import shutil
import pandas as pd
import glob
import subprocess
import re
import qiime2
from qiime2.plugins.dada2.methods import denoise_single, denoise_paired
from qiime2.plugins.feature_table.visualizers import summarize
from qiime2.plugins.feature_classifier.methods import classify_sklearn
from qiime2.plugins.taxa.visualizers import barplot
from Bio import SeqIO
from qiime2.plugins.feature_table.methods import relative_frequency
from Bio.Blast import NCBIWWW, NCBIXML
from qiime2 import Metadata


def qiime2_profile(sub_num, sub_path, master_manifest_path, profile_o_path, summary_o_folder, classifier_map, trim_left_f=13, trim_left_r=13, trunc_len_f=150, trunc_len_r=150, n_threads=8):
    """
    calculates microbiome profiles from a submission of reads and stores the results in a .qza file in the specified folder

    Parameters
    ----------
    sub_num : str
        ID of the submission containing the reads.
    sub_path : str
        path to the submission.
    master_manifest_path : str
        path to the master meta data file (should .tsv file) Manifest must 
        have the following columns: Run or id, Library_Layout, Region_16S, 
        Submission
    profile_o_path : str
        path to the output folder for microbiome profile
    summary_o_path : str:
        path to the output folder for read summary table
    classifier_map : dict
        map of 16s rRNA region to classifier built to classify that region
    trim_left_f : int
        number of nucleotides to trim from the 5' end of the forward read before denoising
    trim_left_r : int
        number of nucleotides to trim from the 5' end of the reverse read before denoising
    trim_left_f : int
        specifies the length at which the forward read will be truncated
    trim_left_r : int
        specifies the length at which the reverse read will be truncated

    Returns
    -------
    none
    """
    
    #define variables
    current_dir = os.getcwd()
    summary_o_path = os.path.join(summary_o_folder, sub_num)
    manifest_path = os.path.join(summary_o_path, f"{sub_num}-manifest.tsv")
    
    #string representing weather reads are paired or single
    p_s = None           
    #string representing what 16s regions were used
    region_16s = None           

    #validate inputs
    if not type(sub_num) == str:
        raise TypeEror("sub_num must be a string")
    if not os.path.exists(sub_path):
        raise FileNotFoundError(f"{sub_num} submission path is invalid")
    if not os.path.exists(master_manifest_path):
        raise FileNotFoundError("master manifest path is invalid")
    if not type(classifier_map) == dict:
        raise TypeError("classifier_map must be a dict")
    for classifier in classifier_map.values():
        if not os.path.exists(classifier):
            raise FileNotFoundError(f"classifier map contains invalid paths. {classifier} is not a valid path")
    
    #if output folders do not exist, create them
    os.makedirs(profile_o_path, exist_ok=True)
    os.makedirs(summary_o_folder, exist_ok=True)
    os.makedirs(summary_o_path, exist_ok=True)
    
    #generate manifest
    manifest = pd.read_csv(master_manifest_path, sep='\t', encoding='latin1')
    
    #Rename "Run" to 'id' to support compatability with qiime2
    if 'Run' in manifest.columns:
        manifest.rename(columns={'Run': 'id'}, inplace=True)
    
    #prune manifest
    manifest = manifest[manifest['Submission'] == sub_num]
    reads = os.listdir(sub_path)
    len_manifest = len(manifest)
    len_reads = len(reads)
    manifest = manifest[manifest['id'].isin(reads)]
    len_filtered_manifest = len(manifest)
    
    #check that names and number of samples match on manifest and in folder
    if manifest.empty:
        raise ValueError("Pruned manifest is empty. Submission number may not be in manifest or reads may be improperly named.")
    if len_manifest > len_reads:
        print("Warning: more entries in manifest than in folder")
    if len_manifest < len_reads:
        print("Warning: more entries in folder than in manifest")
    if len_manifest == len_reads and len_filtered_manifest < len_reads:
        print("Warning: mismatch between some names on manifest and in folder")
        
    #check that the submission does not have mixed single and paired reads
    unique_layouts = manifest['Library_Layout'].unique().tolist()
    
    if len(unique_layouts) > 1:
        raise ValueError("Multiple layouts detected. Submission may contain single and paired reads.")
    p_s = unique_layouts[0]
    
    #check that the submission does not have samples with different 16s regions
    unique_16s = manifest['Region_16S'].unique().tolist()
    if len(unique_layouts) > 1:
        raise ValueError("Multiple regions detected. Submission may contain reads from different 16s regions.")
    region_16s = unique_16s[0]
    
    #add file paths to manifest
    if p_s == 'PAIRED':
        manifest['forward-absolute-filepath'] = 0
        manifest['reverse-absolute-filepath'] = 0
        for sample in manifest['id'].tolist():
            sample_path = os.path.join(sub_path, sample)
            reads = os.listdir(sample_path)
            #check for correct number of reads
            if len(reads) != 2:
                raise ValueError(f"Incorrect number of files for {sample}. Should be 2. Got {len(reads)}.")
            #add read paths to manifest
            manifest.loc[manifest['id'] == sample, 'forward-absolute-filepath'] = os.path.join(sample_path, reads[0])
            manifest.loc[manifest['id'] == sample, 'reverse-absolute-filepath'] = os.path.join(sample_path, reads[1])
    elif p_s == 'SINGLE':
        manifest['absolute-filepath'] = 0
        for sample in manifest['id'].tolist():
            sample_path = os.path.join(sub_path, sample)
            reads = os.listdir(sample_path)
            #check for correct number of reads
            if len(reads) != 1:
                raise ValueError(f"incorrect number of files for {sample}. Should be 1. Got {len(reads)}.")
            #add read path to manifest
            manifest.loc[manifest['id'] == sample, 'absolute-filepath'] = os.path.join(sample_path, reads[0])
    else:
        raise ValueError("invalid value for library layout")
        
    #save the manifest
    manifest.to_csv(manifest_path, sep='\t', index=False)

    ## execute qiime2 pipeline ##
    
    print(f"Processing {sub_num}")
       
    # Execute import command
    if p_s == 'PAIRED':
        demux = qiime2.Artifact.import_data(
            'SampleData[PairedEndSequencesWithQuality]', 
            manifest_path, 
            view_type='PairedEndFastqManifestPhred33V2'
        )
    elif p_s == 'SINGLE':
        demux = qiime2.Artifact.import_data(
            'SampleData[SequencesWithQuality]', 
            manifest_path, 
            view_type='SingleEndFastqManifestPhred33V2'
        )

    # Execute denoise command
    denoising_stats_output = os.path.join(summary_o_path, f"{sub_num}-denoise-stats.qza") 

    if p_s == 'PAIRED':
        denoise_results = denoise_paired(
            demultiplexed_seqs=demux,
            trim_left_f=trim_left_f,
            trim_left_r=trim_left_r,
            trunc_len_f=trunc_len_f,
            trunc_len_r=trunc_len_r, 
            n_threads=n_threads
        )
    elif p_s == 'SINGLE':
        denoise_results = denoise_single(
            demultiplexed_seqs=demux,
            trim_left=trim_left_f,
            trunc_len=trunc_len_f, 
            n_threads=n_threads
        )

    denoise_results.denoising_stats.save(denoising_stats_output)
    
    #temporay? Keep for longer?
    denoise_results.representative_sequences.save(os.path.join(summary_o_path, f"{sub_num}-rep_seqs"))
    denoise_results.table.save(os.path.join(summary_o_path, f"{sub_num}-freq_table"))
    denoise_results.denoising_stats.save(denoising_stats_output)
    print(f"{sub_num} denoising complete")

    # Execute summary command
    visualization_output = os.path.join(summary_o_path, f"{sub_num}-summary")
    metadata = qiime2.Metadata.load(manifest_path)

    summary_result = summarize(
        table=denoise_results.table,
        sample_metadata=metadata
    )
    summary_result.visualization.save(visualization_output)
    print(f"{sub_num} summary table complete")

    # Execute classify command
    classifier = qiime2.Artifact.load(classifier_map[region_16s])
    taxonomy_output = os.path.join(summary_o_path, f"{sub_num}-taxonomy")

    taxonomy = classify_sklearn(
        reads=denoise_results.representative_sequences,
        classifier=classifier, 
        n_jobs=-1,
    )
    taxonomy.classification.save(taxonomy_output)
    
    print(f"{sub_num} taxonomy classification complete")   
    
    #Generate Relitive Frequencies
    rel_freq = relative_frequency(denoise_results.table)[0] #add .table for denoise results
    print(f"{sub_num} relitive frequency table complete")
    
    #Generate Taxonomy Bar Graph
    barplot_path = os.path.join(summary_o_path, f"{sub_num}-barplot") 
    metadata = Metadata.load(manifest_path)
    
    barplot_vis = barplot(
        table=denoise_results.table, 
        taxonomy=taxonomy.classification, 
        metadata=metadata
    )
    barplot_vis.visualization.save(barplot_path)
    print(f"{sub_num} barplot complete")
    
    #generate final output data
    rel_freq_df = rel_freq.view(pd.DataFrame)
    rel_freq_df = rel_freq_df.transpose()
    taxonomy_df = taxonomy.classification.view(pd.DataFrame)
    
    merged_df = pd.merge(rel_freq_df, taxonomy_df, left_index=True, right_index=True, how='inner')
    merged_df = merged_df.drop(columns=['Confidence']) #used to also drop feature ID, will see if it works
    #sum accross multiple OTUs that are the same species
    merged_df = merged_df.groupby('Taxon').sum()
    
    #save final output data
    profile_output = os.path.join(profile_o_path, f"{sub_num}-profile.csv")
    merged_df.to_csv(profile_output, index=True)
    print(f"==================== {sub_num} processing complete ====================")


#TODO: double check sequence assignments with blast
def display_table(table):
    display(table.head())
    num_rows, num_cols = table.shape
    print("Number of rows:", num_rows)
    print("Number of columns:", num_cols)
    


In [2]:
#Run Pipeline
#generate file paths
current_dir = os.getcwd()
classifier_folder = os.path.join(current_dir, "classifier")
master_manifest_path = os.path.join(current_dir, "DataFrame_2_Pruned.tsv")
profile_o_path = os.path.join(current_dir, "profile_o")
summary_o_path = os.path.join(current_dir, "summary_o")

#classifiers
gg_nb = os.path.join(classifier_folder, "gg-13-8-99-nb-classifier.qza") 

#only run the pipeline on unprocessed reads
sub_nums = set(os.listdir(os.path.join(current_dir, "sorted_reads")))
completed = set(os.listdir(profile_o_path))
completed = [x[:-len("-profile.csv")] for x in completed]
sub_nums.difference_update(completed)


classifier_map = {
    "V4":gg_nb, 
    "V1-V3":gg_nb, 
    "V1-V2":gg_nb, 
    "V3-V4":gg_nb, 
    "V1-V2 and v4":gg_nb, 
    "V4-V5":gg_nb, 
    "V1-V4":gg_nb
}

#BUG: wrong set being used to join taxon df and freq df

print(f"Processing submissions {sub_nums}\n")
for sub_num in sub_nums:
    sub_path = os.path.join(current_dir, "sorted_reads", sub_num)
    try:
        qiime2_profile(sub_num, sub_path, master_manifest_path, profile_o_path, summary_o_path, classifier_map, trim_left_f=0, trim_left_r=0, trunc_len_f=0, trunc_len_r=0, )
    except Exception as e:
        print(f"Error processing {sub_num}: {e}")
        print(f"==================== {sub_num} processing failed ====================")

Processing submissions {'ERA1111396', 'ERA292152', 'SRA578693', 'SRA1038019', 'SRA298410', 'SRA860003', 'SRA575681', 'SRA996410'}

Processing ERA1111396
Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada_single.R /tmp/qiime2-archive-fz0pn7ls/2d82f39b-0744-4caa-8965-e42a868b27f3/data /tmp/tmp6_w64ewo/output.tsv.biom /tmp/tmp6_w64ewo/track.tsv /tmp/tmp6_w64ewo 0 0 2.0 2 Inf independent consensus 1.0 8 1000000 NULL 16

ERA1111396 denoising complete


  os.path.join(output_dir, 'sample-frequency-detail.csv'))
  os.path.join(output_dir, 'feature-frequency-detail.csv'))


ERA1111396 summary table complete
ERA1111396 taxonomy classification complete
ERA1111396 relitive frequency table complete
ERA1111396 barplot complete


Unnamed: 0_level_0,ERR2162452,ERR2162453,ERR2162454,ERR2162455,ERR2162456,ERR2162457,ERR2162458,ERR2162459,ERR2162460,ERR2162461,...,ERR2163209,ERR2163210,ERR2163211,ERR2163212,ERR2163213,ERR2163214,ERR2163215,ERR2163216,ERR2163217,ERR2163218
Taxon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Unassigned,0.0,0.000261,0.052964,0.0,0.0,0.0,0.0,0.001969,0.000192,0.0,...,0.0,0.0,0.029153,0.000133,8.1e-05,3.4e-05,0.0,0.0,0.0,0.0
k__Archaea,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
k__Archaea; p__Crenarchaeota; c__Thaumarchaeota; o__Cenarchaeales; f__Cenarchaeaceae; g__Nitrosopumilus; s__,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
k__Archaea; p__Crenarchaeota; c__Thaumarchaeota; o__Nitrososphaerales; f__Nitrososphaeraceae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
k__Archaea; p__Crenarchaeota; c__Thaumarchaeota; o__Nitrososphaerales; f__Nitrososphaeraceae; g__Candidatus Nitrososphaera; s__,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Number of rows: 2181
Number of columns: 761
Processing ERA292152
Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada_single.R /tmp/qiime2-archive-3ffhmlxu/b1dea680-b371-4f96-9a85-3c2f07afbf8f/data /tmp/tmpcgfdg1sd/output.tsv.biom /tmp/tmpcgfdg1sd/track.tsv /tmp/tmpcgfdg1sd 0 0 2.0 2 Inf independent consensus 1.0 8 1000000 NULL 16

Error processing ERA292152: An error was encountered while running DADA2 in R (return code 1), please inspect stdout and stderr to learn more.
Error processing SRA578693: Pruned manifest is empty. Submission number may not be in manifest or reads may be improperly named.
Processing SRA1038019
Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run a

  os.path.join(output_dir, 'sample-frequency-detail.csv'))
  os.path.join(output_dir, 'feature-frequency-detail.csv'))


SRA1038019 summary table complete
SRA1038019 taxonomy classification complete
SRA1038019 relitive frequency table complete
SRA1038019 barplot complete


Unnamed: 0_level_0,SRR11027332,SRR11027333,SRR11027334,SRR11027335,SRR11027336,SRR11027337,SRR11027338,SRR11027339,SRR11027340,SRR11027341,...,SRR11027344,SRR11027345,SRR11027346,SRR11027347,SRR11027348,SRR11027349,SRR11027350,SRR11027351,SRR11027352,SRR11027353
Taxon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
k__Bacteria,0.0,0.0,0.0,0.0,0.901408,0.34127,0.0,0.0,0.0,0.0,...,0.111842,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0
k__Bacteria; p__Bacteroidetes; c__Bacteroidia; o__Bacteroidales; f__Porphyromonadaceae; g__Porphyromonas; s__,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.412214,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
k__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.916667,1.0,0.0,0.0,0.0,0.0
k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__Clostridiaceae; g__Clostridium; s__,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.156489,...,0.0,0.0,0.425,0.0,0.0,0.0,0.0,0.0,0.0,0.0
k__Bacteria; p__Firmicutes; c__Clostridia; o__Clostridiales; f__[Tissierellaceae]; g__Anaerococcus; s__,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.526316,0.0,0.148855,...,0.888158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Number of rows: 15
Number of columns: 22
Processing SRA298410
Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada_paired.R /tmp/tmpscm0387u/forward /tmp/tmpscm0387u/reverse /tmp/tmpscm0387u/output.tsv.biom /tmp/tmpscm0387u/track.tsv /tmp/tmpscm0387u/filt_f /tmp/tmpscm0387u/filt_r 0 0 0 0 2.0 2.0 2 independent consensus 1.0 8 1000000

Error processing SRA298410: An error was encountered while running DADA2 in R (return code -11), please inspect stdout and stderr to learn more.
Processing SRA860003
Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada_paired.R /tmp/tmprx5v08qh/forward /tmp/tmprx5v

  os.path.join(output_dir, 'sample-frequency-detail.csv'))
  os.path.join(output_dir, 'feature-frequency-detail.csv'))


SRA860003 summary table complete
SRA860003 taxonomy classification complete
SRA860003 relitive frequency table complete
SRA860003 barplot complete


Unnamed: 0_level_0,SRR8728265
Taxon,Unnamed: 1_level_1
k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Corynebacteriaceae; g__Corynebacterium; s__,0.082248
k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Dermabacteraceae; g__Brachybacterium,0.020562
k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Dietziaceae; g__Dietzia; s__,0.002443
k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Micrococcaceae,0.00285
k__Bacteria; p__Actinobacteria; c__Actinobacteria; o__Actinomycetales; f__Micrococcaceae; g__Kocuria; s__palustris,0.036849


Number of rows: 49
Number of columns: 1
Error processing SRA575681: Pruned manifest is empty. Submission number may not be in manifest or reads may be improperly named.
Processing SRA996410
Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada_single.R /tmp/qiime2-archive-uoy8c3cn/d6fc8df8-2285-46bb-a7e6-d4210da98c38/data /tmp/tmp9eye8jzr/output.tsv.biom /tmp/tmp9eye8jzr/track.tsv /tmp/tmp9eye8jzr 0 0 2.0 2 Inf independent consensus 1.0 8 1000000 NULL 16

SRA996410 denoising complete


  os.path.join(output_dir, 'sample-frequency-detail.csv'))
  os.path.join(output_dir, 'feature-frequency-detail.csv'))


SRA996410 summary table complete
SRA996410 taxonomy classification complete
SRA996410 relitive frequency table complete
SRA996410 barplot complete


Unnamed: 0_level_0,SRR10447626,SRR10447627,SRR10447628,SRR10447629,SRR10447630,SRR10447631,SRR10447632,SRR10447633,SRR10447634,SRR10447635,...,SRR10447734,SRR10447735,SRR10447736,SRR10447737,SRR10447738,SRR10447739,SRR10447740,SRR10447741,SRR10447742,SRR10447743
Taxon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
k__Archaea; p__Crenarchaeota; c__Thaumarchaeota; o__Nitrososphaerales; f__Nitrososphaeraceae; g__Candidatus Nitrososphaera; s__SCA1170,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
k__Bacteria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.006458,0.0,0.003238,0.0,0.0,0.0,0.0,0.0
k__Bacteria; p__; c__; o__; f__; g__; s__,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
k__Bacteria; p__Acidobacteria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
k__Bacteria; p__Acidobacteria; c__Acidobacteria-5; o__; f__; g__; s__,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Number of rows: 994
Number of columns: 118


In [9]:
#visuialize bar plot graph
from qiime2 import Visualization
from qiime2 import Artifact
from qiime2.plugins import demux, dada2, feature_classifier, feature_table
from qiime2 import Metadata
import os
from qiime2.plugins.taxa.visualizers import barplot

sub_num = "ERA1522711"
current_dir = os.getcwd()
summary_path = os.path.join(current_dir, "summary_o", sub_num, f"{sub_num}-summary.qzv")

# Load the summary visualization
summary_viz = Visualization.load(summary_path)

# Visualize the barplot
summary_viz


In [8]:
#visuialize bar plot graph
from qiime2 import Visualization
from qiime2 import Artifact
from qiime2.plugins import demux, dada2, feature_classifier, feature_table
from qiime2 import Metadata
import os
from qiime2.plugins.taxa.visualizers import barplot

sub_num = "ERA1522711"
current_dir = os.getcwd()
barplot_path = os.path.join(current_dir, "summary_o", sub_num, f"{sub_num}-barplot.qzv")

# Load the barplot visualization
barplot_viz = Visualization.load(barplot_path)

# Visualize the barplot
barplot_viz



Licenzing and Citations:
QIIME2 overall:


QIIME2 pluging:


classifiers:

Silva:
Michael S Robeson II, Devon R O’Rourke, Benjamin D Kaehler, Michal Ziemski, Matthew R Dillon, Jeffrey T Foster, Nicholas A Bokulich. RESCRIPt: Reproducible sequence taxonomy reference database management for the masses. bioRxiv 2020.10.05.326504; doi: https://doi.org/10.1101/2020.10.05.326504

See the SILVA website for the latest citation information for SILVA. https://www.arb-silva.de/

GTDB:
UUID: 5d5461cc-6a51-434b-90ab-040f388e4221
SHA256: 07aadcf7472d9cc6f853f6b4615348619f1a3eceb56c1fb1b6d8dbb20554765f
Sklearn Version: 1.4.2
Date Trained: 2024-05-30
Citations: Parks et al. (2021), Parks et al. (2020), Parks et al. (2018), Rinke et al. (2021)