In [2]:
from qiime2 import Artifact
from qiime2 import Metadata
import qiime2.plugins.dada2.actions as dada2_actions
import qiime2.plugins.demux.actions as demux_actions
import qiime2.plugins.feature_classifier.actions as feature_classifier_actions
import qiime2.plugins.feature_table.actions as feature_table_actions
import qiime2.plugins.longitudinal.actions as longitudinal_actions
import qiime2.plugins.taxa.actions as taxa_actions

In [3]:
from urllib import request

url = 'https://docs.qiime2.org/2021.11/data/tutorials/moving-pictures-usage/sample-metadata.tsv'
fn = 'sample-metadata.tsv'
request.urlretrieve(url, fn)
sample_metadata_md = Metadata.load(fn)

In [4]:
import zipfile

url = 'https://docs.qiime2.org/2021.11/data/tutorials/moving-pictures-usage/emp-single-end-sequences.zip'
fn = 'emp-single-end-sequences.zip'
request.urlretrieve(url, fn)
with zipfile.ZipFile(fn) as zf:
    zf.extractall('emp-single-end-sequences')

In [5]:
per_sample_sequences_0 = Artifact.import_data(
    'EMPSingleEndSequences',
    'emp-single-end-sequences',
)

In [6]:
barcodes_sequence_mdc = sample_metadata_md.get_column('barcode-sequence')
per_sample_sequences_0, _ = demux_actions.emp_single(
    seqs=per_sample_sequences_0,
    barcodes=barcodes_sequence_mdc,
    golay_error_correction=True,
    rev_comp_barcodes=False,
    rev_comp_mapping_barcodes=True,
)
table_0, representative_sequences_0, _ = dada2_actions.denoise_single(
    demultiplexed_seqs=per_sample_sequences_0,
    trunc_len=120,
    trim_left=0,
    max_ee=2.0,
    trunc_q=2,
    pooling_method='independent',
    chimera_method='consensus',
    min_fold_parent_over_abundance=1.0,
    n_threads=0,
    n_reads_learn=1000000,
    hashed_feature_ids=True,
)

Running external command line application(s). This may print messages to stdout and/or stderr.
The command(s) being run are below. These commands cannot be manually re-run as they will depend on temporary files that no longer exist.

Command: run_dada_single.R /tmp/qiime2-archive-yfblax17/a934768e-91cb-4092-a7ff-b0ee724d1289/data /tmp/tmp39vi1zn3/output.tsv.biom /tmp/tmp39vi1zn3/track.tsv /tmp/tmp39vi1zn3 120 0 2.0 2 Inf independent consensus 1.0 0 1000000 NULL 16

R version 4.1.1 (2021-08-10) 


Loading required package: Rcpp


DADA2: 1.22.0 / Rcpp: 1.0.7 / RcppParallel: 5.1.4 
1) Filtering .
2) Learning Error Rates
3960 total bases in 33 reads from 1 samples will be used for learning the error rates.
3) Denoise samples .
4) Remove chimeras (method = consensus)
5) Report read numbers through the pipeline
6) Write output


In [7]:
import pandas as pd
sample_metadata_md.to_dataframe()

Unnamed: 0_level_0,barcode-sequence,body-site,year,month,day,subject,reported-antibiotic-usage,days-since-experiment-start
sample-id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
L1S8,AGCTGACTAGTC,gut,2008.0,10.0,28.0,subject-1,Yes,0.0
L1S57,ACACACTATGGC,gut,2009.0,1.0,20.0,subject-1,No,84.0
L1S76,ACTACGTGTGGT,gut,2009.0,2.0,17.0,subject-1,No,112.0
L1S105,AGTGCGATGCGT,gut,2009.0,3.0,17.0,subject-1,No,140.0
L2S155,ACGATGCGACCA,left palm,2009.0,1.0,20.0,subject-1,No,84.0
L2S175,AGCTATCCACGA,left palm,2009.0,2.0,17.0,subject-1,No,112.0
L2S204,ATGCAGCTCAGT,left palm,2009.0,3.0,17.0,subject-1,No,140.0
L2S222,CACGTGACATGT,left palm,2009.0,4.0,14.0,subject-1,No,168.0
L3S242,ACAGTTGCGCGA,right palm,2008.0,10.0,28.0,subject-1,Yes,0.0
L3S294,CACGACAGGCTA,right palm,2009.0,1.0,20.0,subject-1,No,84.0


In [8]:
filtered_table_0, = feature_table_actions.filter_samples(
    table=table_0,
    min_frequency=1000,
    min_features=0,
    exclude_ids=False,
    filter_empty_features=True,
)
filtered_table_1, = feature_table_actions.filter_samples(
    table=filtered_table_0,
    min_frequency=0,
    min_features=0,
    metadata=sample_metadata_md,
    where='"body-site"="gut"',
    exclude_ids=True,
    filter_empty_features=True,
)
filtered_table_2, = feature_table_actions.filter_samples(
    table=filtered_table_1,
    min_frequency=0,
    min_features=0,
    metadata=sample_metadata_md,
    where='"subject"="subject-2"',
    exclude_ids=True,
    filter_empty_features=True,
)

In [9]:
filtered_table_2.save('/home/chris/src/provenance_py/provenance_lib/tests/data/ns_collisions2.qza')

'/home/chris/src/provenance_py/provenance_lib/tests/data/ns_collisions2.qza'