In [1]:
import pkg_resources

from qiime2.plugins import ArtifactAPIUsage
from q2cli.core.usage import CLIUsage

from qiime2.sdk import PluginManager
plugin_manager = PluginManager()

In [2]:
local = %pwd
def get_test_data_path(filename):
    return f'{local}/{filename}'

In [37]:
def emp_seqs_from_disk_factory():
    """
    Returns an EMPSingleEndDirFmt object, by building FastqGzFormat objects for barcodes and seqs
    Generally speaking, we must first make a QIIME 2 format object that represents the data, and return that
    
    In order to handle imports, we'll need:
    - the import format
    - its parent package
    - the FileFormats that comprise it if any
    - their parent package
    - data getters to handle
      - data on disk (fp)
      - remote data (urllib.request)
      - primitives (qiime2.plugin.util.transform(myprimitive, to_type=ItsFormat)?)
    
    """
    import os
    import tempfile
        
    from q2_demux._format import EMPSingleEndDirFmt
    from q2_types.per_sample_sequences import FastqGzFormat
    
    dir_fp = get_test_data_path('emp-single-end-sequences/')
    bc_fp = dir_fp + 'barcodes.fastq.gz'
    seqs_fp = dir_fp + 'sequences.fastq.gz'
    
    print(bc_fp)
    print(seqs_fp)
    
    fmt = EMPSingleEndDirFmt(mode='w')
    fmt.barcodes.write_data(bc_fp, FastqGzFormat)
    fmt.sequences.write_data(seqs_fp, FastqGzFormat)
    
    fmt.validate()
    return fmt

def min_emp_seqs_from_disk_factory():
    """
    MINIMALLY:
    Returns an EMPSingleEndDirFmt object. 
    In order to "handle" imports minimally, we need only make a QIIME 2 format object and return that
    """
    from q2_demux._format import EMPSingleEndDirFmt
    return EMPSingleEndDirFmt(mode='w')

def crappy_emp_seqs_from_disk_factory():
    return EMPSingleEndDirFmt(mode='w')

def empty_emp_seqs_from_disk_factory():
    """
    It looks like we can actually get away with this where we need factories, without sacrificing render-ability.
    As such, it may be more readable to use an empty lambda so an xref isn't necessary.
    """
    return

emp_seqs_from_disk_factory()

/home/chris/src/provenance_py/provenance_lib/notebooks/emp-single-end-sequences/barcodes.fastq.gz
/home/chris/src/provenance_py/provenance_lib/notebooks/emp-single-end-sequences/sequences.fastq.gz


<q2_demux._format.EMPSingleEndDirFmt at 0x7fb6251ca400>

In [20]:
# Factory to get sample metadata
def md_factory():
    from urllib import request
    from qiime2 import Metadata
    fp, _ = request.urlretrieve('https://data.qiime2.org/2022.2/tutorials/moving-pictures/sample_metadata.tsv')
    return Metadata.load(fp)


In [25]:
## Once that's done, we can import the data into QIIME 2, assigning it a semantic type (e.g EMPSingleEndSequences)
uses = [CLIUsage(), ArtifactAPIUsage()]

for use in uses:
    raw_emp_se_seqs = use.init_format('raw_emp_se_seqs', emp_seqs_from_disk_factory, ext='.fastq.gz')
    print(raw_emp_se_seqs)
    seqs = use.import_from_format('emp_se_seqs', semantic_type = 'EMPSingleEndSequences', fmt_variable=raw_emp_se_seqs,
                                  view_type='EMPSingleEndDirFmt')

    sample_metadata = use.init_metadata('sample_metadata', md_factory)  
    
    barcode_sequence = use.get_metadata_column(
       'barcode_sequence', 'barcode-sequence', sample_metadata)

    
    demux, demux_details = use.action(
       use.UsageAction(plugin_id='demux', action_id='emp_single'),
       use.UsageInputs(
           seqs=seqs,
           barcodes=barcode_sequence,),
       use.UsageOutputNames(
           per_sample_sequences='demux',
           error_correction_details='demux_details',),
     )
    
    print(use.render(), "\n")

<CLIUsageVariable name='raw_emp_se_seqs..fastq.gz', var_type='format'>
qiime tools import \
  --type 'EMPSingleEndSequences' \
  --input-format EMPSingleEndDirFmt \
  --input-path raw_emp_se_seqs..fastq.gz \
  --output-path emp-se-seqs.qza
qiime demux emp-single \
  --i-seqs emp-se-seqs.qza \
  --m-barcodes-file sample-metadata.tsv \
  --m-barcodes-column barcode-sequence \
  --o-per-sample-sequences demux.qza \
  --o-error-correction-details demux-details.qza 

<ArtifactAPIUsageVariable name='raw_emp_se_seqs..fastq.gz', var_type='format'>
from qiime2 import Artifact
import qiime2.plugins.demux.actions as demux_actions

emp_se_seqs = Artifact.import_data(
    'EMPSingleEndSequences',
    'raw_emp_se_seqs..fastq.gz',
    'EMPSingleEndDirFmt',
)
barcode_sequence_mdc = sample_metadata_md.get_column('barcode-sequence')
demux, demux_details = demux_actions.emp_single(
    seqs=emp_se_seqs,
    barcodes=barcode_sequence_mdc,
) 



In [39]:
## Minimal version of the above
uses = [CLIUsage(), ArtifactAPIUsage()]

for use in uses:
    raw_emp_se_seqs = use.init_format('raw_emp_se_seqs', lambda: None, ext='.fastq.gz')
    seqs = use.import_from_format('emp_se_seqs', semantic_type = 'EMPSingleEndSequences', fmt_variable=raw_emp_se_seqs,
                                  view_type='EMPSingleEndDirFmt')

    sample_metadata = use.init_metadata('sample_metadata', lambda: None)  
    
    barcode_sequence = use.get_metadata_column(
       'barcode_sequence', 'barcode-sequence', sample_metadata)
    
    demux, demux_details = use.action(
       use.UsageAction(plugin_id='demux', action_id='emp_single'),
       use.UsageInputs(
           seqs=seqs,
           barcodes=barcode_sequence,),
       use.UsageOutputNames(
           per_sample_sequences='demux',
           error_correction_details='demux_details',),
     )
    
    print(use.render(), "\n")

qiime tools import \
  --type 'EMPSingleEndSequences' \
  --input-format EMPSingleEndDirFmt \
  --input-path raw_emp_se_seqs..fastq.gz \
  --output-path emp-se-seqs.qza
qiime demux emp-single \
  --i-seqs emp-se-seqs.qza \
  --m-barcodes-file sample-metadata.tsv \
  --m-barcodes-column barcode-sequence \
  --o-per-sample-sequences demux.qza \
  --o-error-correction-details demux-details.qza 

from qiime2 import Artifact
import qiime2.plugins.demux.actions as demux_actions

emp_se_seqs = Artifact.import_data(
    'EMPSingleEndSequences',
    'raw_emp_se_seqs..fastq.gz',
    'EMPSingleEndDirFmt',
)
barcode_sequence_mdc = sample_metadata_md.get_column('barcode-sequence')
demux, demux_details = demux_actions.emp_single(
    seqs=emp_se_seqs,
    barcodes=barcode_sequence_mdc,
) 

