diff --git a/README.md b/README.md
index c51fcd0c..594e11aa 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ git clone https://github.com/biocore/mg-scripts.git
 Create a Python3 Conda environment in which to run the notebook:
 
 ```bash
-conda create -n sp_pipeline 'python==3.9' numpy pandas click scipy matplotlib fastq-pair
+conda create --yes -n spp python='python=3.9' scikit-learn pandas numpy nose pep8 flake8 matplotlib jupyter notebook 'seaborn>=0.7.1' pip openpyxl 'seqtk>=1.4' click scipy fastq-pair
 ```
 
 Activate the Conda environment:
@@ -62,3 +62,9 @@ Please note that the setting 'minimap2_databases' is expected to be a list of pa
 For NuQCJob, minimap2_databases is expected to be the path to a directory containing two subdirectories: 'metagenomic'
 and 'metatranscriptomic'. Each directory should contain or symlink to the appropriate .mmi files needed for that Assay
 type.
+
+Additional TellSeq-related notes:
+'spades-cloudspades-0.1', 'tellread-release-novaseqX' or similar directories must be placed in a location available to SPP.
+Their paths should be made known to SPP in the configuration files. (See examples for details).
+Additional scripts found in sequence_processing_pipeline/contrib were contributed by Daniel and Omar and can be similarly located and configured.
+
diff --git a/sequence_processing_pipeline/Commands.py b/sequence_processing_pipeline/Commands.py
index b2cd5e41..ae971fc9 100644
--- a/sequence_processing_pipeline/Commands.py
+++ b/sequence_processing_pipeline/Commands.py
@@ -22,8 +22,14 @@ def split_similar_size_bins(data_location_path, max_file_list_size_in_gb,
     # is now the following:
     # add one more level to account for project_names nested under ConvertJob
     # dir.
+    # this will ignore the _I1_ reads that appear in the integrated result.
     fastq_paths = glob.glob(data_location_path + '*/*/*.fastq.gz')
 
+    # case-specific filter for TellSeq output directories that also contain
+    # _I1_ files. Ensure paths are still sorted afterwards.
+    fastq_paths = [x for x in fastq_paths if '_I1_001.fastq.gz' not in x]
+    fastq_paths = sorted(fastq_paths)
+
     # convert from GB and halve as we sum R1
     max_size = (int(max_file_list_size_in_gb) * (2 ** 30) / 2)
 
diff --git a/sequence_processing_pipeline/ConvertJob.py b/sequence_processing_pipeline/ConvertJob.py
index 122a4987..dc9b36aa 100644
--- a/sequence_processing_pipeline/ConvertJob.py
+++ b/sequence_processing_pipeline/ConvertJob.py
@@ -156,7 +156,13 @@ def run(self, callback=None):
                                        exec_from=self.log_path,
                                        callback=callback)
 
-            self.copy_controls_between_projects()
+            # ConvertJob() is used to process Amplicon as well as Meta*Omic
+            # runs. Amplicon runs use a dummy sample-sheet generated by
+            # Pipeline(). For these types of sheets we can't copy controls
+            # between projects because demuxing is not performed here.
+            _, sheet_name = split(self.sample_sheet_path)
+            if sheet_name != 'dummy_sample_sheet.csv':
+                self.copy_controls_between_projects()
 
         except JobFailedError as e:
             # When a job has failed, parse the logs generated by this specific
@@ -169,6 +175,7 @@ def run(self, callback=None):
         logging.info(f'Successful job: {job_info}')
 
     def parse_logs(self):
+        # overrides Job.parse_logs() w/tailored parse for specific logs.
         log_path = join(self.output_path, 'Logs')
         errors = join(log_path, 'Errors.log')
 
diff --git a/sequence_processing_pipeline/FastQCJob.py b/sequence_processing_pipeline/FastQCJob.py
index 5e0bf4fc..8db0440b 100644
--- a/sequence_processing_pipeline/FastQCJob.py
+++ b/sequence_processing_pipeline/FastQCJob.py
@@ -6,7 +6,6 @@
 from functools import partial
 from json import dumps
 import logging
-import glob
 
 
 class FastQCJob(Job):
@@ -305,19 +304,3 @@ def _generate_job_script(self):
 
         with open(sh_details_fp, 'w') as f:
             f.write('\n'.join(self.commands))
-
-    def parse_logs(self):
-        log_path = join(self.output_path, 'logs')
-        files = sorted(glob.glob(join(log_path, '*.out')))
-        msgs = []
-
-        for some_file in files:
-            with open(some_file, 'r') as f:
-                msgs += [line for line in f.readlines()
-                         # note 'error' is not same
-                         # requirement as found in QCJob.
-                         # ('error:'). This is a very
-                         # generalized filter.
-                         if 'error' in line.lower()]
-
-        return [msg.strip() for msg in msgs]
diff --git a/sequence_processing_pipeline/Job.py b/sequence_processing_pipeline/Job.py
index b650543e..4121bd7f 100644
--- a/sequence_processing_pipeline/Job.py
+++ b/sequence_processing_pipeline/Job.py
@@ -1,3 +1,6 @@
+from jinja2 import BaseLoader, TemplateNotFound
+from os.path import getmtime
+import pathlib
 from itertools import zip_longest
 from os import makedirs, walk
 from os.path import basename, exists, split, join
@@ -9,6 +12,26 @@
 import logging
 from inspect import stack
 import re
+from collections import Counter
+from glob import glob
+
+
+# taken from https://jinja.palletsprojects.com/en/3.0.x/api/#jinja2.BaseLoader
+class KISSLoader(BaseLoader):
+    def __init__(self, path):
+        # pin the path for loader to the location sequence_processing_pipeline
+        # (the location of this file), along w/the relative path to the
+        # templates directory.
+        self.path = join(pathlib.Path(__file__).parent.resolve(), path)
+
+    def get_source(self, environment, template):
+        path = join(self.path, template)
+        if not exists(path):
+            raise TemplateNotFound(template)
+        mtime = getmtime(path)
+        with open(path) as f:
+            source = f.read()
+        return source, path, lambda: mtime == getmtime(path)
 
 
 class Job:
@@ -32,6 +55,7 @@ class Job:
                                slurm_status_running)
 
     polling_interval_in_seconds = 60
+    squeue_retry_in_seconds = 10
 
     def __init__(self, root_dir, output_path, job_name, executable_paths,
                  max_array_length, modules_to_load=None):
@@ -103,7 +127,17 @@ def run(self):
         raise PipelineError("Base class run() method not implemented.")
 
     def parse_logs(self):
-        raise PipelineError("Base class parse_logs() method not implemented.")
+        # by default, look for anything to parse in the logs directory.
+        log_path = join(self.output_path, 'logs')
+        files = sorted(glob(join(log_path, '*')))
+        msgs = []
+
+        for some_file in files:
+            with open(some_file, 'r') as f:
+                msgs += [line for line in f.readlines()
+                         if 'error:' in line.lower()]
+
+        return [msg.strip() for msg in msgs]
 
     def _which(self, file_path, modules_to_load=None):
         """
@@ -212,6 +246,41 @@ def _system_call(self, cmd, allow_return_codes=[], callback=None):
 
         return {'stdout': stdout, 'stderr': stderr, 'return_code': return_code}
 
+    def _query_slurm(self, job_ids):
+        # query_slurm encapsulates the handling of squeue.
+        count = 0
+        while True:
+            result = self._system_call("squeue -t all -j "
+                                       f"{','.join(job_ids)} "
+                                       "-o '%i,%T'")
+
+            if result['return_code'] == 0:
+                # there was no issue w/squeue, break this loop and
+                # continue.
+                break
+            else:
+                # there was likely an intermittent issue w/squeue. Pause
+                # and wait before trying a few more times. If the problem
+                # persists then report the error and exit.
+                count += 1
+
+                if count > 3:
+                    raise ExecFailedError(result['stderr'])
+
+                sleep(Job.squeue_retry_in_seconds)
+
+        lines = result['stdout'].split('\n')
+        lines.pop(0)  # remove header
+        lines = [x.split(',') for x in lines if x != '']
+
+        jobs = {}
+        for job_id, state in lines:
+            # ensure unique_id is of type string for downstream use.
+            job_id = str(job_id)
+            jobs[job_id] = state
+
+        return jobs
+
     def wait_on_job_ids(self, job_ids, callback=None):
         '''
         Wait for the given job-ids to finish running before returning.
@@ -229,63 +298,27 @@ def wait_on_job_ids(self, job_ids, callback=None):
         # ensure all ids are strings to ensure proper working w/join().
         job_ids = [str(x) for x in job_ids]
 
-        def query_slurm(job_ids):
-            # internal function query_slurm encapsulates the handling of
-            # squeue.
-            count = 0
-            while True:
-                result = self._system_call("squeue -t all -j "
-                                           f"{','.join(job_ids)} "
-                                           "-o '%F,%A,%T'")
-
-                if result['return_code'] == 0:
-                    # there was no issue w/squeue, break this loop and
-                    # continue.
-                    break
-                else:
-                    # there was a likely intermittent issue w/squeue. Pause
-                    # and wait before trying a few more times. If the problem
-                    # persists then report the error and exit.
-                    count += 1
-
-                    if count > 3:
-                        raise ExecFailedError(result['stderr'])
-
-                    sleep(60)
-
-            lines = result['stdout'].split('\n')
-            lines.pop(0)    # remove header
-            lines = [x.split(',') for x in lines if x != '']
-
-            jobs = {}
-            child_jobs = {}
-            for job_id, unique_id, state in lines:
-                jobs[unique_id] = state
-
-                if unique_id != job_id:
-                    child_jobs[unique_id] = job_id  # job is a child job
-
-            return jobs, child_jobs
-
         while True:
-            jobs, child_jobs = query_slurm(job_ids)
-
-            for jid in job_ids:
-                logging.debug("JOB %s: %s" % (jid, jobs[jid]))
-                if callback is not None:
-                    callback(jid=jid, status=jobs[jid])
-
-                children = [x for x in child_jobs if child_jobs[x] == jid]
-                if len(children) == 0:
-                    logging.debug("\tNO CHILDREN")
-                for cid in children:
-                    logging.debug("\tCHILD JOB %s: %s" % (cid, jobs[cid]))
-            status = [jobs[x] in Job.slurm_status_not_running for x in job_ids]
-
-            if set(status) == {True}:
-                # all jobs either completed successfully or terminated.
+            # Because query_slurm only returns state on the job-ids we specify,
+            # the wait process is a simple check to see whether any of the
+            # states are 'running' states or not.
+            jobs = self._query_slurm(job_ids)
+
+            # jobs will be a dict of job-ids or array-ids for jobs that
+            # are array-jobs. the value of jobs[id] will be a state e.g.:
+            # 'RUNNING', 'FAILED', 'COMPLETED'.
+            states = [jobs[x] in Job.slurm_status_not_running for x in jobs]
+
+            if set(states) == {True}:
+                # if all the states are either FAILED or COMPLETED
+                # then the set of those states no matter how many
+                # array-jobs there were will ultimately be the set of
+                # {True}. If not then that means there are still jobs
+                # that are running.
                 break
 
+            logging.debug(f"sleeping {Job.polling_interval_in_seconds} "
+                          "seconds...")
             sleep(Job.polling_interval_in_seconds)
 
         return jobs
@@ -345,16 +378,48 @@ def submit_job(self, script_path, job_parameters=None,
         # to the user.
         results = self.wait_on_job_ids([job_id], callback=callback)
 
-        job_result = {'job_id': job_id, 'job_state': results[job_id]}
+        if job_id in results:
+            # job is a non-array job
+            job_result = {'job_id': job_id, 'job_state': results[job_id]}
+        else:
+            # job is an array job
+            # assume all array jobs in this case will be associated w/job_id.
+            counts = Counter()
+            for array_id in results:
+                counts[results[array_id]] += 1
+
+            # for array jobs we won't be returning a string representing the
+            # state of a single job. Instead we're returning a dictionary of
+            # the number of unique states the set of array-jobs ended up in and
+            # the number for each one.
+            job_result = {'job_id': job_id, 'job_state': dict(counts)}
 
         if callback is not None:
-            callback(jid=job_id, status=job_result['job_state'])
+            if isinstance(job_result['job_state'], dict):
+                # this is an array job
+                states = []
+                for key in counts:
+                    states.append(f"{key}: {counts[key]}")
 
-        if job_result['job_state'] == 'COMPLETED':
-            return job_result
+                callback(jid=job_id, status=", ".join(states))
+
+            else:
+                # this is a standard job
+                callback(jid=job_id, status=job_result['job_state'])
+
+        if isinstance(job_result['job_state'], dict):
+            states = list(job_result['job_state'].keys())
+            if states == ['COMPLETED']:
+                return job_result
+            else:
+                raise JobFailedError(f"job {job_id} exited with jobs in the "
+                                     f"following states: {', '.join(states)}")
         else:
-            raise JobFailedError(f"job {job_id} exited with status "
-                                 f"{job_result['job_state']}")
+            if job_result['job_state'] == 'COMPLETED':
+                return job_result
+            else:
+                raise JobFailedError(f"job {job_id} exited with status "
+                                     f"{job_result['job_state']}")
 
     def _group_commands(self, cmds):
         # break list of commands into chunks of max_array_length (Typically
diff --git a/sequence_processing_pipeline/NuQCJob.py b/sequence_processing_pipeline/NuQCJob.py
index b2ffd3a9..0e05b41d 100644
--- a/sequence_processing_pipeline/NuQCJob.py
+++ b/sequence_processing_pipeline/NuQCJob.py
@@ -1,8 +1,7 @@
-from jinja2 import BaseLoader, TemplateNotFound
 from metapool import load_sample_sheet
 from os import stat, makedirs, rename
-from os.path import join, basename, dirname, exists, abspath, getmtime
-from sequence_processing_pipeline.Job import Job
+from os.path import join, basename, dirname, exists, abspath, split
+from sequence_processing_pipeline.Job import Job, KISSLoader
 from sequence_processing_pipeline.PipelineError import (PipelineError,
                                                         JobFailedError)
 from sequence_processing_pipeline.Pipeline import Pipeline
@@ -11,28 +10,9 @@
 from sequence_processing_pipeline.Commands import split_similar_size_bins
 from sequence_processing_pipeline.util import iter_paired_files
 from jinja2 import Environment
-import glob
+from glob import glob
 import re
 from sys import executable
-import pathlib
-
-
-# taken from https://jinja.palletsprojects.com/en/3.0.x/api/#jinja2.BaseLoader
-class KISSLoader(BaseLoader):
-    def __init__(self, path):
-        # pin the path for loader to the location sequence_processing_pipeline
-        # (the location of this file), along w/the relative path to the
-        # templates directory.
-        self.path = join(pathlib.Path(__file__).parent.resolve(), path)
-
-    def get_source(self, environment, template):
-        path = join(self.path, template)
-        if not exists(path):
-            raise TemplateNotFound(template)
-        mtime = getmtime(path)
-        with open(path) as f:
-            source = f.read()
-        return source, path, lambda: mtime == getmtime(path)
 
 
 logging.basicConfig(level=logging.DEBUG)
@@ -128,6 +108,9 @@ def __init__(self, fastq_root_dir, output_path, sample_sheet_path,
         self.minimum_bytes = 3100
         self.fastq_regex = re.compile(r'^(.*)_S\d{1,4}_L\d{3}_R\d_\d{3}'
                                       r'\.fastq\.gz$')
+        self.interleave_fastq_regex = re.compile(r'^(.*)_S\d{1,4}_L\d{3}_R\d'
+                                                 r'_\d{3}\.interleave\.fastq'
+                                                 r'\.gz$')
         self.html_regex = re.compile(r'^(.*)_S\d{1,4}_L\d{3}_R\d_\d{3}\.html$')
         self.json_regex = re.compile(r'^(.*)_S\d{1,4}_L\d{3}_R\d_\d{3}\.json$')
 
@@ -167,7 +150,7 @@ def _filter_empty_fastq_files(self, filtered_directory,
         '''
         empty_list = []
 
-        files = glob.glob(join(filtered_directory, f'*.{self.suffix}'))
+        files = glob(join(filtered_directory, f'*.{self.suffix}'))
 
         for r1, r2 in iter_paired_files(files):
             full_path = join(filtered_directory, r1)
@@ -194,7 +177,7 @@ def _move_helper(self, completed_files, regex, samples_in_project, dst):
             substr = regex.search(file_name)
             if substr is None:
                 raise ValueError(f"{file_name} does not follow naming "
-                                 " pattern.")
+                                 "pattern.")
             else:
                 # check if found substring is a member of this
                 # project. Note sample-name != sample-id
@@ -214,8 +197,7 @@ def _move_helper(self, completed_files, regex, samples_in_project, dst):
         for fp in files_to_move:
             move(fp, dst)
 
-    @staticmethod
-    def _move_trimmed_files(project_name, output_path):
+    def _move_trimmed_files(self, project_name, output_path):
         '''
         Given output_path, move all fastqs to a new subdir named project_name.
         :param project_name: The name of the new folder to be created.
@@ -229,8 +211,16 @@ def _move_trimmed_files(project_name, output_path):
             # this directory shouldn't already exist.
             makedirs(join(output_path, project_name), exist_ok=False)
 
-            for trimmed_file in list(glob.glob(pattern)):
-                move(trimmed_file, join(output_path, project_name))
+            sample_ids = [x[0] for x in self.sample_ids
+                          if x[1] == project_name]
+
+            for trimmed_file in list(glob(pattern)):
+                file_name = split(trimmed_file)[1]
+                substr = self.interleave_fastq_regex.search(file_name)
+                if substr is not None:
+                    # only move the sample_ids in this project.
+                    if substr[1] in sample_ids:
+                        move(trimmed_file, join(output_path, project_name))
         else:
             raise ValueError(f"'{output_path}' does not exist")
 
@@ -282,10 +272,9 @@ def run(self, callback=None):
         for project in self.project_data:
             project_name = project['Sample_Project']
             needs_human_filtering = project['HumanFiltering']
-
             source_dir = join(self.output_path, project_name)
             pattern = f"{source_dir}/*.fastq.gz"
-            completed_files = list(glob.glob(pattern))
+            completed_files = list(glob(pattern))
 
             # if the 'only-adapter-filtered' directory exists, move the files
             # into a unique location so that files from multiple projects
@@ -294,7 +283,7 @@ def run(self, callback=None):
                                      'only-adapter-filtered')
 
             if exists(trimmed_only_path):
-                NuQCJob._move_trimmed_files(project_name, trimmed_only_path)
+                self._move_trimmed_files(project_name, trimmed_only_path)
 
             if needs_human_filtering is True:
                 filtered_directory = join(source_dir, 'filtered_sequences')
@@ -330,7 +319,7 @@ def run(self, callback=None):
 
             # move all html files underneath the subdirectory for this project.
             pattern = f"{old_html_path}/*.html"
-            completed_htmls = list(glob.glob(pattern))
+            completed_htmls = list(glob(pattern))
             self._move_helper(completed_htmls,
                               # Tissue_1_Super_Trizol_S19_L001_R1_001.html
                               self.html_regex,
@@ -339,7 +328,7 @@ def run(self, callback=None):
 
             # move all json files underneath the subdirectory for this project.
             pattern = f"{old_json_path}/*.json"
-            completed_jsons = list(glob.glob(pattern))
+            completed_jsons = list(glob(pattern))
             self._move_helper(completed_jsons,
                               # Tissue_1_Super_Trizol_S19_L001_R1_001.json
                               self.json_regex,
@@ -357,7 +346,7 @@ def _confirm_job_completed(self):
         # since NuQCJob processes across all projects in a run, there isn't
         # a need to iterate by project_name and job_id.
         pattern = f"{self.output_path}/hds-{self.qiita_job_id}.*.completed"
-        completed_files = list(glob.glob(pattern))
+        completed_files = list(glob(pattern))
         if completed_files:
             return True
 
@@ -510,16 +499,3 @@ def _generate_job_script(self, max_bucket_size):
                                     pmls_path=self.pmls_path))
 
         return job_script_path
-
-    def parse_logs(self):
-        log_path = join(self.output_path, 'logs')
-        # sorted lists give predictable results
-        files = sorted(glob.glob(join(log_path, '*.out')))
-        msgs = []
-
-        for some_file in files:
-            with open(some_file, 'r') as f:
-                msgs += [line for line in f.readlines()
-                         if 'error:' in line.lower()]
-
-        return [msg.strip() for msg in msgs]
diff --git a/sequence_processing_pipeline/Pipeline.py b/sequence_processing_pipeline/Pipeline.py
index 21b460c2..9a30c2a0 100644
--- a/sequence_processing_pipeline/Pipeline.py
+++ b/sequence_processing_pipeline/Pipeline.py
@@ -173,25 +173,20 @@ def get_qiita_id_from_sif_fp(fp):
         hacky_name_pieces_dict = parse_project_name(temp_name)
         return hacky_name_pieces_dict[QIITA_ID_KEY]
 
-    def __init__(self, configuration_file_path, run_id, sample_sheet_path,
-                 mapping_file_path, output_path, qiita_job_id, pipeline_type):
+    def __init__(self, configuration_file_path, run_id, input_file_path,
+                 output_path, qiita_job_id, pipeline_type, lane_number=None):
         """
         Initialize Pipeline object w/configuration information.
         :param configuration_file_path: Path to configuration.json file.
         :param run_id: Used w/search_paths to locate input run_directory.
-        :param sample_sheet_path: Path to sample-sheet.
-        :param mapping_file_path: Path to mapping file.
+        :param input_file_path: Path to sample-sheet or pre-prep file.
         :param output_path: Path where all pipeline-generated files live.
         :param qiita_job_id: Qiita Job ID creating this Pipeline.
         :param pipeline_type: Pipeline type ('Amplicon', 'Metagenomic', etc.)
+        :param lane_number: (Optional) overwrite lane_number in input_file.
         """
-        if sample_sheet_path is not None and mapping_file_path is not None:
-            raise PipelineError("sample_sheet_path or mapping_file_path "
-                                "must be defined, but not both.")
-
-        if sample_sheet_path is None and mapping_file_path is None:
-            raise PipelineError("sample_sheet_path or mapping_file_path "
-                                "must be defined, but not both.")
+        if input_file_path is None:
+            raise PipelineError("user_input_file_path cannot be None")
 
         if pipeline_type not in Pipeline.pipeline_types:
             raise PipelineError(f"'{type}' is not a valid pipeline type.")
@@ -235,22 +230,36 @@ def __init__(self, configuration_file_path, run_id, sample_sheet_path,
         self.run_id = run_id
         self.qiita_job_id = qiita_job_id
         self.pipeline = []
+        self.assay_type = None
 
-        if sample_sheet_path:
-            self.search_paths = self.configuration['search_paths']
-            self.sample_sheet = self._validate_sample_sheet(sample_sheet_path)
-            self.mapping_file = None
-        else:
+        # this method will catch a run directory as well as its products
+        # directory, which also has the same name. Hence, return the
+        # shortest matching path as that will at least return the right
+        # path between the two.
+        results = []
+
+        if pipeline_type == Pipeline.AMPLICON_PTYPE:
             self.search_paths = self.configuration['amplicon_search_paths']
-            self.mapping_file = self._validate_mapping_file(mapping_file_path)
-            # unlike _validate_sample_sheet() which returns a SampleSheet
-            # object that stores the path to the file it was created from,
-            # _validate_mapping_file() just returns a DataFrame. Store the
-            # path to the original mapping file itself as well.
-            self.mapping_file_path = mapping_file_path
-            self.sample_sheet = None
+            self.assay_type = Pipeline.AMPLICON_ATYPE
+        else:
+            self.search_paths = self.configuration['search_paths']
+
+        for search_path in self.search_paths:
+            logging.debug(f'Searching {search_path} for {self.run_id}')
+            for entry in listdir(search_path):
+                some_path = join(search_path, entry)
+                # ensure some_path never ends in '/'
+                some_path = some_path.rstrip('/')
+                if isdir(some_path) and some_path.endswith(self.run_id):
+                    logging.debug(f'Found {some_path}')
+                    results.append(some_path)
 
-        self.run_dir = self._search_for_run_dir()
+        if results:
+            results.sort(key=lambda s: len(s))
+            self.run_dir = results[0]
+        else:
+            raise PipelineError(f"A run-dir for '{self.run_id}' could not be "
+                                "found")
 
         # required files for successful operation
         # both RTAComplete.txt and RunInfo.xml should reside in the root of
@@ -268,14 +277,78 @@ def __init__(self, configuration_file_path, run_id, sample_sheet_path,
         except PermissionError:
             raise PipelineError('RunInfo.xml is present, but not readable')
 
-        if self.mapping_file is not None:
+        self.input_file_path = input_file_path
+
+        if pipeline_type == Pipeline.AMPLICON_PTYPE:
+            # assume input_file_path references a pre-prep (mapping) file.
+
+            self.mapping_file = self._validate_mapping_file(input_file_path)
+            # unlike _validate_sample_sheet() which returns a SampleSheet
+            # object that stores the path to the file it was created from,
+            # _validate_mapping_file() just returns a DataFrame. Store the
+            # path to the original mapping file itself as well.
+
             # create dummy sample-sheet
             output_fp = join(output_path, 'dummy_sample_sheet.csv')
             self.generate_dummy_sample_sheet(self.run_dir, output_fp)
-            self.sample_sheet = output_fp
+            self.dummy_sheet_path = output_fp
+
+            # Optional lane_number parameter is ignored for Amplicon
+            # runs, as the only valid value is 1.
+        else:
+            if lane_number is not None:
+                # confirm that the lane_number is a reasonable value.
+                lane_number = int(lane_number)
+                if lane_number < 1 or lane_number > 8:
+                    raise ValueError(f"'{lane_number}' is not a valid name"
+                                     " number")
+
+                # overwrite sample-sheet w/DFSheets processed version
+                # with overwritten Lane number.
+                sheet = load_sample_sheet(input_file_path)
+                with open(input_file_path, 'w') as f:
+                    sheet.write(f, lane=lane_number)
+
+            # assume user_input_file_path references a sample-sheet.
+            self.sample_sheet = self._validate_sample_sheet(input_file_path)
+            self.mapping_file = None
+
+        if self.assay_type is None:
+            # set self.assay_type for non-amplicon types.
+            assay_type = self.sample_sheet.Header['Assay']
+            if assay_type not in Pipeline.assay_types:
+                raise ValueError(f"'{assay_type} is not a valid Assay type")
+            self.assay_type = assay_type
 
         self._configure_profile()
 
+    def get_sample_sheet_path(self):
+        """
+        Returns path to a sample-sheet or dummy sample-sheet for amplicon runs.
+        """
+        if self.assay_type == Pipeline.AMPLICON_ATYPE:
+            # assume self.dummy_sheet_path has been created for amplicon runs.
+            return self.dummy_sheet_path
+        else:
+            # assume input_file_path is a sample-sheet for non-amplicon runs.
+            return self.input_file_path
+
+    def get_software_configuration(self, software):
+        if software is None or software == "":
+            raise ValueError(f"'{software}' is not a valid value")
+
+        key_order = ['profile', 'configuration', software]
+
+        config = self.config_profile
+
+        for key in key_order:
+            if key in config:
+                config = config[key]
+            else:
+                raise PipelineError(f"'{key}' is not defined in configuration")
+
+        return config
+
     def identify_reserved_words(self, words):
         '''
         Returns a list of words that should not appear as column names in any
@@ -294,7 +367,7 @@ def identify_reserved_words(self, words):
         # specifically how the proper set of prep-info file columns are
         # generated. For now the functionality will be defined here as this
         # area of metapool is currently in flux.
-        if self.mapping_file is not None:
+        if self.pipeline_type == Pipeline.AMPLICON_PTYPE:
             reserved = PREP_MF_COLUMNS
         else:
             # results will be dependent on SheetType and SheetVersion of
@@ -313,15 +386,6 @@ def _configure_profile(self):
         # from self.sample_sheet (or self.mapping_file).
         instr_type = InstrumentUtils.get_instrument_type(self.run_dir)
 
-        if isinstance(self.sample_sheet, str):
-            # if self.sample_sheet is a file instead of a KLSampleSheet()
-            # type, then this is an Amplicon run.
-            assay_type = Pipeline.AMPLICON_ATYPE
-        else:
-            assay_type = self.sample_sheet.Header['Assay']
-            if assay_type not in Pipeline.assay_types:
-                raise ValueError(f"'{assay_type} is not a valid Assay type")
-
         # open the configuration profiles directory as specified by
         # profiles_path in the configuration.json file. parse each json into
         # a nested dictionary keyed by (instrument-type, assay-type) as
@@ -381,40 +445,17 @@ def _configure_profile(self):
             i_type = profile['profile']['instrument_type']
             a_type = profile['profile']['assay_type']
 
-            if i_type == instr_type and a_type == assay_type:
+            if i_type == instr_type and a_type == self.assay_type:
                 selected_profile = profile
                 break
 
         if selected_profile is None:
-            raise ValueError(f"a matching profile ({instr_type}, {assay_type}"
-                             ") was not found. Please notify an administrator")
+            raise ValueError(f"a matching profile ({instr_type}, "
+                             f"{self.assay_type}) was not found. Please notify"
+                             " an administrator")
 
         self.config_profile = selected_profile
 
-    def _search_for_run_dir(self):
-        # this method will catch a run directory as well as its products
-        # directory, which also has the same name. Hence, return the
-        # shortest matching path as that will at least return the right
-        # path between the two.
-        results = []
-
-        for search_path in self.search_paths:
-            logging.debug(f'Searching {search_path} for {self.run_id}')
-            for entry in listdir(search_path):
-                some_path = join(search_path, entry)
-                # ensure some_path never ends in '/'
-                some_path = some_path.rstrip('/')
-                if isdir(some_path) and some_path.endswith(self.run_id):
-                    logging.debug(f'Found {some_path}')
-                    results.append(some_path)
-
-        if results:
-            results.sort(key=lambda s: len(s))
-            return results[0]
-
-        raise PipelineError(f"A run-dir for '{self.run_id}' could not be "
-                            "found")
-
     def _directory_check(self, directory_path, create=False):
         if exists(directory_path):
             logging.debug("directory '%s' exists." % directory_path)
@@ -591,7 +632,7 @@ def generate_sample_info_files(self, addl_info=None):
         :param addl_info: A df of (sample-name, project-name) pairs.
         :return: A list of paths to sample-information-files.
         """
-        if self.mapping_file is not None:
+        if self.pipeline_type == Pipeline.AMPLICON_PTYPE:
             # Generate a list of BLANKs for each project.
             temp_df = self.mapping_file[[SAMPLE_NAME_KEY, _PROJECT_NAME_KEY]]
             temp_df_as_dicts_list = temp_df.to_dict(orient='records')
@@ -670,7 +711,7 @@ def get_sample_ids(self):
 
         # test for self.mapping_file, since self.sample_sheet will be
         # defined in both cases.
-        if self.mapping_file is not None:
+        if self.pipeline_type == Pipeline.AMPLICON_PTYPE:
             results = list(self.mapping_file.sample_name)
         else:
             results = [x.Sample_ID for x in self.sample_sheet.samples]
@@ -685,7 +726,7 @@ def get_sample_names(self, project_name=None):
         '''
         # test for self.mapping_file, since self.sample_sheet will be
         # defined in both cases.
-        if self.mapping_file is not None:
+        if self.pipeline_type == Pipeline.AMPLICON_PTYPE:
             return self._get_sample_names_from_mapping_file(project_name)
         else:
             return self._get_sample_names_from_sample_sheet(project_name)
@@ -775,12 +816,9 @@ def _parse_project_name(self, project_name, short_names):
             return proj_info[PROJECT_SHORT_NAME_KEY], proj_info[QIITA_ID_KEY]
 
     def get_project_info(self, short_names=False):
-        # test for self.mapping_file, since self.sample_sheet will be
-        # defined in both cases.
         results = []
 
-        contains_replicates = None
-        if self.mapping_file is not None:
+        if self.pipeline_type == Pipeline.AMPLICON_PTYPE:
             if CONTAINS_REPLICATES_KEY in self.mapping_file:
                 contains_replicates = True
             else:
@@ -792,25 +830,35 @@ def get_project_info(self, short_names=False):
                 {p: parse_project_name(p) for p in sample_project_map}
         else:
             projects_info = self.sample_sheet.get_projects_details()
-        # endif mapping_file
 
         if short_names:
             proj_name_key = PROJECT_SHORT_NAME_KEY
         else:
             proj_name_key = PROJECT_FULL_NAME_KEY
-        # endif
+
         for curr_project_info in projects_info.values():
             curr_dict = {
                 _PROJECT_NAME_KEY: curr_project_info[proj_name_key],
                 QIITA_ID_KEY: curr_project_info[QIITA_ID_KEY]
             }
 
-            if contains_replicates is not None:
+            if self.pipeline_type == Pipeline.AMPLICON_PTYPE:
+                # this is a mapping file:
                 curr_contains_reps = contains_replicates
             else:
-                curr_contains_reps = \
-                    curr_project_info.get(CONTAINS_REPLICATES_KEY, False)
-            # endif
+                bi_df = self.sample_sheet.Bioinformatics
+                if CONTAINS_REPLICATES_KEY in bi_df.columns.tolist():
+                    # subselect rows in [Bioinformatics] based on whether they
+                    # match the project name.
+                    df = bi_df.loc[bi_df['Sample_Project'] ==
+                                   curr_project_info[proj_name_key]]
+                    # since only one project can match by definition, convert
+                    # to dict and extract the needed value.
+                    curr_contains_reps = df.iloc[0].to_dict()[
+                                         CONTAINS_REPLICATES_KEY]
+                else:
+                    curr_contains_reps = False
+
             curr_dict[CONTAINS_REPLICATES_KEY] = curr_contains_reps
             results.append(curr_dict)
         # next project
diff --git a/sequence_processing_pipeline/SeqCountsJob.py b/sequence_processing_pipeline/SeqCountsJob.py
new file mode 100644
index 00000000..f080bd00
--- /dev/null
+++ b/sequence_processing_pipeline/SeqCountsJob.py
@@ -0,0 +1,147 @@
+from os.path import join, split
+from .Job import Job, KISSLoader
+from .PipelineError import JobFailedError
+import logging
+from jinja2 import Environment
+from os import walk
+from json import dumps
+from glob import glob
+
+
+logging.basicConfig(level=logging.DEBUG)
+
+
+class SeqCountsJob(Job):
+    def __init__(self, run_dir, output_path, queue_name,
+                 node_count, wall_time_limit, jmem, modules_to_load,
+                 qiita_job_id, max_array_length, files_to_count_path,
+                 cores_per_task=4):
+        """
+        ConvertJob provides a convenient way to run bcl-convert or bcl2fastq
+        on a directory BCL files to generate Fastq files.
+        :param run_dir: The 'run' directory that contains BCL files.
+        :param output_path: Path where all pipeline-generated files live.
+        :param queue_name: The name of the Torque queue to use for processing.
+        :param node_count: The number of nodes to request.
+        :param wall_time_limit: A hard time limit (in min) to bound processing.
+        :param jmem: String representing total memory limit for entire job.
+        :param modules_to_load: A list of Linux module names to load
+        :param qiita_job_id: identify Torque jobs using qiita_job_id
+        :param max_array_length: A hard-limit for array-sizes
+        :param files_to_count_path: A path to a list of file-paths to count.
+        :param cores_per_task: (Optional) # of CPU cores per node to request.
+        """
+        super().__init__(run_dir,
+                         output_path,
+                         'SeqCountsJob',
+                         [],
+                         max_array_length,
+                         modules_to_load=modules_to_load)
+
+        self.queue_name = queue_name
+        self.node_count = node_count
+        self.wall_time_limit = wall_time_limit
+        self.cores_per_task = cores_per_task
+
+        # raise an Error if jmem is not a valid floating point value.
+        self.jmem = str(int(jmem))
+        self.qiita_job_id = qiita_job_id
+        self.jinja_env = Environment(loader=KISSLoader('templates'))
+
+        self.job_name = (f"seq_counts_{self.qiita_job_id}")
+        self.files_to_count_path = files_to_count_path
+
+        with open(self.files_to_count_path, 'r') as f:
+            lines = f.readlines()
+            lines = [x.strip() for x in lines]
+            lines = [x for x in lines if x != '']
+            self.file_count = len(lines)
+
+    def run(self, callback=None):
+        job_script_path = self._generate_job_script()
+        params = ['--parsable',
+                  f'-J {self.job_name}',
+                  f'--array 1-{self.sample_count}']
+        try:
+            self.job_info = self.submit_job(job_script_path,
+                                            job_parameters=' '.join(params),
+                                            exec_from=None,
+                                            callback=callback)
+
+            logging.debug(f'SeqCountsJob Job Info: {self.job_info}')
+        except JobFailedError as e:
+            # When a job has failed, parse the logs generated by this specific
+            # job to return a more descriptive message to the user.
+            info = self.parse_logs()
+            # prepend just the message component of the Error.
+            info.insert(0, str(e))
+            raise JobFailedError('\n'.join(info))
+
+        self._aggregate_counts()
+
+        logging.debug(f'SeqCountJob {self.job_info["job_id"]} completed')
+
+    def _generate_job_script(self):
+        job_script_path = join(self.output_path, "seq_counts.sbatch")
+        template = self.jinja_env.get_template("seq_counts.sbatch")
+
+        #  got to make files_to_count.txt and put it in the output directory
+
+        with open(job_script_path, mode="w", encoding="utf-8") as f:
+            f.write(template.render({
+                "job_name": "seq_counts",
+                "wall_time_limit": self.wall_time_limit,
+                "mem_in_gb": self.jmem,
+                "node_count": self.node_count,
+                "cores_per_task": self.cores_per_task,
+                "queue_name": self.queue_name,
+                "file_count": self.file_count,
+                "output_path": self.output_path
+            }))
+
+        return job_script_path
+
+    def parse_logs(self):
+        # overrides Job.parse_logs() w/tailored parse for specific logs.
+        files = sorted(glob(join(self.log_path, '*.err')))
+        msgs = []
+
+        for some_file in files:
+            with open(some_file, 'r') as f:
+                msgs += [line for line in f.readlines()
+                         if line.startswith("[E::stk_size]")]
+
+        return [msg.strip() for msg in msgs]
+
+    def _aggregate_counts(self):
+        def extract_metadata(fp):
+            with open(fp, 'r') as f:
+                lines = f.readlines()
+                lines = [x.strip() for x in lines]
+                if len(lines) != 2:
+                    raise ValueError("error processing %s" % fp)
+                _dir, _file = split(lines[0])
+                seq_counts, base_pairs = lines[1].split('\t')
+                return _dir, _file, int(seq_counts), int(base_pairs)
+
+        results = {}
+
+        for root, dirs, files in walk(self.log_path):
+            for _file in files:
+                if _file.endswith('.out'):
+                    log_output_file = join(root, _file)
+                    _dir, _file, seq_counts, base_pairs = \
+                        extract_metadata(log_output_file)
+
+                    if _dir not in results:
+                        results[_dir] = {}
+
+                    results[_dir][_file] = {'seq_counts': seq_counts,
+                                            'base_pairs': base_pairs}
+
+        results_path = join(self.output_path, 'aggregate_counts.json')
+
+        with open(results_path, 'w') as f:
+            print(dumps(results, indent=2), file=f)
+
+        return results_path
diff --git a/sequence_processing_pipeline/TRIntegrateJob.py b/sequence_processing_pipeline/TRIntegrateJob.py
new file mode 100644
index 00000000..7b8740b4
--- /dev/null
+++ b/sequence_processing_pipeline/TRIntegrateJob.py
@@ -0,0 +1,163 @@
+from os.path import join
+from .Job import Job, KISSLoader
+from .PipelineError import JobFailedError
+import logging
+from jinja2 import Environment
+from .Pipeline import Pipeline
+from .PipelineError import PipelineError
+from metapool import load_sample_sheet
+from os import makedirs
+from shutil import copyfile
+
+
+logging.basicConfig(level=logging.DEBUG)
+
+
+class TRIntegrateJob(Job):
+    def __init__(self, run_dir, output_path, sample_sheet_path, queue_name,
+                 node_count, wall_time_limit, jmem, modules_to_load,
+                 qiita_job_id, integrate_script_path, sil_path, raw_fastq_dir,
+                 reference_base, reference_map, cores_per_task):
+        """
+        ConvertJob provides a convenient way to run bcl-convert or bcl2fastq
+        on a directory BCL files to generate Fastq files.
+        :param run_dir: The 'run' directory that contains BCL files.
+        :param output_path: Path where all pipeline-generated files live.
+        :param sample_sheet_path: The path to a sample-sheet.
+        :param queue_name: The name of the Torque queue to use for processing.
+        :param node_count: The number of nodes to request.
+        :param wall_time_limit: A hard time limit (in min) to bound processing.
+        :param jmem: String representing total memory limit for entire job.
+        :param modules_to_load: A list of Linux module names to load
+        :param qiita_job_id: identify Torque jobs using qiita_job_id
+        :param integrate_script_path: None
+        :param sil_path: A path to a confidential file mapping C5xx, adapters.
+        :param reference_base: None
+        :param reference_map: None
+        :param cores_per_task: # of CPU cores per node to request.
+        """
+        super().__init__(run_dir,
+                         output_path,
+                         'TRIntegrateJob',
+                         [],
+                         # max_array_length and self.max_array_length are
+                         # not used by TRIntegrateJob.
+                         -1,
+                         modules_to_load=modules_to_load)
+
+        self.sample_sheet_path = sample_sheet_path
+        self._file_check(self.sample_sheet_path)
+        metadata = self._process_sample_sheet()
+        self.sample_ids = metadata['sample_ids']
+        self.queue_name = queue_name
+        self.node_count = node_count
+        self.wall_time_limit = wall_time_limit
+        self.cores_per_task = cores_per_task
+        self.integrate_script_path = integrate_script_path
+        self.sil_path = sil_path
+        self.raw_fastq_dir = raw_fastq_dir
+        self.tmp_dir = join(self.output_path, 'tmp')
+
+        self.reference_base = reference_base
+        self.reference_map = reference_map
+
+        # raise an Error if jmem is not a valid floating point value.
+        self.jmem = str(int(jmem))
+        self.qiita_job_id = qiita_job_id
+        self.sample_count = len(self.sample_ids)
+        self.jinja_env = Environment(loader=KISSLoader('templates'))
+        self.job_name = (f"integrate_{self.qiita_job_id}")
+
+        with open(self.sil_path, 'r') as f:
+            # obtain the number of unique barcode_ids as determined by
+            # TellReadJob() in order to set up an array job of the
+            # proper length.
+            lines = f.readlines()
+            lines = [x.strip() for x in lines]
+            lines = [x for x in lines if x != '']
+            self.barcode_id_count = len(lines)
+
+    def run(self, callback=None):
+        job_script_path = self._generate_job_script()
+
+        # copy sil_path to TRIntegrate working directory and rename to a
+        # predictable name.
+        copyfile(self.sil_path,
+                 join(self.output_path, 'sample_index_list.txt'))
+
+        # generate the tailored subset of adapter to barcode_id based on
+        # the proprietary lists owned by the manufacturer and supplied by
+        # the caller, and the barcode ids found in the sample-sheet.
+        self._generate_sample_index_list()
+
+        makedirs(self.tmp_dir)
+
+        params = ['--parsable',
+                  f'-J {self.job_name}',
+                  f'--array 1-{self.sample_count}']
+        try:
+            self.job_info = self.submit_job(job_script_path,
+                                            job_parameters=' '.join(params),
+                                            exec_from=None,
+                                            callback=callback)
+
+            logging.debug(f'TRIntegrateJob Job Info: {self.job_info}')
+        except JobFailedError as e:
+            # When a job has failed, parse the logs generated by this specific
+            # job to return a more descriptive message to the user.
+            info = self.parse_logs()
+            # prepend just the message component of the Error.
+            info.insert(0, str(e))
+            raise JobFailedError('\n'.join(info))
+
+        logging.debug(f'TRIntegrateJob {self.job_info["job_id"]} completed')
+
+    def _process_sample_sheet(self):
+        sheet = load_sample_sheet(self.sample_sheet_path)
+
+        if not sheet.validate_and_scrub_sample_sheet():
+            s = "Sample sheet %s is not valid." % self.sample_sheet_path
+            raise PipelineError(s)
+
+        header = sheet.Header
+        chemistry = header['chemistry']
+
+        if header['Assay'] not in Pipeline.assay_types:
+            s = "Assay value '%s' is not recognized." % header['Assay']
+            raise PipelineError(s)
+
+        sample_ids = []
+        for sample in sheet.samples:
+            sample_ids.append((sample['Sample_ID'], sample['Sample_Project']))
+
+        bioinformatics = sheet.Bioinformatics
+
+        # reorganize the data into a list of dictionaries, one for each row.
+        # the ordering of the rows will be preserved in the order of the list.
+        lst = bioinformatics.to_dict('records')
+
+        # human-filtering jobs are scoped by project. Each job requires
+        # particular knowledge of the project.
+        return {'chemistry': chemistry,
+                'projects': lst,
+                'sample_ids': sample_ids}
+
+    def _generate_job_script(self):
+        job_script_path = join(self.output_path, 'integrate_test.sbatch')
+        template = self.jinja_env.get_template("integrate.sbatch")
+
+        with open(job_script_path, mode="w", encoding="utf-8") as f:
+            f.write(template.render({
+                "job_name": "integrate",
+                "wall_time_limit": self.wall_time_limit,
+                "mem_in_gb": self.jmem,
+                "node_count": self.node_count,
+                "cores_per_task": self.cores_per_task,
+                "integrate_script_path": self.integrate_script_path,
+                "queue_name": self.queue_name,
+                "barcode_id_count": self.barcode_id_count,
+                "raw_fastq_dir": self.raw_fastq_dir,
+                "tmp_dir": self.tmp_dir,
+                "output_dir": self.output_path}))
+
+        return job_script_path
diff --git a/sequence_processing_pipeline/TellReadJob.py b/sequence_processing_pipeline/TellReadJob.py
new file mode 100644
index 00000000..3d68d4c8
--- /dev/null
+++ b/sequence_processing_pipeline/TellReadJob.py
@@ -0,0 +1,174 @@
+from os.path import join
+from .Job import Job, KISSLoader
+from .PipelineError import JobFailedError
+import logging
+from jinja2 import Environment
+from .Pipeline import Pipeline
+from .PipelineError import PipelineError
+from metapool import load_sample_sheet
+
+
+logging.basicConfig(level=logging.DEBUG)
+
+
+class TellReadJob(Job):
+    def __init__(self, run_dir, output_path, sample_sheet_path, queue_name,
+                 node_count, wall_time_limit, jmem, modules_to_load,
+                 qiita_job_id, reference_base,
+                 reference_map, sing_script_path, cores_per_task):
+        """
+        ConvertJob provides a convenient way to run bcl-convert or bcl2fastq
+        on a directory BCL files to generate Fastq files.
+        :param run_dir: The 'run' directory that contains BCL files.
+        :param output_path: Path where all pipeline-generated files live.
+        :param sample_sheet_path: The path to a sample-sheet.
+        :param queue_name: The name of the Torque queue to use for processing.
+        :param node_count: The number of nodes to request.
+        :param wall_time_limit: A hard time limit (in min) to bound processing.
+        :param jmem: String representing total memory limit for entire job.
+        :param modules_to_load: A list of Linux module names to load
+        :param qiita_job_id: identify Torque jobs using qiita_job_id
+        :param reference_base: None
+        :param reference_map: None
+        :param cores_per_task: (Optional) # of CPU cores per node to request.
+        """
+        super().__init__(run_dir,
+                         output_path,
+                         'TellReadJob',
+                         [],
+                         1,
+                         modules_to_load=modules_to_load)
+
+        self.sample_sheet_path = sample_sheet_path
+        self._file_check(self.sample_sheet_path)
+        metadata = self._process_sample_sheet()
+        self.sample_ids = metadata['sample_ids']
+        self.queue_name = queue_name
+        self.node_count = node_count
+        self.wall_time_limit = wall_time_limit
+        self.cores_per_task = cores_per_task
+
+        self.reference_base = reference_base
+        self.reference_map = reference_map
+
+        # raise an Error if jmem is not a valid floating point value.
+        self.jmem = str(int(jmem))
+        self.qiita_job_id = qiita_job_id
+        self.jinja_env = Environment(loader=KISSLoader('templates'))
+        self.sing_script_path = sing_script_path
+
+        sheet = load_sample_sheet(self.sample_sheet_path)
+        lane = sheet.samples[0].Lane
+
+        # force self.lane_number to be int. raise an Error if it's not.
+        tmp = int(lane)
+        if tmp < 1 or tmp > 8:
+            raise ValueError(f"'{tmp}' is not a valid lane number")
+        self.lane_number = tmp
+
+        self.job_name = (f"{self.qiita_job_id}-tellread")
+
+    def run(self, callback=None):
+        job_script_path = self._generate_job_script()
+
+        # everything is in the job script so there are no additional params.
+        params = []
+
+        try:
+            self.job_info = self.submit_job(job_script_path,
+                                            job_parameters=' '.join(params),
+                                            exec_from=None,
+                                            callback=callback)
+
+            logging.debug(f'TellReadJob Job Info: {self.job_info}')
+        except JobFailedError as e:
+            # When a job has failed, parse the logs generated by this specific
+            # job to return a more descriptive message to the user.
+            # TODO: We need more examples of failed jobs before we can create
+            #  a parser for the logs.
+            # info = self.parse_logs()
+            # prepend just the message component of the Error.
+            # info.insert(0, str(e))
+            info = str(e)
+            raise JobFailedError('\n'.join(info))
+
+        logging.debug(f'TellReadJob {self.job_info["job_id"]} completed')
+
+    def _process_sample_sheet(self):
+        sheet = load_sample_sheet(self.sample_sheet_path)
+
+        if not sheet.validate_and_scrub_sample_sheet():
+            s = "Sample sheet %s is not valid." % self.sample_sheet_path
+            raise PipelineError(s)
+
+        header = sheet.Header
+        chemistry = header['chemistry']
+
+        if header['Assay'] not in Pipeline.assay_types:
+            s = "Assay value '%s' is not recognized." % header['Assay']
+            raise PipelineError(s)
+
+        sample_ids = []
+        for sample in sheet.samples:
+            sample_ids.append((sample['Sample_ID'],
+                               sample['Sample_Project'],
+                               sample['barcode_id']))
+
+        bioinformatics = sheet.Bioinformatics
+
+        # reorganize the data into a list of dictionaries, one for each row.
+        # the ordering of the rows will be preserved in the order of the list.
+        lst = bioinformatics.to_dict('records')
+
+        # human-filtering jobs are scoped by project. Each job requires
+        # particular knowledge of the project.
+        return {'chemistry': chemistry,
+                'projects': lst,
+                'sample_ids': sample_ids}
+
+    def _generate_job_script(self):
+        job_script_path = join(self.output_path, 'tellread_test.sbatch')
+        template = self.jinja_env.get_template("tellread.sbatch")
+
+        # generate a comma separated list of sample-ids from the tuples stored
+        # in self.sample_ids.
+
+        # NB: Proposed sample-sheets will have traditional Sample_ID and
+        # Sample_Name columns as well as a new value named barcode_id. It's
+        # this column that will contain the 'C50n' values needed to be
+        # supplied to tellread. Later we will use this mapping to rename the
+        # files from C50n...fastq.gz to sample-name...fastq.gz.
+        samples = ','.join([id[2] for id in self.sample_ids])
+
+        # since we haven't included support for reference_map yet, whenever a
+        # reference is not included, the mapping against the list of sample_ids
+        # is ['NONE', 'NONE', ..., 'NONE'].
+        refs = ','.join(['NONE' for _ in self.sample_ids])
+
+        extra = ""
+
+        with open(job_script_path, mode="w", encoding="utf-8") as f:
+            f.write(template.render({
+                "job_name": "tellread",
+                "wall_time_limit": self.wall_time_limit,
+                "mem_in_gb": self.jmem,
+                "node_count": self.node_count,
+                "cores_per_task": self.cores_per_task,
+                "queue_name": self.queue_name,
+                "sing_script_path": self.sing_script_path,
+                "modules_to_load": ' '.join(self.modules_to_load),
+                "lane": f"s_{self.lane_number}",
+                # NB: Note that we no longer create a sub-directory under the
+                # working directory for TellRead to create all its output
+                # folders and files. This means it is creating folders and
+                # files in the same directory that has our sbatch script and
+                # logs directory. Currently there are no name collisions,
+                # however.
+                "output": self.output_path,
+                "rundir_path": self.root_dir,
+                "samples": samples,
+                "refs": refs,
+                "extra": extra
+            }))
+
+        return job_script_path
diff --git a/sequence_processing_pipeline/aggregate_counts.py b/sequence_processing_pipeline/aggregate_counts.py
new file mode 100644
index 00000000..ace90212
--- /dev/null
+++ b/sequence_processing_pipeline/aggregate_counts.py
@@ -0,0 +1,40 @@
+from os import walk
+from sys import argv
+from os.path import join, split
+from json import dumps
+
+
+def extract_metadata(log_output_file_path):
+    with open(log_output_file_path, 'r') as f:
+        lines = f.readlines()
+        lines = [x.strip() for x in lines]
+        if len(lines) != 2:
+            raise ValueError("error processing %s" % log_output_file_path)
+        _dir, _file = split(lines[0])
+        seq_counts, base_pairs = lines[1].split('\t')
+        return _dir, _file, int(seq_counts), int(base_pairs)
+
+
+def aggregate_counts(fp):
+    results = {}
+
+    for root, dirs, files in walk(fp):
+        for _file in files:
+            if _file.endswith('.out'):
+                log_output_file = join(root, _file)
+                _dir, _file, seq_counts, base_pairs = \
+                    extract_metadata(log_output_file)
+
+                if _dir not in results:
+                    results[_dir] = {}
+
+                results[_dir][_file] = {'seq_counts': seq_counts,
+                                        'base_pairs': base_pairs}
+
+    return results
+
+
+if __name__ == '__main__':
+    results = aggregate_counts(argv[1])
+    with open(argv[2], 'w') as f:
+        print(dumps(results, indent=2), file=f)
diff --git a/sequence_processing_pipeline/contrib/create_picklist.py b/sequence_processing_pipeline/contrib/create_picklist.py
new file mode 100644
index 00000000..a1d6a1d0
--- /dev/null
+++ b/sequence_processing_pipeline/contrib/create_picklist.py
@@ -0,0 +1,72 @@
+import os
+# from metapool.metapool import *
+from sys import argv
+import pandas as pd
+import matplotlib.pyplot as plt
+from metapool.metapool import (read_survival, make_2D_array,
+                               calculate_iseqnorm_pooling_volumes,
+                               format_pooling_echo_pick_list)
+import seaborn as sns
+
+input_sheet_filename = argv[1]
+
+plate_df_w_reads = pd.read_csv(input_sheet_filename, sep='\t')
+plate_df_w_reads['Blank'] = [True if 'blank' in s.lower() else False
+                             for s in plate_df_w_reads['Sample_Name']]
+reads_column = 'read_counts'
+
+well_col = 'Sample_Well'
+assert reads_column in plate_df_w_reads.columns
+
+f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(8, 8))
+# evenness plot
+rmax = int(round(plate_df_w_reads[reads_column].max(), -2))
+
+foo = read_survival(plate_df_w_reads.loc[plate_df_w_reads['Blank'] is True,
+                                         reads_column],
+                    label='Blanks',
+                    rmax=rmax)
+
+bar = read_survival(plate_df_w_reads.loc[plate_df_w_reads['Blank'] is False,
+                                         reads_column],
+                    label='Samples',
+                    rmax=rmax)
+
+survival_df = pd.concat([foo, bar])
+
+ax3.set_xlabel(reads_column)
+ax3.set_ylabel('Samples')
+survival_df.plot(color=['coral', 'steelblue'], ax=ax1)
+ax1.set_xlabel(reads_column)
+ax1.set_ylabel('Samples')
+
+# Histogram
+sns.histplot(plate_df_w_reads[reads_column], ax=ax3)
+
+# Boxplot
+sns.boxplot(x="Blank", y=reads_column, data=plate_df_w_reads, ax=ax4)
+sns.stripplot(x="Blank", y=reads_column, data=plate_df_w_reads, ax=ax4,
+              size=3, color='black', alpha=0.5)
+
+plt.tight_layout()
+plt.savefig(input_sheet_filename + '.comboplot.pdf')
+
+pdfn = calculate_iseqnorm_pooling_volumes(plate_df_w_reads,
+                                          dynamic_range=20,
+                                          normalization_column=reads_column)
+plt.savefig(input_sheet_filename + '.normalizedplot.pdf')
+
+vols = make_2D_array(pdfn,
+                     data_col='iSeq normpool volume',
+                     well_col=well_col).astype(float)
+
+# Write the picklist as .csv
+picklist_fp = input_sheet_filename + '.picklist.csv'
+
+if os.path.isfile(picklist_fp):
+    print("Warning! This file exists already.")
+
+picklist = format_pooling_echo_pick_list(vols, max_vol_per_well=30000)
+
+with open(picklist_fp, 'w') as f:
+    f.write(picklist)
diff --git a/sequence_processing_pipeline/contrib/integrate-indices-np.py b/sequence_processing_pipeline/contrib/integrate-indices-np.py
new file mode 100644
index 00000000..b1be83a6
--- /dev/null
+++ b/sequence_processing_pipeline/contrib/integrate-indices-np.py
@@ -0,0 +1,332 @@
+# Why
+# 1) cloudspades requires the index reads be inline in the record header
+# 2) Ariadne requires the data are sorted by the barcodes
+#
+# Inlining is easy. Sorting is complex as the amount of data is large, and
+# the ordering stems is determined external to the data being sorted. To
+# determine order, all barcodes must be read in to gather the complete
+# barcode <-> record association; if only partial data is read then
+# associations to barcodes may be missed, and we cannot perform an insertion
+# sort efficiently as we're writing to disk. Once we know an order for the
+# records, we (currently) read in the entirety of the subsequent data (R1 then
+# R2), reorder, and write. Performing this in blocks to minimize memory may be
+# possible, but we have to assume access is random as a grouping barcode
+# may be with any record along the file.
+#
+# A variety of approaches were considered, including:
+# - indexing portions in a hashtable, reading inputs multiple times, and
+#   writing in blocks. This was tested in both rust and python. The amount of
+#   memory was large, and keeping it under control would be many many many
+#   passes over data on disk or in memory
+# - using pandas to do the grouping, which possibly avoids the memory burden
+#   of a hashmap. it didn't
+# - using mmap files. No go, these are large and we have to walk over them
+#   a lot.
+#
+# Parsing this stuff adds a lot of overhead in Python. It will add some, if not
+# a lot, in rust as well -- our test data had 65M sequences. So the current
+# approach operates in the raw file data itself, using regex's to parse
+# individual records. We use numpy for sorting and getting record orders.
+# This is memory expensive but so far much less than the other approaches tried
+# and it does not require multiple passes over files. We bottleneck on write
+# IO, so to mitigate that, we are using a parallel gzip (pgzip), which still
+# bottlenecks but gets better throughput.
+#
+# There probably are smarter ways to do this to reduce the memory burden.
+# Right now, it's O(N) where N is the number of records. We load R1 and R2
+# separately though so we at least halve the memory use. As for doing it
+# faster, at the moment we appear to saturate time on gzip. Easiest solution
+# would be to increase the number of threads, but then again, this process
+# is expected to run in an array, and filesystem can only take so much.
+#
+# In addition to the inline tests, md5 checks to verify all record IDs are
+# present in both R1 / R2, and relative to original input. Spot checks on
+# an arbitrary set of records were performed on R1 / R2 to verify no apparent
+# unusual modification. And spot checks were performed to verify that correct
+# barcodes are incorporating as expected in output.
+#
+# author: Daniel McDonald (d3mcdonald@eng.ucsd.edu)
+import numpy as np
+import click
+import re
+import io
+import pgzip
+import gzip
+
+
+RECORD = re.compile(rb'@\S+\n[ATGCN]+\n\+\n\S+\n')
+BARCODE = re.compile(rb'@\S+\n([ATGCN]+)\n\+\n\S+\n')
+
+
+def gather_order(i1_in_fp):
+    """Determine record order
+
+    This is a fancy way of saying: get all the barcodes, and sort them.
+
+    We return the order of the sorted records, the unique barcodes,
+    and the bounds for what barcode associated with what record
+    """
+    # determine barcode length
+    _ = i1_in_fp.readline()
+    b = i1_in_fp.readline()
+    rec_len = len(b.strip())
+    i1_in_fp.seek(0)
+
+    # we need larger data in memory later anyway...
+    i1 = i1_in_fp.read()
+    start = 0
+    end = len(i1)
+
+    # get the number of records. we completely assume non-multiline fastq here
+    newlines = i1.count(b'\n')
+    assert newlines % 4 == 0
+    barcodes = np.empty(newlines // 4, dtype='|S%d' % rec_len)
+
+    # walk all index records
+    # grab each barcode
+    idx = 0
+    while start < end:
+        barcode_result = BARCODE.search(i1, pos=start)
+        barcode = barcode_result.groups()[0]
+        assert len(barcode) == rec_len  # get angry if the barcode is weird
+
+        barcodes[idx] = barcode
+        idx += 1
+        start = barcode_result.end()
+
+    # we no longer need the raw data so let's toss it
+    del i1
+
+    # determine the record order of a lexicographic sort
+    # gather the unique barcodes so we can use them later, and the bounding
+    # points in the sorted set
+    record_order = barcodes.argsort()
+    barcodes = barcodes[record_order]
+    unique_barcodes, barcode_bounds = np.unique(barcodes, return_index=True)
+
+    return record_order, unique_barcodes, barcode_bounds
+
+
+def test_gather_order():
+    i1data = [b'@foo', b'ATGC', b'+', b'!!!!',
+              b'@bar', b'TTGG', b'+', b'!!!!',
+              b'@baz', b'ATGC', b'+', b'!!!!',
+              b'@oof', b'TTTT', b'+', b'!!!!',
+              b'@rab', b'TTGG', b'+', b'!!!!',
+              b'@zab', b'TTTT', b'+', b'!!!!',
+              b'@ofo', b'TTTT', b'+', b'!!!!', b'']
+
+    i1 = io.BytesIO(b'\n'.join(i1data))
+    order, unique, bounds = gather_order(i1)
+
+    exp_order = np.array([0, 2, 1, 4, 3, 5, 6])
+    exp_unique = np.array([b'ATGC', b'TTGG', b'TTTT'])
+    exp_bounds = np.array([0, 2, 4])
+
+    assert (order == exp_order).all()
+    assert (unique == exp_unique).all()
+    assert (bounds == exp_bounds).all()
+
+
+def troll_and_write(order, unique, bounds, in_, out_):
+    """Walk over the raw data, spit out barcode amended records in order
+
+    - read all data
+    - get index boundaries for each record
+    - pull out each record in order according to the barcode data
+    - associate the barcode
+    - write
+    """
+
+    data = in_.read()
+    boundaries = np.empty([order.size, 2], dtype=np.uint64)
+
+    stop = 0
+    for idx in range(order.size):
+        rec = RECORD.search(data, pos=stop)
+        start, stop = rec.span()
+        boundaries[idx] = np.array([start, stop], dtype=np.uint64)
+
+    current_barcode_idx = 0
+    current_barcode = unique[current_barcode_idx]
+    current_barcode_bound_end = bounds[current_barcode_idx + 1]
+
+    for order_idx, record_idx in enumerate(order):
+        if order_idx >= current_barcode_bound_end:
+            current_barcode_idx += 1
+
+            if current_barcode_idx >= bounds.size:
+                raise ValueError("should not happen?")
+            current_barcode = unique[current_barcode_idx]
+
+            if current_barcode_idx + 1 >= bounds.size:
+                # run to the end
+                current_barcode_bound_end = order.size
+            else:
+                current_barcode_bound_end = bounds[current_barcode_idx + 1]
+
+        start, stop = boundaries[record_idx]
+        record = data[start:stop]
+
+        # in a one-off, these might pass by chance. It would be real weird
+        # for them to always pass for all records in a large file.
+        # n.b., b'foo'[0] is int, because yay, so we use a slice to maintain
+        # a human readable character to test against as most mortals haven't
+        # memorized the ascii table
+        assert record[:1] == b'@'
+        assert record[-1:] == b'\n'
+
+        with_barcode = insert_barcode(record, current_barcode)
+        out_.write(with_barcode)
+
+
+def test_troll_and_write():
+    i1data = [b'@foo', b'ATGC', b'+', b'!!!!',
+              b'@bar', b'TTGG', b'+', b'!!!!',
+              b'@baz', b'ATGC', b'+', b'!!!!',
+              b'@oof', b'TTTT', b'+', b'!!!!',
+              b'@rab', b'TTGG', b'+', b'!!!!',
+              b'@zab', b'TTTT', b'+', b'!!!!',
+              b'@ofo', b'TTTT', b'+', b'!!!!', b'']
+
+    i1 = io.BytesIO(b'\n'.join(i1data))
+    order, unique, bounds = gather_order(i1)
+
+    # we assume records are in the same order, as that has previously been
+    # observed w/ tellread and is the normal expectation
+    r1data = [b'@foo', b'AATGC', b'+', b'!!!!!',
+              b'@bar', b'ATTGG', b'+', b'!!!!!',
+              b'@baz', b'AATGC', b'+', b'!!!!!',
+              b'@oof', b'ATTTT', b'+', b'!!!!!',
+              b'@rab', b'ATTGG', b'+', b'!!!!!',
+              b'@zab', b'ATTTT', b'+', b'!!!!!',
+              b'@ofo', b'ATTTT', b'+', b'!!!!!', b'']
+    r1 = io.BytesIO(b'\n'.join(r1data))
+    r1out = io.BytesIO()
+    troll_and_write(order, unique, bounds, r1, r1out)
+    r1out.seek(0)
+
+    r1exp = [b'@foo BX:Z:ATGC-1', b'AATGC', b'+', b'!!!!!',
+             b'@baz BX:Z:ATGC-1', b'AATGC', b'+', b'!!!!!',
+             b'@bar BX:Z:TTGG-1', b'ATTGG', b'+', b'!!!!!',
+             b'@rab BX:Z:TTGG-1', b'ATTGG', b'+', b'!!!!!',
+             b'@oof BX:Z:TTTT-1', b'ATTTT', b'+', b'!!!!!',
+             b'@zab BX:Z:TTTT-1', b'ATTTT', b'+', b'!!!!!',
+             b'@ofo BX:Z:TTTT-1', b'ATTTT', b'+', b'!!!!!',
+             b'']
+    r1exp = b'\n'.join(r1exp)
+    assert r1exp == r1out.read()
+
+
+def create_tag(t):
+    return b'BX:Z:%s-1' % t
+
+
+def create_tag_no_suffix(t):
+    return b'BX:Z:%s' % t
+
+
+def insert_barcode(record, barcode):
+    """Get the current ID, smash the needed tag in"""
+    # @foo\nATGC\n+\n!!!!\n
+    id_, remainder = record.split(b'\n', 1)
+    tag = create_tag(barcode)
+    return b'%s %s\n%s' % (id_, tag, remainder)
+
+
+def readfq(fp):
+    if fp.mode == 'rb':
+        strip = bytes.strip
+    else:
+        strip = str.strip
+
+    id_ = iter(fp)
+    seq = iter(fp)
+    dumb = iter(fp)
+    qual = iter(fp)
+    for rec in zip(id_, seq, dumb, qual):
+        yield list(map(strip, rec))
+
+
+def writefq(rec, out):
+    for item in rec:
+        out.write(item)
+        out.write(b'\n')
+
+
+@click.group()
+def cli():
+    pass
+
+
+@cli.command()
+def tests():
+    test_gather_order()
+    test_troll_and_write()
+
+
+@cli.command()
+@click.option('--r1-in', type=click.Path(exists=True), required=True)
+@click.option('--r2-in', type=click.Path(exists=True), required=True)
+@click.option('--i1-in', type=click.Path(exists=True), required=True)
+@click.option('--r1-out', type=click.Path(exists=False), required=True)
+@click.option('--r2-out', type=click.Path(exists=False), required=True)
+@click.option('--threads', type=int, required=False, default=1)
+@click.option('--no-sort', is_flag=True, default=False)
+def integrate(r1_in, r2_in, i1_in, r1_out, r2_out, threads, no_sort):
+    r1_in_fp = open(r1_in, 'rb')
+    r2_in_fp = open(r2_in, 'rb')
+    i1_in_fp = open(i1_in, 'rb')
+
+    if no_sort:
+        r1_out_fp = gzip.open(r1_out, mode='wb')
+        r2_out_fp = gzip.open(r2_out, mode='wb')
+
+        r1_sniff = r1_in_fp.readline().strip()
+        r2_sniff = r2_in_fp.readline().strip()
+        r1_in_fp.seek(0)
+        r2_in_fp.seek(0)
+
+        # outputs from tellread don't seem to have orientation information
+        # some downstream programs hate this, so let's add if needed.
+        if r1_sniff.endswith(b'/1'):
+            if not r2_sniff.endswith(b'/2'):
+                raise ValueError('unexpected endings: '
+                                 f'{r1_sniff.decode("utf-8")} '
+                                 f'{r2_sniff.decode("utf-8")}')
+            orient_r1 = ''
+            orient_r2 = ''
+        else:
+            assert b'/1' not in r1_sniff
+
+            orient_r1 = b'/1'
+            orient_r2 = b'/2'
+
+        for (r1, r2, i1) in zip(*map(readfq, [r1_in_fp, r2_in_fp, i1_in_fp])):
+            assert r1[0] == r2[0]
+            assert r1[0] == i1[0]
+
+            tag = create_tag_no_suffix(i1[1])
+            r1[0] = b"%s%s %s" % (r1[0], orient_r1, tag)
+            r2[0] = b"%s%s %s" % (r2[0], orient_r2, tag)
+            writefq(r1, r1_out_fp)
+            writefq(r2, r2_out_fp)
+        r1_out_fp.close()
+        r2_out_fp.close()
+    else:
+        # 200MB is what they use in their readme...
+        r1_out_fp = pgzip.open(r1_out, mode='wb', thread=threads,
+                               blocksize=2*10**8)
+        r2_out_fp = pgzip.open(r2_out, mode='wb', thread=threads,
+                               blocksize=2*10**8)
+
+        order, unique, bounds = gather_order(i1_in_fp)
+
+        for in_, out_ in zip([r1_in_fp, r2_in_fp], [r1_out_fp, r2_out_fp]):
+            troll_and_write(order, unique, bounds, in_, out_)
+            in_.close()
+            out_.close()
+
+
+if __name__ == '__main__':
+    cli()
diff --git a/sequence_processing_pipeline/contrib/plot_counts.py b/sequence_processing_pipeline/contrib/plot_counts.py
new file mode 100644
index 00000000..ecab9e49
--- /dev/null
+++ b/sequence_processing_pipeline/contrib/plot_counts.py
@@ -0,0 +1,27 @@
+import matplotlib.pyplot as plt
+import re
+import sys
+import os
+import pandas as pd
+
+ex = re.compile(r'_I1_(C5\d\d).fastq.gz.corrected.err_barcode_removed.fastq')
+
+# remove total line from wc
+data = [x.strip().split(' ') for x in open(sys.argv[1])][:-1]
+plotdata = [(ex.search(i).groups()[0], int(v) / 4) for v, i in data]
+sheetdata = dict(plotdata)
+
+ordered = sorted(plotdata, key=lambda x: x[1])
+f = plt.figure(figsize=(16, 8))
+plt.bar([i for i, _ in ordered], [v for _, v in ordered])
+plt.ylabel('I1 reads')
+plt.xticks(list(range(len(ordered))), [i for i, _ in ordered], rotation=90)
+plt.savefig(sys.argv[3] + '/counts.pdf')
+
+sheet = pd.read_csv(sys.argv[2], dtype=str)
+sheet = sheet[~sheet['Lane'].isnull()]
+sheet['read_counts'] = [sheetdata[i] for i in sheet['Barcode_ID']]
+name = os.path.basename(sys.argv[2]).rsplit('.', 1)[0]
+newname = name + '.read_counts.tsv'
+
+sheet.to_csv(sys.argv[3] + '/' + newname, sep='\t', index=False, header=True)
diff --git a/sequence_processing_pipeline/scripts/fake_squeue.py b/sequence_processing_pipeline/scripts/fake_squeue.py
new file mode 100755
index 00000000..6c8511ce
--- /dev/null
+++ b/sequence_processing_pipeline/scripts/fake_squeue.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+from json import load, dumps
+from os.path import exists, join
+from sys import argv
+from random import randint, choice
+
+
+def print_state(state):
+    # Note that %i will appear w/column name 'JOBID' in actual squeue output.
+    # this is because %i shows the array-id if it's an array job and what we
+    # consider the regular job-id if it's not an array job.
+    print("JOBID,STATE")
+    for job_id in state:
+        if 'array_ids' in state[job_id]:
+            # this is an array job
+            for array_id in state[job_id]['array_ids']:
+                if state[job_id]['array_ids'][array_id] <= 0:
+                    end_state = state[job_id]['endgame'][array_id]
+                else:
+                    end_state = 'RUNNING'
+
+                print(f"{array_id},{end_state}")
+        else:
+            # this is a non-array job
+            if state[job_id]['countdown'] <= 0:
+                end_state = state[job_id]['endgame']
+            else:
+                end_state = 'RUNNING'
+
+            print(f"{job_id},{end_state}")
+
+
+def generate_output(job_ids):
+    results = {}
+
+    for job_id in job_ids:
+        is_successful = choice([True, False])
+        is_array_job = choice([True, False])
+
+        if is_array_job:
+            result = {'job_id': job_id}
+            result['array_ids'] = {}
+            result['endgame'] = {}
+
+            for i in range(0, randint(5, 15)):
+                array_id = "%s_%d" % (job_id, i)
+                result['array_ids'][array_id] = randint(3, 7)
+                result['array_ids'][array_id] = randint(3, 7)
+                if is_successful:
+                    # all array jobs must be successful
+                    result['endgame'][array_id] = "COMPLETED"
+                else:
+                    # some jobs may succeed but some may fail
+                    result['endgame'][array_id] = choice(
+                        ['COMPLETED', 'FAILED'])
+            results[job_id] = result
+        else:
+            result = {'job_id': job_id}
+            result['countdown'] = randint(3, 7)
+            result['endgame'] = choice(['COMPLETED', 'FAILED'])
+            results[job_id] = result
+
+    return results
+
+
+def save_state(state, file_path):
+    with open(file_path, 'w') as f:
+        print(dumps(state, indent=2), file=f)
+
+
+def load_state(file_path):
+    with open(file_path, 'r') as f:
+        return load(f)
+
+
+if __name__ == "__main__":
+    # "squeue -t all -j " f"{','.join(job_ids)} " "-o '%i,%T'"
+    job_ids = argv[4].split(',')
+
+    state_file_path = join("sequence_processing_pipeline", "scripts",
+                           "my_state.json")
+
+    state = generate_output(job_ids)
+
+    if exists(state_file_path):
+        state = load_state(state_file_path)
+    else:
+        state = generate_output(job_ids)
+
+    print_state(state)
+
+    for job_id in state:
+        if 'array_ids' in state[job_id]:
+            # this is an array job.
+            for array_id in state[job_id]['array_ids']:
+                state[job_id]['array_ids'][array_id] -= 1
+        else:
+            # this is a standard job.
+            state[job_id]['countdown'] -= 1
+
+    save_state(state, state_file_path)
diff --git a/sequence_processing_pipeline/templates/integrate.sbatch b/sequence_processing_pipeline/templates/integrate.sbatch
new file mode 100644
index 00000000..68ebce5e
--- /dev/null
+++ b/sequence_processing_pipeline/templates/integrate.sbatch
@@ -0,0 +1,67 @@
+#!/bin/bash -l
+#SBATCH -J {{job_name}}
+#SBATCH --time {{wall_time_limit}}
+#SBATCH --mem {{mem_in_gb}}G
+#SBATCH -N {{node_count}}
+#SBATCH -c {{cores_per_task}}
+#SBATCH -p {{queue_name}}
+#SBATCH --array=1-{{barcode_id_count}}
+
+#SBATCH --output {{output_dir}}/logs/integrate_%x_%A_%a.out
+#SBATCH --error {{output_dir}}/logs/integrate_%x_%A_%a.err
+
+set -x
+set -e
+
+samples=($(cat {{output_dir}}/sample_index_list.txt | cut -f 2))
+sample=${samples[$((${SLURM_ARRAY_TASK_ID} - 1))]} 
+
+export TMPDIR={{tmp_dir}}
+
+# get list of samples and determine which sample this array instance will work
+# on.
+samples=($(cat {{output_dir}}/sample_index_list.txt | cut -f 2))
+sample=${samples[$((${SLURM_ARRAY_TASK_ID} - 1))]}
+
+echo "Processing sample ${sample}..."
+
+# make temp directory
+export TMPDIR={{tmp_dir}}
+mkdir -p $TMPDIR
+
+
+# TODO: All three input files must be non-zero in length.
+# If possible, do this check as part of normal FSR operation.
+# Previously this was done right here BEFORE integrating, rather
+# than after.
+
+# NB: non-zero file-length check removed for now. This should be performed
+# by FSR after processing is done.
+# TODO: Make sure raw_fastq_dir is TellReadJob/Full
+r1_in={{raw_fastq_dir}}/TellReadJob_R1_${sample}.fastq.gz.corrected.err_barcode_removed.fastq
+r2_in={{raw_fastq_dir}}/TellReadJob_R2_${sample}.fastq.gz.corrected.err_barcode_removed.fastq
+i1_in={{raw_fastq_dir}}/TellReadJob_I1_${sample}.fastq.gz.corrected.err_barcode_removed.fastq
+
+# create output directory
+mkdir -p {{output_dir}}/integrated
+
+# generate output file names
+r1_out={{output_dir}}/integrated/${sample}.R1.fastq.gz
+r2_out={{output_dir}}/integrated/${sample}.R2.fastq.gz
+i1_out={{output_dir}}/integrated/${sample}.I1.fastq.gz
+
+# generate 'integrated' I1 fastq.gz file. We do this as part of each array so
+# they're done in parallel.
+gzip -c ${i1_in} > ${i1_out}
+
+# generate integrated R1 and R2 fastq.gz files.
+conda activate qp-knight-lab-processing-2022.03
+
+python {{integrate_script_path}} integrate \
+--no-sort \
+--r1-in ${r1_in} \
+--r2-in ${r2_in} \
+--i1-in ${i1_in} \
+--r1-out ${r1_out} \
+--r2-out ${r2_out} \
+--threads {{cores_per_task}}
diff --git a/sequence_processing_pipeline/templates/seq_counts.sbatch b/sequence_processing_pipeline/templates/seq_counts.sbatch
new file mode 100644
index 00000000..f44bd5b9
--- /dev/null
+++ b/sequence_processing_pipeline/templates/seq_counts.sbatch
@@ -0,0 +1,25 @@
+#!/bin/bash -l
+#SBATCH -J {{job_name}}
+#SBATCH --time {{wall_time_limit}}
+#SBATCH --mem {{mem_in_gb}}G
+#SBATCH -N {{node_count}}
+#SBATCH -c {{cores_per_task}}
+#SBATCH -p {{queue_name}}
+#SBATCH --array=1-{{file_count}}
+
+#SBATCH --output {{output_path}}/logs/%x_%A_%a.out
+#SBATCH --error {{output_path}}/logs/%x_%A_%a.err
+
+set -x
+set -e
+
+mkdir -p {{output_path}}/logs
+
+files=($(cat {{output_path}}/files_to_count.txt))
+my_file=${files[$((${SLURM_ARRAY_TASK_ID} - 1))]}
+
+echo "${my_file}"
+
+conda activate qp-knight-lab-processing-2022.03
+
+seqtk size ${my_file}
diff --git a/sequence_processing_pipeline/templates/tellread-cleanup.sbatch b/sequence_processing_pipeline/templates/tellread-cleanup.sbatch
new file mode 100644
index 00000000..3c31219d
--- /dev/null
+++ b/sequence_processing_pipeline/templates/tellread-cleanup.sbatch
@@ -0,0 +1,13 @@
+#!/bin/bash -l
+#SBATCH -J {{job_name}}             # cleanup
+#SBATCH --time {{wall_time_limit}}  # 24:00:00
+#SBATCH --mem {{mem_in_gb}}G        # 8G
+#SBATCH -N {{node_count}}           # 1
+#SBATCH -c {{cores_per_task}}       # 1
+#SBATCH -p {{queue_name}}           # qiita
+
+#SBATCH --output {{output}}/logs/cleanup_%x-%A.out
+#SBATCH --error {{output}}/logs/cleanup_%x-%A.err
+
+# remove unused large outputs
+rm -rf {{OUTPUT}}/biosample_format {{OUTPUT}}/1_demult {{OUTPUT}}/Full
diff --git a/sequence_processing_pipeline/templates/tellread.sbatch b/sequence_processing_pipeline/templates/tellread.sbatch
new file mode 100644
index 00000000..f038a568
--- /dev/null
+++ b/sequence_processing_pipeline/templates/tellread.sbatch
@@ -0,0 +1,48 @@
+#!/bin/bash -l
+#SBATCH -J {{job_name}}
+#SBATCH -p {{queue_name}}
+#SBATCH -N {{node_count}}
+#SBATCH -c {{cores_per_task}}
+#SBATCH --mem {{mem_in_gb}}G
+#SBATCH --time {{wall_time_limit}}
+
+#SBATCH --output {{output}}/logs/tellread_%x-%A.out
+#SBATCH --error {{output}}/logs/tellread_%x-%A.err
+
+set -x
+
+module load {{modules_to_load}}
+{{sing_script_path}} \
+    -i {{rundir_path}} \
+    -o {{output}} \
+    -s $(echo {{samples}} | tr -d '"') \
+    -g $(echo {{refs}} | tr -d '"') \
+    -j ${SLURM_JOB_CPUS_PER_NODE} {{extra}} \
+    -l {{lane}}
+
+# get the timestamp for the most recently changed file in directory '.'
+
+# hard-limit for wait time set to ~ 8 hours.
+# (4 checks per hour, for 8 hours equals 32 iterations)
+for i in $(seq 1 32);
+do
+    before="$(find {{output}}/Full -type f -printf '%T@\n' | sort -n | tail -1)"
+    # assume TellReadJob is finished if ctime hasn't changed in 15 minutes
+    # for any fastq file in the directory.
+    sleep 900
+    after="$(find {{output}}/Full -type f -printf '%T@\n' | sort -n | tail -1)"
+
+    echo "$before   $after"
+
+    if [[ "$before" == "$after" ]]; then
+        echo "DONE"
+        exit 0
+    else
+        echo "NOT DONE"
+    fi
+done
+
+# if we've reached this point then we've exceeded our hard-limit for waiting.
+# return w/an error.
+exit 1
+
diff --git a/sequence_processing_pipeline/tests/data/20230906_FS10001773_68_BTR67708-1611.csv b/sequence_processing_pipeline/tests/data/20230906_FS10001773_68_BTR67708-1611.csv
new file mode 100644
index 00000000..f696f0c9
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/20230906_FS10001773_68_BTR67708-1611.csv
@@ -0,0 +1,41 @@
+Sample_ID,Sample_Name,Sample_Plate,Sample_Well,Barcode_96_Well_Position,Barcode_ID,Sample_Project,Well_description,Lane
+Person.A.TELLSEQ.R20.microbe,Person.A.TELLSEQ.R20.microbe,TellSeq3_15196_P3,A1,A4,C525,TellSeq3_15196_P3,Person.A.TELLSEQ.R20.microbe,1
+Person.B.TELLSEQ.R24.microbe,Person.B.TELLSEQ.R24.microbe,TellSeq3_15196_P3,B1,B4,C526,TellSeq3_15196_P3,Person.B.TELLSEQ.R24.microbe,1
+Person.C.TELLSEQ.R21.microbe,Person.C.TELLSEQ.R21.microbe,TellSeq3_15196_P3,C1,C4,C527,TellSeq3_15196_P3,Person.C.TELLSEQ.R21.microbe,1
+Person.D.TELLSEQ.R26.microbe,Person.D.TELLSEQ.R26.microbe,TellSeq3_15196_P3,D1,D4,C528,TellSeq3_15196_P3,Person.D.TELLSEQ.R26.microbe,1
+Person.E.TELLSEQ.R19.microbe,Person.E.TELLSEQ.R19.microbe,TellSeq3_15196_P3,E1,E4,C529,TellSeq3_15196_P3,Person.E.TELLSEQ.R19.microbe,1
+Pet.C.TELLSEQ.R23.microbe,Pet.C.TELLSEQ.R23.microbe,TellSeq3_15196_P3,F1,F4,C530,TellSeq3_15196_P3,Pet.C.TELLSEQ.R23.microbe,1
+BLANK.TELLSEQ.3.12.H.microbe,BLANK.TELLSEQ.3.12.H.microbe,TellSeq3_15196_P3,G1,G4,C531,TellSeq3_15196_P3,BLANK.TELLSEQ.3.12.H.microbe,1
+Isolate.115.R1.microbe,Isolate.115.R1.microbe,TellSeq3_15196_P1,H1,H4,C532,TellSeq3_15196_P3,Isolate.115.R1.microbe,1
+Zymo.Mock.Community.R1.microbe,Zymo.Mock.Community.R1.microbe,TellSeq3_15196_P1,A2,A5,C533,TellSeq3_15196_P3,Zymo.Mock.Community.R1.microbe,1
+E.coli.QC.DNA.R1.microbe,E.coli.QC.DNA.R1.microbe,TellSeq3_15196_P1,B2,B5,C534,TellSeq3_15196_P3,E.coli.QC.DNA.R1.microbe,1
+Person.A.TELLSEQ.R20.purified.microbe,Person.A.TELLSEQ.R20.purified.microbe,TellSeq3_15196_P3,C2,C5,C535,TellSeq3_15196_P3,Person.A.TELLSEQ.R20.purified.microbe,1
+Person.B.TELLSEQ.R24.purified.microbe,Person.B.TELLSEQ.R24.purified.microbe,TellSeq3_15196_P3,D2,D5,C536,TellSeq3_15196_P3,Person.B.TELLSEQ.R24.purified.microbe,1
+Person.C.TELLSEQ.R21.purified.microbe,Person.C.TELLSEQ.R21.purified.microbe,TellSeq3_15196_P3,E2,E5,C537,TellSeq3_15196_P3,Person.C.TELLSEQ.R21.purified.microbe,1
+Person.D.TELLSEQ.R26.purified.microbe,Person.D.TELLSEQ.R26.purified.microbe,TellSeq3_15196_P3,F2,F5,C538,TellSeq3_15196_P3,Person.D.TELLSEQ.R26.purified.microbe,1
+Person.E.TELLSEQ.R19.purified.microbe,Person.E.TELLSEQ.R19.purified.microbe,TellSeq3_15196_P3,G2,G5,C539,TellSeq3_15196_P3,Person.E.TELLSEQ.R19.purified.microbe,1
+Pet.C.TELLSEQ.R23.purified.microbe,Pet.C.TELLSEQ.R23.purified.microbe,TellSeq3_15196_P3,H2,H5,C540,TellSeq3_15196_P3,Pet.C.TELLSEQ.R23.purified.microbe,1
+BLANK.TELLSEQ.3.12.H.purified.microbe,BLANK.TELLSEQ.3.12.H.purified.microbe,TellSeq3_15196_P3,A3,A6,C541,TellSeq3_15196_P3,BLANK.TELLSEQ.3.12.H.purified.microbe,1
+Isolate.115.R2.microbe,Isolate.115.R2.microbe,TellSeq3_15196_P1,B3,B6,C542,TellSeq3_15196_P3,Isolate.115.R2.microbe,1
+Zymo.Mock.Community.R2.microbe,Zymo.Mock.Community.R2.microbe,TellSeq3_15196_P1,C3,C6,C543,TellSeq3_15196_P3,Zymo.Mock.Community.R2.microbe,1
+E.coli.QC.DNA.R2.microbe,E.coli.QC.DNA.R2.microbe,TellSeq3_15196_P1,D3,D6,C544,TellSeq3_15196_P3,E.coli.QC.DNA.R2.microbe,1
+Person.A.TELLSEQ.R20.std,Person.A.TELLSEQ.R20.std,TellSeq3_15196_P3,A1,A1,C501,TellSeq3_15196,Person.A.TELLSEQ.R20.std,1
+Person.B.TELLSEQ.R24.std,Person.B.TELLSEQ.R24.std,TellSeq3_15196_P3,B1,B1,C502,TellSeq3_15196,Person.B.TELLSEQ.R24.std,1
+Person.C.TELLSEQ.R21.std,Person.C.TELLSEQ.R21.std,TellSeq3_15196_P3,C1,C1,C503,TellSeq3_15196,Person.C.TELLSEQ.R21.std,1
+Person.D.TELLSEQ.R26.std,Person.D.TELLSEQ.R26.std,TellSeq3_15196_P3,D1,D1,C504,TellSeq3_15196,Person.D.TELLSEQ.R26.std,1
+Person.E.TELLSEQ.R19.std,Person.E.TELLSEQ.R19.std,TellSeq3_15196_P3,E1,E1,C505,TellSeq3_15196,Person.E.TELLSEQ.R19.std,1
+Pet.C.TELLSEQ.R23.std,Pet.C.TELLSEQ.R23.std,TellSeq3_15196_P3,F1,F1,C506,TellSeq3_15196,Pet.C.TELLSEQ.R23.std,1
+BLANK.TELLSEQ.3.12.H.std,BLANK.TELLSEQ.3.12.H.std,TellSeq3_15196_P3,G1,G1,C507,TellSeq3_15196,BLANK.TELLSEQ.3.12.H.std,1
+Isolate.115.R1.std,Isolate.115.R1.std,TellSeq3_15196_P1,H1,H1,C508,TellSeq3_15196,Isolate.115.R1.std,1
+Zymo.Mock.Community.R1.std,Zymo.Mock.Community.R1.std,TellSeq3_15196_P1,A2,A2,C509,TellSeq3_15196,Zymo.Mock.Community.R1.std,1
+E.coli.QC.DNA.R1.std,E.coli.QC.DNA.R1.std,TellSeq3_15196_P1,B2,B2,C510,TellSeq3_15196,E.coli.QC.DNA.R1.std,1
+Person.A.TELLSEQ.R20.purified.std,Person.A.TELLSEQ.R20.purified.std,TellSeq3_15196_P3,C2,C2,C511,TellSeq3_15196,Person.A.TELLSEQ.R20.purified.std,1
+Person.B.TELLSEQ.R24.purified.std,Person.B.TELLSEQ.R24.purified.std,TellSeq3_15196_P3,D2,D2,C512,TellSeq3_15196,Person.B.TELLSEQ.R24.purified.std,1
+Person.C.TELLSEQ.R21.purified.std,Person.C.TELLSEQ.R21.purified.std,TellSeq3_15196_P3,E2,E2,C513,TellSeq3_15196,Person.C.TELLSEQ.R21.purified.std,1
+Person.D.TELLSEQ.R26.purified.std,Person.D.TELLSEQ.R26.purified.std,TellSeq3_15196_P3,F2,F2,C514,TellSeq3_15196,Person.D.TELLSEQ.R26.purified.std,1
+Person.E.TELLSEQ.R19.purified.std,Person.E.TELLSEQ.R19.purified.std,TellSeq3_15196_P3,G2,G2,C515,TellSeq3_15196,Person.E.TELLSEQ.R19.purified.std,1
+Pet.C.TELLSEQ.R23.purified.std,Pet.C.TELLSEQ.R23.purified.std,TellSeq3_15196_P3,H2,H2,C516,TellSeq3_15196,Pet.C.TELLSEQ.R23.purified.std,1
+BLANK.TELLSEQ.3.12.H.purified.std,BLANK.TELLSEQ.3.12.H.purified.std,TellSeq3_15196_P3,A3,A3,C517,TellSeq3_15196,BLANK.TELLSEQ.3.12.H.purified.std,1
+Isolate.115.R2.std,Isolate.115.R2.std,TellSeq3_15196_P1,B3,B3,C518,TellSeq3_15196,Isolate.115.R2.std,1
+Zymo.Mock.Community.R2.std,Zymo.Mock.Community.R2.std,TellSeq3_15196_P1,C3,C3,C519,TellSeq3_15196,Zymo.Mock.Community.R2.std,1
+E.coli.QC.DNA.R2.std,E.coli.QC.DNA.R2.std,TellSeq3_15196_P1,D3,D3,C520,TellSeq3_15196,E.coli.QC.DNA.R2.std,1
diff --git a/sequence_processing_pipeline/tests/data/aggregate_counts_results.json b/sequence_processing_pipeline/tests/data/aggregate_counts_results.json
new file mode 100644
index 00000000..1cae0f05
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/aggregate_counts_results.json
@@ -0,0 +1,36 @@
+{
+  "REMOVED/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full": {
+    "TellReadJob_I1_C520.fastq.gz.erroneous.fastq": {
+      "seq_counts": 2139633,
+      "base_pairs": 38513394
+    },
+    "TellReadJob_R1_C519.fastq.gz.corrected.err_barcode_removed.fastq": {
+      "seq_counts": 64464162,
+      "base_pairs": 8345327641
+    },
+    "TellReadJob_R1_C520.fastq.gz.corrected.err_barcode_removed.fastq": {
+      "seq_counts": 70399028,
+      "base_pairs": 9293296513
+    },
+    "TellReadJob_I1_C519.fastq.gz.erroneous.fastq": {
+      "seq_counts": 1932116,
+      "base_pairs": 34778088
+    },
+    "TellReadJob_I1_C519.fastq.gz.corrected.err_barcode_removed.fastq": {
+      "seq_counts": 64464162,
+      "base_pairs": 1160354916
+    },
+    "TellReadJob_R2_C519.fastq.gz.corrected.err_barcode_removed.fastq": {
+      "seq_counts": 64464162,
+      "base_pairs": 8370238082
+    },
+    "TellReadJob_R2_C520.fastq.gz.corrected.err_barcode_removed.fastq": {
+      "seq_counts": 70399028,
+      "base_pairs": 9317943166
+    },
+    "TellReadJob_I1_C520.fastq.gz.corrected.err_barcode_removed.fastq": {
+      "seq_counts": 70399028,
+      "base_pairs": 1267182504
+    }
+  }
+}
diff --git a/sequence_processing_pipeline/tests/data/configuration_profiles/iseq_metagenomic.json b/sequence_processing_pipeline/tests/data/configuration_profiles/iseq_metagenomic.json
index 089e82f1..c82c76b0 100644
--- a/sequence_processing_pipeline/tests/data/configuration_profiles/iseq_metagenomic.json
+++ b/sequence_processing_pipeline/tests/data/configuration_profiles/iseq_metagenomic.json
@@ -3,6 +3,25 @@
     "instrument_type": "iseq",
     "assay_type": "Metagenomic",
     "configuration": {
+      "tell-seq": {
+        "label": "my_label",
+        "reference_base": "",
+        "reference_map": "",
+        "sing_script_path": "/my_path/tellread-release-novaseqX/run_tellread_sing.sh",
+        "nodes": 1,
+        "lane": 1,
+        "sample_index_list": "/my_path/sample_index_list_1.txt",
+        "queue": "qiita",
+        "wallclock_time_in_minutes": 1440,
+        "modules_to_load": ["singularity_3.6.4"],
+        "integrate_script_path": "/my_path/integrate-indices-np.py",
+        "tellread_mem_limit": "16",
+        "tellread_cores": "4",
+        "normcount_cores": "1",
+        "integrate_cores": "1",
+        "normcount_mem_limit": "8",
+        "integrate_mem_limit": "8"
+      },
       "bcl2fastq": {
         "nodes": 1,
         "nprocs": 16,
diff --git a/sequence_processing_pipeline/tests/data/fake_sample_index_list.txt b/sequence_processing_pipeline/tests/data/fake_sample_index_list.txt
new file mode 100644
index 00000000..1c4345ef
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/fake_sample_index_list.txt
@@ -0,0 +1,96 @@
+CCCCCACCAA	C501	NONE	PE
+AACCCCCACA	C502	NONE	PE
+CCAACACACC	C503	NONE	PE
+AACACCCCCA	C504	NONE	PE
+CAAAACCCCC	C505	NONE	PE
+ACACACCACC	C506	NONE	PE
+AACCCACACC	C507	NONE	PE
+CAAAAAAAAA	C508	NONE	PE
+AAACCACCCC	C509	NONE	PE
+ACACCCCCCC	C510	NONE	PE
+AAACACCACA	C511	NONE	PE
+CAAAACCCCA	C512	NONE	PE
+ACCCCAACCC	C513	NONE	PE
+CAACCAACAC	C514	NONE	PE
+CCCCCACCCA	C515	NONE	PE
+CCAACCCCCA	C516	NONE	PE
+CAAAACACCC	C517	NONE	PE
+ACACACCAAA	C518	NONE	PE
+CCACCAAAAA	C519	NONE	PE
+AAACCCCCCC	C520	NONE	PE
+AACAACCCCA	C521	NONE	PE
+CAACAACAAC	C522	NONE	PE
+CACCACCAAA	C523	NONE	PE
+CACAACAAAC	C524	NONE	PE
+AACACCCACC	C525	NONE	PE
+CAAAACACAA	C526	NONE	PE
+AAAAAAAAAA	C527	NONE	PE
+CCAACCCCCA	C528	NONE	PE
+CAACCCCAAA	C529	NONE	PE
+ACCCAACCCA	C530	NONE	PE
+CACACCAAAC	C531	NONE	PE
+CAACAAAAAC	C532	NONE	PE
+CCAAAAAAAC	C533	NONE	PE
+ACCAACACAC	C534	NONE	PE
+CCAAAACACC	C535	NONE	PE
+CACCCAAACC	C536	NONE	PE
+CAAACCAAAC	C537	NONE	PE
+CACAAACACA	C538	NONE	PE
+ACCCACCCCC	C539	NONE	PE
+AACCACACAC	C540	NONE	PE
+CACCCCCACA	C541	NONE	PE
+CACAACCACC	C542	NONE	PE
+AAAAACAAAA	C543	NONE	PE
+CCACACAAAC	C544	NONE	PE
+AAACAAACAC	C545	NONE	PE
+ACCCCCACCC	C546	NONE	PE
+ACACCCCAAA	C547	NONE	PE
+CAAACCAAAC	C548	NONE	PE
+AAACCACCAA	C549	NONE	PE
+CAACCAAAAC	C550	NONE	PE
+ACACCCCCCC	C551	NONE	PE
+CACCCACCAC	C552	NONE	PE
+ACCCCAACCC	C553	NONE	PE
+AAACCCAACA	C554	NONE	PE
+ACCACACAAA	C555	NONE	PE
+ACCCACCACC	C556	NONE	PE
+CCCCAACCAA	C557	NONE	PE
+CAAAAACACC	C558	NONE	PE
+ACCACAAAAC	C559	NONE	PE
+ACCCCCCCAA	C560	NONE	PE
+CCACAAAACA	C561	NONE	PE
+CAAACCCACC	C562	NONE	PE
+ACACCACAAA	C563	NONE	PE
+ACCAACAAAA	C564	NONE	PE
+CCCAACAAAA	C565	NONE	PE
+CACCCCCCCA	C566	NONE	PE
+AAACCCACCA	C567	NONE	PE
+CACACCACAA	C568	NONE	PE
+CCAAACCCCA	C569	NONE	PE
+CACCCCACCC	C570	NONE	PE
+AAACCCCCAA	C571	NONE	PE
+ACACCACACC	C572	NONE	PE
+ACAAAACACC	C573	NONE	PE
+CACAAACCAC	C574	NONE	PE
+ACCCCCACAA	C575	NONE	PE
+CCCCAAACCC	C576	NONE	PE
+AAAACACAAC	C577	NONE	PE
+AACCCAACCA	C578	NONE	PE
+AAACACACAC	C579	NONE	PE
+AAACACCACC	C580	NONE	PE
+AACCCCAACA	C581	NONE	PE
+CCACCCAAAC	C582	NONE	PE
+CCAAAACAAC	C583	NONE	PE
+ACCAACAAAC	C584	NONE	PE
+AAACACCACC	C585	NONE	PE
+AACCACACAC	C586	NONE	PE
+CACAACAAAA	C587	NONE	PE
+AACCAAAAAC	C588	NONE	PE
+ACCAAACCAA	C589	NONE	PE
+ACAAACACAC	C590	NONE	PE
+ACCACACCAA	C591	NONE	PE
+AAAAACAACC	C592	NONE	PE
+CACACAACAC	C593	NONE	PE
+CCCCCAACCC	C594	NONE	PE
+ACACAAAACC	C595	NONE	PE
+CCACCACACC	C596	NONE	PE
diff --git a/sequence_processing_pipeline/tests/data/files_to_count.txt b/sequence_processing_pipeline/tests/data/files_to_count.txt
new file mode 100644
index 00000000..8d7ce4b1
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/files_to_count.txt
@@ -0,0 +1,8 @@
+/ddn_scratch/qiita_t/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_I1_C519.fastq.gz.corrected.err_barcode_removed.fastq
+/ddn_scratch/qiita_t/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_I1_C519.fastq.gz.erroneous.fastq
+/ddn_scratch/qiita_t/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_I1_C520.fastq.gz.corrected.err_barcode_removed.fastq
+/ddn_scratch/qiita_t/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_I1_C520.fastq.gz.erroneous.fastq
+/ddn_scratch/qiita_t/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R1_C519.fastq.gz.corrected.err_barcode_removed.fastq
+/ddn_scratch/qiita_t/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R1_C520.fastq.gz.corrected.err_barcode_removed.fastq
+/ddn_scratch/qiita_t/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R2_C519.fastq.gz.corrected.err_barcode_removed.fastq
+/ddn_scratch/qiita_t/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R2_C520.fastq.gz.corrected.err_barcode_removed.fastq
diff --git a/sequence_processing_pipeline/tests/data/seq_counts.sbatch b/sequence_processing_pipeline/tests/data/seq_counts.sbatch
new file mode 100644
index 00000000..cc73187c
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts.sbatch
@@ -0,0 +1,25 @@
+#!/bin/bash -l
+#SBATCH -J seq_counts
+#SBATCH --time 1440
+#SBATCH --mem 8G
+#SBATCH -N 1
+#SBATCH -c 1
+#SBATCH -p qiita
+#SBATCH --array=1-8
+
+#SBATCH --output sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/SeqCountsJob/logs/%x_%A_%a.out
+#SBATCH --error sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/SeqCountsJob/logs/%x_%A_%a.err
+
+set -x
+set -e
+
+mkdir -p sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/SeqCountsJob/logs
+
+files=($(cat sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/SeqCountsJob/files_to_count.txt))
+my_file=${files[$((${SLURM_ARRAY_TASK_ID} - 1))]}
+
+echo "${my_file}"
+
+conda activate qp-knight-lab-processing-2022.03
+
+seqtk size ${my_file}
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_1.err b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_1.err
new file mode 100644
index 00000000..47c59651
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_1.err
@@ -0,0 +1,3 @@
+This is an example .err file produced by seq_counts.sbatch.
+Additional details removed.
++ seqtk size REMOVED/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R1_C519.fastq.gz.corrected.err_barcode_removed.fastq
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_1.out b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_1.out
new file mode 100644
index 00000000..50a46674
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_1.out
@@ -0,0 +1,2 @@
+REMOVED/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R1_C519.fastq.gz.corrected.err_barcode_removed.fastq
+64464162	8345327641
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_2.err b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_2.err
new file mode 100644
index 00000000..47c59651
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_2.err
@@ -0,0 +1,3 @@
+This is an example .err file produced by seq_counts.sbatch.
+Additional details removed.
++ seqtk size REMOVED/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R1_C519.fastq.gz.corrected.err_barcode_removed.fastq
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_2.out b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_2.out
new file mode 100644
index 00000000..87ad9f55
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_2.out
@@ -0,0 +1,2 @@
+REMOVED/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R1_C520.fastq.gz.corrected.err_barcode_removed.fastq
+70399028	9293296513
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_3.err b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_3.err
new file mode 100644
index 00000000..e9c0cf9d
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_3.err
@@ -0,0 +1,3 @@
+This is an example .err file produced by seq_counts.sbatch.
+Additional details removed.
++ seqtk size REMOVED/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R1_C520.fastq.gz.corrected.err_barcode_removed.fastq
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_3.out b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_3.out
new file mode 100644
index 00000000..a22d9f8d
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_3.out
@@ -0,0 +1,2 @@
+REMOVED/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_I1_C519.fastq.gz.erroneous.fastq
+1932116	34778088
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_4.err b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_4.err
new file mode 100644
index 00000000..e9c0cf9d
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_4.err
@@ -0,0 +1,3 @@
+This is an example .err file produced by seq_counts.sbatch.
+Additional details removed.
++ seqtk size REMOVED/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R1_C520.fastq.gz.corrected.err_barcode_removed.fastq
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_4.out b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_4.out
new file mode 100644
index 00000000..0b35614a
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_4.out
@@ -0,0 +1,2 @@
+REMOVED/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R2_C520.fastq.gz.corrected.err_barcode_removed.fastq
+70399028	9317943166
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_5.err b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_5.err
new file mode 100644
index 00000000..47c59651
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_5.err
@@ -0,0 +1,3 @@
+This is an example .err file produced by seq_counts.sbatch.
+Additional details removed.
++ seqtk size REMOVED/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R1_C519.fastq.gz.corrected.err_barcode_removed.fastq
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_5.out b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_5.out
new file mode 100644
index 00000000..887522ae
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_5.out
@@ -0,0 +1,2 @@
+REMOVED/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_I1_C520.fastq.gz.corrected.err_barcode_removed.fastq
+70399028	1267182504
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_6.err b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_6.err
new file mode 100644
index 00000000..e9c0cf9d
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_6.err
@@ -0,0 +1,3 @@
+This is an example .err file produced by seq_counts.sbatch.
+Additional details removed.
++ seqtk size REMOVED/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R1_C520.fastq.gz.corrected.err_barcode_removed.fastq
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_6.out b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_6.out
new file mode 100644
index 00000000..a4fbd555
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_6.out
@@ -0,0 +1,2 @@
+REMOVED/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R2_C519.fastq.gz.corrected.err_barcode_removed.fastq
+64464162	8370238082
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_7.err b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_7.err
new file mode 100644
index 00000000..47c59651
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_7.err
@@ -0,0 +1,3 @@
+This is an example .err file produced by seq_counts.sbatch.
+Additional details removed.
++ seqtk size REMOVED/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R1_C519.fastq.gz.corrected.err_barcode_removed.fastq
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_7.out b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_7.out
new file mode 100644
index 00000000..6c6a9c06
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_7.out
@@ -0,0 +1,2 @@
+REMOVED/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_I1_C519.fastq.gz.corrected.err_barcode_removed.fastq
+64464162	1160354916
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_8.err b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_8.err
new file mode 100644
index 00000000..e9c0cf9d
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_8.err
@@ -0,0 +1,3 @@
+This is an example .err file produced by seq_counts.sbatch.
+Additional details removed.
++ seqtk size REMOVED/working_dir/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_R1_C520.fastq.gz.corrected.err_barcode_removed.fastq
diff --git a/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_8.out b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_8.out
new file mode 100644
index 00000000..9be52329
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/seq_counts_logs/seq_count_2679966_8.out
@@ -0,0 +1,2 @@
+REMOVED/8edbdee2-da52-4278-af40-267185bbcd7e/TellReadJob/Full/TellReadJob_I1_C520.fastq.gz.erroneous.fastq
+2139633	38513394
diff --git a/sequence_processing_pipeline/tests/data/tellseq_metag_dummy_sample_sheet.csv b/sequence_processing_pipeline/tests/data/tellseq_metag_dummy_sample_sheet.csv
new file mode 100644
index 00000000..105330fd
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/tellseq_metag_dummy_sample_sheet.csv
@@ -0,0 +1,135 @@
+[Header],,,,,,,,
+IEMFileVersion,1,,,,,,,
+SheetType,tellseq_metag,,,,,,,
+SheetVersion,10,,,,,,,
+Investigator Name,Knight,,,,,,,
+Experiment Name,RKL0151,,,,,,,
+Date,5/6/24,,,,,,,
+Workflow,GenerateFASTQ,,,,,,,
+Application,FASTQ Only,,,,,,,
+Assay,Metagenomic,,,,,,,
+Description,,,,,,,,
+Chemistry,Default,,,,,,,
+,,,,,,,,
+[Reads],,,,,,,,
+151,,,,,,,,
+151,,,,,,,,
+,,,,,,,,
+[Settings],,,,,,,,
+ReverseComplement,0,,,,,,,
+,,,,,,,,
+[Data],,,,,,,,
+Sample_ID,Sample_Name,Sample_Plate,well_id_384,barcode_id,Sample_Project,Well_description,Lane,
+LS_8_10_2013_SRE,LS.8.10.2013.SRE,LS_Donor_SS_Samples_P1,A1,C501,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.10.2013.SRE,1,
+LS_12_17_2014_SRE,LS.12.17.2014.SRE,LS_Donor_SS_Samples_P1,B1,C509,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.12.17.2014.SRE,1,
+LS_4_4_2015_SRE,LS.4.4.2015.SRE,LS_Donor_SS_Samples_P1,C1,C502,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.4.4.2015.SRE,1,
+LS_2_23_2015_SRE,LS.2.23.2015.SRE,LS_Donor_SS_Samples_P1,D1,C510,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.2.23.2015.SRE,1,
+LS_9_28_2014_SRE,LS.9.28.2014.SRE,LS_Donor_SS_Samples_P1,E1,C503,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.9.28.2014.SRE,1,
+LS_12_14_2013_SRE,LS.12.14.2013.SRE,LS_Donor_SS_Samples_P1,F1,C511,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.12.14.2013.SRE,1,
+LS_4_7_2013_SRE,LS.4.7.2013.SRE,LS_Donor_SS_Samples_P1,G1,C504,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.4.7.2013.SRE,1,
+LS_7_14_2013_SRE,LS.7.14.2013.SRE,LS_Donor_SS_Samples_P1,H1,C512,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.7.14.2013.SRE,1,
+LS_10_27_2013_SRE,LS.10.27.2013.SRE,LS_Donor_SS_Samples_P1,I1,C505,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.10.27.2013.SRE,1,
+LS_1_19_2014_SRE,LS.1.19.2014.SRE,LS_Donor_SS_Samples_P1,J1,C513,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.19.2014.SRE,1,
+LS_9_3_2013_SRE,LS.9.3.2013.SRE,LS_Donor_SS_Samples_P1,K1,C506,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.9.3.2013.SRE,1,
+LS_2_25_2013_SRE,LS.2.25.2013.SRE,LS_Donor_SS_Samples_P1,L1,C514,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.2.25.2013.SRE,1,
+LS_7_26_2015_SRE,LS.7.26.2015.SRE,LS_Donor_SS_Samples_P1,M1,C507,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.7.26.2015.SRE,1,
+LS_2_17_2014_SRE,LS.2.17.2014.SRE,LS_Donor_SS_Samples_P1,N1,C515,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.2.17.2014.SRE,1,
+LS_6_29_2015_SRE,LS.6.29.2015.SRE,LS_Donor_SS_Samples_P1,O1,C508,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.6.29.2015.SRE,1,
+LS_3_24_2015_SRE,LS.3.24.2015.SRE,LS_Donor_SS_Samples_P1,P1,C516,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.3.24.2015.SRE,1,
+LS_1_6_2015_SRE,LS.1.6.2015.SRE,LS_Donor_SS_Samples_P1,A2,C517,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.6.2015.SRE,1,
+T_LS_7_15_15B_SRE,T.LS.7.15.15B.SRE,LS_Donor_SS_Samples_P1,B2,C525,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.15.15B.SRE,1,
+LS_6_9_2013_SRE,LS.6.9.2013.SRE,LS_Donor_SS_Samples_P1,C2,C518,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.6.9.2013.SRE,1,
+Person A_SRE,Person A.SRE,LS_Donor_SS_Samples_P1,D2,C526,Tellseq_Shortread_Metagenomic_Analysis_10283,Person A.SRE,1,
+LS_8_22_2014_R2_SRE,LS.8.22.2014.R2.SRE,LS_Donor_SS_Samples_P1,E2,C519,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.22.2014.R2.SRE,1,
+Person B_SRE,Person B.SRE,LS_Donor_SS_Samples_P1,F2,C527,Tellseq_Shortread_Metagenomic_Analysis_10283,Person B.SRE,1,
+LS_8_22_2014_R1_SRE,LS.8.22.2014.R1.SRE,LS_Donor_SS_Samples_P1,G2,C520,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.22.2014.R1.SRE,1,
+Person C_SRE,Person C.SRE,LS_Donor_SS_Samples_P1,H2,C528,Tellseq_Shortread_Metagenomic_Analysis_10283,Person C.SRE,1,
+LS_12_28_2011_SRE,LS.12.28.2011.SRE,LS_Donor_SS_Samples_P1,I2,C521,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.12.28.2011.SRE,1,
+Person D_SRE,Person D.SRE,LS_Donor_SS_Samples_P1,J2,C529,Tellseq_Shortread_Metagenomic_Analysis_10283,Person D.SRE,1,
+LS_5_4_2014_SRE,LS.5.4.2014.SRE,LS_Donor_SS_Samples_P1,K2,C522,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.5.4.2014.SRE,1,
+45208_1_1,45208.1.1,UROBIOME_TEST_MF_SAMPLES_P2,L2,C530,Tellseq_Shortread_Metagenomic_Analysis_10283,45208.1.1,1,
+LS_11_6_2012_SRE,LS.11.6.2012.SRE,LS_Donor_SS_Samples_P1,M2,C523,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.11.6.2012.SRE,1,
+45248_2_2,45248.2.2,UROBIOME_TEST_MF_SAMPLES_P2,N2,C531,Tellseq_Shortread_Metagenomic_Analysis_10283,45248.2.2,1,
+LS_4_3_2012_SRE,LS.4.3.2012.SRE,LS_Donor_SS_Samples_P1,O2,C524,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.4.3.2012.SRE,1,
+45261_2_1,45261.2.1,UROBIOME_TEST_MF_SAMPLES_P2,P2,C532,Tellseq_Shortread_Metagenomic_Analysis_10283,45261.2.1,1,
+45272_11_2,45272.11.2,UROBIOME_TEST_MF_SAMPLES_P2,A3,C533,Tellseq_Shortread_Metagenomic_Analysis_10283,45272.11.2,1,
+T_LS_7_12_15A,T.LS.7.12.15A,Larry_Smarr_Plus_Donor_Samples_P3,B3,C541,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.12.15A,1,
+45316_8_1,45316.8.1,UROBIOME_TEST_MF_SAMPLES_P2,C3,C534,Tellseq_Shortread_Metagenomic_Analysis_10283,45316.8.1,1,
+T_LS_7_8_15A,T.LS.7.8.15A,Larry_Smarr_Plus_Donor_Samples_P3,D3,C542,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.8.15A,1,
+45327_7_2,45327.7.2,UROBIOME_TEST_MF_SAMPLES_P2,E3,C535,Tellseq_Shortread_Metagenomic_Analysis_10283,45327.7.2,1,
+LS_8_10_2013,LS.8.10.2013,LS_Time_Series_ABSQ_P4,F3,C543,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.10.2013,1,
+45272_1_swab_2,45272.1.swab.2,UROBIOME_TEST_MF_SAMPLES_P2,G3,C536,Tellseq_Shortread_Metagenomic_Analysis_10283,45272.1.swab.2,1,
+LS_6_29_2015,LS.6.29.2015,LS_Time_Series_ABSQ_P4,H3,C544,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.6.29.2015,1,
+45326_1_swab_2,45326.1.swab.2,UROBIOME_TEST_MF_SAMPLES_P2,I3,C537,Tellseq_Shortread_Metagenomic_Analysis_10283,45326.1.swab.2,1,
+LS_3_8_2015,LS.3.8.2015,LS_Time_Series_ABSQ_P4,J3,C545,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.3.8.2015,1,
+T_LS_7_19_15A,T.LS.7.19.15A,Larry_Smarr_Plus_Donor_Samples_P3,K3,C538,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.19.15A,1,
+LS_4_29_2013,LS.4.29.2013,LS_Time_Series_ABSQ_P4,L3,C546,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.4.29.2013,1,
+T_LS_7_15_15B,T.LS.7.15.15B,Larry_Smarr_Plus_Donor_Samples_P3,M3,C539,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.15.15B,1,
+LS_11_16_2014,LS.11.16.2014,LS_Time_Series_ABSQ_P4,N3,C547,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.11.16.2014,1,
+T_LS_7_19_15B,T.LS.7.19.15B,Larry_Smarr_Plus_Donor_Samples_P3,O3,C540,Tellseq_Shortread_Metagenomic_Analysis_10283,T.LS.7.19.15B,1,
+LS_1_19_2014,LS.1.19.2014,LS_Time_Series_ABSQ_P4,P3,C548,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.19.2014,1,
+LS_3_24_2015,LS.3.24.2015,LS_Time_Series_ABSQ_P4,A4,C549,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.3.24.2015,1,
+LS_2_8_2013,LS.2.8.2013,LS_Time_Series_ABSQ_P4,B4,C557,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.2.8.2013,1,
+LS_11_10_2013,LS.11.10.2013,LS_Time_Series_ABSQ_P4,C4,C550,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.11.10.2013,1,
+Marine_Sediment_0_2cm_R1,Marine.Sediment.0.2cm.R1,MarineSediment_Donor_LarrySmarr_NoProK_P5,D4,C558,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.0.2cm.R1,1,
+LS_3_23_2014,LS.3.23.2014,LS_Time_Series_ABSQ_P4,E4,C551,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.3.23.2014,1,
+Marine_Sediment_5_7cm_R1,Marine.Sediment.5.7cm.R1,MarineSediment_Donor_LarrySmarr_NoProK_P5,F4,C559,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.5.7cm.R1,1,
+LS_1_14_2015,LS.1.14.2015,LS_Time_Series_ABSQ_P4,G4,C552,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.14.2015,1,
+Marine_Sediment_10_12cm_R2,Marine.Sediment.10.12cm.R2,MarineSediment_Donor_LarrySmarr_NoProK_P5,H4,C560,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.10.12cm.R2,1,
+LS_8_25_2014,LS.8.25.2014,LS_Time_Series_ABSQ_P4,I4,C553,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.8.25.2014,1,
+Marine_Sediment_15_17cm_R1,Marine.Sediment.15.17cm.R1,MarineSediment_Donor_LarrySmarr_NoProK_P5,J4,C561,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.15.17cm.R1,1,
+LS_1_26_2013,LS.1.26.2013,LS_Time_Series_ABSQ_P4,K4,C554,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.1.26.2013,1,
+Marine_Sediment_20_22cm_R1,Marine.Sediment.20.22cm.R1,MarineSediment_Donor_LarrySmarr_NoProK_P5,L4,C562,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.20.22cm.R1,1,
+LS_6_16_2014,LS.6.16.2014,LS_Time_Series_ABSQ_P4,M4,C555,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.6.16.2014,1,
+Marine_Sediment_25_27cm_R2,Marine.Sediment.25.27cm.R2,MarineSediment_Donor_LarrySmarr_NoProK_P5,N4,C563,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.25.27cm.R2,1,
+LS_7_27_2014,LS.7.27.2014,LS_Time_Series_ABSQ_P4,O4,C556,Tellseq_Shortread_Metagenomic_Analysis_10283,LS.7.27.2014,1,
+Marine_Sediment_30_32cm_R3,Marine.Sediment.30.32cm.R3,MarineSediment_Donor_LarrySmarr_NoProK_P5,P4,C564,Tellseq_Shortread_Metagenomic_Analysis_10283,Marine.Sediment.30.32cm.R3,1,
+Person_A_R3,Person.A.R3,MarineSediment_Donor_LarrySmarr_NoProK_P5,A5,C565,Tellseq_Shortread_Metagenomic_Analysis_10283,Person.A.R3,1,
+Soil_SynCom_T4_2_Tube5,Soil.SynCom.T4.2.Tube5,16_member_community_native_soil_P6,B5,C573,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T4.2.Tube5,1,
+Person_B_R2,Person.B.R2,MarineSediment_Donor_LarrySmarr_NoProK_P5,C5,C566,Tellseq_Shortread_Metagenomic_Analysis_10283,Person.B.R2,1,
+A21,A21,Tumor_Community_P7,D5,C574,Tellseq_Shortread_Metagenomic_Analysis_10283,A21,1,
+Person_C_R4,Person.C.R4,MarineSediment_Donor_LarrySmarr_NoProK_P5,E5,C567,Tellseq_Shortread_Metagenomic_Analysis_10283,Person.C.R4,1,
+A23,A23,Tumor_Community_P7,F5,C575,Tellseq_Shortread_Metagenomic_Analysis_10283,A23,1,
+Person_D_R2,Person.D.R2,MarineSediment_Donor_LarrySmarr_NoProK_P5,G5,C568,Tellseq_Shortread_Metagenomic_Analysis_10283,Person.D.R2,1,
+A27,A27,Tumor_Community_P7,H5,C576,Tellseq_Shortread_Metagenomic_Analysis_10283,A27,1,
+Soil_SynCom_T1_2_Tube1,Soil.SynCom.T1.2.Tube1,16_member_community_native_soil_P6,I5,C569,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T1.2.Tube1,1,
+A30,A30,Tumor_Community_P7,J5,C577,Tellseq_Shortread_Metagenomic_Analysis_10283,A30,1,
+Soil _SynCom_T2_2_Tube2,Soil .SynCom.T2.2.Tube2,16_member_community_native_soil_P6,K5,C570,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil .SynCom.T2.2.Tube2,1,
+A31,A31,Tumor_Community_P7,L5,C578,Tellseq_Shortread_Metagenomic_Analysis_10283,A31,1,
+Soil_SynCom_T3_2_Tube3,Soil.SynCom.T3.2.Tube3,16_member_community_native_soil_P6,M5,C571,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T3.2.Tube3,1,
+S1_T1_A,S1.T1.A,Tumor_Community_P7,N5,C579,Tellseq_Shortread_Metagenomic_Analysis_10283,S1.T1.A,1,
+Soil_SynCom_T4_1_Tube4,Soil.SynCom.T4.1.Tube4,16_member_community_native_soil_P6,O5,C572,Tellseq_Shortread_Metagenomic_Analysis_10283,Soil.SynCom.T4.1.Tube4,1,
+S2_T1_B_A,S2.T1.B.A,Tumor_Community_P7,P5,C580,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.T1.B.A,1,
+S2_T1_01BH1_Y_A,S2.T1.01BH1.Y.A,Tumor_Community_P7,A6,C581,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.T1.01BH1.Y.A,1,
+S1_T1_1CIM_A,S1.T1.1CIM.A,Tumor_Community_P7,B6,C589,Tellseq_Shortread_Metagenomic_Analysis_10283,S1.T1.1CIM.A,1,
+S2_MT1_1HBI_Y_A,S2.MT1.1HBI.Y.A,Tumor_Community_P7,C6,C582,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.MT1.1HBI.Y.A,1,
+S1_M1_B_1CIM_A,S1.M1.B.1CIM.A,Tumor_Community_P7,D6,C590,Tellseq_Shortread_Metagenomic_Analysis_10283,S1.M1.B.1CIM.A,1,
+S1_T1_B_LBM_A,S1.T1.B.LBM.A,Tumor_Community_P7,E6,C583,Tellseq_Shortread_Metagenomic_Analysis_10283,S1.T1.B.LBM.A,1,
+BLANK_K15_cancer_patient,BLANK.K15.cancer.patient,Tumor_Community_P7,F6,C591,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.K15.cancer.patient,1,
+S2_MT1_LBM_A,S2.MT1.LBM.A,Tumor_Community_P7,G6,C584,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.MT1.LBM.A,1,
+BLANK_M15_cancer_patient,BLANK.M15.cancer.patient,Tumor_Community_P7,H6,C592,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.M15.cancer.patient,1,
+S2_T1_A,S2.T1.A,Tumor_Community_P7,I6,C585,Tellseq_Shortread_Metagenomic_Analysis_10283,S2.T1.A,1,
+BLANK_O15_cancer_patient,BLANK.O15.cancer.patient,Tumor_Community_P7,J6,C593,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.O15.cancer.patient,1,
+1CIM_M_CNTL_A,1CIM.M.CNTL.A,Tumor_Community_P7,K6,C586,Tellseq_Shortread_Metagenomic_Analysis_10283,1CIM.M.CNTL.A,1,
+BLANK_A17_cancer_patient,BLANK.A17.cancer.patient,Tumor_Community_P7,L6,C594,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.A17.cancer.patient,1,
+1CIM_G_CNTL_A,1CIM.G.CNTL.A,Tumor_Community_P7,M6,C587,Tellseq_Shortread_Metagenomic_Analysis_10283,1CIM.G.CNTL.A,1,
+BLANK_C17_cancer_patient,BLANK.C17.cancer.patient,Tumor_Community_P7,N6,C595,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.C17.cancer.patient,1,
+GC_1HCOM_A,GC.1HCOM.A,Tumor_Community_P7,O6,C588,Tellseq_Shortread_Metagenomic_Analysis_10283,GC.1HCOM.A,1,
+BLANK_E17_cancer_patient,BLANK.E17.cancer.patient,Tumor_Community_P7,P6,C596,Tellseq_Shortread_Metagenomic_Analysis_10283,BLANK.E17.cancer.patient,1,
+,,,,,,,,
+[Bioinformatics],,,,,,,,
+Sample_Project,QiitaID,BarcodesAreRC,ForwardAdapter,ReverseAdapter,HumanFiltering,library_construction_protocol,experiment_design_description,contains_replicates
+Tellseq_Shortread_Metagenomic_Analysis_10283,10283,TRUE,GATCGGAAGAGCACACGTCTGAACTCCAGTCAC,GATCGGAAGAGCGTCGTGTAGGGAAAGGAGTGT,TRUE,tellseq,tellseq metagenomics,FALSE
+,,,,,,,,
+[Contact],,,,,,,,
+Sample_Project,Email,,,,,,,
+Tellseq_Shortread_Metagenomic_Analysis_10283,cbrenchy@gmail.com,,,,,,,
+,,,,,,,,
+[SampleContext],,,,,,,,
+sample_name,sample_type,primary_qiita_study,secondary_qiita_studies,,,,,
+BLANK.K15.cancer.patient,control blank,10283,,,,,,
+BLANK.M15.cancer.patient,control blank,10283,,,,,,
+BLANK.O15.cancer.patient,control blank,10283,,,,,,
+BLANK.A17.cancer.patient,control blank,10283,,,,,,
+BLANK.C17.cancer.patient,control blank,10283,,,,,,
+BLANK.E17.cancer.patient,control blank,10283,,,,,,
\ No newline at end of file
diff --git a/sequence_processing_pipeline/tests/data/tellseq_output/integrate_test.sbatch b/sequence_processing_pipeline/tests/data/tellseq_output/integrate_test.sbatch
new file mode 100644
index 00000000..f7a53198
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/tellseq_output/integrate_test.sbatch
@@ -0,0 +1,67 @@
+#!/bin/bash -l
+#SBATCH -J integrate
+#SBATCH --time 96:00:00
+#SBATCH --mem 16G
+#SBATCH -N 1
+#SBATCH -c 4
+#SBATCH -p qiita
+#SBATCH --array=1-96
+
+#SBATCH --output sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TRIntegrateJob/logs/integrate_%x_%A_%a.out
+#SBATCH --error sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TRIntegrateJob/logs/integrate_%x_%A_%a.err
+
+set -x
+set -e
+
+samples=($(cat sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TRIntegrateJob/sample_index_list.txt | cut -f 2))
+sample=${samples[$((${SLURM_ARRAY_TASK_ID} - 1))]} 
+
+export TMPDIR=sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TRIntegrateJob/tmp
+
+# get list of samples and determine which sample this array instance will work
+# on.
+samples=($(cat sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TRIntegrateJob/sample_index_list.txt | cut -f 2))
+sample=${samples[$((${SLURM_ARRAY_TASK_ID} - 1))]}
+
+echo "Processing sample ${sample}..."
+
+# make temp directory
+export TMPDIR=sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TRIntegrateJob/tmp
+mkdir -p $TMPDIR
+
+
+# TODO: All three input files must be non-zero in length.
+# If possible, do this check as part of normal FSR operation.
+# Previously this was done right here BEFORE integrating, rather
+# than after.
+
+# NB: non-zero file-length check removed for now. This should be performed
+# by FSR after processing is done.
+# TODO: Make sure raw_fastq_dir is TellReadJob/Full
+r1_in=sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TellReadJob/Full/TellReadJob_R1_${sample}.fastq.gz.corrected.err_barcode_removed.fastq
+r2_in=sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TellReadJob/Full/TellReadJob_R2_${sample}.fastq.gz.corrected.err_barcode_removed.fastq
+i1_in=sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TellReadJob/Full/TellReadJob_I1_${sample}.fastq.gz.corrected.err_barcode_removed.fastq
+
+# create output directory
+mkdir -p sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TRIntegrateJob/integrated
+
+# generate output file names
+r1_out=sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TRIntegrateJob/integrated/${sample}.R1.fastq.gz
+r2_out=sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TRIntegrateJob/integrated/${sample}.R2.fastq.gz
+i1_out=sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TRIntegrateJob/integrated/${sample}.I1.fastq.gz
+
+# generate 'integrated' I1 fastq.gz file. We do this as part of each array so
+# they're done in parallel.
+gzip -c ${i1_in} > ${i1_out}
+
+# generate integrated R1 and R2 fastq.gz files.
+conda activate qp-knight-lab-processing-2022.03
+
+python sequence_processing_pipeline/contrib/integrate-indices-np.py integrate \
+--no-sort \
+--r1-in ${r1_in} \
+--r2-in ${r2_in} \
+--i1-in ${i1_in} \
+--r1-out ${r1_out} \
+--r2-out ${r2_out} \
+--threads 4
\ No newline at end of file
diff --git a/sequence_processing_pipeline/tests/data/tellseq_output/tellread_test.sbatch b/sequence_processing_pipeline/tests/data/tellseq_output/tellread_test.sbatch
new file mode 100644
index 00000000..9dc3ccff
--- /dev/null
+++ b/sequence_processing_pipeline/tests/data/tellseq_output/tellread_test.sbatch
@@ -0,0 +1,47 @@
+#!/bin/bash -l
+#SBATCH -J tellread
+#SBATCH -p qiita
+#SBATCH -N 1
+#SBATCH -c 4
+#SBATCH --mem 16G
+#SBATCH --time 96:00:00
+
+#SBATCH --output sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TellReadJob/logs/tellread_%x-%A.out
+#SBATCH --error sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TellReadJob/logs/tellread_%x-%A.err
+
+set -x
+
+module load singularity_3.6.4
+$HOME/qiita-spots/tellread-release-novaseqX/run_tellread_sing.sh \
+    -i sequence_processing_pipeline/tests/data/sample_run_directories/150629_SN1001_0511_AH5L7GBCXX \
+    -o sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TellReadJob \
+    -s $(echo C501,C509,C502,C510,C503,C511,C504,C512,C505,C513,C506,C514,C507,C515,C508,C516,C517,C525,C518,C526,C519,C527,C520,C528,C521,C529,C522,C530,C523,C531,C524,C532,C533,C541,C534,C542,C535,C543,C536,C544,C537,C545,C538,C546,C539,C547,C540,C548,C549,C557,C550,C558,C551,C559,C552,C560,C553,C561,C554,C562,C555,C563,C556,C564,C565,C573,C566,C574,C567,C575,C568,C576,C569,C577,C570,C578,C571,C579,C572,C580,C581,C589,C582,C590,C583,C591,C584,C592,C585,C593,C586,C594,C587,C595,C588,C596 | tr -d '"') \
+    -g $(echo NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE,NONE | tr -d '"') \
+    -j ${SLURM_JOB_CPUS_PER_NODE}  \
+    -l s_1
+
+# get the timestamp for the most recently changed file in directory '.'
+
+# hard-limit for wait time set to ~ 8 hours.
+# (4 checks per hour, for 8 hours equals 32 iterations)
+for i in $(seq 1 32);
+do
+    before="$(find sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TellReadJob/Full -type f -printf '%T@\n' | sort -n | tail -1)"
+    # assume TellReadJob is finished if ctime hasn't changed in 15 minutes
+    # for any fastq file in the directory.
+    sleep 900
+    after="$(find sequence_processing_pipeline/tests/2caa8226-cf69-45a3-bd40-1e90ec3d18d0/TellReadJob/Full -type f -printf '%T@\n' | sort -n | tail -1)"
+
+    echo "$before   $after"
+
+    if [[ "$before" == "$after" ]]; then
+        echo "DONE"
+        exit 0
+    else
+        echo "NOT DONE"
+    fi
+done
+
+# if we've reached this point then we've exceeded our hard-limit for waiting.
+# return w/an error.
+exit 1
diff --git a/sequence_processing_pipeline/tests/test_ConvertJob.py b/sequence_processing_pipeline/tests/test_ConvertJob.py
index e8879864..f394cfb8 100644
--- a/sequence_processing_pipeline/tests/test_ConvertJob.py
+++ b/sequence_processing_pipeline/tests/test_ConvertJob.py
@@ -955,7 +955,7 @@ def test_error_msg_from_logs(self):
 
         # an internal method to force submit_job() to raise a JobFailedError
         # instead of submitting the job w/sbatch and waiting for a failed
-        # job w/sacct.
+        # job w/squeue.
         self.assertTrue(job._toggle_force_job_fail())
 
         error_msg = ("This job died.\n2024-01-01T12:12:12Z thread 99999 ERROR:"
diff --git a/sequence_processing_pipeline/tests/test_FastQCJob.py b/sequence_processing_pipeline/tests/test_FastQCJob.py
index 28fe52cb..a2291296 100644
--- a/sequence_processing_pipeline/tests/test_FastQCJob.py
+++ b/sequence_processing_pipeline/tests/test_FastQCJob.py
@@ -1121,7 +1121,7 @@ def test_error_msg_from_logs(self):
 
         # an internal method to force submit_job() to raise a JobFailedError
         # instead of submitting the job w/sbatch and waiting for a failed
-        # job w/sacct.
+        # job w/squeue.
         self.assertTrue(job._toggle_force_job_fail())
 
         try:
diff --git a/sequence_processing_pipeline/tests/test_Job.py b/sequence_processing_pipeline/tests/test_Job.py
index 7aa5889a..192709b6 100644
--- a/sequence_processing_pipeline/tests/test_Job.py
+++ b/sequence_processing_pipeline/tests/test_Job.py
@@ -1,10 +1,10 @@
 import unittest
 from sequence_processing_pipeline.Job import Job
 from sequence_processing_pipeline.PipelineError import PipelineError
-from os.path import abspath, join, dirname
-from os import makedirs
+from os.path import abspath, join, dirname, split, isdir
+from os import makedirs, chmod, remove
 from functools import partial
-from shutil import rmtree
+from shutil import rmtree, copyfile
 import re
 
 
@@ -14,7 +14,10 @@ def setUp(self):
 
     def tearDown(self):
         for some_path in self.remove_these:
-            rmtree(some_path)
+            if isdir(some_path):
+                rmtree(some_path)
+            else:
+                remove(some_path)
 
     def test_system_call(self):
         package_root = abspath('./sequence_processing_pipeline')
@@ -123,6 +126,168 @@ def test_extract_project_names_from_fastq_dir(self):
         obs = job.extract_project_names_from_fastq_dir(tmp)
         self.assertEqual(obs, ['NPH_15288'])
 
+    def test_query_slurm(self):
+        package_root = abspath('./sequence_processing_pipeline')
+        base_path = partial(join, package_root, 'tests', 'data')
+
+        # set up a fake job so that we can test the query_jobs() method.
+        # it doesn't matter what the parameters are so long as the job
+        # passes initialization.
+        job = Job(base_path('211021_A00000_0000_SAMPLE'),
+                  base_path('7b9d7d9c-2cd4-4d54-94ac-40e07a713585'),
+                  '200nnn_xnnnnn_nnnn_xxxxxxxxxx', ['ls'], 2, None)
+
+        # locate python binary path
+        # we have a python script called fake_squeue.py that can simulate
+        # repeated calls to squeue. It does this by generating a fake random
+        # set of array job ids for each job id passed to it and records their
+        # state in my_state.json. Each array job is set to change state from
+        # RUNNING to either COMPLETED or FAILED between three to seven squeue
+        # calls. The choice of which job-ids will succeed or fail, as is which
+        # individual array-ids will succeed or fail is random.
+        python_path = split(job._which('python'))[0]
+        squeue_path = join(python_path, 'squeue')
+        foo = join(package_root, 'scripts', 'fake_squeue.py')
+
+        # place the fake squeue file in a place that's known to be in the
+        # PATH. Make sure this file is removed after this test is complete.
+        # Also make sure the saved state file is removed.
+        copyfile(foo, squeue_path)
+        chmod(squeue_path, 0o755)
+        self.remove_these.append(squeue_path)
+        self.remove_these.append(join(package_root, 'scripts',
+                                      'my_state.json'))
+
+        job_ids = ['1234567', '1234568', '1234569', '1234570']
+        jobs = job._query_slurm(job_ids)
+
+        # jobs is a dictionary of unique array_ids and/or job-ids for non-
+        # array jobs. The faked squeue reports anywhere between five and
+        # fifteen array jobs for a given job-id. After the first invocation
+        # all processes should be in the 'RUNNING' state.
+        # e.g.: "1234567_1": "RUNNING"
+
+        for j in jobs:
+            self.assertEqual(jobs[j], 'RUNNING')
+            if '_' in j:
+                jid, aid = j.split('_')
+            else:
+                jid = j
+                aid = None
+
+            # assert the job id component of the array-id is a valid job id.
+            self.assertIn(jid, job_ids)
+
+            if aid:
+                # assert the array-id component of the array-id is between 0
+                # and 15 as defined in the fake squeue script.
+                aid = int(aid)
+                self.assertLess(aid, 15)
+                self.assertGreaterEqual(aid, 0)
+
+    def test_query_slurm_single_job(self):
+        # perform test_query_slurm() but with a single job only.
+        package_root = abspath('./sequence_processing_pipeline')
+        base_path = partial(join, package_root, 'tests', 'data')
+
+        # set up a fake job so that we can test the query_jobs() method.
+        # it doesn't matter what the parameters are so long as the job
+        # passes initialization.
+        job = Job(base_path('211021_A00000_0000_SAMPLE'),
+                  base_path('7b9d7d9c-2cd4-4d54-94ac-40e07a713585'),
+                  '200nnn_xnnnnn_nnnn_xxxxxxxxxx', ['ls'], 2, None)
+
+        # locate python binary path
+        # we have a python script called fake_squeue.py that can simulate
+        # repeated calls to squeue. It does this by generating a fake random
+        # set of array job ids for each job id passed to it and records their
+        # state in my_state.json. Each array job is set to change state from
+        # RUNNING to either COMPLETED or FAILED between three to seven squeue
+        # calls. The choice of which job-ids will succeed or fail, as is which
+        # individual array-ids will succeed or fail is random.
+        python_path = split(job._which('python'))[0]
+        squeue_path = join(python_path, 'squeue')
+        foo = join(package_root, 'scripts', 'fake_squeue.py')
+
+        # place the fake squeue file in a place that's known to be in the
+        # PATH. Make sure this file is removed after this test is complete.
+        # Also make sure the saved state file is removed.
+        copyfile(foo, squeue_path)
+        chmod(squeue_path, 0o755)
+        self.remove_these.append(squeue_path)
+        self.remove_these.append(join(package_root, 'scripts',
+                                      'my_state.json'))
+
+        job_ids = ['1234567']
+        jobs = job._query_slurm(job_ids)
+
+        # jobs is a dictionary of unique array_ids and/or job-ids for non-
+        # array jobs. The faked squeue reports anywhere between five and
+        # fifteen array jobs for a given job-id. After the first invocation
+        # all processes should be in the 'RUNNING' state.
+        # e.g.: "1234567_1": "RUNNING"
+
+        for j in jobs:
+            self.assertEqual(jobs[j], 'RUNNING')
+            if '_' in j:
+                jid, aid = j.split('_')
+            else:
+                jid = j
+                aid = None
+
+            # assert the job id component of the array-id is a valid job id.
+            self.assertIn(jid, job_ids)
+
+            if aid:
+                # assert the array-id component of the array-id is between 0
+                # and 15 as defined in the fake squeue script.
+                aid = int(aid)
+                self.assertLess(aid, 15)
+                self.assertGreaterEqual(aid, 0)
+
+    def test_wait_on_job_ids(self):
+        package_root = abspath('./sequence_processing_pipeline')
+        base_path = partial(join, package_root, 'tests', 'data')
+
+        job = Job(base_path('211021_A00000_0000_SAMPLE'),
+                  base_path('7b9d7d9c-2cd4-4d54-94ac-40e07a713585'),
+                  '200nnn_xnnnnn_nnnn_xxxxxxxxxx', ['ls'], 2, None)
+
+        python_path = split(job._which('python'))[0]
+        squeue_path = join(python_path, 'squeue')
+        foo = join(package_root, 'scripts', 'fake_squeue.py')
+        copyfile(foo, squeue_path)
+        chmod(squeue_path, 0o755)
+        self.remove_these.append(squeue_path)
+        self.remove_these.append(join(package_root, 'scripts',
+                                      'my_state.json'))
+
+        job_ids = ['1', '2', '3', '4']
+
+        # to shorten the test time, set polling_interval_in_seconds to be
+        # lower than one minute.
+        Job.polling_interval_in_seconds = 10
+        results = job.wait_on_job_ids(job_ids)
+
+        # calling query_slurm one more time after wait_on_job_ids() is called
+        # will technically advance the counter one more, which means that this
+        # doesn't confirm that wait_on_job_ids() doesn't return before EVERY
+        # single job is either COMPLETED or FAILED. However it does confirm
+        # that wait_on_job_ids() doesn't return once the FIRST completed array
+        # job is either COMPLETED or FAILED while others are still RUNNING.
+        # This was previously an issue.
+        obs = job._query_slurm(job_ids)
+
+        for array_id in obs:
+            state = obs[array_id]
+            # w/out relying on states defined in Job, simply confirm all are
+            # either COMPLETED or FAILED.
+            self.assertIn(state, ['COMPLETED', 'FAILED'])
+
+        # since wait_on_job_ids() now returns the same data structure as
+        # query_slurm(), they should be equal.
+        self.assertDictEqual(obs, results)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/sequence_processing_pipeline/tests/test_NuQCJob.py b/sequence_processing_pipeline/tests/test_NuQCJob.py
index a3cfdada..8737552b 100644
--- a/sequence_processing_pipeline/tests/test_NuQCJob.py
+++ b/sequence_processing_pipeline/tests/test_NuQCJob.py
@@ -9,7 +9,7 @@
 )
 from os import makedirs, remove
 from metapool import load_sample_sheet
-import glob
+from os import walk
 
 
 class TestNuQCJob(unittest.TestCase):
@@ -996,7 +996,7 @@ def test_error_msg_from_logs(self):
 
         # an internal method to force submit_job() to raise a JobFailedError
         # instead of submitting the job w/sbatch and waiting for a failed
-        # job w/sacct.
+        # job w/squeue.
         self.assertTrue(job._toggle_force_job_fail())
 
         try:
@@ -2208,10 +2208,56 @@ def test_generate_mmi_filter_cmds_w_annotate_fastq(self):
         self.assertEqual(obs, exp)
 
     def test_move_trimmed(self):
-        # Note: this test does not make use of the output_dir that other
-        # tests use.
+        # create a NuQCJob() object, but do not call run().
+        # instead we will manually create some files to test with.
+        double_db_paths = ["db_path/mmi_1.db", "db_path/mmi_2.db"]
+        job = NuQCJob(
+            self.fastq_root_path,
+            self.output_path,
+            self.good_sample_sheet_path,
+            double_db_paths,
+            "queue_name",
+            1,
+            1440,
+            "8",
+            "fastp",
+            "minimap2",
+            "samtools",
+            [],
+            self.qiita_job_id,
+            1000,
+            "",
+            self.movi_path,
+            self.gres_value,
+            self.pmls_path,
+            ['BX']
+        )
 
-        for dummy_fp in SAMPLE_DIR:
+        sample_dir = [
+            "NuQCJob/only-adapter-filtered/EP890158A02_S58_L001_R1_001."
+            "interleave.fastq.gz",
+            "NuQCJob/only-adapter-filtered/EP890158A02_S58_L001_R2_001."
+            "interleave.fastq.gz",
+            "NuQCJob/only-adapter-filtered/EP023801B04_S27_L001_R1_001."
+            "interleave.fastq.gz",
+            "NuQCJob/only-adapter-filtered/EP023801B04_S27_L001_R2_001."
+            "interleave.fastq.gz",
+            "NuQCJob/NPH_15288/fastp_reports_dir/html/EP890158A02_S58_L001_"
+            "R1_001.html",
+            "NuQCJob/NPH_15288/fastp_reports_dir/json/EP023801B04_S27_L001_"
+            "R1_001.json",
+            "NuQCJob/process_all_fastq_files.sh",
+            "NuQCJob/hds-a439513a-5fcc-4f29-a1e5-902ee5c1309d.1897981."
+            "completed",
+            "NuQCJob/logs/slurm-1897981_1.out",
+            "NuQCJob/tmp/hds-a439513a-5fcc-4f29-a1e5-902ee5c1309d-1",
+            'NuQCJob/only-adapter-filtered/CDPH-SAL_'
+            'Salmonella_Typhi_MDL-150__S36_L001_R1_001.interleave.fastq.gz',
+            'NuQCJob/only-adapter-filtered/CDPH-SAL_'
+            'Salmonella_Typhi_MDL-150__S36_L001_R2_001.interleave.fastq.gz',
+        ]
+
+        for dummy_fp in sample_dir:
             dummy_fp = self.path(dummy_fp)
             dummy_path = dirname(dummy_fp)
             makedirs(dummy_path, exist_ok=True)
@@ -2220,38 +2266,33 @@ def test_move_trimmed(self):
 
         trimmed_only_path = self.path("NuQCJob", "only-adapter-filtered")
 
-        NuQCJob._move_trimmed_files("NPH_15288", trimmed_only_path)
-
-        new_path = join(trimmed_only_path, "NPH_15288")
-        pattern = f"{new_path}/*.fastq.gz"
-
-        exp = [
-            (
-                "only-adapter-filtered/NPH_15288/359180345_S58_L001_R1_001."
-                "fastq.gz"
-            ),
-            (
-                "only-adapter-filtered/NPH_15288/359180337_S27_L001_R1_001."
-                "fastq.gz"
-            ),
-            (
-                "only-adapter-filtered/NPH_15288/359180338_S51_L001_R2_001."
-                "fastq.gz"
-            ),
-            (
-                "only-adapter-filtered/NPH_15288/359180338_S51_L001_R1_001."
-                "fastq.gz"
-            ),
-            (
-                "only-adapter-filtered/NPH_15288/359180337_S27_L001_R2_001."
-                "fastq.gz"
-            ),
-        ]
-
-        for trimmed_file in list(glob.glob(pattern)):
-            trimmed_file = trimmed_file.split("NuQCJob/")[-1]
-            if trimmed_file not in exp:
-                self.assertIn(trimmed_file, exp)
+        # test _move_trimmed_files() by verifying that only the interleave
+        # fastq files from the NYU project are moved.
+        job._move_trimmed_files("NYU_BMS_Melanoma_13059", trimmed_only_path)
+
+        new_path = join(trimmed_only_path, "NYU_BMS_Melanoma_13059")
+
+        exp = {
+            'NuQCJob/only-adapter-filtered/NYU_BMS_Melanoma_13059/EP890158A02'
+            '_S58_L001_R1_001.interleave.fastq.gz',
+            'NuQCJob/only-adapter-filtered/NYU_BMS_Melanoma_13059/EP023801B04'
+            '_S27_L001_R1_001.interleave.fastq.gz',
+            'NuQCJob/only-adapter-filtered/NYU_BMS_Melanoma_13059/EP890158A02'
+            '_S58_L001_R2_001.interleave.fastq.gz',
+            'NuQCJob/only-adapter-filtered/NYU_BMS_Melanoma_13059/EP023801B04'
+            '_S27_L001_R2_001.interleave.fastq.gz'
+            }
+
+        obs = []
+        for root, dirs, files in walk(new_path):
+            for some_file in files:
+                some_path = join(root, some_file)
+                some_path = some_path.replace(self.path(""), "")
+                obs.append(some_path)
+
+        # confirm that only the samples in NYU_BMS_Melanoma_13059 were
+        # moved.
+        self.assertEqual(set(obs), exp)
 
     def _helper(self, regex, good_names, bad_names):
         for good_name in good_names:
@@ -2263,27 +2304,5 @@ def _helper(self, regex, good_names, bad_names):
             self.assertIsNone(substr, msg=f"Regex failed on {bad_name}")
 
 
-SAMPLE_DIR = [
-    "NuQCJob/only-adapter-filtered/359180345_S58_L001_R1_001.fastq.gz",
-    "NuQCJob/only-adapter-filtered/359180337_S27_L001_R1_001.fastq.gz",
-    "NuQCJob/only-adapter-filtered/359180338_S51_L001_R2_001.fastq.gz",
-    "NuQCJob/only-adapter-filtered/359180338_S51_L001_R1_001.fastq.gz",
-    "NuQCJob/only-adapter-filtered/359180337_S27_L001_R2_001.fastq.gz",
-    "NuQCJob/NPH_15288/fastp_reports_dir/html/359180354_S22_L001_R1_001.html",
-    "NuQCJob/NPH_15288/fastp_reports_dir/html/359180338_S51_L001_R1_001.html",
-    "NuQCJob/NPH_15288/fastp_reports_dir/html/359180345_S58_L001_R1_001.html",
-    "NuQCJob/NPH_15288/fastp_reports_dir/html/359180337_S27_L001_R1_001.html",
-    "NuQCJob/NPH_15288/fastp_reports_dir/html/359180353_S17_L001_R1_001.html",
-    "NuQCJob/NPH_15288/fastp_reports_dir/json/359180353_S17_L001_R1_001.json",
-    "NuQCJob/NPH_15288/fastp_reports_dir/json/359180337_S27_L001_R1_001.json",
-    "NuQCJob/NPH_15288/fastp_reports_dir/json/359180345_S58_L001_R1_001.json",
-    "NuQCJob/NPH_15288/fastp_reports_dir/json/359180338_S51_L001_R1_001.json",
-    "NuQCJob/NPH_15288/fastp_reports_dir/json/359180354_S22_L001_R1_001.json",
-    "NuQCJob/process_all_fastq_files.sh",
-    "NuQCJob/hds-a439513a-5fcc-4f29-a1e5-902ee5c1309d.1897981.completed",
-    "NuQCJob/logs/slurm-1897981_1.out",
-    "NuQCJob/tmp/hds-a439513a-5fcc-4f29-a1e5-902ee5c1309d-1",
-]
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/sequence_processing_pipeline/tests/test_Pipeline.py b/sequence_processing_pipeline/tests/test_Pipeline.py
index 53267c7d..0af31439 100644
--- a/sequence_processing_pipeline/tests/test_Pipeline.py
+++ b/sequence_processing_pipeline/tests/test_Pipeline.py
@@ -140,7 +140,7 @@ def test_validate_mapping_file_numeric_ids(self):
         with NamedTemporaryFile() as tmp:
             self._make_mapping_file(tmp.name)
             exp = ['1.0', '1e-3']
-            pipeline = Pipeline(self.good_config_file, self.good_run_id, None,
+            pipeline = Pipeline(self.good_config_file, self.good_run_id,
                                 tmp.name, self.output_file_path, self.qiita_id,
                                 Pipeline.AMPLICON_PTYPE)
 
@@ -153,7 +153,7 @@ def test_validate_mapping_file_numeric_ids(self):
 
     def test_get_sample_names_from_sample_sheet(self):
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
-                            self.mp_sheet_path, None,
+                            self.mp_sheet_path,
                             self.output_file_path, self.qiita_id,
                             Pipeline.METAGENOMIC_PTYPE)
 
@@ -178,7 +178,7 @@ def test_get_sample_names_from_sample_sheet(self):
 
     def test_get_orig_names_from_sheet_with_replicates(self):
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
-                            self.good_sheet_w_replicates, None,
+                            self.good_sheet_w_replicates,
                             self.output_file_path, self.qiita_id,
                             Pipeline.METAGENOMIC_PTYPE)
 
@@ -198,7 +198,7 @@ def test_required_file_checks(self):
         with self.assertRaisesRegex(PipelineError, "required file 'RunInfo.xml"
                                                    "' is not present."):
             Pipeline(self.good_config_file, self.good_run_id,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
 
@@ -210,7 +210,7 @@ def test_required_file_checks(self):
         with self.assertRaisesRegex(PipelineError, "required file 'RTAComplete"
                                                    ".txt' is not present."):
             Pipeline(self.good_config_file, self.good_run_id,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
 
@@ -222,7 +222,7 @@ def test_required_file_checks(self):
         with self.assertRaisesRegex(PipelineError, "RunInfo.xml is present, bu"
                                                    "t not readable"):
             Pipeline(self.good_config_file, self.good_run_id,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
         self.make_runinfo_file_readable()
@@ -232,7 +232,7 @@ def test_creation(self):
         with self.assertRaises(PipelineError) as e:
             Pipeline(self.bad_config_file,
                      self.good_run_id,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
 
@@ -249,7 +249,7 @@ def test_creation(self):
                                                 " valid sample-sheet."):
             Pipeline(self.good_config_file,
                      self.good_run_id,
-                     self.bad_assay_type_path, None,
+                     self.bad_assay_type_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
 
@@ -257,7 +257,7 @@ def test_creation(self):
         with self.assertRaises(PipelineError) as e:
             Pipeline(self.invalid_config_file,
                      self.good_run_id,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
 
@@ -268,7 +268,7 @@ def test_creation(self):
         with self.assertRaises(PipelineError) as e:
             Pipeline(None,
                      self.good_run_id,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
 
@@ -279,7 +279,7 @@ def test_creation(self):
         with self.assertRaises(PipelineError) as e:
             Pipeline(self.good_config_file,
                      self.invalid_run_id,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
 
@@ -290,7 +290,7 @@ def test_creation(self):
         with self.assertRaises(PipelineError) as e:
             Pipeline(self.good_config_file,
                      None,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
 
@@ -300,7 +300,7 @@ def test_creation(self):
                                                    "not a valid json file"):
             Pipeline(self.good_sample_sheet_path,
                      self.good_run_id,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
 
@@ -323,7 +323,7 @@ def test_creation(self):
                                                 "bad.json'"):
             Pipeline(self.good_config_file,
                      self.good_run_id,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
 
@@ -345,7 +345,7 @@ def test_creation(self):
                                                 "bad.json'"):
             Pipeline(self.good_config_file,
                      self.good_run_id,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
 
@@ -368,7 +368,7 @@ def test_creation(self):
                                                 "bad.json'"):
             Pipeline(self.good_config_file,
                      self.good_run_id,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
 
@@ -379,7 +379,7 @@ def test_sample_sheet_validation(self):
         # contained w/in its 'message' member.
         try:
             Pipeline(self.good_config_file, self.good_run_id,
-                     self.good_sample_sheet_path, None,
+                     self.good_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
         except PipelineError as e:
@@ -389,7 +389,7 @@ def test_sample_sheet_validation(self):
         # test unsuccessful validation of a bad sample-sheet
         with self.assertRaises(PipelineError) as e:
             Pipeline(self.good_config_file, self.good_run_id,
-                     self.bad_sample_sheet_path, None,
+                     self.bad_sample_sheet_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.METAGENOMIC_PTYPE)
         self.assertEqual(str(e.exception), ('Sample-sheet contains errors:\n'
@@ -401,7 +401,6 @@ def test_generate_sample_information_files(self):
         # test sample-information-file generation.
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
                             self.good_sample_sheet_path,
-                            None,
                             self.output_file_path, self.qiita_id,
                             Pipeline.METAGENOMIC_PTYPE)
 
@@ -511,6 +510,62 @@ def test_generate_sample_information_files(self):
                 exp = exp_last_lines[some_name]
                 self.assertEqual(obs, exp)
 
+    def test_generate_sample_information_files_with_additional_meta(self):
+        # TODO: With recent changes is this needed?
+        # test sample-information-file generation.
+        pipeline = Pipeline(self.good_config_file, self.good_run_id,
+                            self.good_sample_sheet_path,
+                            self.output_file_path, self.qiita_id,
+                            Pipeline.METAGENOMIC_PTYPE)
+
+        # create a dataframe with duplicate information to pass to
+        # generate_sample_information_files(). Confirm that the duplicates
+        # are dropped. Confirm 'NOTBLANK_999A' is also filtered out.
+        df = pd.DataFrame(data=[('BLANK999_999A', 'NYU_BMS_Melanoma_13059'),
+                                ('BLANK999_999A', 'NYU_BMS_Melanoma_13059'),
+                                ('NOTBLANK_999A', 'NYU_BMS_Melanoma_13059')],
+                          columns=['sample_name', 'project_name'])
+
+        sif_path = pipeline.generate_sample_info_files(addl_info=df)
+
+        # get the path for the NYU_BMS_Melanoma dataset.
+        sif_path = [x for x in sif_path if 'NYU_BMS_Melanoma' in x][0]
+
+        exp_first_line = ("BLANK1.1A\t2021-10-21\t193\t"
+                          "Control\tNegative\tSterile water blank\t"
+                          "Sterile water blank\turban biome\t"
+                          "research facility\tsterile water\t"
+                          "misc environment\tUSA:CA:San Diego\t"
+                          "BLANK1.1A\t32.5\t-117.25\tcontrol blank\t"
+                          "metagenome\t256318\tBLANK1.1A\t"
+                          "NYU_BMS_Melanoma\tTRUE\tUCSD\tFALSE")
+
+        exp_last_line = ("BLANK4.4H\t2021-10-21\t193\tControl\tNegative\t"
+                         "Sterile water blank\tSterile water blank\t"
+                         "urban biome\tresearch facility\tsterile water\t"
+                         "misc environment\tUSA:CA:San Diego\tBLANK4.4H\t"
+                         "32.5\t-117.25\tcontrol blank\tmetagenome\t256318\t"
+                         "BLANK4.4H\tNYU_BMS_Melanoma\tTRUE\tUCSD\tFALSE")
+
+        with open(sif_path, 'r') as f:
+            obs_lines = f.readlines()
+
+            # confirm that each file contains the expected header.
+            header = obs_lines[0].strip()
+            self.assertEqual(header, '\t'.join(Pipeline.sif_header))
+
+            # confirm that the first line of each file is as expected.
+            obs = obs_lines[1].strip()
+            exp = exp_first_line
+
+            self.assertEqual(obs, exp)
+
+            # confirm that the last line of each file is as expected.
+            obs = obs_lines[-1].strip()
+            exp = exp_last_line
+
+            self.assertEqual(obs, exp)
+
     def test_get_sample_ids(self):
         exp_sample_ids = ['CDPH-SAL__Salmonella__Typhi__MDL-143',
                           'CDPH-SAL_Salmonella_Typhi_MDL-144',
@@ -980,7 +1035,7 @@ def test_get_sample_ids(self):
                           'EP400448B04', 'EP479894B04']
         # test sample-information-file generation.
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
-                            self.good_sample_sheet_path, None,
+                            self.good_sample_sheet_path,
                             self.output_file_path, self.qiita_id,
                             Pipeline.METAGENOMIC_PTYPE)
 
@@ -1456,7 +1511,7 @@ def test_get_sample_names(self):
 
         # test sample-information-file generation.
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
-                            self.good_sample_sheet_path, None,
+                            self.good_sample_sheet_path,
                             self.output_file_path, self.qiita_id,
                             Pipeline.METAGENOMIC_PTYPE)
 
@@ -1484,7 +1539,7 @@ def test_get_project_info(self):
 
         # test sample-information-file generation.
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
-                            self.good_sample_sheet_path, None,
+                            self.good_sample_sheet_path,
                             self.output_file_path, self.qiita_id,
                             Pipeline.METAGENOMIC_PTYPE)
 
@@ -1515,7 +1570,7 @@ def test_get_project_info(self):
         self.assertEqual(sorted(obs_project_names), sorted(exp_project_names))
 
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
-                            self.good_sheet_w_replicates, None,
+                            self.good_sheet_w_replicates,
                             self.output_file_path, self.qiita_id,
                             Pipeline.METAGENOMIC_PTYPE)
 
@@ -1527,7 +1582,7 @@ def test_get_project_info(self):
 
     def test_configuration_profiles(self):
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
-                            self.good_sample_sheet_path, None,
+                            self.good_sample_sheet_path,
                             self.output_file_path, self.qiita_id,
                             Pipeline.METAGENOMIC_PTYPE)
 
@@ -1556,7 +1611,7 @@ def test_configuration_profiles(self):
     def test_parse_project_name(self):
         # test sample-information-file generation.
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
-                            self.good_sample_sheet_path, None,
+                            self.good_sample_sheet_path,
                             self.output_file_path, self.qiita_id,
                             Pipeline.METAGENOMIC_PTYPE)
 
@@ -1588,7 +1643,7 @@ def test_parse_project_name(self):
 
     def test_identify_reserved_words(self):
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
-                            self.good_sample_sheet_path, None,
+                            self.good_sample_sheet_path,
                             self.output_file_path, self.qiita_id,
                             Pipeline.METAGENOMIC_PTYPE)
 
@@ -1605,7 +1660,7 @@ def test_identify_reserved_words(self):
 
         # create new pipeline using a/legacy (v90) metagenomic sample-sheet.
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
-                            self.good_legacy_sheet_path, None,
+                            self.good_legacy_sheet_path,
                             self.output_file_path, self.qiita_id,
                             Pipeline.METAGENOMIC_PTYPE)
 
@@ -1698,7 +1753,7 @@ def test_required_file_checks(self):
         with self.assertRaisesRegex(PipelineError, "required file 'RunInfo.xml"
                                                    "' is not present."):
             Pipeline(self.good_config_file, self.good_run_id,
-                     None, self.good_mapping_file_path,
+                     self.good_mapping_file_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.AMPLICON_PTYPE)
 
@@ -1710,7 +1765,7 @@ def test_required_file_checks(self):
         with self.assertRaisesRegex(PipelineError, "required file 'RTAComplete"
                                                    ".txt' is not present."):
             Pipeline(self.good_config_file, self.good_run_id,
-                     None, self.good_mapping_file_path,
+                     self.good_mapping_file_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.AMPLICON_PTYPE)
 
@@ -1721,7 +1776,7 @@ def test_required_file_checks(self):
 
         with self.assertRaisesRegex(PipelineError, "RunInfo.xml is present, "
                                                    "but not readable"):
-            Pipeline(self.good_config_file, self.good_run_id, None,
+            Pipeline(self.good_config_file, self.good_run_id,
                      self.good_mapping_file_path, self.output_file_path,
                      self.qiita_id, Pipeline.AMPLICON_PTYPE)
             self.make_runinfo_file_readable()
@@ -1731,7 +1786,7 @@ def test_creation(self):
         with self.assertRaises(PipelineError) as e:
             Pipeline(self.bad_config_file,
                      self.good_run_id,
-                     None, self.good_mapping_file_path,
+                     self.good_mapping_file_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.AMPLICON_PTYPE)
 
@@ -1746,7 +1801,7 @@ def test_creation(self):
         with self.assertRaises(PipelineError) as e:
             Pipeline(self.invalid_config_file,
                      self.good_run_id,
-                     None, self.good_mapping_file_path,
+                     self.good_mapping_file_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.AMPLICON_PTYPE)
 
@@ -1757,7 +1812,7 @@ def test_creation(self):
         with self.assertRaises(PipelineError) as e:
             Pipeline(None,
                      self.good_run_id,
-                     None, self.good_mapping_file_path,
+                     self.good_mapping_file_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.AMPLICON_PTYPE)
 
@@ -1768,7 +1823,7 @@ def test_creation(self):
         with self.assertRaises(PipelineError) as e:
             Pipeline(self.good_config_file,
                      self.invalid_run_id,
-                     None, self.good_mapping_file_path,
+                     self.good_mapping_file_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.AMPLICON_PTYPE)
 
@@ -1779,7 +1834,7 @@ def test_creation(self):
         with self.assertRaises(PipelineError) as e:
             Pipeline(self.good_config_file,
                      None,
-                     None, self.good_mapping_file_path,
+                     self.good_mapping_file_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.AMPLICON_PTYPE)
 
@@ -1787,7 +1842,7 @@ def test_mapping_file_validation(self):
         # test successful validation of a good mapping-file.
         try:
             Pipeline(self.good_config_file, self.good_run_id,
-                     None, self.good_mapping_file_path,
+                     self.good_mapping_file_path,
                      self.output_file_path,
                      self.qiita_id, Pipeline.AMPLICON_PTYPE)
         except PipelineError as e:
@@ -1797,7 +1852,7 @@ def test_mapping_file_validation(self):
         # test unsuccessful validation of a bad mapping-file.
         with self.assertRaises(PipelineError) as e:
             Pipeline(self.good_config_file, self.good_run_id,
-                     None, self.mf_missing_column,
+                     self.mf_missing_column,
                      self.output_file_path,
                      self.qiita_id, Pipeline.AMPLICON_PTYPE)
         self.assertEqual(str(e.exception), ('Mapping-file is missing '
@@ -1807,7 +1862,7 @@ def test_mapping_file_validation(self):
         # test unsuccessful validation of a bad mapping-file.
         with self.assertRaises(PipelineError) as e:
             Pipeline(self.good_config_file, self.good_run_id,
-                     None, self.mf_duplicate_sample,
+                     self.mf_duplicate_sample,
                      self.output_file_path,
                      self.qiita_id, Pipeline.AMPLICON_PTYPE)
         self.assertEqual(str(e.exception), ("Mapping-file contains duplicate "
@@ -1834,7 +1889,6 @@ def test_is_sample_sheet(self):
     def test_generate_sample_information_files(self):
         # test sample-information-file generation.
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
-                            None,
                             self.good_mapping_file_path,
                             self.output_file_path,
                             self.qiita_id,
@@ -2056,7 +2110,6 @@ def test_get_sample_ids(self):
         # test sample-information-file generation.
         pipeline = Pipeline(self.good_config_file,
                             self.good_run_id,
-                            None,
                             self.good_mapping_file_path,
                             self.output_file_path,
                             self.qiita_id,
@@ -2184,7 +2237,6 @@ def test_get_sample_names(self):
         # test sample-information-file generation.
         pipeline = Pipeline(self.good_config_file,
                             self.good_run_id,
-                            None,
                             self.good_mapping_file_path,
                             self.output_file_path,
                             self.qiita_id,
@@ -2208,7 +2260,7 @@ def test_get_project_info(self):
 
         # test sample-information-file generation.
         pipeline = Pipeline(self.good_config_file, self.good_run_id,
-                            None, self.good_mapping_file_path,
+                            self.good_mapping_file_path,
                             self.output_file_path, self.qiita_id,
                             Pipeline.AMPLICON_PTYPE)
 
@@ -2225,33 +2277,12 @@ def test_get_project_info(self):
                     self.assertDictEqual(obs_d, exp_d)
                     break
 
-    def test_additional_constuctor_check(self):
-        with self.assertRaisesRegex(PipelineError, ("sample_sheet_path or "
-                                                    "mapping_file_path must "
-                                                    "be defined, but not "
-                                                    "both.")):
-            Pipeline(self.good_config_file, self.good_run_id,
-                     None, None,
-                     self.output_file_path,
-                     self.qiita_id, Pipeline.AMPLICON_PTYPE)
-
-        with self.assertRaisesRegex(PipelineError, ("sample_sheet_path or "
-                                                    "mapping_file_path must "
-                                                    "be defined, but not "
-                                                    "both.")):
-            Pipeline(self.good_config_file, self.good_run_id,
-                     self.sample_sheet_path,
-                     self.good_mapping_file_path,
-                     self.output_file_path,
-                     self.qiita_id, Pipeline.AMPLICON_PTYPE)
-
     def test_dummy_sheet_generation(self):
         # generate a RunInfo.xml file w/only one indexed read.
         self.create_runinfo_file(four_reads=False)
 
         _ = Pipeline(self.good_config_file,
                      self.good_run_id,
-                     None,
                      self.good_mapping_file_path,
                      self.output_file_path,
                      self.qiita_id,
@@ -2270,7 +2301,6 @@ def test_dummy_sheet_generation(self):
 
         _ = Pipeline(self.good_config_file,
                      self.good_run_id,
-                     None,
                      self.good_mapping_file_path,
                      self.output_file_path,
                      self.qiita_id,
@@ -2290,7 +2320,6 @@ def test_dummy_sheet_generation(self):
     def test_process_run_info_file(self):
         pipeline = Pipeline(self.good_config_file,
                             self.good_run_id,
-                            None,
                             self.good_mapping_file_path,
                             self.output_file_path,
                             self.qiita_id,
@@ -2330,7 +2359,6 @@ def test_process_run_info_file(self):
     def test_identify_reserved_words(self):
         pipeline = Pipeline(self.good_config_file,
                             self.good_run_id,
-                            None,
                             self.good_mapping_file_path,
                             self.output_file_path,
                             self.qiita_id,
diff --git a/sequence_processing_pipeline/tests/test_SeqCountsJob.py b/sequence_processing_pipeline/tests/test_SeqCountsJob.py
new file mode 100644
index 00000000..d641c3b2
--- /dev/null
+++ b/sequence_processing_pipeline/tests/test_SeqCountsJob.py
@@ -0,0 +1,74 @@
+from os.path import join
+from sequence_processing_pipeline.SeqCountsJob import SeqCountsJob
+from functools import partial
+import unittest
+from json import load as json_load
+
+
+class TestSeqCountsJob(unittest.TestCase):
+    def setUp(self):
+        package_root = "sequence_processing_pipeline"
+        self.path = partial(join, package_root, "tests")
+        # where 2caa8226-cf69-45a3-bd40-1e90ec3d18d0 is a random qiita job id.
+        self.exp = self.path('data', 'tellseq_output', 'integrate_test.sbatch')
+
+        # where 150629_SN1001_0511_AH5L7GBCXX is a run-directory that already
+        # exists.
+        self.run_dir = self.path('data', 'sample_run_directories',
+                                 '150629_SN1001_0511_AH5L7GBCXX')
+
+        self.output_path = self.path('2caa8226-cf69-45a3-bd40-1e90ec3d18d0')
+
+        self.files_to_count_path = self.path("data", "files_to_count.txt")
+
+        self.queue_name = "qiita"
+        self.node_count = "1"
+        self.wall_time_limit = "1440"
+        self.jmem = "8"
+        self.modules_to_load = []
+        self.qiita_job_id = "2caa8226-cf69-45a3-bd40-1e90ec3d18d0"
+        self.cores_per_task = "1"
+        self.raw_fastq_dir = join(self.output_path, "TellReadJob", "Full")
+        self.max_array_length = 100
+        self.exp_sbatch_output = self.path("data", "seq_counts.sbatch")
+        self.exp_results = self.path("data",
+                                     "aggregate_counts_results.json")
+
+    def test_creation(self):
+        def compare_files(obs, exp):
+            with open(obs, 'r') as f:
+                obs_lines = f.readlines()
+                obs_lines = [x.strip() for x in obs_lines]
+                obs_lines = [x for x in obs_lines if x != '']
+
+            with open(exp, 'r') as f:
+                exp_lines = f.readlines()
+                exp_lines = [x.strip() for x in exp_lines]
+                exp_lines = [x for x in exp_lines if x != '']
+
+            for obs_line, exp_line in zip(obs_lines, exp_lines):
+                self.assertEqual(obs_line, exp_line)
+
+        # test basic good-path
+        job = SeqCountsJob(self.run_dir, self.output_path, self.queue_name,
+                           self.node_count, self.wall_time_limit, self.jmem,
+                           self.modules_to_load, self.qiita_job_id,
+                           self.max_array_length, self.files_to_count_path,
+                           self.cores_per_task)
+
+        obs = job._generate_job_script()
+
+        compare_files(obs, self.exp_sbatch_output)
+
+        # hack log path so that it points to test data directory rather than
+        # the output directory for a run we didn't run().
+        job.log_path = self.path("data", "seq_counts_logs")
+
+        obs = json_load(open(job._aggregate_counts(), 'r'))
+        exp = json_load(open(self.exp_results, 'r'))
+
+        self.assertDictEqual(obs, exp)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/sequence_processing_pipeline/tests/test_TRIntegrateJob.py b/sequence_processing_pipeline/tests/test_TRIntegrateJob.py
new file mode 100644
index 00000000..17ded346
--- /dev/null
+++ b/sequence_processing_pipeline/tests/test_TRIntegrateJob.py
@@ -0,0 +1,72 @@
+from os.path import join
+from sequence_processing_pipeline.TRIntegrateJob import TRIntegrateJob
+from functools import partial
+import unittest
+
+
+class TestTRIntegrateJob(unittest.TestCase):
+    def setUp(self):
+        package_root = "sequence_processing_pipeline"
+        self.path = partial(join, package_root, "tests")
+        # where 2caa8226-cf69-45a3-bd40-1e90ec3d18d0 is a random qiita job id.
+        self.obs = self.path('2caa8226-cf69-45a3-bd40-1e90ec3d18d0',
+                             'TRIntegrateJob', 'integrate_test.sbatch')
+        self.exp = self.path('data', 'tellseq_output', 'integrate_test.sbatch')
+
+        # where 150629_SN1001_0511_AH5L7GBCXX is a run-directory that already
+        # exists.
+        self.run_dir = self.path('data', 'sample_run_directories',
+                                 '150629_SN1001_0511_AH5L7GBCXX')
+
+        self.output_path = self.path('2caa8226-cf69-45a3-bd40-1e90ec3d18d0')
+
+        self.sample_sheet_path = self.path('data',
+                                           'tellseq_metag_dummy_sample_'
+                                           'sheet.csv')
+
+        self.queue_name = "qiita"
+        self.node_count = "1"
+        self.wall_time_limit = "96:00:00"
+        self.jmem = "16"
+        self.modules_to_load = ["singularity_3.6.4"]
+        self.qiita_job_id = "2caa8226-cf69-45a3-bd40-1e90ec3d18d0"
+        self.label = "150629_SN1001_0511_AH5L7GBCXX-test"
+        self.reference_base = ""
+        self.reference_map = ""
+        self.tmp1_path = join(self.output_path, "TRIntegrateJob", "output",
+                              "tmp1")
+        # reflects location of script on host.
+        self.sing_script_path = ("$HOME/qiita-spots/tellread-release-novaseqX/"
+                                 "run_tellread_sing.sh")
+        self.lane = "1"
+        self.cores_per_task = "4"
+        self.integrate_script_path = join(package_root, "contrib",
+                                          "integrate-indices-np.py")
+        self.sil_path = self.path('data', 'fake_sample_index_list.txt')
+        self.raw_fastq_dir = join(self.output_path, "TellReadJob", "Full")
+
+    def test_creation(self):
+        # test basic good-path
+        job = TRIntegrateJob(self.run_dir, self.output_path,
+                             self.sample_sheet_path, self.queue_name,
+                             self.node_count, self.wall_time_limit,
+                             self.jmem, self.modules_to_load,
+                             self.qiita_job_id, self.integrate_script_path,
+                             self.sil_path, self.raw_fastq_dir,
+                             self.reference_base, self.reference_map,
+                             self.cores_per_task)
+
+        job._generate_job_script()
+
+        with open(self.obs, 'r') as f:
+            obs_lines = f.readlines()
+
+        with open(self.exp, 'r') as f:
+            exp_lines = f.readlines()
+
+        for obs_line, exp_line in zip(obs_lines, exp_lines):
+            self.assertEqual(obs_line, exp_line)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/sequence_processing_pipeline/tests/test_TellReadJob.py b/sequence_processing_pipeline/tests/test_TellReadJob.py
new file mode 100644
index 00000000..4c6a75c2
--- /dev/null
+++ b/sequence_processing_pipeline/tests/test_TellReadJob.py
@@ -0,0 +1,66 @@
+from os.path import join
+from sequence_processing_pipeline.TellReadJob import TellReadJob
+from functools import partial
+import unittest
+
+
+class TestTellReadJob(unittest.TestCase):
+    def setUp(self):
+        package_root = "sequence_processing_pipeline"
+        self.path = partial(join, package_root, "tests")
+        # where 2caa8226-cf69-45a3-bd40-1e90ec3d18d0 is a random qiita job id.
+        self.obs = self.path('2caa8226-cf69-45a3-bd40-1e90ec3d18d0',
+                             'TellReadJob', 'tellread_test.sbatch')
+        self.exp = self.path('data', 'tellseq_output', 'tellread_test.sbatch')
+
+        # where 150629_SN1001_0511_AH5L7GBCXX is a run-directory that already
+        # exists.
+        self.run_dir = self.path('data', 'sample_run_directories',
+                                 '150629_SN1001_0511_AH5L7GBCXX')
+
+        self.output_path = self.path('2caa8226-cf69-45a3-bd40-1e90ec3d18d0')
+
+        self.sample_sheet_path = self.path('data',
+                                           'tellseq_metag_dummy_sample_'
+                                           'sheet.csv')
+
+        self.queue_name = "qiita"
+        self.node_count = "1"
+        self.wall_time_limit = "96:00:00"
+        self.jmem = "16"
+        self.modules_to_load = ["singularity_3.6.4"]
+        self.qiita_job_id = "2caa8226-cf69-45a3-bd40-1e90ec3d18d0"
+        self.label = "150629_SN1001_0511_AH5L7GBCXX-test"
+        self.reference_base = ""
+        self.reference_map = ""
+        self.tmp1_path = join(self.output_path, "TellReadJob", "output",
+                              "tmp1")
+        # reflects location of script on host.
+        self.sing_script_path = ("$HOME/qiita-spots/tellread-release-novaseqX/"
+                                 "run_tellread_sing.sh")
+        self.lane = "1"
+        self.cores_per_task = "4"
+
+    def test_creation(self):
+        # test basic good-path
+        job = TellReadJob(self.run_dir, self.output_path,
+                          self.sample_sheet_path, self.queue_name,
+                          self.node_count, self.wall_time_limit,
+                          self.jmem, self.modules_to_load, self.qiita_job_id,
+                          self.reference_base, self.reference_map,
+                          self.sing_script_path, self.cores_per_task)
+
+        job._generate_job_script()
+
+        with open(self.obs, 'r') as f:
+            obs_lines = f.readlines()
+
+        with open(self.exp, 'r') as f:
+            exp_lines = f.readlines()
+
+        for obs_line, exp_line in zip(obs_lines, exp_lines):
+            self.assertEqual(obs_line, exp_line)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/sequence_processing_pipeline/tests/test_commands.py b/sequence_processing_pipeline/tests/test_commands.py
index f58bb176..ac8a4bd9 100644
--- a/sequence_processing_pipeline/tests/test_commands.py
+++ b/sequence_processing_pipeline/tests/test_commands.py
@@ -16,12 +16,12 @@ def test_split_similar_size_bins(self, glob, stat):
         class MockStat:
             st_size = 2 ** 28  # 256MB
 
-        mockglob = ['/foo/bar/a_R1_.fastq.gz',
-                    '/foo/bar/b_R2_.fastq.gz',
-                    '/foo/bar/a_R2_.fastq.gz',
-                    '/foo/baz/c_R2_.fastq.gz',
-                    '/foo/baz/c_R1_.fastq.gz',
-                    '/foo/bar/b_R1_.fastq.gz']
+        mockglob = ['/foo/bar/a_R1_001.fastq.gz',
+                    '/foo/bar/b_R2_001.fastq.gz',
+                    '/foo/bar/a_R2_001.fastq.gz',
+                    '/foo/baz/c_R2_001.fastq.gz',
+                    '/foo/baz/c_R1_001.fastq.gz',
+                    '/foo/bar/b_R1_001.fastq.gz']
 
         with TemporaryDirectory() as tmp:
             exp = (2, 1073741824)
@@ -30,9 +30,12 @@ class MockStat:
             obs = split_similar_size_bins('foo', 1, tmp + '/prefix')
             self.assertEqual(obs, exp)
 
-            exp_1 = ('/foo/bar/a_R1_.fastq.gz\t/foo/bar/a_R2_.fastq.gz\tbar\n'
-                     '/foo/bar/b_R1_.fastq.gz\t/foo/bar/b_R2_.fastq.gz\tbar\n')
-            exp_2 = '/foo/baz/c_R1_.fastq.gz\t/foo/baz/c_R2_.fastq.gz\tbaz\n'
+            exp_1 = ('/foo/bar/a_R1_001.fastq.gz\t/foo/bar/a_R2_001.fastq.gz'
+                     '\tbar\n'
+                     '/foo/bar/b_R1_001.fastq.gz\t/foo/bar/b_R2_001.fastq.gz'
+                     '\tbar\n')
+            exp_2 = ('/foo/baz/c_R1_001.fastq.gz\t/foo/baz/c_R2_001.fastq.gz'
+                     '\tbaz\n')
 
             obs_1 = open(tmp + '/prefix-1').read()
             self.assertEqual(obs_1, exp_1)
@@ -56,10 +59,6 @@ def test_demux(self):
                                      '@2::MUX::bing/2', 'ATGC', '+', '!!!!',
                                      ''])
             infile = io.StringIO(infile_data)
-            exp_data_r1 = '\n'.join(['@baz/1', 'ATGC', '+', '!!!!',
-                                     '@bing/1', 'ATGC', '+', '!!!!', ''])
-            exp_data_r2 = '\n'.join(['@baz/2', 'ATGC', '+', '!!!!',
-                                     '@bing/2', 'ATGC', '+', '!!!!', ''])
 
             exp_data_r1 = ['@baz/1', 'ATGC', '+', '!!!!',
                            '@bing/1', 'ATGC', '+', '!!!!']
diff --git a/sequence_processing_pipeline/tests/test_util.py b/sequence_processing_pipeline/tests/test_util.py
index 136dc9a0..e5073101 100644
--- a/sequence_processing_pipeline/tests/test_util.py
+++ b/sequence_processing_pipeline/tests/test_util.py
@@ -4,24 +4,18 @@
 
 class TestUtil(unittest.TestCase):
     def test_iter_paired_files(self):
-        tests = [(['a_R1_foo',
-                   'b_R2_bar',
-                   'a_R2_baz',
-                   'b_R1_bing'],
-                  [('a_R1_foo', 'a_R2_baz'),
-                   ('b_R1_bing', 'b_R2_bar')]),
-                 (['a.R1.foo',
-                   'b.R2.bar',
-                   'a.R2.baz',
-                   'b.R1.bing'],
-                  [('a.R1.foo', 'a.R2.baz'),
-                   ('b.R1.bing', 'b.R2.bar')]),
-                 (['a.R1.foo',
-                   'b_R2_bar',
-                   'a.R2.baz',
-                   'b_R1_bing'],
-                  [('a.R1.foo', 'a.R2.baz'),
-                   ('b_R1_bing', 'b_R2_bar')])]
+        # tuples of randomly ordered fastq files and thier expected
+        # sorted and organized output from iter_paired_files().
+
+        # underscore filenames updated to require '_001.fastq.gz'.
+        # legacy dot filenames test remains as-is.
+        tests = [(['b_R2_001.fastq.gz', 'a_R1_001.fastq.gz',
+                   'a_R2_001.fastq.gz', 'b_R1_001.fastq.gz'],
+                  [('a_R1_001.fastq.gz', 'a_R2_001.fastq.gz'),
+                   ('b_R1_001.fastq.gz', 'b_R2_001.fastq.gz')]),
+                 (['a.R1.foo', 'b.R2.bar', 'a.R2.baz', 'b.R1.bing'],
+                  [('a.R1.foo', 'a.R2.baz'), ('b.R1.bing', 'b.R2.bar')])]
+
         for files, exp in tests:
             obs = list(iter_paired_files(files))
             self.assertEqual(obs, exp)
@@ -42,7 +36,7 @@ def test_iter_paired_files_bad_pair(self):
             list(iter_paired_files(files))
 
     def test_iter_paired_files_mismatch_prefix(self):
-        files = ['a_R1_foo', 'ab_R2_foo']
+        files = ['a_R1_001.fastq.gz', 'ab_R2_001.fastq.gz']
         with self.assertRaisesRegex(ValueError, "Mismatch prefixes"):
             list(iter_paired_files(files))
 
diff --git a/sequence_processing_pipeline/util.py b/sequence_processing_pipeline/util.py
index d9586f81..e19bf98a 100644
--- a/sequence_processing_pipeline/util.py
+++ b/sequence_processing_pipeline/util.py
@@ -1,7 +1,18 @@
 import re
 
 
-PAIR_UNDERSCORE = (re.compile(r'_R1_'), '_R1_', '_R2_')
+# PAIR_UNDERSCORE = (re.compile(r'_R1_'), '_R1_', '_R2_')
+# The above will truncate on the first _R1_ found, which only works when _R1_
+# or _R2_ appears exactly once in a file path. When the wet-lab incorporates
+# these same strings in their sample-names as descriptive metadata, this
+# assumption is broken. For all raw fastq files being used as input into
+# NuQCJob, we can assume they end in the following convention. Per Illumina
+# spec, all fastq files end in _001 and we preserve this convention even at
+# the cost of renaming output files from TRIntegrateJob.
+# PAIR_DOT is kept as is, but may be removed later because for the purposes of
+# SPP, no input should ever be named with dots instead of underscores.
+PAIR_UNDERSCORE = (re.compile(r'_R1_001.fastq.gz'),
+                   '_R1_001.fastq.gz', '_R2_001.fastq.gz')
 PAIR_DOT = (re.compile(r'\.R1\.'), '.R1.', '.R2.')
 PAIR_TESTS = (PAIR_UNDERSCORE, PAIR_DOT)