diff --git a/.travis.yml b/.travis.yml
index 6c9aadd0..d1221fc9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,5 +13,6 @@ install:
 script: pytest
 branches:
   only:
+    - 0.6-rc2
     - dev
     - master
diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
index f6f0c2b4..d44606ca 100644
--- a/doc/source/changelog.rst
+++ b/doc/source/changelog.rst
@@ -27,6 +27,7 @@ Changelog
 
     - Various small bug fixes and dev improvements.
 
+    - Require `setuptools` for installation, and `pandas 0.20.2`. If `numexpr` is installed, version `2.6.2` is required.
 
 - **v0.5** (*2017-03-01*):
 
diff --git a/doc/source/conf.py b/doc/source/conf.py
index b7f13245..2037083e 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -140,6 +140,7 @@
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
+html_static_path = []  # it's empty; suppress warning
 
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
diff --git a/doc/source/config-files.rst b/doc/source/config-files.rst
index 95264447..4c5d2d08 100644
--- a/doc/source/config-files.rst
+++ b/doc/source/config-files.rst
@@ -19,7 +19,7 @@ If you are planning to submit jobs to a cluster, then you need to know about a s
 That should be all you need to worry about as a pipeline user. If you need to adjust compute resources or want to develop a pipeline or have more advanced project-level control over pipelines, then you'll need to know about a few others:
 
 Pipeline developers
-*****************
+**********************
 
 If you want to add a new pipeline to looper, tweak the way looper interacts with a pipeline for a given project, or change the default cluster resources requested by a pipeline, then you need to know about a configuration file that coordinates linking your pipeline in to your looper project.
 
diff --git a/doc/source/define-your-project.rst b/doc/source/define-your-project.rst
index af9d543d..4a6e884e 100644
--- a/doc/source/define-your-project.rst
+++ b/doc/source/define-your-project.rst
@@ -40,8 +40,8 @@ For example, by default, your jobs will run serially on your local computer, whe
 
 Let's go through the more advanced details of both annotation sheets and project config files:
 
-.. include:: sample-annotation-sheet.rst
+.. include:: sample-annotation-sheet.rst.inc
 
-.. include:: project-config.rst
+.. include:: project-config.rst.inc
 
 
diff --git a/doc/source/inputs.rst b/doc/source/inputs.rst
deleted file mode 100644
index 07c26983..00000000
--- a/doc/source/inputs.rst
+++ /dev/null
@@ -1,3 +0,0 @@
-Required Inputs
-=============================================
-
diff --git a/doc/source/pipeline-interface-mapping.rst b/doc/source/pipeline-interface-mapping.rst.inc
similarity index 98%
rename from doc/source/pipeline-interface-mapping.rst
rename to doc/source/pipeline-interface-mapping.rst.inc
index 114fd199..61a2be74 100644
--- a/doc/source/pipeline-interface-mapping.rst
+++ b/doc/source/pipeline-interface-mapping.rst.inc
@@ -1,4 +1,6 @@
-.. _pipeline-interface-mapping:
+:orphan:
+
+.. _pi_mapping:
 
 Pipeline interface section: protocol_mapping 
 ********************************************
diff --git a/doc/source/pipeline-interface-pipelines.rst b/doc/source/pipeline-interface-pipelines.rst.inc
similarity index 99%
rename from doc/source/pipeline-interface-pipelines.rst
rename to doc/source/pipeline-interface-pipelines.rst.inc
index 4cceeb3f..0e23cc7b 100644
--- a/doc/source/pipeline-interface-pipelines.rst
+++ b/doc/source/pipeline-interface-pipelines.rst.inc
@@ -1,3 +1,5 @@
+:orphan:
+
 .. _pipeline-interface-pipelines:
 
 Pipeline interface section: pipelines 
diff --git a/doc/source/pipeline-interface.rst b/doc/source/pipeline-interface.rst
index f7bae756..0a7888ef 100644
--- a/doc/source/pipeline-interface.rst
+++ b/doc/source/pipeline-interface.rst
@@ -31,7 +31,7 @@ Let's start with a very simple example. A basic ``pipeline_interface.yaml`` file
 
 The first section specifies that samples of protocol ``RRBS`` will be mapped to the pipeline specified by key ``rrbs_pipeline``. The second section describes where the pipeline named ``rrbs_pipeline`` is located and what command-line arguments it requires. Pretty simple. Let's go through each of these sections in more detail:
 
-.. include:: pipeline-interface-mapping.rst
+.. include:: pipeline-interface-mapping.rst.inc
 
-.. include:: pipeline-interface-pipelines.rst
+.. include:: pipeline-interface-pipelines.rst.inc
 
diff --git a/doc/source/project-config.rst b/doc/source/project-config.rst.inc
similarity index 99%
rename from doc/source/project-config.rst
rename to doc/source/project-config.rst.inc
index 10695637..62e76b15 100644
--- a/doc/source/project-config.rst
+++ b/doc/source/project-config.rst.inc
@@ -1,3 +1,5 @@
+:orphan:
+
 Project config file
 ***************************************************
 
diff --git a/doc/source/sample-annotation-sheet.rst b/doc/source/sample-annotation-sheet.rst.inc
similarity index 99%
rename from doc/source/sample-annotation-sheet.rst
rename to doc/source/sample-annotation-sheet.rst.inc
index f6ea5c5d..a3464595 100644
--- a/doc/source/sample-annotation-sheet.rst
+++ b/doc/source/sample-annotation-sheet.rst.inc
@@ -1,3 +1,4 @@
+:orphan:
 
 Sample annotation sheet
 **************************************************
diff --git a/doc/source/tutorials.rst b/doc/source/tutorials.rst
index 425e1644..0b918829 100644
--- a/doc/source/tutorials.rst
+++ b/doc/source/tutorials.rst
@@ -41,7 +41,7 @@ Inside there will be two directories:
 -  ``submissions`` [2]_ - which holds yaml representations of the samples and log files of the submited jobs.
 
 
-The sample-specific output of each pipeline type varies and is described in :doc:`pipelines`.
+The sample-specific output of each pipeline type varies.
 
 To use pre-made pipelines with your project, all you have to do is :doc:`define your project <define-your-project>` using looper's standard format. To link your own, custom built pipelines, you can :doc:`connect your pipeline to looper with a pipeline interface <pipeline-interface>`.
 
diff --git a/examples/microtest_project_config.yaml b/examples/microtest_project_config.yaml
index 1109d790..1cc64b65 100644
--- a/examples/microtest_project_config.yaml
+++ b/examples/microtest_project_config.yaml
@@ -1,94 +1,35 @@
-# This project config file describes all *project-specific variables*
-# Its primary purpose as as input to Looper, which will submit jobs as appropriate
-# for each sample in the project.
-# But it is also read by other tools, including:
-# - project sample loop (primary purpose)
-# - make_trackhubs scripts to produce web accessible results
-# - stats summary scripts
-# - analysis scripts requiring pointers to metadata, results, and other options.
-
 metadata:
-  # output_dir: ABSOLUTE PATH to the parent, shared space where project results go
   output_dir: /scratch/lab_bock/shared/projects/microtest
-  # results and submission subdirs are subdirectors directories under parent output_dir
-  # results: where output sample folders will go
-  # submission: where cluster submit scripts and log files will go
   results_subdir: results_pipeline
   submission_subdir: submission
-  # pipelines_dir: ABSOLUTE PATH the directory where the Looper will find pipeline
-  # scripts (and accompanying pipeline config files) for submission.
   pipelines_dir: $CODEBASE/pipelines
-  # Elements in this section can be absolute or relative.
-  # Typically, this project config file is stored with the project metadata, so
-  # relative paths are considered relative to this project config file.
-  # sample_annotation: one-row-per-sample metadata
   sample_annotation: microtest_sample_annotation.csv
-  # merge_table: input for samples with more than one input file
   merge_table: microtest_merge_table.csv
-  # compare_table: comparison pairs or groups, like normalization samples
-  compare_table: null.csv
-
 
-# a list of annotation sheet columns that are "derived"
-# the values in these are constructed using a regex-like expression
-# of variables (defined in the next section).
 derived_columns: [data_source]
 
-
 data_sources:
-  # specify the ABSOLUTE PATH of input files using variable path expressions
-  # entries correspond to values in the data_source column in sample_annotation table
-  # {variable} can be used to replace environment variables or other sample_annotation columns
-  # If you use {variable} codes, you should quote the field so python can parse it.
   bsf_samples: "{RAWDATA}{flowcell}/{flowcell}_{lane}_samples/{flowcell}_{lane}#{BSF_name}.bam"
   microtest: "/data/groups/lab_bock/shared/resources/microtest/{sample_name}.bam"
   microtest_merge: "/data/groups/lab_bock/shared/resources/microtest/{sample_name}{file_number}.bam"
 
-
 subprojects:
   config_test:
     pipeline_config:
       wgbs.py: wgbs_ds.yaml
 
-
-genomes:
-  human: hg19
-  mouse: mm10
-
-transcriptomes:
-  human: hg19_cdna
-  mouse: mm10_cdna
-
+implied_columns:
+  organism:
+    human:
+      genomes: hg19
+      transcriptome: hg19_cdna
+    mouse:
+      genome: mm10
+      transcriptome: mm10_cdna
 
 pipeline_config:
-  # pipeline configuration files used in project.
-  # Key string must match the _name of the pipeline script_ (including extension)
-  # Relative paths are relative to this project config file.
-  # Default (null) means use the generic config for the pipeline.
-  # wgbs.py: null
-  # Or you can point to a specific config to be used in this project:
-  # rrbs.py: rrbs_config.yaml
-  # wgbs.py: wgbs_config.yaml
-  # cgps: cpgs_config.yaml
-
+  rrbs.py: rrbs_config.yaml
 
 pipeline_args:
   rnaBitSeq.py:
-    "-w": 50
-
-
-trackhubs:
-  trackhub_dir: /data/groups/lab_bock/public_html/arendeiro/microtest/
-  # url: if you include this, the make_trackhubs will produce a link to your track hub in the project folder.
-  url: http://www.whatever.com/
-  matrix_x: cell_type
-  matrix_y: cell_count
-  sort_order: cell_type=+
-  parent_track_name: ews_rrbs
-  visibility: dense
-  hub_name: ews_hub
-  short_label_column: sample_name
-  email: arendeiro@cemm.oeaw.ac.at
-
-username: user
-email: user@email.com
+    "-w": 50
\ No newline at end of file
diff --git a/looper/__init__.py b/looper/__init__.py
index 19e32a81..8be0cf76 100644
--- a/looper/__init__.py
+++ b/looper/__init__.py
@@ -19,14 +19,18 @@
 # Default user logging format is simple
 DEFAULT_LOGGING_FMT = "%(message)s"
 # Developer logger format is more information-rich
-DEV_LOGGING_FMT = "%(module)s:%(lineno)d [%(levelname)s] > %(message)s "
+DEV_LOGGING_FMT = "%(module)s:%(lineno)d (%(funcName)s) [%(levelname)s] > %(message)s "
 
 
 
 def setup_looper_logger(level, additional_locations=None, devmode=False):
     """
-    Called by test configuration via `pytest`'s `conftest`.
-    All arguments are optional and have suitable defaults.
+    Establish a logger for a looper CLI program.
+
+    This configures a logger to provide information about a looper program's
+    execution. Verbosity, destination(s) for messages, and message text
+    format are controlled by the arguments' values. This is also used by
+    looper's test suite.
 
     :param int | str level: logging level
     :param tuple(str | FileIO[str]) additional_locations: supplementary
diff --git a/looper/_version.py b/looper/_version.py
index 29c83aa3..76d80dda 100644
--- a/looper/_version.py
+++ b/looper/_version.py
@@ -1 +1 @@
-__version__ = "0.6.0-rc1"
+__version__ = "0.6.0-rc2"
diff --git a/looper/looper.py b/looper/looper.py
index d676d17f..604261bc 100755
--- a/looper/looper.py
+++ b/looper/looper.py
@@ -14,25 +14,19 @@
 import pandas as _pd
 from . import setup_looper_logger, LOGGING_LEVEL, __version__
 from .loodels import Project
-from .models import COMPUTE_SETTINGS_VARNAME
-from .utils import VersionInHelpParser
+from .models import Sample, COMPUTE_SETTINGS_VARNAME, SAMPLE_EXECUTION_TOGGLE
+from .utils import alpha_cased, VersionInHelpParser
 
 try:
-    from .models import \
-        InterfaceManager, PipelineInterface, \
-        ProtocolMapper
+    from .models import PipelineInterface, ProtocolMapper
 except:
     sys.path.append(os.path.join(os.path.dirname(__file__), "looper"))
-    from models import \
-        InterfaceManager, PipelineInterface, \
-        ProtocolMapper
+    from models import PipelineInterface, ProtocolMapper
 
 from colorama import init
 init()
 from colorama import Fore, Style
 
-SAMPLE_EXECUTION_TOGGLE = "toggle"
-
 # Descending by severity for correspondence with logic inversion.
 # That is, greater verbosity setting corresponds to lower logging level.
 _LEVEL_BY_VERBOSITY = [logging.ERROR, logging.CRITICAL, logging.WARN,
@@ -131,7 +125,7 @@ def parse_arguments():
                 destroy_subparser, check_subparser, clean_subparser]:
         subparser.add_argument(
                 "config_file",
-                help="Project YAML config file.")
+                help="Project configuration file (YAML).")
         subparser.add_argument(
                 "--file-checks",
                 action="store_false",
@@ -140,11 +134,12 @@ def parse_arguments():
                 "-d",
                 "--dry-run",
                 action="store_true",
-                help="Don't actually submit.")
+                help="Don't actually submit the project/subproject.")
         subparser.add_argument(
                 "--sp",
                 dest="subproject",
-                help="Supply subproject")
+                help="Name of subproject to use, as designated in the "
+                     "project's configuration file")
 
     # To enable the loop to pass args directly on to the pipelines...
     args, remaining_args = parser.parse_known_args()
@@ -175,7 +170,7 @@ def parse_arguments():
 
 
 
-def run(prj, args, remaining_args, interface_manager):
+def run(prj, args, remaining_args):
     """
     Main Looper function: Submit jobs for samples in project.
 
@@ -184,105 +179,126 @@ def run(prj, args, remaining_args, interface_manager):
     :param Iterable[str] remaining_args: arguments given to this module's 
         parser that were not defined as options it should parse, 
         to be passed on to parser(s) elsewhere
-    :param InterfaceManager interface_manager: aggregator and manager of
-        pipeline interfaces and protocol mappings
     """
 
-    # Easier change later, especially likely for library --> protocol.
-    _read_type = "read_type"
-    _protocol = "library"
-
-    _start_counter(len(prj.samples))
-
+    num_samples = prj.num_samples
+    _start_counter(num_samples)
     valid_read_types = ["single", "paired"]
 
     # Keep track of how many jobs have been submitted.
-    submit_count = 0
-    job_count = 0
+    job_count = 0            # Some job templates will be skipped.
+    submit_count = 0         # Some jobs won't be submitted.
     processed_samples = set()
 
     # Create a problem list so we can keep track and show them at the end.
     failures = []
 
+    _LOGGER.info("Building submission bundle(s) for protocol(s): {}".
+                 format(list(prj.protocols)))
+    submission_bundle_by_protocol = {
+            alpha_cased(p): prj.build_submission_bundles(
+            alpha_cased(p)) for p in prj.protocols}
+
     for sample in prj.samples:
-        _LOGGER.debug(sample)
         _LOGGER.info(_COUNTER.show(sample.sample_name, sample.library))
 
-        pipeline_outfolder = os.path.join(
+        sample_output_folder = os.path.join(
                 prj.metadata.results_subdir, sample.sample_name)
-        _LOGGER.debug("Pipeline output folder: '%s'", pipeline_outfolder)
+        _LOGGER.debug("Sample output folder: '%s'", sample_output_folder)
         skip_reasons = []
 
         # Don't submit samples with duplicate names.
         if sample.sample_name in processed_samples:
-            skip_reasons.append("Duplicate sample name.")
+            skip_reasons.append("Duplicate sample name")
 
         # Check if sample should be run.
-        if hasattr(sample, SAMPLE_EXECUTION_TOGGLE):
-            if sample[SAMPLE_EXECUTION_TOGGLE] != "1":
-                skip_reasons.append("Column '{}' deselected.".format(SAMPLE_EXECUTION_TOGGLE))
-
-        # Check if single_or_paired value is recognized.
-        if hasattr(sample, _read_type):
-            # Drop "-end", "_end", or just "end" from end of the column value.
-            sample.read_type = re.sub(
-                    '[_\\-]?end$', '', str(sample.read_type)).lower()
-            if sample.read_type not in valid_read_types:
-                skip_reasons.append("{} must be in {}.".\
-                    format(_read_type, valid_read_types))
+        if sample.is_dormant():
+            skip_reasons.append("Inactive status (via {})".
+                                format(SAMPLE_EXECUTION_TOGGLE))
 
         # Get the base protocol-to-pipeline mappings
-        if hasattr(sample, _protocol):
-            protocol = sample.library.upper()
-            pipelines = interface_manager.build_pipelines(protocol)
-            if len(pipelines) == 0:
-                skip_reasons.append(
-                        "No pipeline found for protocol {}.".format(protocol))
+        try:
+            protocol = alpha_cased(sample.library)
+        except AttributeError:
+            skip_reasons.append("Missing 'library' attribute")
         else:
-            skip_reasons.append("Missing '{}' attribute.".format(_protocol))
-
+            protocol = protocol.upper()
+            _LOGGER.debug("Fetching submission bundle")
+            try:
+                _LOGGER.debug("Using '%s' as protocol key", protocol)
+                submission_bundles = submission_bundle_by_protocol[protocol]
+            except KeyError:
+                skip_reasons.append("No pipeline found for protocol")
+            if not submission_bundles:
+                skip_reasons.append("No submission bundle for protocol")
 
         if skip_reasons:
             _LOGGER.warn("> Not submitted: {}".format(skip_reasons))
             failures.append([skip_reasons, sample.sample_name])
             continue
 
+        # TODO: determine what to do with subtype(s) here.
         # Processing preconditions have been met.
         processed_samples.add(sample.sample_name)
-        sample.to_yaml()
+
+        # At this point, we have a generic Sample; write that to disk
+        # for reuse in case of many jobs (pipelines) using base Sample.
+        # Do a single overwrite here, then any subsequent Sample can be sure
+        # that the file is fresh, with respect to this run of looper.
+        sample.to_yaml(subs_folder_path=prj.metadata.submission_subdir)
+
+        # Store the base Sample data for reuse in creating subtype(s).
+        sample_data = sample.as_series()
 
         # Go through all pipelines to submit for this protocol.
         # Note: control flow doesn't reach this point if variable "pipelines"
         # cannot be assigned (library/protocol missing).
-        for pipeline_interface, pipeline_key, pipeline_job in pipelines:
+        # pipeline_key (previously pl_id) is no longer necessarily
+        # script name, it's more flexible.
+        for pipeline_interface, sample_subtype, pipeline_key, pipeline_job \
+                in submission_bundles:
+            job_count += 1
+
+            _LOGGER.debug("Creating %s instance: '%s'",
+                          sample_subtype.__name__, sample.sample_name)
+            sample = sample_subtype(sample_data)
 
-            # pipeline_key (previously pl_id) is no longer necessarily script name, it's more flexible.
             # The current sample is active.
             # For each pipeline submission consideration, start fresh.
             skip_reasons = []
 
-            _LOGGER.debug("Setting pipeline attributes for job '{}' (PL_ID: '{}')".
-                          format(pipeline_job, pipeline_key))
-
+            _LOGGER.debug("Setting pipeline attributes for job '{}' "
+                          "(PL_ID: '{}')".format(pipeline_job, pipeline_key))
             try:
                 # Add pipeline-specific attributes.
                 sample.set_pipeline_attributes(
                         pipeline_interface, pipeline_name=pipeline_key)
             except AttributeError:
                 # TODO: inform about WHICH missing attribute(s).
-                fail_message = "Pipeline required attribute(s) missing."
+                fail_message = "Pipeline required attribute(s) missing"
                 _LOGGER.warn("> Not submitted: %s", fail_message)
                 skip_reasons.append(fail_message)
 
-            try:
-                # Check for any required inputs before submitting.
-                _LOGGER.debug("Confirming required inputs")
-                sample.confirm_required_inputs()
-            except IOError:
-                # TODO: inform about WHICH missing file(s).
-                fail_message = "Required input file(s) not found."
-                _LOGGER.warn("> Not submitted: %s", fail_message)
-                skip_reasons.append(fail_message)
+            # Check for any missing requirements before submitting.
+            _LOGGER.debug("Determining missing requirements")
+            error_type, missing_reqs_msg = \
+                    sample.determine_missing_requirements()
+            if missing_reqs_msg:
+                if prj.permissive:
+                    _LOGGER.warn(missing_reqs_msg)
+                else:
+                    raise error_type(missing_reqs_msg)
+                _LOGGER.warn("> Not submitted: %s", missing_reqs_msg)
+                skip_reasons.append(missing_reqs_msg)
+
+            # Check if single_or_paired value is recognized.
+            if hasattr(sample, "read_type"):
+                # Drop "-end", "_end", or "end" from end of the column value.
+                sample.read_type = re.sub(
+                    '[_\\-]?end$', '', str(sample.read_type)).lower()
+                if sample.read_type not in valid_read_types:
+                    skip_reasons.append("read_type must be in {}".
+                                        format(valid_read_types))
 
             # Identify cluster resources required for this submission.
             submit_settings = pipeline_interface.choose_resource_package(
@@ -303,14 +319,17 @@ def run(prj, args, remaining_args, interface_manager):
             # Append arguments for this pipeline
             # Sample-level arguments are handled by the pipeline interface.
             try: 
-                argstring = pipeline_interface.get_arg_string(pipeline_key, sample)
-                argstring += " "
+                argstring = pipeline_interface.get_arg_string(
+                        pipeline_name=pipeline_key, sample=sample,
+                        submission_folder_path=prj.metadata.submission_subdir)
             except AttributeError:
                 # TODO: inform about which missing attribute(s).
                 fail_message = "Required attribute(s) missing " \
-                               "for pipeline arguments string."
+                               "for pipeline arguments string"
                 _LOGGER.warn("> Not submitted: %s", fail_message)
                 skip_reasons.append(fail_message)
+            else:
+                argstring += " "
 
             if skip_reasons:
                 # Sample is active, but we've at least 1 pipeline skip reason.
@@ -335,7 +354,8 @@ def run(prj, args, remaining_args, interface_manager):
                     # because we don't care about parameters here.
                     if hasattr(prj.pipeline_config, pipeline_key):
                         # First priority: pipeline config in project config
-                        pl_config_file = getattr(prj.pipeline_config, pipeline_key)
+                        pl_config_file = getattr(prj.pipeline_config,
+                                                 pipeline_key)
                         # Make sure it's a file (it could be provided as null.)
                         if pl_config_file:
                             if not os.path.isfile(pl_config_file):
@@ -350,47 +370,50 @@ def run(prj, args, remaining_args, interface_manager):
                             cmd += " -C " + pl_config_file
 
                 cmd += " -O " + prj.metadata.results_subdir
-                if submit_settings.setdefault("cores", 1) > 1:
+                if int(submit_settings.setdefault("cores", 1)) > 1:
                     cmd += " -P " + submit_settings["cores"]
                 try:
-                    if submit_settings["mem"] > 1:
+                    if float(submit_settings["mem"]) > 1:
                         cmd += " -M " + submit_settings["mem"]
                 except KeyError:
                     _LOGGER.warn("Submission settings "
                                  "lack memory specification")
 
             # Add the command string and job name to the submit_settings object
-            submit_settings["JOBNAME"] = sample.sample_name + "_" + pipeline_key
+            submit_settings["JOBNAME"] = \
+                    sample.sample_name + "_" + pipeline_key
             submit_settings["CODE"] = cmd
 
             # Submit job!
-            job_count += 1
+            _LOGGER.debug("Attempting job submission: '%s' ('%s')",
+                          sample.sample_name, pl_name)
             submitted = cluster_submit(
                     sample, prj.compute.submission_template,
                     prj.compute.submission_command, submit_settings,
-                    prj.metadata.submission_subdir, pipeline_outfolder, 
+                    prj.metadata.submission_subdir, sample_output_folder, 
                     pl_name, args.time_delay, submit=True, 
                     dry_run=args.dry_run,  ignore_flags=args.ignore_flags, 
                     remaining_args=remaining_args)
             if submitted:
+                _LOGGER.debug("SUBMITTED")
                 submit_count += 1
-
-    msg = "\nLooper finished. {} of {} job(s) submitted.".\
-            format(submit_count, job_count)
+            else:
+                _LOGGER.debug("NOT SUBMITTED")
+
+    # Report what went down.
+    _LOGGER.info("Looper finished")
+    _LOGGER.info("Samples generating jobs: %d of %d",
+                 len(processed_samples), num_samples)
+    _LOGGER.info("Jobs submitted: %d of %d", submit_count, job_count)
     if args.dry_run:
-        msg += " Dry run. No jobs were actually submitted."
-
-    _LOGGER.info(msg)
-
+        _LOGGER.info("Dry run. No jobs were actually submitted.")
     if failures:
         _LOGGER.info("%d sample(s) with submission failure.", len(failures))
-        sample_count_pairs_by_reason = aggregate_exec_skip_reasons(failures)
+        sample_by_reason = aggregate_exec_skip_reasons(failures)
         _LOGGER.info("{} unique reasons for submission failure: {}".format(
-                len(sample_count_pairs_by_reason),
-                sample_count_pairs_by_reason.keys()))
-        _LOGGER.info("Per-sample submission failure count for each reason:")
-        for reason, sample_nfail_pairs in sample_count_pairs_by_reason.items():
-            _LOGGER.info("> {}: {}".format(reason, sample_nfail_pairs))
+                len(sample_by_reason),
+                list(sample_by_reason.keys())))
+        _LOGGER.info("Samples by failure: {}".format(dict(sample_by_reason)))
 
 
 
@@ -404,7 +427,7 @@ def aggregate_exec_skip_reasons(skip_reasons_sample_pairs):
     :return Mapping[str, Iterable[str]]: mapping from explanation to 
         collection of names of samples to which it pertains
     """
-    from collections import Counter, defaultdict
+    from collections import defaultdict
     samples_by_skip_reason = defaultdict(list)
     for skip_reasons, sample in skip_reasons_sample_pairs:
         for reason in set(skip_reasons):
@@ -423,11 +446,11 @@ def summarize(prj):
     columns = []
     stats = []
 
-    _start_counter(len(prj.samples))
+    _start_counter(prj.num_samples)
 
     for sample in prj.samples:
         _LOGGER.info(_COUNTER.show(sample.sample_name, sample.library))
-        pipeline_outfolder = os.path.join(
+        sample_output_folder = os.path.join(
                 prj.metadata.results_subdir, sample.sample_name)
 
         # Grab the basic info from the annotation sheet for this sample.
@@ -435,7 +458,7 @@ def summarize(prj):
         sample_stats = sample.get_sheet_dict()
         columns.extend(sample_stats.keys())
         # Version 0.3 standardized all stats into a single file
-        stats_file = os.path.join(pipeline_outfolder, "stats.tsv")
+        stats_file = os.path.join(sample_output_folder, "stats.tsv")
         if os.path.isfile(stats_file):
             _LOGGER.info("Found stats file: '%s'", stats_file)
         else:
@@ -485,17 +508,17 @@ def destroy(prj, args, preview_flag=True):
 
     _LOGGER.info("Results to destroy:")
 
-    _start_counter(len(prj.samples))
+    _start_counter(prj.num_samples)
 
     for sample in prj.samples:
         _LOGGER.info(_COUNTER.show(sample.sample_name, sample.library))
-        pipeline_outfolder = os.path.join(
+        sample_output_folder = os.path.join(
                 prj.metadata.results_subdir, sample.sample_name)
         if preview_flag:
             # Preview: Don't actually delete, just show files.
-            _LOGGER.info(str(pipeline_outfolder))
+            _LOGGER.info(str(sample_output_folder))
         else:
-            destroy_sample_results(pipeline_outfolder, args)
+            destroy_sample_results(sample_output_folder, args)
 
     if not preview_flag:
         _LOGGER.info("Destroy complete.")
@@ -522,13 +545,13 @@ def clean(prj, args, preview_flag=True):
 
     _LOGGER.info("Files to clean:")
 
-    _start_counter(len(prj.samples))
+    _start_counter(prj.num_samples)
 
     for sample in prj.samples:
         _LOGGER.info(_COUNTER.show(sample.sample_name, sample.library))
-        pipeline_outfolder = os.path.join(prj.metadata.results_subdir,
-                                          sample.sample_name)
-        cleanup_files = glob.glob(os.path.join(pipeline_outfolder,
+        sample_output_folder = os.path.join(
+                prj.metadata.results_subdir, sample.sample_name)
+        cleanup_files = glob.glob(os.path.join(sample_output_folder,
                                                "*_cleanup.sh"))
         if preview_flag:
             # Preview: Don't actually clean, just show what will be cleaned.
@@ -597,13 +620,13 @@ def _submission_status_text(curr, total, sample_name, sample_library):
 
 
 def cluster_submit(
-    sample, submit_template, submission_command, variables_dict,
-    submission_folder, pipeline_outfolder, pipeline_name, time_delay,
-    submit=False, dry_run=False, ignore_flags=False, remaining_args=None):
+        sample, submit_template, submission_command, variables_dict,
+        submission_folder, sample_output_folder, pipeline_name, time_delay,
+        submit=False, dry_run=False, ignore_flags=False, remaining_args=None):
     """
-    Submit job to cluster manager.
-    
-    :param models.Sample sample: the sample object for submission
+    Write cluster submission script to disk and submit job for given Sample.
+
+    :param models.Sample sample: the Sample object for submission
     :param str submit_template: path to submission script template
     :param str submission_command: actual command with which to execute the 
         submission of the cluster job for the given sample
@@ -611,7 +634,7 @@ def cluster_submit(
         the submission template
     :param str submission_folder: path to the folder in which to place 
         submission files
-    :param str pipeline_outfolder: path to folder into which the pipeline 
+    :param str sample_output_folder: path to folder into which the pipeline 
         will write file(s), and where to search for flag file to check 
         if a sample's already been submitted
     :param str pipeline_name: name of the pipeline that the job will run
@@ -642,12 +665,10 @@ def cluster_submit(
     if not os.path.exists(submit_script_dirpath):
         os.makedirs(submit_script_dirpath)
 
+    # Add additional arguments, populate template fields, and write to disk.
     with open(submit_template, 'r') as handle:
         filedata = handle.read()
-
-    # Update variable dict with any additional arguments.
     variables_dict["CODE"] += " " + str(" ".join(remaining_args or []))
-    # Fill in submit_template with variables.
     for key, value in variables_dict.items():
         # Here we add brackets around the key names and use uppercase because
         # this is how they are encoded as variables in the submit templates.
@@ -655,16 +676,32 @@ def cluster_submit(
     with open(submit_script, 'w') as handle:
         handle.write(filedata)
 
-    # Prepare and write sample yaml object
-    sample.to_yaml()
+    # Ensure existence of on-disk representation of this sample.
+    if type(sample) is Sample:
+        # run() writes base Sample to disk for each non-skipped sample.
+        expected_filepath = os.path.join(
+                submission_folder, "{}.yaml".format(sample.name))
+        _LOGGER.debug("Base Sample, to reuse file: '%s'",
+                      expected_filepath)
+        if not os.path.exists(expected_filepath):
+            _LOGGER.warn("Missing expected Sample file; creating")
+            sample.to_yaml(subs_folder_path=submission_folder)
+        else:
+            _LOGGER.debug("Base Sample file exists")
+    else:
+        # Serialize Sample, generate data for disk, and write.
+        name_sample_subtype = sample.__class__.__name__
+        _LOGGER.debug("Writing %s representation to disk: '%s'",
+                      name_sample_subtype, sample.name)
+        sample.to_yaml(subs_folder_path=submission_folder)
 
     # Check if job is already submitted (unless ignore_flags is set to True)
     if not ignore_flags:
         flag_files = glob.glob(os.path.join(
-                pipeline_outfolder, pipeline_name + "*.flag"))
+                sample_output_folder, pipeline_name + "*.flag"))
         if len(flag_files) > 0:
-            flags = [os.path.basename(f) for f in flag_files]
-            _LOGGER.info("> Not submitting, flag(s) found: {}".format(flags))
+            _LOGGER.info("> Not submitting, flag(s) found: {}".
+                         format(flag_files))
             submit = False
         else:
             pass
@@ -672,7 +709,8 @@ def cluster_submit(
     if not submit:
         return False
     if dry_run:
-        _LOGGER.info("> DRY RUN: I would have submitted this")
+        _LOGGER.info("> DRY RUN: I would have submitted this: '%s'",
+                     submit_script)
     else:
         subprocess.call(submission_command + " " + submit_script, shell=True)
         time.sleep(time_delay)    # Delay next job's submission.
@@ -783,7 +821,6 @@ def main():
     # Parse command-line arguments and establish logger.
     args, remaining_args = parse_arguments()
 
-    
     _LOGGER.info("Command: {} (Looper version: {})".
                  format(args.command, __version__))
     # Initialize project
@@ -801,20 +838,19 @@ def main():
 
         # TODO split here, spawning separate run process for each
         # pipelines directory in project metadata pipelines directory.
-        try:
-            pipedirs = prj.metadata.pipelines_dir
-            _LOGGER.info("Pipelines path(s): {}".format(pipedirs))
-        except AttributeError:
-            _LOGGER.error("Looper requires a metadata.pipelines_dir")
-            raise
 
-        if len(pipedirs) == 0:
-            _LOGGER.error("Looper requires a metadata.pipelines_dir")   
-            raise AttributeError         
+        if not hasattr(prj.metadata, "pipelines_dir") or \
+                        len(prj.metadata.pipelines_dir) == 0:
+            raise AttributeError(
+                    "Looper requires at least one pipeline(s) location.")
 
-        interface_manager = InterfaceManager(prj.metadata.pipelines_dir)
+        if not prj.interfaces_by_protocol:
+            _LOGGER.error(
+                    "The Project knows no protocols. Does it point "
+                    "to at least one pipelines location that exists?")
+            return
         try:
-            run(prj, args, remaining_args, interface_manager=interface_manager)
+            run(prj, args, remaining_args)
         except IOError:
             _LOGGER.error("{} pipelines_dir: '{}'".format(
                     prj.__class__.__name__, prj.metadata.pipelines_dir))
diff --git a/looper/models.py b/looper/models.py
index 78f76078..6f7ed895 100644
--- a/looper/models.py
+++ b/looper/models.py
@@ -49,9 +49,11 @@
 # TODO: the examples changes would involve library and output_dir.
 
 from collections import \
-    defaultdict, Iterable, Mapping, MutableMapping, OrderedDict as _OrderedDict
+    defaultdict, Iterable, Mapping, MutableMapping, namedtuple, \
+    OrderedDict as _OrderedDict
 from functools import partial
 import glob
+import inspect
 import itertools
 import logging
 import os as _os
@@ -65,7 +67,16 @@
 import yaml
 
 from .utils import \
-    parse_ftype, check_bam, check_fastq, get_file_size, partition
+    alpha_cased, check_bam, check_fastq, expandpath, \
+    get_file_size, import_from_source, parse_ftype, partition, \
+    standard_stream_redirector
+
+
+# TODO: decide if we want to denote functions for export.
+__functions__ = []
+__classes__ = ["AttributeDict", "PipelineInterface", "Project",
+               "ProtocolInterface", "ProtocolMapper", "Sample"]
+__all__ = __functions__ + __classes__
 
 
 COMPUTE_SETTINGS_VARNAME = "PEPENV"
@@ -74,6 +85,8 @@
 SAMPLE_NAME_COLNAME = "sample_name"
 SAMPLE_ANNOTATIONS_KEY = "sample_annotation"
 IMPLICATIONS_DECLARATION = "implied_columns"
+DATA_SOURCES_SECTION = "data_sources"
+SAMPLE_EXECUTION_TOGGLE = "toggle"
 COL_KEY_SUFFIX = "_key"
 
 ATTRDICT_METADATA = {"_force_nulls": False, "_attribute_identity": False}
@@ -84,6 +97,27 @@
 
 
 
+def check_sheet(sample_file, dtype=str):
+    """
+    Check if csv file exists and has all required columns.
+
+    :param str sample_file: path to sample annotations file.
+    :param type dtype: data type for CSV read.
+    :raises IOError: if given annotations file can't be read.
+    :raises ValueError: if required column(s) is/are missing.
+    """
+    df = _pd.read_table(sample_file, sep=None, dtype=dtype,
+                        index_col=False, engine="python")
+    req = [SAMPLE_NAME_COLNAME]
+    missing = set(req) - set(df.columns)
+    if len(missing) != 0:
+        raise ValueError(
+            "Annotation sheet ('{}') is missing column(s): {}; has: {}".
+                format(sample_file, missing, df.columns))
+    return df
+
+
+
 def copy(obj):
     def copy(self):
         """
@@ -97,17 +131,187 @@ def copy(self):
 
 
 
+def include_in_repr(attr, klazz):
+    """
+    Determine whether to include attribute in an object's text representation.
+
+    :param str attr: attribute to include/exclude from object's representation
+    :param str | type klazz: name of type or type itself of which the object
+        to be represented is an instance
+    :return bool: whether to include attribute in an object's
+        text representation
+    """
+    classname = klazz.__name__ if isinstance(klazz, type) else klazz
+    return attr not in \
+           {"Project": ["sheet", "interfaces_by_protocol"]}[classname]
+
+
+
 def is_url(maybe_url):
+    """
+    Determine whether a path is a URL.
+
+    :param str maybe_url: path to investigate as URL
+    :return bool: whether path appears to be a URL
+    """
     return urlparse(maybe_url).scheme != ""
 
 
 
+def merge_sample(sample, merge_table, data_sources, derived_columns):
+    """
+    Use merge table data to augment/modify Sample.
+
+    :param Sample sample: sample to modify via merge table data
+    :param merge_table: data with which to alter Sample
+    :param Mapping data_sources: collection of named paths to data locations
+    :param Iterable[str] derived_columns: names of column for which
+        corresponding Sample attribute's value is data-derived
+    :return Set[str]: names of columns that were merged
+    """
+
+    merged_cols = {}
+
+    if merge_table is None:
+        _LOGGER.log(5, "No data for sample merge, skipping")
+        return merged_cols
+
+    if SAMPLE_NAME_COLNAME not in merge_table.columns:
+        raise KeyError(
+            "Merge table requires a column named '{}'.".
+                format(SAMPLE_NAME_COLNAME))
+
+    _LOGGER.debug("Merging Sample with data sources: {}".
+                  format(data_sources))
+    _LOGGER.debug("Merging Sample with derived columns: {}".
+                  format(derived_columns))
+
+    sample_indexer = merge_table[SAMPLE_NAME_COLNAME] == \
+                     getattr(sample, SAMPLE_NAME_COLNAME)
+    merge_rows = merge_table[sample_indexer]
+
+    if len(merge_rows) == 0:
+        _LOGGER.debug("No merge rows for sample '%s', skipping", sample.name)
+        return merged_cols
+
+    # Hash derived columns for faster lookup in case of many samples/columns.
+    derived_columns = set(derived_columns)
+    _LOGGER.log(5, "%d rows to merge", len(merge_rows))
+
+
+    # For each row in the merge table of this sample:
+    # 1) populate any derived columns
+    # 2) derived columns --> space-delimited strings
+    # 3) update the sample values with the merge table
+    # Keep track of merged cols,
+    # so we don't re-derive them later.
+    merged_cols = {key: "" for key in merge_rows.columns}
+    for _, row in merge_rows.iterrows():
+        row_dict = row.to_dict()
+        for col in merge_rows.columns:
+            if col == SAMPLE_NAME_COLNAME or \
+                            col not in derived_columns:
+                _LOGGER.log(5, "Skipping column: '%s'", col)
+                continue
+            # Initialize key in parent dict.
+            col_key = col + COL_KEY_SUFFIX
+            merged_cols[col_key] = ""
+            row_dict[col_key] = row_dict[col]
+            row_dict[col] = sample.locate_data_source(
+                    data_sources, col, row_dict[col], row_dict)  # 1)
+
+        _LOGGER.log(5, "Adding derived columns")
+        # Also add in any derived cols present.
+        for col in derived_columns:
+            # Skip over attributes that the sample
+            # either lacks, and those covered by the
+            # data from the current (row's) data.
+            if not hasattr(sample, col) or \
+                            col in row_dict:
+                _LOGGER.log(5, "Skipping column: '%s'", col)
+                continue
+            # Map column name key to sample's value
+            # for the attribute given by column name.
+            col_key = col + COL_KEY_SUFFIX
+            row_dict[col_key] = getattr(sample, col)
+            # Map the column name itself to the
+            # populated data source template string.
+            row_dict[col] = sample.locate_data_source(
+                    data_sources, col, getattr(sample, col), row_dict)
+            _LOGGER.debug("PROBLEM adding derived column: "
+                          "{}, {}, {}".format(col, row_dict[col],
+                                              getattr(sample, col)))
+
+        # Since we are now jamming multiple (merged)
+        # entries into a single attribute, we have to
+        # join them into a space-delimited string
+        # and then set to sample attribute.
+        for key, val in row_dict.items():
+            if key == SAMPLE_NAME_COLNAME or not val:
+                _LOGGER.log(5, "Skipping KV: {}={}".format(key, val))
+                continue
+            _LOGGER.log(5, "merge: sample '%s'; %s=%s",
+                          str(sample.name), str(key), str(val))
+            if not key in merged_cols:
+                new_val = str(val).rstrip()
+            else:
+                new_val = "{} {}".format(
+                    merged_cols[key], str(val)).strip()
+            merged_cols[key] = new_val  # 2)
+
+    # Don't update sample_name.
+    merged_cols.pop(SAMPLE_NAME_COLNAME, None)
+
+    sample.update(merged_cols)  # 3)
+    sample.merged_cols = merged_cols
+    sample.merged = True
+
+    return sample
+
+
+
+def process_pipeline_interfaces(pipeline_interface_locations):
+    """
+    Create a ProtocolInterface for each pipeline location given.
+
+    :param Iterable[str] pipeline_interface_locations: locations, each of
+        which should be either a directory path or a filepath, that specifies
+        pipeline interface and protocol mappings information. Each such file
+        should be have a pipelines section and a protocol mappings section
+        whereas each folder should have a file for each of those sections.
+    :return Mapping[str, Iterable[ProtocolInterface]]: mapping from protocol 
+        name to interface(s) for which that protocol is mapped
+    """
+    interface_by_protocol = defaultdict(list)
+    for pipe_iface_location in pipeline_interface_locations:
+        if not _os.path.exists(pipe_iface_location):
+            _LOGGER.warn("Ignoring nonexistent pipeline interface "
+                         "location '%s'", pipe_iface_location)
+            continue
+        proto_iface = ProtocolInterface(pipe_iface_location)
+        for proto_name in proto_iface.protomap:
+            _LOGGER.log(5, "Adding protocol name: '%s'", proto_name)
+            interface_by_protocol[alpha_cased(proto_name)].append(proto_iface)
+    return interface_by_protocol
+
+
+
+# Collect PipelineInterface, Sample type, pipeline path, and script with flags.
+SubmissionBundle = namedtuple(
+    "SubmissionBundle",
+    field_names=["interface", "subtype", "pipeline", "pipeline_with_flags"])
+
+
+
 @copy
 class Paths(object):
     """ A class to hold paths as attributes. """
 
-    def __str__(self):
-        return "Paths object."
+    def __getitem__(self, key):
+        """
+        Provides dict-style access to attributes
+        """
+        return getattr(self, key)
 
     def __iter__(self):
         """
@@ -120,11 +324,8 @@ def __iter__(self):
         """
         return iter(self.__dict__.values())
 
-    def __getitem__(self, key):
-        """
-        Provides dict-style access to attributes
-        """
-        return getattr(self, key)
+    def __repr__(self):
+        return "Paths object."
 
 
 
@@ -185,8 +386,7 @@ def __setattr__(self, key, value):
 
     def __getattr__(self, item, default=None):
         """
-        Fetch the value associated with the provided identifier. Unlike an
-        ordinary object, `AttributeDict` supports fetching
+        Fetch the value associated with the provided identifier.
 
         :param int | str item: identifier for value to fetch
         :return object: whatever value corresponds to the requested key/item
@@ -198,6 +398,11 @@ def __getattr__(self, item, default=None):
             anyway. More specifically, respect attribute naming that appears
             to be indicative of the intent of protection.
         """
+        try:
+            return super(AttributeDict, self).__getattribute__(item)
+        except (AttributeError, TypeError):
+            # Handle potential property and non-string failures.
+            pass
         try:
             # Fundamentally, this is still a mapping;
             # route object notation access pattern accordingly.
@@ -241,21 +446,21 @@ def __setitem__(self, key, value):
         if isinstance(value, Mapping):
             try:
                 # Combine AttributeDict instances.
-                _LOGGER.debug("Updating key: '{}'".format(key))
+                _LOGGER.log(5, "Updating key: '{}'".format(key))
                 self.__dict__[key].add_entries(value)
             except (AttributeError, KeyError):
                 # Create new AttributeDict, replacing previous value.
                 self.__dict__[key] = AttributeDict(value)
-            _LOGGER.debug("'{}' now has keys {}".
+            _LOGGER.log(5, "'{}' now has keys {}".
                           format(key, self.__dict__[key].keys()))
         elif value is not None or \
                 key not in self.__dict__ or self.__dict__["_force_nulls"]:
             _LOGGER.log(5, "Setting '{}' to {}".format(key, value))
             self.__dict__[key] = value
         else:
-            _LOGGER.debug("Not setting {k} to {v}; _force_nulls: {nulls}".
-                          format(k=key, v=value,
-                                 nulls=self.__dict__["_force_nulls"]))
+            _LOGGER.log(5, "Not setting {k} to {v}; _force_nulls: {nulls}".
+                        format(k=key, v=value,
+                               nulls=self.__dict__["_force_nulls"]))
 
 
     def __getitem__(self, item):
@@ -305,6 +510,9 @@ def __len__(self):
     def __repr__(self):
         return repr(self.__dict__)
 
+    def __str__(self):
+        return "{}: {}".format(self.__class__.__name__, repr(self))
+
 
 
 @copy
@@ -339,6 +547,10 @@ class Project(AttributeDict):
         settings can't be established, optional; if null (the default),
         a warning message will be logged, and no exception will be raised.
     :type no_compute_exception: type
+    :param defer_sample_construction: whether to wait to build this Project's
+        Sample objects until they're needed, optional; by default, the basic
+        Sample is created during Project construction
+    :type defer_sample_construction: bool
 
 
     :Example:
@@ -356,21 +568,21 @@ class Project(AttributeDict):
     def __init__(self, config_file, subproject=None,
                  default_compute=None, dry=False,
                  permissive=True, file_checks=False, compute_env_file=None,
-                 no_environment_exception=None, no_compute_exception=None):
+                 no_environment_exception=None, no_compute_exception=None,
+                 defer_sample_construction=False):
 
-        _LOGGER.info("Creating %s from file: '%s'",
+        _LOGGER.debug("Creating %s from file: '%s'",
                           self.__class__.__name__, config_file)
         super(Project, self).__init__()
 
-        default_compute = default_compute or self.default_cmpenv_file
-
         # Initialize local, serial compute as default (no cluster submission)
         # Start with default environment settings.
         _LOGGER.debug("Establishing default environment settings")
         self.environment, self.environment_file = None, None
 
         try:
-            self.update_environment(default_compute)
+            self.update_environment(
+                    default_compute or self.default_compute_envfile)
         except Exception as e:
             _LOGGER.error("Can't load environment config file '%s'",
                           str(default_compute))
@@ -413,11 +625,14 @@ def __init__(self, config_file, subproject=None,
         self.config_file = _os.path.abspath(config_file)
 
         # Parse config file
-        _LOGGER.info("Parsing %s config file", self.__class__.__name__)
+        _LOGGER.debug("Parsing %s config file", self.__class__.__name__)
         if subproject:
             _LOGGER.info("Using subproject: '{}'".format(subproject))
         self.parse_config_file(subproject)
 
+        # Ensure data_sources is at least set if it wasn't parsed.
+        self.setdefault("data_sources", None)
+
         self.name = self.infer_name(self.config_file)
         self.subproject = subproject
 
@@ -435,37 +650,114 @@ def __init__(self, config_file, subproject=None,
         except AttributeError:
             self.derived_columns = self.DERIVED_COLUMNS_DEFAULT
 
-        # Sheet will be set to non-null value by call to add_sample_sheet().
-        # That call also sets the samples (list) attribute for the instance
-        # and adds default derived columns.
-        self.sheet = None
-        self.samples = list()
-        self.add_sample_sheet()
-
         self.finalize_pipelines_directory()
 
+        # SampleSheet creation populates project's samples, adds the
+        # sheet itself, and adds any derived columns.
+        _LOGGER.debug("Processing {} pipeline location(s): {}".
+                      format(len(self.metadata.pipelines_dir),
+                             self.metadata.pipelines_dir))
+        self.interfaces_by_protocol = \
+                process_pipeline_interfaces(self.metadata.pipelines_dir)
+
+        path_anns_file = self.metadata.sample_annotation
+        _LOGGER.debug("Reading sample annotations sheet: '%s'", path_anns_file)
+        try:
+            self.sheet = check_sheet(path_anns_file)
+        except IOError:
+            _LOGGER.error("Alleged annotations file doesn't exist: '%s'",
+                          path_anns_file)
+            anns_folder_path = _os.path.dirname(path_anns_file)
+            try:
+                annotations_file_folder_contents = \
+                        _os.listdir(anns_folder_path)
+            except OSError:
+                _LOGGER.error("Annotations file folder doesn't exist either: "
+                              "'%s'", anns_folder_path)
+            else:
+                _LOGGER.error("Annotations file folder's contents: {}".
+                              format(annotations_file_folder_contents))
+            raise
+
+        self.merge_table = None
+        self._samples = None if defer_sample_construction \
+                else self._make_basic_samples()
+
+
+    def __repr__(self):
+        include = partial(include_in_repr, klazz=self.__class__)
+        return repr({k: v for k, v in self.__dict__.items() if include(k)})
+
+
+    @property
+    def compute_env_var(self):
+        """
+        Environment variable through which to access compute settings.
+
+        :return str: name of the environment variable to pointing to
+            compute settings
+        """
+        return COMPUTE_SETTINGS_VARNAME
+
 
     @property
-    def default_cmpenv_file(self):
+    def default_compute_envfile(self):
         """ Path to default compute environment settings file. """
         return _os.path.join(
                 self.templates_folder, "default_compute_settings.yaml")
 
 
     @property
-    def templates_folder(self):
-        return _os.path.join(_os.path.dirname(__file__), "submit_templates")
+    def num_samples(self):
+        """ Number of samples available in this Project. """
+        return sum(1 for _ in self.sample_names)
 
 
     @property
-    def compute_env_var(self):
+    def output_dir(self):
         """
-        Environment variable through which to access compute settings.
-        
-        :return str: name of the environment variable to pointing to 
-            compute settings
+        Directory in which to place results and submissions folders.
+
+        By default, assume that the project's configuration file specifies
+        an output directory, and that this is therefore available within
+        the project metadata. If that assumption does not hold, though,
+        consider the folder in which the project configuration file lives
+        to be the project's output directory.
+
+        :return str: path to the project's output directory, either as
+            specified in the configuration file or the folder that contains
+            the project's configuration file.
         """
-        return COMPUTE_SETTINGS_VARNAME
+        try:
+            return self.metadata.output_dir
+        except AttributeError:
+            return _os.path.dirname(self.config_file)
+
+
+    @property
+    def project_folders(self):
+        """
+        Names of folders to nest within a project output directory.
+
+        :return Iterable[str]: names of output-nested folders
+        """
+        return ["results_subdir", "submission_subdir"]
+
+
+    @property
+    def protocols(self):
+        """
+        Determine this Project's unique protocol names.
+
+        :return Set[str]: collection of this Project's unique protocol names
+        """
+        protos = set()
+        for s in self.samples:
+            try:
+                protos.add(s.library)
+            except AttributeError:
+                _LOGGER.debug("Sample '%s' lacks protocol", s.sample_name)
+        return protos
 
 
     @property
@@ -484,34 +776,36 @@ def required_metadata(self):
 
 
     @property
-    def project_folders(self):
+    def sample_names(self):
+        """ Names of samples of which this Project is aware. """
+        return iter(self.sheet[SAMPLE_NAME_COLNAME])
+
+
+    @property
+    def samples(self):
         """
-        Names of folders to nest within a project output directory.
-        
-        :return Iterable[str]: names of output-nested folders
+        Generic/base Sample instance for each of this Project's samples.
+
+        :return Iterable[Sample]: Sample instance for each
+            of this Project's samples
         """
-        return ["results_subdir", "submission_subdir"]
+        if self._samples is None:
+            _LOGGER.debug("Building basic Sample(s) for %s",
+                          self.__class__.__name__)
+            self._samples = self._make_basic_samples()
+        _LOGGER.debug("%s has %d basic Sample(s)",
+                      self.__class__.__name__, len(self._samples))
+        return self._samples
 
 
     @property
-    def output_dir(self):
+    def templates_folder(self):
         """
-        Directory in which to place results and submissions folders.
-        
-        By default, assume that the project's configuration file specifies
-        an output directory, and that this is therefore available within 
-        the project metadata. If that assumption does not hold, though, 
-        consider the folder in which the project configuration file lives 
-        to be the project's output directory. 
-        
-        :return str: path to the project's output directory, either as 
-            specified in the configuration file or the folder that contains
-            the project's configuration file.
+        Path to folder with default submission templates.
+
+        :return str: path to folder with default submission templates
         """
-        try:
-            return self.metadata.output_dir
-        except AttributeError:
-            return _os.path.dirname(self.config_file)
+        return _os.path.join(_os.path.dirname(__file__), "submit_templates")
 
 
     @staticmethod
@@ -530,27 +824,135 @@ def infer_name(path_config_file):
         return config_folder
 
 
-    def _handle_missing_env_attrs(self, env_settings_file, when_missing):
-        """ Default environment settings aren't required; warn, though. """
-        missing_env_attrs = \
-            [attr for attr in ["environment", "environment_file"]
-             if not hasattr(self, attr) or getattr(self, attr) is None]
-        if not missing_env_attrs:
-            return
-        message = "'{}' lacks environment attributes: {}".\
-                format(env_settings_file, missing_env_attrs)
-        if when_missing is None:
-            _LOGGER.warn(message)
+    def build_submission_bundles(self, protocol, priority=True):
+        """
+        Create pipelines to submit for each sample of a particular protocol.
+
+        With the argument (flag) to the priority parameter, there's control
+        over whether to submit pipeline(s) from only one of the project's
+        known pipeline locations with a match for the protocol, or whether to
+        submit pipelines created from all locations with a match for the
+        protocol.
+        
+        :param str protocol: name of the protocol/library for which to
+            create pipeline(s)
+        :param bool priority: to only submit pipeline(s) from the first of the
+            pipelines location(s) (indicated in the project config file) that
+            has a match for the given protocol; optional, default True
+        :return Iterable[(PipelineInterface, str, str)]:
+        :raises AssertionError: if there's a failure in the attempt to
+            partition an interface's pipeline scripts into disjoint subsets of
+            those already mapped and those not yet mapped
+        """
+
+        # Pull out the collection of interfaces (potentially one from each of
+        # the locations indicated in the project configuration file) as a
+        # sort of pool of information about possible ways in which to submit
+        # pipeline(s) for sample(s) of the indicated protocol.
+        try:
+            protocol_interfaces = \
+                    self.interfaces_by_protocol[protocol]
+        except KeyError:
+            _LOGGER.warn("Unknown protocol: '{}'".format(protocol))
+            return []
+
+        job_submission_bundles = []
+        pipeline_keys_used = set()
+        _LOGGER.debug("Building pipelines for {} PIs...".
+                      format(len(protocol_interfaces)))
+        for proto_iface in protocol_interfaces:
+            # Short-circuit if we care only about the highest-priority match
+            # for pipeline submission. That is, if the intent is to submit
+            # pipeline(s) from a single location for each sample of the given
+            # protocol, we can stop searching the pool of pipeline interface
+            # information once we've found a match for the protocol.
+            if priority and len(job_submission_bundles) > 0:
+                return job_submission_bundles[0]
+
+            this_protocol_pipelines = proto_iface.fetch_pipelines(protocol)
+            if not this_protocol_pipelines:
+                _LOGGER.warn("No mapping for protocol '%s' in %s", 
+                             protocol, proto_iface)
+                continue
+            
+            # TODO: update once dependency-encoding logic is in place.
+            # The proposed dependency-encoding format uses a semicolon
+            # between pipelines for which the dependency relationship is
+            # serial. For now, simply treat those as multiple independent
+            # pipelines by replacing the semicolon with a comma, which is the
+            # way in which multiple independent pipelines for a single protocol
+            # are represented in the mapping declaration.
+            pipeline_keys = \
+                    this_protocol_pipelines.replace(";", ",")\
+                                           .strip(" ()\n")\
+                                           .split(",")
+            # These cleaned pipeline keys are what's used to resolve the path
+            # to the pipeline to run.
+            pipeline_keys = [pk.strip() for pk in pipeline_keys]
+
+            # Skip over pipelines already mapped by another location.
+            already_mapped, new_scripts = \
+                    partition(pipeline_keys,
+                              partial(_is_member, items=pipeline_keys_used))
+            pipeline_keys_used |= set(pipeline_keys)
+
+            # Attempt to validate that partition yielded disjoint subsets.
+            try:
+                disjoint_partition_violation = \
+                        set(already_mapped) & set(new_scripts)
+            except TypeError:
+                _LOGGER.debug("Unable to hash partitions for validation")
+            else:
+                assert not disjoint_partition_violation, \
+                        "Partitioning {} with membership in {} as " \
+                        "predicate produced intersection: {}".format(
+                        pipeline_keys, pipeline_keys_used, 
+                        disjoint_partition_violation)
+
+            if len(already_mapped) > 0:
+                _LOGGER.debug("Skipping {} already-mapped script name(s): {}".
+                              format(len(already_mapped), already_mapped))
+            _LOGGER.debug("{} new scripts for protocol {} from "
+                          "pipeline(s) location '{}': {}".
+                          format(len(new_scripts), protocol,
+                                 proto_iface.source, new_scripts))
+
+            # For each pipeline script to which this protocol will pertain,
+            # create the new jobs/submission bundles.
+            new_jobs = []
+            for pipeline_key in new_scripts:
+                # Determine how to reference the pipeline and where it is.
+                strict_pipe_key, full_pipe_path, full_pipe_path_with_flags = \
+                        proto_iface.finalize_pipeline_key_and_paths(
+                                pipeline_key)
+                # Determine which interface and Sample subtype to use.
+                sample_subtype = \
+                        proto_iface.fetch_sample_subtype(
+                                protocol, strict_pipe_key, full_pipe_path)
+                # Package the pipeline's interface, subtype, command, and key.
+                submission_bundle = SubmissionBundle(
+                        proto_iface.pipe_iface, sample_subtype,
+                        strict_pipe_key, full_pipe_path_with_flags)
+                # Add this bundle to the collection of ones relevant for the
+                # current ProtocolInterface.
+                new_jobs.append(submission_bundle)
+
+            job_submission_bundles.append(new_jobs)
+
+        # Repeat logic check of short-circuit conditional to account for
+        # edge case in which it's satisfied during the final iteration.
+        if priority and len(job_submission_bundles) > 1:
+            return job_submission_bundles[0]
         else:
-            when_missing(message)
+            return list(itertools.chain(*job_submission_bundles))
 
 
     def finalize_pipelines_directory(self, pipe_path=""):
         """
         Finalize the establishment of a path to this project's pipelines.
-        
-        With the passed argument, override anything already set. 
-        Otherwise, prefer path provided in this project's config, then 
+
+        With the passed argument, override anything already set.
+        Otherwise, prefer path provided in this project's config, then
         local pipelines folder, then a location set in project environment.
 
         :param str pipe_path: (absolute) path to pipelines
@@ -586,6 +988,128 @@ def finalize_pipelines_directory(self, pipe_path=""):
         self.metadata.pipelines_dir = pipe_path
 
 
+    def get_arg_string(self, pipeline_name):
+        """
+        For this project, given a pipeline, return an argument string
+        specified in the project config file.
+        """
+
+        def make_optarg_text(opt, arg):
+            """ Transform flag/option into CLI-ready text version. """
+            return "{} {}".format(opt, _os.path.expandvars(arg)) \
+                    if arg else opt
+
+        def create_argtext(name):
+            """ Create command-line argstring text from config section. """
+            try:
+                optargs = getattr(self.pipeline_args, name)
+            except AttributeError:
+                return ""
+            # NS using __dict__ will add in the metadata from AttrDict (doh!)
+            _LOGGER.debug("optargs.items(): {}".format(optargs.items()))
+            optargs_texts = [make_optarg_text(opt, arg)
+                             for opt, arg in optargs.items()]
+            _LOGGER.debug("optargs_texts: {}".format(optargs_texts))
+            # TODO: may need to fix some spacing issues here.
+            return " ".join(optargs_texts)
+
+        default_argtext = create_argtext(DEFAULT_COMPUTE_RESOURCES_NAME)
+        pipeline_argtext = create_argtext(pipeline_name)
+
+        if not pipeline_argtext:
+            # The project config may not have an entry for this pipeline;
+            # no problem! There are no pipeline-specific args. Return text
+            # from default arguments, whether empty or not.
+            return default_argtext
+        elif default_argtext:
+            # Non-empty pipeline-specific and default argtext
+            return " ".join([default_argtext, pipeline_argtext])
+        else:
+            # No default argtext, but non-empty pipeline-specific argtext
+            return pipeline_argtext
+
+
+    def build_sheet(self, *protocols):
+        """
+        Create all Sample object for this project for the given protocol(s).
+
+        :return pandas.core.frame.DataFrame: DataFrame with from base version
+            of each of this Project's samples, for indicated protocol(s) if
+            given, else all of this Project's samples
+        """
+        # Use all protocols if none are explicitly specified.
+        samples = self.samples
+        protocols = {alpha_cased(p) for p in (protocols or self.protocols)}
+        return _pd.DataFrame(
+                [s.as_series() for s in samples if
+                 hasattr(s, "library") and alpha_cased(s.library) in protocols])
+
+
+    def make_project_dirs(self):
+        """
+        Creates project directory structure if it doesn't exist.
+        """
+        for folder_name in self.project_folders:
+            folder_path = self.metadata[folder_name]
+            _LOGGER.debug("Ensuring project dir exists: '%s'", folder_path)
+            if not _os.path.exists(folder_path):
+                _LOGGER.debug("Attempting to create project folder: '%s'",
+                              folder_path)
+                try:
+                    _os.makedirs(folder_path)
+                except OSError as e:
+                    _LOGGER.warn("Could not create project folder: '%s'",
+                                 str(e))
+
+
+    def _make_basic_samples(self):
+        """ Build the base Sample objects from the annotations sheet data. """
+
+        # This should be executed just once, establishing the Project's
+        # base Sample objects if they don't already exist.
+        if hasattr(self.metadata, "merge_table"):
+            if self.merge_table is None:
+                if self.metadata.merge_table and \
+                        _os.path.isfile(self.metadata.merge_table):
+                    self.merge_table = _pd.read_table(
+                        self.metadata.merge_table,
+                        sep=None, engine="python")
+                    _LOGGER.debug("Merge table shape: {}".
+                                  format(self.merge_table.shape))
+                else:
+                    _LOGGER.debug(
+                        "Alleged path to merge table data is not a "
+                        "file: '%s'", self.metadata.merge_table)
+            else:
+                _LOGGER.debug("Already parsed merge table")
+        else:
+            _LOGGER.debug("No merge table")
+
+        # Create the Sample(s).
+        samples = []
+        for _, row in self.sheet.iterrows():
+            sample = Sample(row.dropna())
+            sample.set_genome(self.get("genomes"))
+            sample.set_transcriptome(self.get("transcriptomes"))
+
+            merge_sample(sample, self.merge_table,
+                         self.data_sources, self.derived_columns)
+            sample.set_file_paths(self)
+            # Hack for backwards-compatibility
+            # Pipelines should now use `data_source`)
+            _LOGGER.debug("Setting sample's data path")
+            try:
+                sample.data_path = sample.data_source
+            except AttributeError:
+                _LOGGER.log(5, "Sample '%s' lacks data source; skipping "
+                              "data path assignment", sample.sample_name)
+            else:
+                _LOGGER.log(5, "Path to sample data: '%s'", sample.data_source)
+            samples.append(sample)
+
+        return samples
+
+
     def parse_config_file(self, subproject=None):
         """
         Parse provided yaml config file and check required fields exist.
@@ -598,6 +1122,9 @@ def parse_config_file(self, subproject=None):
         with open(self.config_file, 'r') as conf_file:
             config = yaml.safe_load(conf_file)
 
+        _LOGGER.debug("{} config data: {}".format(
+                self.__class__.__name__, config))
+
         # Parse yaml into the project's attributes.
         _LOGGER.debug("Adding attributes for {}: {}".format(
                 self.__class__.__name__, config.keys()))
@@ -627,6 +1154,29 @@ def parse_config_file(self, subproject=None):
             _LOGGER.debug("Metadata: %s", str(self.metadata))
             delattr(self, "paths")
 
+        # In looper 0.6, we added pipeline_interfaces to metadata
+        # For backwards compatibility, merge it with pipelines_dir
+
+        if "metadata" in config:
+            if "pipelines_dir" in self.metadata:
+                _LOGGER.warning("Looper v0.6 suggests "
+                    "switching from pipelines_dir to "
+                    "pipeline_interfaces. See docs for details: "
+                    "http://looper.readthedocs.io/en/latest/")
+            if "pipeline_interfaces" in self.metadata:
+                if "pipelines_dir" in self.metadata:
+                    raise AttributeError(
+                            "You defined both 'pipeline_interfaces' and "
+                            "'pipelines_dir'. Please remove your "
+                            "'pipelines_dir' definition.")
+                else:
+                    self.metadata.pipelines_dir = \
+                            self.metadata.pipeline_interfaces
+                _LOGGER.debug("Adding pipeline_interfaces to "
+                    "pipelines_dir. New value: {}".
+                    format(self.metadata.pipelines_dir))
+
+
         # Ensure required absolute paths are present and absolute.
         for var in self.required_metadata:
             if var not in self.metadata:
@@ -661,12 +1211,12 @@ def parse_config_file(self, subproject=None):
         _LOGGER.debug("Parsing relative sections")
         for sect in relative_sections:
             if not hasattr(self, sect):
-                _LOGGER.debug("%s lacks relative section '%s', skipping",
-                                   self.__class__.__name__, sect)
+                _LOGGER.log(5, "%s lacks relative section '%s', skipping",
+                            self.__class__.__name__, sect)
                 continue
             relative_vars = getattr(self, sect)
             if not relative_vars:
-                _LOGGER.debug("No relative variables, continuing")
+                _LOGGER.log(5, "No relative variables, continuing")
                 continue
             for var in relative_vars.keys():
                 if not hasattr(relative_vars, var) or \
@@ -677,18 +1227,17 @@ def parse_config_file(self, subproject=None):
                 _LOGGER.debug("Ensuring absolute path(s) for '%s'", var)
                 # Parsed from YAML, so small space of possible datatypes.
                 if isinstance(relpath, list):
-                    setattr(relative_vars, var,
-                            [self._ensure_absolute(maybe_relpath)
-                             for maybe_relpath in relpath])
+                    absolute = [self._ensure_absolute(maybe_relpath)
+                                for maybe_relpath in relpath]
                 else:
-                    abs_path = self._ensure_absolute(relpath)
-                    _LOGGER.debug("Setting '%s' to '%s'", var, abs_path)
-                    setattr(relative_vars, var, abs_path)
+                    absolute = self._ensure_absolute(relpath)
+                _LOGGER.debug("Setting '%s' to '%s'", var, absolute)
+                setattr(relative_vars, var, absolute)
 
         # Project config may have made compute.submission_template relative.
         # Make sure it's absolute.
         if self.compute is None:
-            _LOGGER.debug("No compute, no submission template")
+            _LOGGER.log(5, "No compute, no submission template")
         elif not _os.path.isabs(self.compute.submission_template):
             # Relative to environment config file.
             self.compute.submission_template = _os.path.join(
@@ -703,24 +1252,57 @@ def parse_config_file(self, subproject=None):
                     path_config_file=self.config_file)
 
 
-    def _ensure_absolute(self, maybe_relpath):
-        _LOGGER.debug("Ensuring absolute path for '%s'", maybe_relpath)
-        if _os.path.isabs(maybe_relpath) or is_url(maybe_relpath):
-            _LOGGER.debug("Already absolute")
-            return maybe_relpath
-        # Maybe we have env vars that make the path absolute?
-        expanded = _os.path.expandvars(maybe_relpath)
-        _LOGGER.debug("Expanded: '%s'", expanded)
-        if _os.path.isabs(expanded):
-            _LOGGER.debug("Expanded is absolute")
-            return expanded
-        _LOGGER.debug("Making non-absolute path '%s' be absolute",
-                      maybe_relpath)
-        # Set path to an absolute path, relative to project config.
-        config_dirpath = _os.path.dirname(self.config_file)
-        _LOGGER.debug("config_dirpath: %s", config_dirpath)
-        abs_path = _os.path.join(config_dirpath, maybe_relpath)
-        return abs_path
+    def set_compute(self, setting):
+        """
+        Set the compute attributes according to the
+        specified settings in the environment file.
+
+        :param str setting:	name for non-resource compute bundle, the name of
+            a subsection in an environment configuration file
+        :return bool: success flag for attempt to establish compute settings
+        """
+
+        # Hope that environment & environment compute are present.
+        if setting and self.environment and "compute" in self.environment:
+            # Augment compute, creating it if needed.
+            if self.compute is None:
+                _LOGGER.debug("Creating Project compute")
+                self.compute = AttributeDict()
+                _LOGGER.debug("Adding entries for setting '%s'", setting)
+            self.compute.add_entries(self.environment.compute[setting])
+
+            # Ensure submission template is absolute.
+            if not _os.path.isabs(self.compute.submission_template):
+                try:
+                    self.compute.submission_template = _os.path.join(
+                            _os.path.dirname(self.environment_file),
+                            self.compute.submission_template)
+                except AttributeError as e:
+                    # Environment and environment compute should at least have been
+                    # set as null-valued attributes, so execution here is an error.
+                    _LOGGER.error(str(e))
+                    # Compute settings have been established.
+                else:
+                    return True
+        else:
+            # Scenario in which environment and environment compute are
+            # both present but don't evaluate to True is fairly
+            # innocuous, even common if outside of the looper context.
+            _LOGGER.debug("Environment = {}".format(self.environment))
+
+        return False
+
+
+    def set_project_permissions(self):
+        """
+        Make the project's public_html folder executable.
+        """
+        try:
+            _os.chmod(self.trackhubs.trackhub_dir, 0o0755)
+        except OSError:
+            # This currently does not fail now
+            # ("cannot change folder's mode: %s" % d)
+            pass
 
 
     def update_environment(self, env_settings_file):
@@ -730,13 +1312,11 @@ def update_environment(self, env_settings_file):
         :param str env_settings_file: path to file with 
             new environment configuration data
         """
-        if not env_settings_file:
-            return
 
-        with open(env_settings_file, 'r') as handle:
+        with open(env_settings_file, 'r') as f:
             _LOGGER.info("Loading %s: %s",
                          self.compute_env_var, env_settings_file)
-            env_settings = yaml.load(handle)
+            env_settings = yaml.load(f)
             _LOGGER.debug("Parsed environment settings: %s",
                           str(env_settings))
 
@@ -761,432 +1341,40 @@ def update_environment(self, env_settings_file):
         self.environment_file = env_settings_file
 
 
-    def make_project_dirs(self):
-        """
-        Creates project directory structure if it doesn't exist.
-        """
-        for folder_name in self.project_folders:
-            folder_path = self.metadata[folder_name]
-            _LOGGER.debug("Ensuring project dir exists: '%s'", folder_path)
-            if not _os.path.exists(folder_path):
-                _LOGGER.debug("Attempting to create project folder: '%s'",
-                              folder_path)
-                try:
-                    _os.makedirs(folder_path)
-                except OSError as e:
-                    _LOGGER.warn("Could not create project folder: '%s'",
-                                 str(e))
-
-
-    def set_project_permissions(self):
-        """
-        Makes the project's public_html folder executable.
-        """
-        for d in [self.trackhubs.trackhub_dir]:
-            try:
-                _os.chmod(d, 0o0755)
-            except OSError:
-                # This currently does not fail now
-                # ("cannot change folder's mode: %s" % d)
-                continue
-
-
-    def set_compute(self, setting):
-        """
-        Set the compute attributes according to the
-        specified settings in the environment file.
-
-        :param str setting:	name for non-resource compute bundle, the name of
-            a subsection in an environment configuration file
-        :return bool: success flag for attempt to establish compute settings
-        """
-
-        # Hope that environment & environment compute are present.
-        if setting and self.environment and "compute" in self.environment:
-
-                # Augment compute, creating it if needed
-                if self.compute is None:
-                    _LOGGER.debug("Creating Project compute")
-                    self.compute = AttributeDict()
-                    _LOGGER.debug("Adding entries for setting '%s'", setting)
-                self.compute.add_entries(self.environment.compute[setting])
-
-                # Ensure submission template is absolute.
-                if not _os.path.isabs(self.compute.submission_template):
-                    try:
-                        self.compute.submission_template = _os.path.join(
-                                _os.path.dirname(self.environment_file),
-                                self.compute.submission_template)
-                    except AttributeError as e:
-                        # Environment and environment compute should at least have been
-                        # set as null-valued attributes, so execution here is an error.
-                        _LOGGER.error(str(e))
-                        # Compute settings have been established.
-                    else:
-                        return True
-        else:
-            # Scenario in which environment and environment compute are
-            # both present but don't evaluate to True is fairly
-            # innocuous, even common if outside of the looper context.
-            _LOGGER.debug("Environment = {}".format(self.environment))
-
-        return False
-
-
-    def get_arg_string(self, pipeline_name):
-        """
-        For this project, given a pipeline, return an argument string
-        specified in the project config file.
-        """
-
-        def make_optarg_text(opt, arg):
-            """ Transform flag/option into CLI-ready text version. """
-            return "{} {}".format(opt, _os.path.expandvars(arg)) \
-                    if arg else opt
-
-        def create_argtext(name):
-            """ Create command-line argstring text from config section. """
-            try:
-                optargs = getattr(self.pipeline_args, name)
-            except AttributeError:
-                return ""
-            # NS using __dict__ will add in the metadata from AttrDict (doh!)
-            _LOGGER.debug("optargs.items(): {}".format(optargs.items()))
-            optargs_texts = [make_optarg_text(opt, arg)
-                             for opt, arg in optargs.items()]
-            _LOGGER.debug("optargs_texts: {}".format(optargs_texts))
-            # TODO: may need to fix some spacing issues here.
-            return " ".join(optargs_texts)
-
-        default_argtext, pipeline_argtext = \
-                create_argtext(DEFAULT_COMPUTE_RESOURCES_NAME), create_argtext(pipeline_name)
-
-        if not pipeline_argtext:
-            # The project config may not have an entry for this pipeline;
-            # no problem! There are no pipeline-specific args. Return text 
-            # from default arguments, whether empty or not.
-            return default_argtext
-        elif default_argtext:
-            # Non-empty pipeline-specific and default argtext
-            return " ".join([default_argtext, pipeline_argtext])
-        else:
-            # No default argtext, but non-empty pipeline-specific argtext
-            return pipeline_argtext
-
-
-    def add_sample_sheet(self, csv=None):
-        """
-        Build a `SampleSheet` object from a csv file and
-        add it and its samples to the project.
-
-        :param csv: Path to csv file.
-        :type csv: str
-        """
-
-        _LOGGER.debug("Adding sample sheet")
-
-        # Make SampleSheet object
-        # By default read sample_annotation, but allow explict CSV arg.
-        self.sheet = SampleSheet(csv or self.metadata.sample_annotation)
-
-        # Pair project and sheet.
-        self.sheet.prj = self
-
-        # Generate sample objects from annotation sheet.
-        _LOGGER.debug("Creating samples from annotation sheet")
-        self.sheet.make_samples()
-
-        # Add samples to Project
-        for sample in self.sheet.samples:
-            # Overwritten later if merged
-            sample.merged = False
-            self.add_sample(sample)		# Appends sample to self.samples.
-
-        # Merge sample files (!) using merge table if provided:
-        if hasattr(self.metadata, "merge_table"):
-            if self.metadata.merge_table is not None:
-                if _os.path.isfile(self.metadata.merge_table):
-                    # read in merge table
-
-                    merge_table = _pd.read_table(
-                            self.metadata.merge_table,
-                            sep=None, index_col=False, engine="python")
-
-                    if SAMPLE_NAME_COLNAME not in merge_table.columns:
-                        raise KeyError(
-                                "Merge table requires a column named '{}'.".
-                                format(SAMPLE_NAME_COLNAME))
-
-                    for sample in self.sheet.samples:
-                        sample_indexer = \
-                                merge_table[SAMPLE_NAME_COLNAME] == sample.name
-                        merge_rows = merge_table[sample_indexer]
-
-                        # Check if there are rows in the
-                        # merge table for this sample:
-                        if len(merge_rows) > 0:
-                            # For each row in the merge table of this sample:
-                            # 1) populate any derived columns
-                            # 2) derived columns --> space-delimited strings
-                            # 3) update the sample values with the merge table
-
-                            # Keep track of merged cols,
-                            # so we don't re-derive them later.
-                            merged_cols = {
-                                    key: "" for key in merge_rows.columns}
-                            for _, row in merge_rows.iterrows():
-                                row_dict = row.to_dict()
-                                for col in merge_rows.columns:
-                                    if col == SAMPLE_NAME_COLNAME or \
-                                            col not in self.derived_columns:
-                                        continue
-                                    # Initialize key in parent dict.
-                                    col_key = col + COL_KEY_SUFFIX
-                                    merged_cols[col_key] = ""
-                                    row_dict[col_key] = row_dict[col]
-                                    row_dict[col] = sample.locate_data_source(
-                                        col, row_dict[col], row_dict)  # 1)
-
-                                # Also add in any derived cols present.
-                                for col in self.derived_columns:
-                                    # Skip over attributes that the sample
-                                    # either lacks, and those covered by the
-                                    # data from the current (row's) data.
-                                    if not hasattr(sample, col) or \
-                                            col in row_dict:
-                                        continue
-                                    # Map column name key to sample's value
-                                    # for the attribute given by column name.
-                                    col_key = col + COL_KEY_SUFFIX
-                                    row_dict[col_key] = getattr(sample, col)
-                                    # Map the column name itself to the
-                                    # populated data source template string.
-                                    row_dict[col] = sample.locate_data_source(
-                                        col, getattr(sample, col), row_dict)
-                                    _LOGGER.debug(
-                                        "PROBLEM adding derived column: "
-                                        "{}, {}, {}".format(col,
-                                        row_dict[col], getattr(sample, col)))
-
-                                # Since we are now jamming multiple (merged)
-                                # entries into a single attribute, we have to
-                                # join them into a space-delimited string
-                                # and then set to sample attribute.
-                                for key, val in row_dict.items():
-                                    if key == SAMPLE_NAME_COLNAME or not val:
-                                        continue
-                                    _LOGGER.debug("merge: sample '%s'; %s=%s",
-                                                  str(sample.name), 
-                                                  str(key), str(val))
-                                    if not key in merged_cols:
-                                        new_val = str(val).rstrip()
-                                    else:
-                                        new_val = "{} {}".format(
-                                            merged_cols[key], str(val)).strip()
-                                    merged_cols[key] = new_val    # 2)
-
-                            # Don't update sample_name.
-                            merged_cols.pop(SAMPLE_NAME_COLNAME, None)
-
-                            sample.update(merged_cols)    # 3)
-                            sample.merged = True    # mark sample as merged
-                            sample.merged_cols = merged_cols
-
-        # With all samples, prepare file paths.
-        for sample in self.sheet.samples:
-            if hasattr(sample, "organism"):
-                sample.get_genome_transcriptome()
-            sample.set_file_paths()
-            # Hack for backwards-compatibility
-            # Pipelines should now use `data_source`)
-            try:
-                sample.data_path = sample.data_source
-            except AttributeError:
-                _LOGGER.debug("Sample '%s' lacks data source --> skipping "
-                              "data path assignment", sample.sample_name)
-
-
-    def add_sample(self, sample):
-        """
-        Adds a sample to the project's `samples`.
-        """
-        # Check sample is Sample object
-        if not isinstance(sample, Sample):
-            raise TypeError("Provided object is not a Sample object.")
-
-        # Tie sample and project bilaterally
-        sample.prj = self
-        # Append
-        self.samples.append(sample)
-
-
-
-@copy
-class SampleSheet(object):
-    """
-    Class to model a sample annotation sheet.
-
-    :param path: Path to sample file.
-    :type path: str
-    :param dtype: Data type to read sample file as. Default is str.
-    :type dtype: type
-
-    :Example:
-
-    .. code-block:: python
-
-        from models import Project, SampleSheet
-        prj = Project("config.yaml")
-        sheet = SampleSheet("sheet.csv")
-    """
-
-    def __init__(self, path, dtype=str):
-        super(SampleSheet, self).__init__()
-        self.df = self.check_sheet(path, dtype)
-        self.path = path
-        self.samples = list()
-
-    def __repr__(self):
-        if hasattr(self, "prj"):
-            return "SampleSheet for project '%s' with %i samples." % \
-                   (self.prj, len(self.df))
-        else:
-            return "SampleSheet with %i samples." % len(self.df)
-
-
-    @staticmethod
-    def check_sheet(sample_file, dtype):
-        """
-        Check if csv file exists and has all required columns.
-        
-        :param str sample_file: path to sample annotations file.
-        :param type dtype: data type for CSV read.
-        :raises IOError: if given annotations file can't be read.
-        :raises ValueError: if required column(s) is/are missing.
-        """
-
-        df = _pd.read_table(sample_file, sep=None, dtype=dtype,
-                            index_col=False, engine="python")
-        req = [SAMPLE_NAME_COLNAME]
-        missing = set(req) - set(df.columns)
-        if len(missing) != 0:
-            raise ValueError(
-                "Annotation sheet ('{}') is missing column(s): {}; has: {}".
-                format(sample_file, missing, df.columns))
-        return df
-
-
-    @staticmethod
-    def alpha_cased(text, lower=False):
-        """
-        Filter text to just letters and homogenize case.
-        
-        :param str text: what to filter and homogenize.
-        :param bool lower: whether to convert to lowercase; default uppercase.
-        :return str: input filtered to just letters, with homogenized case.
-        """
-        text = "".join(filter(lambda c: c.isalpha(), text))
-        return text.lower() if lower else text.upper()
-
-
-    def make_samples(self):
-        """
-        Create samples from annotation sheet (considering library), 
-        and them to the project.
-        """
+    def _ensure_absolute(self, maybe_relpath):
+        """ Ensure that a possibly relative path is absolute. """
+        _LOGGER.log(5, "Ensuring absolute: '%s'", maybe_relpath)
+        if _os.path.isabs(maybe_relpath) or is_url(maybe_relpath):
+            _LOGGER.log(5, "Already absolute")
+            return maybe_relpath
+        # Maybe we have env vars that make the path absolute?
+        expanded = _os.path.expandvars(maybe_relpath)
+        _LOGGER.log(5, "Expanded: '%s'", expanded)
+        if _os.path.isabs(expanded):
+            _LOGGER.log(5, "Expanded is absolute")
+            return expanded
+        _LOGGER.log(5, "Making non-absolute path '%s' be absolute",
+                      maybe_relpath)
+        # Set path to an absolute path, relative to project config.
+        config_dirpath = _os.path.dirname(self.config_file)
+        _LOGGER.log(5, "config_dirpath: %s", config_dirpath)
+        abs_path = _os.path.join(config_dirpath, maybe_relpath)
+        return abs_path
 
-        found_pipelines = False
-        try:
-            import pipelines  # Use a pipelines package if installed.
-        except ImportError:
-            # pipelines_dir is optional.
-            pipeline_dirpaths = getattr(
-                    self.prj.metadata, "pipelines_dir", None)
-            if pipeline_dirpaths:
-                if isinstance(pipeline_dirpaths, str):
-                    pipeline_dirpaths = [pipeline_dirpaths]
-                sys.path.extend(pipeline_dirpaths)
-                _LOGGER.debug(
-                    "Added {} pipelines path(s) to sys.path: {}".
-                        format(len(pipeline_dirpaths), pipeline_dirpaths))
-            try:
-                import pipelines
-            except ImportError:
-                pass
-            else:
-                found_pipelines = True
-        else:
-            found_pipelines = True
 
-        if not found_pipelines:
-            # Just return a basic Sample for each of the sheet's rows.
-            def make_sample(data):
-                return Sample(data)
+    def _handle_missing_env_attrs(self, env_settings_file, when_missing):
+        """ Default environment settings aren't required; warn, though. """
+        missing_env_attrs = \
+            [attr for attr in ["environment", "environment_file"]
+             if not hasattr(self, attr) or getattr(self, attr) is None]
+        if not missing_env_attrs:
+            return
+        message = "'{}' lacks environment attributes: {}".\
+                format(env_settings_file, missing_env_attrs)
+        if when_missing is None:
+            _LOGGER.warn(message)
         else:
-            # Attempt creation of Sample subtype specific to protocol.
-
-            # Get all pipelines package Sample subclasses.
-            import inspect
-            from utils import fetch_package_classes
-            sample_types = fetch_package_classes(pipelines,
-                    lambda maybe_class: inspect.isclass(maybe_class)
-                                        and issubclass(maybe_class, Sample))
-
-            # TODO: perhaps modify or alter handling of need for __library__.
-            pairing = {self.alpha_cased(sample_class.__library__): sample_class
-                       for sample_type, sample_class in sample_types}
-
-            def make_sample(data):
-                try:
-                    return pairing[self.alpha_cased(data.library)](data)
-                except (AttributeError, KeyError):
-                    return Sample(data)
-
-        for _, row in self.df.iterrows():
-            self.samples.append(make_sample(row.dropna()))
-
-
-    def as_data_frame(self):
-        """
-        Returns a `pandas.DataFrame` representation of self.
-        """
-        return _pd.DataFrame([s.as_series() for s in self.samples])
-
-
-    def write(self, path, sep=None):
-        """
-        Saves an annotation sheet from the samples.
-
-        :param path: Path to file to be written.
-        :type path: str
-        :param sep: Delimiter to use in the file written.
-        :type sep: str
-
-        :Example:
-
-        .. code-block:: python
-
-            from models import SampleSheet
-            sheet = SampleSheet("/projects/example/sheet.csv")
-            sheet.write("~/projects/example/sheet2.csv")
-        """
-
-        valid_types = [".txt", ".tsv", ".csv"]
-
-        # Infer delimiter if needed.
-        if sep is None:
-            file_type = _os.path.splitext(path)[1].lower()
-            if file_type not in valid_types:
-                help_msg = "Provide an argument for parameter 'sep' or pass a " \
-                           "filepath with an extension in: {}".\
-                        format(valid_types)
-                raise ValueError(help_msg)
-            sep = "," if file_type == ".csv" else "\t"
-
-        # Convert to frame and write to disk.
-        with open(path, 'w') as sheetfile:
-            # TODO: decide which--if any--attributes to drop here.
-            self.as_data_frame().to_csv(sheetfile, sep=sep, index=False)
+            when_missing(message)
 
 
 
@@ -1196,7 +1384,7 @@ class Sample(object):
     Class to model Samples based on a pandas Series.
 
     :param series: Sample's data.
-    :type series: pandas.core.series.Series
+    :type series: Mapping | pandas.core.series.Series
 
     :Example:
 
@@ -1226,6 +1414,8 @@ def __init__(self, series):
 
         if isinstance(series, _pd.Series):
             series = series.to_dict()
+        elif isinstance(series, Sample):
+            series = series.as_series().to_dict()
 
         # Set series attributes on self.
         for key, value in series.items():
@@ -1247,17 +1437,21 @@ def __init__(self, series):
         self.required_paths = None
         self.yaml_file = None
 
+        # Not yet merged, potentially toggled when merge step is considered.
+        self.merged = False
+
         # Sample dirs
-        self.paths = Paths()
         # Only when sample is added to project, can paths be added -
         # This is because sample-specific files will be created in a
-        # data root directory dependent on the project.
-        # The SampleSheet object, after being added to a project, will
-        # call Sample.set_file_paths().
+        self.paths = Paths()
 
 
-    def __repr__(self):
-        return "Sample '{}'".format(self.name)
+    def __eq__(self, other):
+        return self.__dict__ == other.__dict__
+
+
+    def __ne__(self, other):
+        return not self == other
 
 
     def __getitem__(self, item):
@@ -1270,12 +1464,18 @@ def __getitem__(self, item):
             raise KeyError(item)
 
 
-    def update(self, newdata):
+    def __repr__(self):
+        return "Sample '{}'".format(self.name)
+
+
+    def as_series(self):
         """
-        Update Sample object with attributes from a dict.
+        Returns a `pandas.Series` object with all the sample's attributes.
+
+        :return pandas.core.series.Series: pandas Series representation
+            of this Sample, with its attributes.
         """
-        for key, value in newdata.items():
-            setattr(self, key, value)
+        return _pd.Series(self.__dict__)
 
 
     def check_valid(self, required=None):
@@ -1295,6 +1495,87 @@ def check_valid(self, required=None):
         return lacking
 
 
+    def determine_missing_requirements(self):
+        """
+        Determine which of this Sample's required attributes/files are missing.
+
+        :return (type, str): hypothetical exception type along with message
+            about what's missing; null and empty if nothing exceptional
+            is detected
+        """
+
+        # set_pipeline_attributes must be run first.
+        if not hasattr(self, "required_inputs"):
+            _LOGGER.warn("You must run set_pipeline_attributes "
+                         "before determine_missing_requirements")
+            return None, ""
+
+        if not self.required_inputs:
+            _LOGGER.debug("No required inputs")
+            return None, ""
+
+        # First, attributes
+        missing, empty = [], []
+        for file_attribute in self.required_inputs_attr:
+            _LOGGER.log(5, "Checking '{}'".format(file_attribute))
+            try:
+                attval = getattr(self, file_attribute)
+            except AttributeError:
+                _LOGGER.log(5, "Missing required input attribute '%s'",
+                             file_attribute)
+                missing.append(file_attribute)
+                continue
+            if attval == "":
+                _LOGGER.log(5, "Empty required input attribute '%s'",
+                             file_attribute)
+                empty.append(file_attribute)
+            else:
+                _LOGGER.log(5, "'{}' is valid: '{}'".
+                              format(file_attribute, attval))
+
+        if missing or empty:
+            return AttributeError, \
+                   "Missing attributes: {}. Empty attributes: {}".\
+                    format(missing, empty)
+
+        # Second, files
+        missing_files = []
+        for paths in self.required_inputs:
+            _LOGGER.log(5, "Text to split and check paths: '%s'", paths)
+            # There can be multiple, space-separated values here.
+            for path in paths.split(" "):
+                _LOGGER.log(5, "Checking path: '{}'".format(path))
+                if not _os.path.exists(path):
+                    _LOGGER.log(5, "Missing required input file: '{}'".
+                                  format(path))
+                    missing_files.append(path)
+
+        if not missing_files:
+            return None, ""
+        else:
+            missing_message = \
+                    "Missing file(s): {}".format(", ".join(missing_files))
+            return IOError, missing_message
+
+
+    def generate_filename(self, delimiter="_"):
+        """
+        Create a name for file in which to represent this Sample.
+
+        This uses knowledge of the instance's subtype, sandwiching a delimiter
+        between the name of this Sample and the name of the subtype before the
+        extension. If the instance is a base Sample type, then the filename
+        is simply the sample name with an extension.
+
+        :param str delimiter: what to place between sample name and name of
+            subtype; this is only relevant if the instance is of a subclass
+        :return str: name for file with which to represent this Sample on disk
+        """
+        base = self.name if type(self) is Sample else \
+            "{}{}{}".format(self.name, delimiter, self.__class__.__name__)
+        return "{}.yaml".format(base)
+
+
     def generate_name(self):
         """
         Generate name for the sample by joining some of its attribute strings.
@@ -1302,88 +1583,134 @@ def generate_name(self):
         raise NotImplementedError("Not implemented in new code base.")
 
 
-    def as_series(self):
+    def get_attr_values(self, attrlist):
         """
-        Returns a `pandas.Series` object with all the sample's attributes.
+        Get value corresponding to each given attribute.
 
-        :return pandas.core.series.Series: pandas Series representation 
-            of this Sample, with its attributes.
+        :param str attrlist: name of an attribute storing a list of attr names
+        :return list | NoneType: value (or empty string) corresponding to
+            each named attribute; null if this Sample's value for the
+            attribute given by the argument to the "attrlist" parameter is
+            empty/null, or if this Sample lacks the indicated attribute
         """
-        return _pd.Series(self.__dict__)
+        # If attribute is None, then value is also None.
+        attribute_list = getattr(self, attrlist, None)
+        if not attribute_list:
+            return None
+
+        if not isinstance(attribute_list, list):
+            attribute_list = [attribute_list]
+
+        # Strings contained here are appended later so shouldn't be null.
+        return [getattr(self, attr, "") for attr in attribute_list]
 
 
-    def to_yaml(self, path=None):
+    def get_sheet_dict(self):
         """
-        Serializes itself in YAML format.
+        Create a K-V pairs for items originally passed in via the sample sheet.
 
-        :param str path: A file path to write yaml to.
+        This is useful for summarizing; it provides a representation of the
+        sample that excludes things like config files and derived entries.
+
+        :return OrderedDict: mapping from name to value for data elements
+            originally provided via the sample sheet (i.e., the a map-like
+            representation of the instance, excluding derived items)
         """
-        def obj2dict(obj, to_skip=("samples", "sheet", "sheet_attributes")):
-            """
-            Build representation of object as a dict, recursively
-            for all objects that might be attributes of self.
+        return _OrderedDict([[k, getattr(self, k)]
+                             for k in self.sheet_attributes])
 
-            :param object obj: what to serialize to write to YAML.
-            :param Iterable[str] to_skip: names of attributes to ignore.
-\            """
-            if isinstance(obj, list):
-                return [obj2dict(i) for i in obj]
-            if isinstance(obj, AttributeDict):
-                return {k: obj2dict(v) for k, v in obj.__dict__.items()
-                        if k not in to_skip and
-                        (k not in ATTRDICT_METADATA or
-                         v != ATTRDICT_METADATA[k])}
-            elif isinstance(obj, Mapping):
-                return {k: obj2dict(v)
-                        for k, v in obj.items() if k not in to_skip}
-            elif isinstance(obj, (Paths, Sample)):
-                return {k: obj2dict(v)
-                        for k, v in obj.__dict__.items() if k not in to_skip}
-            elif hasattr(obj, 'dtype'):  # numpy data types
-                # TODO: this fails with ValueError for multi-element array.
-                return obj.item()
-            elif _pd.isnull(obj):
-                # Missing values as evaluated by pd.isnull().
-                # This gets correctly written into yaml.
-                return "NaN"
-            else:
-                return obj
 
-        # If path is not specified, use default:
-        # prj.metadata.submission_dir + sample_name + yaml
-        self.yaml_file = path or \
-                         _os.path.join(self.prj.metadata.submission_subdir,
-                                       self.sample_name + ".yaml")
-        serial = obj2dict(self)
-        with open(self.yaml_file, 'w') as outfile:
-            outfile.write(yaml.safe_dump(serial, default_flow_style=False))
+    def infer_columns(self, implications):
+        """
+        Infer value for additional field(s) from other field(s).
+
+        Add columns/fields to the sample based on values in those already-set
+        that the sample's project defines as indicative of implications for
+        additional data elements for the sample.
+
+        :param Mapping implications: Project's implied columns data
+        :return None: this function mutates state and is strictly for effect
+        """
+
+        _LOGGER.log(5, "Sample attribute implications: {}".
+                    format(implications))
+        if not implications:
+            return
+
+        for implier_name, implied in implications.items():
+            _LOGGER.debug(
+                "Setting Sample variable(s) implied by '%s'", implier_name)
+            try:
+                implier_value = self[implier_name]
+            except KeyError:
+                _LOGGER.debug("No '%s' for this sample", implier_name)
+                continue
+            try:
+                implied_value_by_column = implied[implier_value]
+                _LOGGER.debug("Implications for '%s' = %s: %s",
+                              implier_name, implier_value,
+                              str(implied_value_by_column))
+                for colname, implied_value in \
+                        implied_value_by_column.items():
+                    _LOGGER.log(5, "Setting '%s'=%s",
+                                colname, implied_value)
+                    setattr(self, colname, implied_value)
+            except KeyError:
+                _LOGGER.log(
+                    5, "Unknown implied value for implier '%s' = '%s'",
+                    implier_name, implier_value)
+
+
+    def is_dormant(self):
+        """
+        Determine whether this Sample is inactive.
 
+        By default, a Sample is regarded as active. That is, if it lacks an
+        indication about activation status, it's assumed to be active. If,
+        however, and there's an indication of such status, it must be '1'
+        in order to be considered switched 'on.'
 
-    def locate_data_source(self, column_name=DATA_SOURCE_COLNAME,
+        :return bool: whether this Sample's been designated as dormant
+        """
+        try:
+            flag = self[SAMPLE_EXECUTION_TOGGLE]
+        except KeyError:
+            # Regard default Sample state as active.
+            return False
+        # If specified, the activation flag must be set to '1'.
+        return flag != "1"
+
+
+    def locate_data_source(self, data_sources, column_name=DATA_SOURCE_COLNAME,
                            source_key=None, extra_vars=None):
         """
-        Uses the template path provided in the project config section 
-        "data_sources" to piece together an actual path by substituting 
+        Uses the template path provided in the project config section
+        "data_sources" to piece together an actual path by substituting
         variables (encoded by "{variable}"") with sample attributes.
 
-        :param str column_name: Name of sample attribute 
+        :param Mapping data_sources: mapping from key name (as a value in
+            a cell of a tabular data structure) to, e.g., filepath
+        :param str column_name: Name of sample attribute
             (equivalently, sample sheet column) specifying a derived column.
-        :param str source_key: The key of the data_source, 
-            used to index into the project config data_sources section. 
-            By default, the source key will be taken as the value of 
-            the specified column (as a sample attribute). 
-            For cases where the sample doesn't have this attribute yet 
+        :param str source_key: The key of the data_source,
+            used to index into the project config data_sources section.
+            By default, the source key will be taken as the value of
+            the specified column (as a sample attribute).
+            For cases where the sample doesn't have this attribute yet
             (e.g. in a merge table), you must specify the source key.
-        :param dict extra_vars: By default, this will look to 
-            populate the template location using attributes found in the 
-            current sample; however, you may also provide a dict of extra 
-            variables that can also be used for variable replacement. 
+        :param dict extra_vars: By default, this will look to
+            populate the template location using attributes found in the
+            current sample; however, you may also provide a dict of extra
+            variables that can also be used for variable replacement.
             These extra variables are given a higher priority.
         :return str: regex expansion of data source specified in configuration,
             with variable substitutions made
+        :raises ValueError: if argument to data_sources parameter is null/empty
         """
 
-        sources_section = "data_sources"
+        if not data_sources:
+            # TODO: should this be a null/empty-string return, or actual error?
+            raise ValueError("No data sources")
 
         if not source_key:
             try:
@@ -1392,15 +1719,16 @@ def locate_data_source(self, column_name=DATA_SOURCE_COLNAME,
                 reason = "'{attr}': to locate sample's data source, provide " \
                          "the name of a key from '{sources}' or ensure " \
                          "sample has attribute '{attr}'".format(
-                         attr=column_name, sources=sources_section)
+                         attr=column_name, sources=DATA_SOURCES_SECTION)
                 raise AttributeError(reason)
 
         try:
-            regex = self.prj[sources_section][source_key]
+            regex = data_sources[source_key]
         except KeyError:
             _LOGGER.warn(
                     "Config lacks entry for data_source key: '{}' "
-                    "(in column: '{}')".format(source_key, column_name))
+                    "in column '{}'; known: {}".format(
+                    source_key, column_name, data_sources.keys()))
             return ""
 
         # Populate any environment variables like $VAR with os.environ["VAR"]
@@ -1429,125 +1757,94 @@ def locate_data_source(self, column_name=DATA_SOURCE_COLNAME,
         return val
 
 
-    def get_genome_transcriptome(self):
+    def make_sample_dirs(self):
         """
-        Get genome and transcriptome, based on project config file.
-        If not available (matching config), genome and transcriptome will be set to sample.organism.
+        Creates sample directory structure if it doesn't exist.
         """
-        try:
-            self.genome = getattr(self.prj.genomes, self.organism)
-        except AttributeError:
-            _LOGGER.debug("Project config lacks genome mapping for "
-                              "organism '%s'", str(self.organism))
-        try:
-            self.transcriptome = getattr(self.prj.transcriptomes, self.organism)
-        except AttributeError:
-            _LOGGER.debug("Project config lacks transcriptome mapping for "
-                              "organism '%s'", str(self.organism))
+        for path in self.paths:
+            if not _os.path.exists(path):
+                _os.makedirs(path)
 
 
-    def set_file_paths(self):
+    def set_file_paths(self, project):
         """
         Sets the paths of all files for this sample.
+
+        :param Project project: object with pointers to data paths and such
         """
         # Any columns specified as "derived" will be constructed
         # based on regex in the "data_sources" section of project config.
 
-        for col in self.prj.derived_columns:
+        for col in project.derived_columns:
             # Only proceed if the specified column exists
-            # and was not already merged or derived.
-            if hasattr(self, col) and col not in self.merged_cols \
-                    and col not in self.derived_cols_done:
-                # Set a variable called {col}_key, so the
-                # original source can also be retrieved.
-                setattr(self, col + COL_KEY_SUFFIX, getattr(self, col))
-                setattr(self, col, self.locate_data_source(col))
-                self.derived_cols_done.append(col)
-
-        self.infer_columns()
-
-        # Parent
-        self.results_subdir = self.prj.metadata.results_subdir
-        self.paths.sample_root = _os.path.join(
-                self.prj.metadata.results_subdir, self.sample_name)
-
-        # Track url
-        bigwig_filename = self.name + ".bigWig"
-        try:
-            # Project's public_html folder
-            self.bigwig = _os.path.join(
-                    self.prj.trackhubs.trackhub_dir, bigwig_filename)
-            self.track_url = \
-                    "{}/{}".format(self.prj.trackhubs.url, bigwig_filename)
-        except:
-            _LOGGER.debug("No trackhub/URL")
-            pass
-
-
-    def infer_columns(self):
-        """
-        Infer value for additional field(s) from other field(s).
-        
-        Add columns/fields to the sample based on values in those already-set 
-        that the sample's project defines as indicative of implications for 
-        additional data elements for the sample.
-        
-        :return None: this function mutates state and is strictly for effect
-        """
-        if not hasattr(self.prj, IMPLICATIONS_DECLARATION):
-            return
-
-        impliers = self.prj[IMPLICATIONS_DECLARATION]
-
-        _LOGGER.debug(
-                "Sample variable(s) that can imply others: %s", str(impliers))
-        for implier_name, implied in impliers.items():
-            _LOGGER.debug(
-                "Setting Sample variable(s) implied by '%s'", implier_name)
-            try:
-                implier_value = self[implier_name]
-            except KeyError:
-                _LOGGER.debug("No '%s' for this sample", implier_name)
-                continue
-            try:
-                implied_value_by_column = implied[implier_value]
-                _LOGGER.debug("Implications for '%s' = %s: %s",
-                              implier_name, implier_value,
-                              str(implied_value_by_column))
-                for colname, implied_value in \
-                        implied_value_by_column.items():
-                    _LOGGER.log(5, "Setting '%s'=%s",
-                                colname, implied_value)
-                    setattr(self, colname, implied_value)
-            except KeyError:
-                _LOGGER.log(
-                    5, "Unknown implied value for implier '%s' = '%s'",
-                    implier_name, implier_value)
+            # and was not already merged or derived.
+            if hasattr(self, col) and col not in self.merged_cols \
+                    and col not in self.derived_cols_done:
+                # Set a variable called {col}_key, so the
+                # original source can also be retrieved.
+                setattr(self, col + COL_KEY_SUFFIX, getattr(self, col))
+                setattr(self, col, self.locate_data_source(
+                        data_sources=project.get(DATA_SOURCES_SECTION),
+                        column_name=col))
+                self.derived_cols_done.append(col)
 
+        self.infer_columns(implications=project.get(IMPLICATIONS_DECLARATION))
 
-    def make_sample_dirs(self):
-        """
-        Creates sample directory structure if it doesn't exist.
-        """
-        for path in self.paths:
-            if not _os.path.exists(path):
-                _os.makedirs(path)
+        # Parent
+        self.results_subdir = project.metadata.results_subdir
+        self.paths.sample_root = _os.path.join(
+                project.metadata.results_subdir, self.sample_name)
 
+        # Track url
+        bigwig_filename = self.name + ".bigWig"
+        try:
+            # Project's public_html folder
+            self.bigwig = _os.path.join(
+                    project.trackhubs.trackhub_dir, bigwig_filename)
+            self.track_url = \
+                    "{}/{}".format(project.trackhubs.url, bigwig_filename)
+        except:
+            _LOGGER.debug("No trackhub/URL")
+            pass
 
-    def get_sheet_dict(self):
+    
+    def set_genome(self, genomes):
         """
-        Create a K-V pairs for items originally passed in via the sample sheet.
+        Set the genome for this Sample.
+
+        :param Mapping[str, str] genomes: genome assembly by organism name
+        """
+        self._set_assembly("genome", genomes)
         
-        This is useful for summarizing; it provides a representation of the 
-        sample that excludes things like config files and derived entries.
         
-        :return OrderedDict: mapping from name to value for data elements 
-            originally provided via the sample sheet (i.e., the a map-like 
-            representation of the instance, excluding derived items)
+    def set_transcriptome(self, transcriptomes):
         """
-        return _OrderedDict([[k, getattr(self, k)]
-                             for k in self.sheet_attributes])
+        Set the transcriptome for this Sample.
 
+        :param Mapping[str, str] transcriptomes: transcriptome assembly by
+            organism name
+        """
+        self._set_assembly("transcriptome", transcriptomes)
+        
+        
+    def _set_assembly(self, ome, assemblies):
+        if not assemblies:
+            _LOGGER.debug("Empty/null assemblies mapping: {} ({})".
+                          format(assemblies, type(assemblies)))
+            return
+        try:
+            assembly = assemblies[self.organism]
+        except AttributeError:
+            _LOGGER.debug("Sample '%s' lacks organism attribute", self.name)
+            assembly = None
+        except KeyError:
+            _LOGGER.log(5, "Unknown {} value: '{}'".
+                    format(ome, self.organism))
+            assembly = None
+        _LOGGER.log(5, "Setting {} as {} on sample: '{}'".
+                format(assembly, ome, self.name))
+        setattr(self, ome, assembly)
+        
 
     def set_pipeline_attributes(
             self, pipeline_interface, pipeline_name, permissive=True):
@@ -1571,108 +1868,37 @@ def set_pipeline_attributes(
         # Settings ending in _attr are lists of attribute keys.
         # These attributes are then queried to populate values
         # for the primary entries.
-        self.ngs_inputs_attr = pipeline_interface.get_attribute(
-                pipeline_name, "ngs_input_files")
-        self.required_inputs_attr = pipeline_interface.get_attribute(
-                pipeline_name, "required_input_files")
-        self.all_inputs_attr = pipeline_interface.get_attribute(
-                pipeline_name, "all_input_files")
-
+        req_attr_names = [("ngs_input_files", "ngs_inputs_attr"),
+                          ("required_input_files", "required_inputs_attr"),
+                          ("all_input_files", "all_inputs_attr")]
+        for name_src_attr, name_dst_attr in req_attr_names:
+            _LOGGER.log(5, "Value of '%s' will be assigned to '%s'",
+                        name_src_attr, name_dst_attr)
+            value = pipeline_interface.get_attribute(
+                    pipeline_name, name_src_attr)
+            _LOGGER.log(5, "Assigning '{}': {}".format(name_dst_attr, value))
+            setattr(self, name_dst_attr, value)
+
+        # Post-processing of input attribute assignments.
+        # Ensure that there's a valid all_inputs_attr.
+        if not self.all_inputs_attr:
+            self.all_inputs_attr = self.required_inputs_attr
+        # Convert attribute keys into values.
         if self.ngs_inputs_attr:
+            _LOGGER.log(5, "Handling NGS input attributes: '%s'", self.name)
             # NGS data inputs exit, so we can add attributes like
             # read_type, read_length, paired.
             self.ngs_inputs = self.get_attr_values("ngs_inputs_attr")
             self.set_read_type(permissive=permissive)
+        else:
+            _LOGGER.log(5, "No NGS inputs: '%s'", self.name)
 
-        # input_size
-        if not self.all_inputs_attr:
-            self.all_inputs_attr = self.required_inputs_attr
-
-        # Convert attribute keys into values
+        # Assign values for actual inputs attributes.
         self.required_inputs = self.get_attr_values("required_inputs_attr")
         self.all_inputs = self.get_attr_values("all_inputs_attr")
         self.input_file_size = get_file_size(self.all_inputs)
 
 
-    def confirm_required_inputs(self, permissive=False):
-
-        # set_pipeline_attributes must be run first.
-        if not hasattr(self, "required_inputs"):
-            _LOGGER.warn("You must run set_pipeline_attributes "
-                         "before confirm_required_inputs")
-            return True
-
-        if not self.required_inputs:
-            _LOGGER.debug("No required inputs")
-            return True
-
-        # First, attributes
-        for file_attribute in self.required_inputs_attr:
-            _LOGGER.debug("Checking '{}'".format(file_attribute))
-            if not hasattr(self, file_attribute):
-                message = "Missing required input attribute '{}'".\
-                    format(file_attribute)
-                _LOGGER.warn(message)
-                if not permissive:
-                    raise IOError(message)
-                else:
-                    return False
-            if getattr(self, file_attribute) is "":
-                message = "Empty required input attribute '{}'".\
-                    format(file_attribute)
-                _LOGGER.warn(message)
-                if not permissive:
-                    raise IOError(message)
-                else:
-                    return False
-
-        # Second, files
-        missing_files = []
-        for paths in self.required_inputs:
-            # There can be multiple, space-separated values here.
-            for path in paths.split(" "):
-                _LOGGER.debug("Checking path: '{}'".format(path))
-                if not _os.path.exists(path):
-                    _LOGGER.warn("Missing required input file: '{}'".format(path))
-                    missing_files.append(path)
-
-        if len(missing_files) > 0:
-            message = "Missing/unreadable file(s): {}".\
-                    format(", ".join(["'{}'".format(path)
-                                      for path in missing_files]))
-            if not permissive:
-                raise IOError(message)
-            else:
-                _LOGGER.error(message)
-                return False
-
-        return True
-
-
-    def get_attr_values(self, attrlist):
-        """
-        Get value corresponding to each given attribute.
-
-        :param str attrlist: name of an attribute storing a list of attr names
-        :return list: value (or empty string) corresponding to each named attr
-        """
-        if not hasattr(self, attrlist):
-            return None
-
-        attribute_list = getattr(self, attrlist)
-
-        # If attribute is None, then value is also None.
-        if not attribute_list:
-            return None
-
-        if not isinstance(attribute_list, list):
-            attribute_list = [attribute_list]
-
-        # Strings contained here are appended later so shouldn't be null.
-        return [getattr(self, attr) if hasattr(self, attr) else ""
-                for attr in attribute_list]
-
-
     def set_read_type(self, n=10, permissive=True):
         """
         For a sample with attr `ngs_inputs` set, this sets the 
@@ -1769,6 +1995,95 @@ def set_read_type(self, n=10, permissive=True):
                              feature, self.name)
 
 
+    def to_yaml(self, path=None, subs_folder_path=None, delimiter="_"):
+        """
+        Serializes itself in YAML format.
+
+        :param str path: A file path to write yaml to; provide this or
+            the subs_folder_path
+        :param str subs_folder_path: path to folder in which to place file
+            that's being written; provide this or a full filepath
+        :param str delimiter: text to place between the sample name and the
+            suffix within the filename; irrelevant if there's no suffix
+        :return str: filepath used (same as input if given, otherwise the
+            path value that was inferred)
+        :raises ValueError: if neither full filepath nor path to extant
+            parent directory is provided.
+        """
+
+        # Determine filepath, prioritizing anything given, then falling
+        # back to a default using this Sample's Project's submission_subdir.
+        # Use the sample name and YAML extension as the file name,
+        # interjecting a pipeline name as a subfolder within the Project's
+        # submission_subdir if such a pipeline name is provided.
+        if not path:
+            if not subs_folder_path:
+                raise ValueError(
+                    "To represent {} on disk, provide a full path or a path "
+                    "to a parent (submissions) folder".
+                    format(self.__class__.__name__))
+            _LOGGER.debug("Creating filename for %s: '%s'",
+                          self.__class__.__name__, self.name)
+            filename = self.generate_filename(delimiter=delimiter)
+            _LOGGER.debug("Filename: '%s'", filename)
+            path = _os.path.join(subs_folder_path, filename)
+
+        _LOGGER.debug("Setting %s filepath: '%s'",
+                      self.__class__.__name__, path)
+        self.yaml_file = path
+
+
+        def obj2dict(obj,
+                     to_skip=("samples", "sheet", "sheet_attributes")):
+            """
+            Build representation of object as a dict, recursively
+            for all objects that might be attributes of self.
+
+            :param object obj: what to serialize to write to YAML.
+            :param Iterable[str] to_skip: names of attributes to ignore.
+\            """
+            if isinstance(obj, list):
+                return [obj2dict(i) for i in obj]
+            if isinstance(obj, AttributeDict):
+                return {k: obj2dict(v) for k, v in obj.__dict__.items()
+                        if k not in to_skip and
+                        (k not in ATTRDICT_METADATA or
+                         v != ATTRDICT_METADATA[k])}
+            elif isinstance(obj, Mapping):
+                return {k: obj2dict(v)
+                        for k, v in obj.items() if k not in to_skip}
+            elif isinstance(obj, (Paths, Sample)):
+                return {k: obj2dict(v)
+                        for k, v in obj.__dict__.items() if
+                        k not in to_skip}
+            elif hasattr(obj, 'dtype'):  # numpy data types
+                # TODO: this fails with ValueError for multi-element array.
+                return obj.item()
+            elif _pd.isnull(obj):
+                # Missing values as evaluated by pd.isnull().
+                # This gets correctly written into yaml.
+                return "NaN"
+            else:
+                return obj
+
+        _LOGGER.debug("Serializing %s: '%s'",
+                      self.__class__.__name__, self.name)
+        serial = obj2dict(self)
+        with open(self.yaml_file, 'w') as outfile:
+            _LOGGER.debug("Generating YAML data for %s: '%s'",
+                          self.__class__.__name__, self.name)
+            yaml_data = yaml.safe_dump(serial, default_flow_style=False)
+            outfile.write(yaml_data)
+
+
+    def update(self, newdata):
+        """
+        Update Sample object with attributes from a dict.
+        """
+        for key, value in newdata.items():
+            setattr(self, key, value)
+
+
 
 @copy
 class PipelineInterface(object):
@@ -1791,28 +2106,64 @@ def __init__(self, config):
             self.pipe_iface_config = config
 
         else:
-            _LOGGER.debug("Parsing '%s' for %s config data",
-                         config, self.__class__.__name__)
+            _LOGGER.debug("Parsing '%s' for PipelineInterface config data",
+                         config)
             self.pipe_iface_file = config
             with open(config, 'r') as f:
                 self.pipe_iface_config = yaml.load(f)
 
+        # Ensure that each pipeline path, if provided, is expanded.
+        self._expand_paths()
+
+
+    def __getitem__(self, item):
+        try:
+            return self._select_pipeline(item)
+        except _MissingPipelineConfigurationException:
+            raise KeyError("{} is not a known pipeline; known: {}".
+                           format(item, self.pipe_iface_config.keys()))
+
 
     def __iter__(self):
         return iter(self.pipe_iface_config.items())
 
 
     def __repr__(self):
-        return repr(self.pipe_iface_config)
+        source = self.pipe_iface_file or "Mapping"
+        num_pipelines = len(self.pipe_iface_config)
+        pipelines = ", ".join(self.pipe_iface_config.keys())
+        return "{} from {}, with {} pipeline(s): {}".format(
+                self.__class__.__name__, source, num_pipelines, pipelines)
+
+
+    def _expand_paths(self):
+        for pipe_data in self.pipe_iface_config.values():
+            if "path" in pipe_data:
+                pipe_path = pipe_data["path"]
+                _LOGGER.log(5, "Expanding path: '%s'", pipe_path)
+                pipe_path = expandpath(pipe_path)
+                _LOGGER.log(5, "Expanded: '%s'", pipe_path)
+                pipe_data["path"] = pipe_path
 
 
     @property
     def pipeline_names(self):
+        """
+        Names of pipelines about which this interface is aware.
+
+        :return Iterable[str]: names of pipelines about which this
+            interface is aware
+        """
         return self.pipe_iface_config.keys()
 
 
     @property
     def pipelines(self):
+        """
+        Keyed collection of pipeline interface data.
+
+        :return Mapping: pipeline interface configuration data
+        """
         return self.pipe_iface_config.values()
 
 
@@ -1895,15 +2246,28 @@ def file_size_ante(name, data):
                 return rp_data
 
 
-    def get_arg_string(self, pipeline_name, sample):
+    def get_arg_string(self, pipeline_name, sample,
+                       submission_folder_path="", **null_replacements):
         """
         For a given pipeline and sample, return the argument string
 
         :param str pipeline_name: Name of pipeline.
         :param Sample sample: current sample for which job is being built
+        :param str submission_folder_path: path to folder in which files
+            related to submission of this sample will be placed.
+        :param dict null_replacements: mapping from name of Sample attribute
+            name to value to use in arg string if Sample attribute's value
+            is null
         :return str: command-line argument string for pipeline
         """
 
+        # It's undesirable to put a null value in the argument string.
+        default_filepath = _os.path.join(
+                submission_folder_path, sample.generate_filename())
+        _LOGGER.debug("Default sample filepath: '%s'", default_filepath)
+        proxies = {"yaml_file": default_filepath}
+        proxies.update(null_replacements)
+
         _LOGGER.debug("Building arguments string")
         config = self._select_pipeline(pipeline_name)
         argstring = ""
@@ -1917,8 +2281,7 @@ def get_arg_string(self, pipeline_name, sample):
 
         for key, value in args.iteritems():
             if value is None:
-                _LOGGER.debug("Null value for opt arg key '%s'",
-                                   str(key))
+                _LOGGER.debug("Null value for opt arg key '%s'", str(key))
                 continue
             try:
                arg = getattr(sample, value)
@@ -1930,7 +2293,19 @@ def get_arg_string(self, pipeline_name, sample):
                     pipeline_name, value, key)
                 raise
 
-            _LOGGER.debug("Adding '{}' from attribute '{}' for argument '{}'".format(arg, value, key))
+            # It's undesirable to put a null value in the argument string.
+            if arg is None:
+                _LOGGER.debug("Null value for Sample attribute: '%s'", value)
+                try:
+                    arg = proxies[value]
+                except KeyError:
+                    raise ValueError("No default for null "
+                                     "Sample attribute: '{}'".format(value))
+                _LOGGER.debug("Found default for '{}': '{}'".
+                              format(value, arg))
+
+            _LOGGER.debug("Adding '{}' from attribute '{}' for argument '{}'".
+                          format(arg, value, key))
             argstring += " " + str(key) + " " + str(arg)
 
         # Add optional arguments
@@ -1959,14 +2334,19 @@ def get_arg_string(self, pipeline_name, sample):
         return argstring
 
 
-    def get_attribute(self, pipeline_name, attribute_key):
-        """ Return value of given attribute for named pipeline. """
+    def get_attribute(self, pipeline_name, attribute_key, path_as_list=True):
+        """
+        Return the value of the named attribute for the pipeline indicated.
+
+        :param str pipeline_name: name of the pipeline of interest
+        :param str attribute_key: name of the pipeline attribute of interest
+        :param bool path_as_list: whether to ensure that a string attribute
+            is returned as a list; this is useful for safe iteration over
+            the returned value.
+        """
         config = self._select_pipeline(pipeline_name)
-        try:
-            value = config[attribute_key]
-        except KeyError:
-            value = None
-        return [value] if isinstance(value, str) else value
+        value = config.get(attribute_key)
+        return [value] if isinstance(value, str) and path_as_list else value
 
 
     def get_pipeline_name(self, pipeline):
@@ -2024,190 +2404,236 @@ def _select_pipeline(self, pipeline_name):
 
 
 
-@copy
-class InterfaceManager(object):
-    """ Manage pipeline use for multiple locations and protocols.
+class ProtocolInterface(object):
+    """ PipelineInterface and ProtocolMapper for a single pipelines location.
 
-    This is done by aggregating protocol interface instances,
-    allowing one Project to use pipelines from multiple locations.
+    This class facilitates use of pipelines from multiple locations by a
+    single project. Also stored are path attributes with information about
+    the location(s) from which the PipelineInterface and ProtocolMapper came.
 
-    :param pipeline_dirs: locations containing pipelines and configuration
-        information; specifically, a directory with a 'pipelines' folder and
-        a 'config' folder, within which there is a pipeline interface file
-        and a protocol mappings file.
-    :type pipeline_dirs: Iterable[str]
+    :param interface_data_source: location (e.g., code repository) of pipelines
+    :type interface_data_source: str
 
     """
-    def __init__(self, pipeline_dirs):
-        # Collect interface/mappings pairs by protocol name.
-        interfaces_and_protocols = \
-                [ProtocolInterfaces(pipedir) for pipedir in pipeline_dirs]
-        self.ifproto_by_proto_name = defaultdict(list)
-        for ifproto in interfaces_and_protocols:
-            for proto_name in ifproto.protomap:
-                _LOGGER.debug("Protocol name: {}".format(proto_name))
-                self.ifproto_by_proto_name[proto_name].append(ifproto)
 
+    SUBTYPE_MAPPING_SECTION = "sample_subtypes"
 
-    def build_pipelines(self, protocol_name, priority=True):
-        """
-        Build up a sequence of scripts to execute for this protocol.
 
-        :param str protocol_name: name for the protocol for which to build
-            pipelines
-        :param bool priority: should only the top priority mapping be used?
-        :return Sequence[(PipelineInterface, str, str)]: sequence of jobs
-            (script paths) to execute for the given protocol; if priority
-            flag is set (as is the default), this is a single-element list,
-            the sequence of jobs built is interpreted as descending priority
-        """
+    def __init__(self, interface_data_source):
+        super(ProtocolInterface, self).__init__()
 
-        try:
-            ifprotos = self.ifproto_by_proto_name[protocol_name]
-        except KeyError:
-            _LOGGER.warn("Unknown protocol: '{}'".format(protocol_name))
-            return []
+        if isinstance(interface_data_source, Mapping):
+            # TODO: for implementation, we need to determine pipelines_path.
+            raise NotImplementedError(
+                    "Raw Mapping as source of {} data is not yet supported".
+                    format(self.__class__.__name__))
+            _LOGGER.debug("Creating %s from raw Mapping",
+                          self.__class__.__name__)
+            self.source = None
+            self.pipe_iface_path = None
+            for name, value in self._parse_iface_data(interface_data_source):
+                setattr(self, name, value)
 
-        jobs = []
-        pipeline_keys_used = set()
-        _LOGGER.debug("Building pipelines for {} PIs...".format(len(ifprotos)))
-        for ifproto in ifprotos:
-            try:
-                this_protocol_pipelines = \
-                        ifproto.protomap.mappings[protocol_name]
-            except KeyError:
-                _LOGGER.debug("Protocol {} missing mapping in '{}'".
-                              format(protocol_name, ifproto.protomaps_path))
-            else:
-                # TODO: update once dependency-encoding logic is in place.
-                _LOGGER.debug("Protocol: {}".format(protocol_name))
-                pipeline_keys = this_protocol_pipelines.replace(";", ",")\
-                                                      .strip(" ()\n")\
-                                                      .split(",")
-                pipeline_keys = [pk.strip() for pk in pipeline_keys]
-                already_mapped, new_scripts = \
-                        partition(pipeline_keys,
-                                  partial(_is_member, items=pipeline_keys_used))
-                pipeline_keys_used |= set(pipeline_keys)
-
-                if len(pipeline_keys) != (len(already_mapped) + len(new_scripts)):
-                    _LOGGER.error("{} --> {} + {}".format(
-                            pipeline_keys, already_mapped, new_scripts))
-
-                    raise RuntimeError(
-                            "Partitioned {} script names into allegedly "
-                            "disjoint sets of {} and {} elements.".
-                            format(len(pipeline_keys),
-                                   len(already_mapped),
-                                   len(new_scripts)))
-
-                _LOGGER.debug("Skipping {} already-mapped script names: {}".
-                              format(len(already_mapped),
-                                     ", ".join(already_mapped)))
-                _LOGGER.debug("{} new scripts for protocol {} from "
-                              "pipelines warehouse '{}': {}".
-                              format(len(new_scripts), protocol_name,
-                                     ifproto.pipedir, ", ".join(new_scripts)))
-
-                jobs.append([(ifproto.interface, ) +
-                             ifproto.pipeline_key_to_path(pipeline_key)
-                             for pipeline_key in pipeline_keys])
-
-        return jobs[0] if priority and len(jobs) > 1 else list(itertools.chain(*jobs))
+        elif _os.path.isfile(interface_data_source):
+            # Secondary version that passes combined yaml file directly,
+            # instead of relying on separate hard-coded config names.
+            _LOGGER.debug("Creating %s from file: '%s'",
+                          self.__class__.__name__, interface_data_source)
+            self.source = interface_data_source
+            self.pipe_iface_path = self.source
+            self.pipelines_path = _os.path.dirname(self.source)
+
+            with open(interface_data_source, 'r') as interface_file:
+                iface = yaml.load(interface_file)
+            for name, value in self._parse_iface_data(iface):
+                setattr(self, name, value)
 
+        elif _os.path.isdir(interface_data_source):
+            _LOGGER.debug("Creating %s from files in directory: '%s'",
+                          self.__class__.__name__, interface_data_source)
+            self.source = interface_data_source
+            self.pipe_iface_path = _os.path.join(
+                    self.source, "config", "pipeline_interface.yaml")
+            self.pipelines_path = _os.path.join(self.source, "pipelines")
 
+            self.pipe_iface = PipelineInterface(self.pipe_iface_path)
+            self.protomap = ProtocolMapper(_os.path.join(
+                    self.source, "config", "protocol_mappings.yaml"))
 
-def _is_member(item, items):
-    return item in items
+        else:
+            raise ValueError("Alleged pipelines location '{}' exists neither "
+                             "as a file nor as a folder.".
+                             format(interface_data_source))
 
 
+    def __repr__(self):
+        return "ProtocolInterface from '{}'".format(self.source or "Mapping")
 
-# TODO: rename.
-class ProtocolInterfaces:
-    """ PipelineInterface and ProtocolMapper for a single pipelines location.
 
-    Instances of this class are used by InterfaceManager to facilitate
-    multi-location pipelines use by a single project. Here also are stored
-    path attributes to retain information about the location from which the
-    interface and mapper came.
+    def fetch_pipelines(self, protocol):
+        """
+        Fetch the mapping for a particular protocol, null if unmapped.
 
-    :param pipedir: location (e.g., code repository) of pipelines
-    :type pipedir: str
+        :param str protocol: name/key for the protocol for which to fetch the
+            pipeline(s)
+        :return str | Iterable[str] | NoneType: pipeline(s) to which the given
+            protocol is mapped, otherwise null
+        """
+        return self.protomap.mappings.get(alpha_cased(protocol))
 
-    """
-    def __init__(self, pipedir):
-        if _os.path.isdir(pipedir):
-            self.pipedir = pipedir
-            self.config_path = _os.path.join(pipedir, "config")
-            self.interface_path = _os.path.join(self.config_path,
-                                                "pipeline_interface.yaml")
-            self.protomaps_path = _os.path.join(self.config_path,
-                                                "protocol_mappings.yaml")
-            self.interface = PipelineInterface(self.interface_path)
-            self.protomap = ProtocolMapper(self.protomaps_path)
-            self.pipelines_path = _os.path.join(pipedir, "pipelines")
-        elif _os.path.isfile(pipedir):
-            # Secondary version that passes combined yaml file directly,
-            # instead of relying on separate hard-coded config names as above
-            self.pipedir = None
-            self.interface_file = pipedir
 
-            self.pipelines_path = _os.path.dirname(pipedir)
+    def fetch_sample_subtype(
+            self, protocol, strict_pipe_key, full_pipe_path):
+        """
+        Determine the interface and Sample subtype for a protocol and pipeline.
 
-            with open(self.interface_file, 'r') as interface_file:
-                iface = yaml.load(interface_file)
-            try:
-                if "protocol_mapping" in iface:
-                    self.protomap = ProtocolMapper(iface["protocol_mapping"])
-                else:
-                    raise Exception("pipeline_interface file is missing "
-                                    "a 'protocol_mapping' section.")
-                if "pipelines" in iface:
-                    self.interface = PipelineInterface(iface["pipelines"])
-                else:
-                    raise Exception("pipeline_interface file is missing "
-                                    "a 'pipelines' section.")
-            except Exception as e:
-                _LOGGER.error(str(iface))
-                raise e
+        :param str protocol: name of the relevant protocol
+        :param str strict_pipe_key: key for specific pipeline in a pipeline
+            interface mapping declaration; this must exactly match a key in
+            the PipelineInterface (or the Mapping that represent it)
+        :param str full_pipe_path: (absolute, expanded) path to the
+            pipeline script
+        :return type: Sample subtype to use for jobs for the given protocol,
+            that use the pipeline indicated
+        :raises KeyError: if given a pipeline key that's not mapped in this
+            ProtocolInterface instance's PipelineInterface
+        """
 
+        subtype = None
 
-    def pipeline_key_to_path(self, pipeline_key):
-        """
-        Given a pipeline_key, return the path to the script for that pipeline
-        specified in this pipeline interface config file.
+        this_pipeline_data = self.pipe_iface[strict_pipe_key]
+
+        try:
+            subtypes = this_pipeline_data[self.SUBTYPE_MAPPING_SECTION]
+        except KeyError:
+            _LOGGER.debug("%s from '%s' doesn't define section '%s' "
+                          "for pipeline '%s'",
+                          self.pipe_iface.__class__.__name__, self.source,
+                          self.SUBTYPE_MAPPING_SECTION, strict_pipe_key)
+            # Without a subtypes section, if pipeline module defines a single
+            # Sample subtype, we'll assume that type is to be used when in
+            # this case, when the interface section for this pipeline lacks
+            # an explicit subtypes section specification.
+            subtype_name = None
+        else:
+            if subtypes is None:
+                # Designate lack of need for import attempt and provide
+                # class with name to format message below.
+                subtype = Sample
+                _LOGGER.debug("Null %s subtype(s) section specified for "
+                              "pipeline: '%s'; using base %s type",
+                              subtype.__name__, strict_pipe_key,
+                              subtype.__name__)
+            elif isinstance(subtypes, str):
+                subtype_name = subtypes
+                _LOGGER.debug("Single subtype name for pipeline '%s' "
+                              "in interface from '%s': '%s'", subtype_name,
+                              strict_pipe_key, self.source)
+            else:
+                temp_subtypes = {
+                        alpha_cased(p): st for p, st in subtypes.items()}
+                try:
+                    subtype_name = temp_subtypes[alpha_cased(protocol)]
+                except KeyError:
+                    # Designate lack of need for import attempt and provide
+                    # class with name to format message below.
+                    subtype = Sample
+                    _LOGGER.debug("No %s subtype specified in interface from "
+                                  "'%s': '%s', '%s'; known: %s",
+                                  subtype.__name__, self.source,
+                                  strict_pipe_key, protocol,
+                                  ", ".join(temp_subtypes.keys()))
+
+        # subtype_name is defined if and only if subtype remained null.
+        subtype = subtype or \
+                  _import_sample_subtype(full_pipe_path, subtype_name) or \
+                  Sample
+        _LOGGER.debug("Using Sample subtype: %s", subtype.__name__)
+        return subtype
+
+
+    def finalize_pipeline_key_and_paths(self, pipeline_key):
+        """
+        Determine pipeline's full path, arguments, and strict key.
+
+        This handles multiple ways in which to refer to a pipeline (by key)
+        within the mapping that contains the data that defines a
+        PipelineInterface. It also ensures proper handling of the path to the
+        pipeline (i.e., ensuring that it's absolute), and that the text for
+        the arguments are appropriately dealt parsed and passed.
 
         :param str pipeline_key: the key in the pipeline interface file used
             for the protocol_mappings section. Previously was the script name.
-        :return (str, str): more restrictive version of input key, along with
-            absolute path for pipeline script.
+        :return (str, str, str): more precise version of input key, along with
+            absolute path for pipeline script, and full script path + options
 
         """
-        # key may contain extra command-line flags; split key from flags.
 
+        # The key may contain extra command-line flags; split key from flags.
+        # The strict key is the script name itself, something like "ATACseq.py"
         strict_pipeline_key, _, pipeline_key_args = pipeline_key.partition(' ')
 
-        if self.interface.get_attribute(strict_pipeline_key, "path"):
-            script_path_only = self.interface.get_attribute(
-                    strict_pipeline_key, "path")[0]
-            script_path_with_flags = " ".join([script_path_only, pipeline_key_args])
+        if self.pipe_iface.get_attribute(strict_pipeline_key, "path"):
+            script_path_only = self.pipe_iface.get_attribute(
+                    strict_pipeline_key, "path")[0].strip()
+            script_path_with_flags = \
+                    " ".join([script_path_only, pipeline_key_args])
         else:
             # backwards compatibility w/ v0.5
             script_path_only = strict_pipeline_key
             script_path_with_flags = pipeline_key 
 
-        if _os.path.isabs(script_path_only):
-            if not _os.path.exists(script_path_only.strip()):
-                _LOGGER.warn("Missing script command: '{}'".format(script_path_only))
-            return strict_pipeline_key, script_path_with_flags
-        else:
-            abs_script_path_only = _os.path.join(self.pipelines_path, script_path_only)
-            abs_script_path_with_flags = _os.path.join(self.pipelines_path, script_path_with_flags)
+        if not _os.path.isabs(script_path_only):
+            _LOGGER.log(5, "Expanding non-absolute script path: '%s'",
+                        script_path_only)
+            script_path_only = _os.path.join(
+                    self.pipelines_path, script_path_only)
+            _LOGGER.log(5, "Absolute script path: '%s'", script_path_only)
+            script_path_with_flags = _os.path.join(
+                    self.pipelines_path, script_path_with_flags)
+            _LOGGER.log(5, "Absolute script path with flags: '%s'",
+                        script_path_with_flags)
+        if not _os.path.exists(script_path_only):
+            _LOGGER.warn(
+                    "Missing pipeline script: '%s'", script_path_only)
+
+        return strict_pipeline_key, script_path_only, script_path_with_flags
+
+
+    @classmethod
+    def _parse_iface_data(cls, pipe_iface_data):
+        """
+        Parse data from mappings to set instance attributes.
+
+        The data that define a ProtocolInterface are a "protocol_mapping"
+        Mapping and a "pipelines" Mapping, which are used to create a
+        ProtocolMapper and a PipelineInterface, representing the configuration
+        data for pipeline(s) from a single location. There are a couple of
+        different ways (file, folder, and eventually, raw Mapping) to provide
+        this data, and this function provides some standardization to how
+        those data are processed, independent of input type/format.
+
+        :param Mapping[str, Mapping] pipe_iface_data: mapping from section
+            name to section data mapping; more specifically, the protocol
+            mappings Mapping and the PipelineInterface mapping
+        :return list[(str, ProtocolMapper | PipelineInterface)]: pairs of
+            attribute name for the ProtocolInterface being created, and the
+            value for that attribute,
+        """
+        assignments = [("protocol_mapping", ProtocolMapper, "protomap"),
+                       ("pipelines", PipelineInterface, "pipe_iface")]
+        attribute_values = []
+        for section_name, data_type, attr_name in assignments:
+            try:
+                data = pipe_iface_data[section_name]
+            except KeyError:
+                _LOGGER.error("Error creating %s from data: %s",
+                              cls.__name__, str(pipe_iface_data))
+                raise Exception("PipelineInterface file lacks section: '{}'".
+                                format(section_name))
+            attribute_values.append((attr_name, data_type(data)))
+        return attribute_values
 
-            if not _os.path.isfile(abs_script_path_only.strip()):
-                _LOGGER.warn("Missing script command: '{}'".
-                             format(abs_script_path_only))
-            return strict_pipeline_key, abs_script_path_with_flags
 
 
 @copy
@@ -2222,15 +2648,32 @@ class ProtocolMapper(Mapping):
     """
     def __init__(self, mappings_input):
         if isinstance(mappings_input, Mapping):
-            # Pre-parsed mappings data
-            self.mappings_file = None
             mappings = mappings_input
+            self.filepath = None
         else:
             # Parse file mapping protocols to pipeline(s).
-            self.mappings_file = mappings_input
-            with open(self.mappings_file, 'r') as mapfile:
+            with open(mappings_input, 'r') as mapfile:
                 mappings = yaml.load(mapfile)
-        self.mappings = {k.upper(): v for k, v in mappings.items()}
+            self.filepath = mappings_input
+        self.mappings = {alpha_cased(k): v for k, v in mappings.items()}
+
+
+    def __getitem__(self, protocol_name):
+        return self.mappings[protocol_name]
+
+    def __iter__(self):
+        return iter(self.mappings)
+
+    def __len__(self):
+        return len(self.mappings)
+
+
+    def __repr__(self):
+        source = self.filepath or "mapping"
+        num_protocols = len(self.mappings)
+        protocols = ", ".join(self.mappings.keys())
+        return "{} from {}, with {} protocol(s): {}".format(
+                self.__class__.__name__, source, num_protocols, protocols)
 
 
     def build_pipeline(self, protocol):
@@ -2261,7 +2704,7 @@ def build_pipeline(self, protocol):
                 self.parse_parallel_jobs(split_jobs[i], split_jobs[i - 1])
         """
 
-    # TODO: incorporate into the InterfaceManager?
+
     def parse_parallel_jobs(self, job, dep):
         job = job.replace("(", "").replace(")", "")
         split_jobs = [x.strip() for x in job.split(',')]
@@ -2271,24 +2714,11 @@ def parse_parallel_jobs(self, job, dep):
         else:
             self.register_job(job, dep)
 
-    # TODO: incorporate into InterfaceManager?
+
     def register_job(self, job, dep):
         _LOGGER.info("Register Job Name: %s\tDep: %s", str(job), str(dep))
 
 
-    def __getitem__(self, item):
-        return self.mappings[item]
-
-    def __iter__(self):
-        return iter(self.mappings)
-
-    def __len__(self):
-        return len(self.mappings)
-
-    def __repr__(self):
-        return repr(self.__dict__)
-
-
 
 class _InvalidResourceSpecificationException(Exception):
     """ Pipeline interface resources--if present--needs default. """
@@ -2335,3 +2765,122 @@ class _MissingPipelineConfigurationException(Exception):
     """ A selected pipeline needs configuration data. """
     def __init__(self, pipeline):
         super(_MissingPipelineConfigurationException, self).__init__(pipeline)
+
+
+
+def _import_sample_subtype(pipeline_filepath, subtype_name=None):
+    """
+    Import a particular Sample subclass from a Python module.
+
+    :param str pipeline_filepath: path to file to regard as Python module
+    :param str subtype_name: name of the target class (which must derive from
+        the base Sample class in order for it to be used), optional; if
+        unspecified, if the module defines a single subtype, then that will
+        be used; otherwise, the base Sample type will be used.
+    :return type: the imported class, defaulting to base Sample in case of
+        failure with the import or other logic
+    """
+    base_type = Sample
+
+    try:
+        _LOGGER.debug("Attempting to import module defined by {}".
+                      format(pipeline_filepath))
+
+        # TODO: consider more fine-grained control here. What if verbose
+        # TODO: logging is only to file, not to stdout/err?
+
+        # Redirect standard streams during the import to prevent noisy
+        # error messaging in the shell that may distract or confuse a user.
+        if _LOGGER.getEffectiveLevel() > logging.DEBUG:
+            with open(_os.devnull, 'w') as temp_standard_streams:
+                with standard_stream_redirector(temp_standard_streams):
+                    pipeline_module = import_from_source(pipeline_filepath)
+        else:
+            pipeline_module = import_from_source(pipeline_filepath)
+
+    except SystemExit:
+        # SystemExit would be caught as BaseException, but SystemExit is
+        # particularly suggestive of an a script without a conditional
+        # check on __main__, and as such warrant a tailored message.
+        _LOGGER.warn("'%s' appears to attempt to run on import; "
+                     "does it lack a conditional on '__main__'? "
+                     "Using base type: %s",
+                     pipeline_filepath, base_type.__name__)
+        return base_type
+
+    except (BaseException, Exception) as e:
+        _LOGGER.warn("Using base %s because of failure in attempt to "
+                     "import pipeline module '%s': %r",
+                     base_type.__name__, pipeline_filepath, e)
+        return base_type
+
+    else:
+        _LOGGER.debug("Successfully imported pipeline module '%s', "
+                      "naming it '%s'", pipeline_filepath,
+                      pipeline_module.__name__)
+
+    def class_names(cs):
+        return ", ".join([c.__name__ for c in cs])
+
+    # Find classes from pipeline module and determine which derive from Sample.
+    classes = _fetch_classes(pipeline_module)
+    _LOGGER.debug("Found %d classes: %s", len(classes), class_names(classes))
+
+    # Base Sample could be imported; we want the true subtypes.
+    proper_subtypes = _proper_subtypes(classes, base_type)
+    _LOGGER.debug("%d proper %s subtype(s): %s", len(proper_subtypes),
+                  base_type.__name__, class_names(proper_subtypes))
+
+    # Determine course of action based on subtype request and number found.
+    if not subtype_name:
+        _LOGGER.debug("No specific subtype is requested from '%s'",
+                      pipeline_filepath)
+        if len(proper_subtypes) == 1:
+            # No specific request and single subtype --> use single subtype.
+            subtype = proper_subtypes[0]
+            _LOGGER.debug("Single %s subtype found in '%s': '%s'",
+                          base_type.__name__, pipeline_filepath,
+                          subtype.__name__)
+            return subtype
+        else:
+            # We can't arbitrarily select from among 0 or multiple subtypes.
+            _LOGGER.debug("%s subtype cannot be selected from %d found in "
+                          "'%s'; using base type", base_type.__name__,
+                          len(proper_subtypes), pipeline_filepath)
+            return base_type
+    else:
+        # Specific subtype request --> look for match.
+        for st in proper_subtypes:
+            if st.__name__ == subtype_name:
+                _LOGGER.debug("Successfully imported %s from '%s'",
+                              subtype_name, pipeline_filepath)
+                return st
+        raise ValueError(
+                "'{}' matches none of the {} {} subtype(s) defined "
+                "in '{}': {}".format(subtype_name, len(proper_subtypes),
+                                     base_type.__name__, pipeline_filepath,
+                                     class_names(proper_subtypes)))
+
+
+
+def _fetch_classes(mod):
+    """ Return the classes defined in a module. """
+    try:
+        _, classes = zip(*inspect.getmembers(
+                mod, lambda o: inspect.isclass(o)))
+    except ValueError:
+        return []
+    return list(classes)
+
+
+
+def _proper_subtypes(types, supertype):
+    """ Determine the proper subtypes of a supertype. """
+    return list(filter(
+            lambda t: issubclass(t, supertype) and t != supertype, types))
+
+
+
+def _is_member(item, items):
+    """ Determine whether an iterm is a member of a collection. """
+    return item in items
diff --git a/looper/utils.py b/looper/utils.py
index d092dc88..6f74ef5f 100644
--- a/looper/utils.py
+++ b/looper/utils.py
@@ -2,8 +2,11 @@
 
 from argparse import ArgumentParser
 from collections import Counter, defaultdict, Iterable
+import contextlib
 import logging
 import os
+import random
+import string
 import subprocess as sp
 import yaml
 from ._version import __version__
@@ -21,22 +24,170 @@ def format_help(self):
 
 
 
+def alpha_cased(text, lower=False):
+    """
+    Filter text to just letters and homogenize case.
+
+    :param str text: what to filter and homogenize.
+    :param bool lower: whether to convert to lowercase; default uppercase.
+    :return str: input filtered to just letters, with homogenized case.
+    """
+    text = "".join(filter(lambda c: c.isalpha(), text))
+    return text.lower() if lower else text.upper()
+
+
+
+def check_bam(bam, o):
+    """
+    Check reads in BAM file for read type and lengths.
+
+    :param str bam: BAM file path.
+    :param int o: Number of reads to look at for estimation.
+    """
+    try:
+        p = sp.Popen(['samtools', 'view', bam], stdout=sp.PIPE)
+        # Count paired alignments
+        paired = 0
+        read_length = Counter()
+        while o > 0:  # Count down number of lines
+            line = p.stdout.readline().decode().split("\t")
+            flag = int(line[1])
+            read_length[len(line[9])] += 1
+            if 1 & flag:  # check decimal flag contains 1 (paired)
+                paired += 1
+            o -= 1
+        p.kill()
+    except OSError:
+        reason = "Note (samtools not in path): For NGS inputs, " \
+                 "looper needs samtools to auto-populate " \
+                 "'read_length' and 'read_type' attributes; " \
+                 "these attributes were not populated."
+        raise OSError(reason)
+
+    _LOGGER.debug("Read lengths: {}".format(read_length))
+    _LOGGER.debug("paired: {}".format(paired))
+    return read_length, paired
+
+
+
+def check_fastq(fastq, o):
+    raise NotImplementedError("Detection of read type/length for "
+                              "fastq input is not yet implemented.")
+
+
+
+def expandpath(path):
+    """
+    Expand a filesystem path that may or may not contain user/env vars.
+
+    :param str path: path to expand
+    :return str: expanded version of input path
+    """
+    return os.path.expandvars(os.path.expanduser(path)).replace("//", "/")
+
+
+
 def fetch_package_classes(pkg, predicate=None):
     """
     Enable single-depth fetch of package's classes if not exported.
-    
+
     :param module pkg: the package of interest.
-    :param function(type) -> bool predicate: condition each class must 
+    :param function(type) -> bool predicate: condition each class must
         satisfy in order to be returned.
-    :return Iterable(type): classes one layer deep within the package, that 
+    :return Iterable(type): classes one layer deep within the package, that
         satisfy the condition if given.
     """
     import inspect
     import itertools
+
+    modules = [pkg] if inspect.ismodule(pkg) else \
+            [obj for obj in inspect.getmembers(
+                    pkg, lambda member: inspect.ismodule(member))]
     return list(itertools.chain(
-            *[inspect.getmembers(mod, predicate)
-              for mod in inspect.getmembers(
-                        pkg, lambda obj: inspect.ismodule(obj))]))
+            *[inspect.getmembers(mod, predicate) for mod in modules]))
+
+
+
+def get_file_size(filename):
+    """
+    Get size of all files in gigabytes (Gb).
+
+    :param str | collections.Iterable[str] filename: A space-separated
+        string or list of space-separated strings of absolute file paths.
+    :return float: size of file(s), in gigabytes.
+    """
+    if filename is None:
+        return float(0)
+    if type(filename) is list:
+        return float(sum([get_file_size(x) for x in filename]))
+    try:
+        total_bytes = sum([float(os.stat(f).st_size)
+                           for f in filename.split(" ") if f is not ''])
+    except OSError:
+        # File not found
+        return 0.0
+    else:
+        return float(total_bytes) / (1024 ** 3)
+
+
+
+def import_from_source(module_filepath):
+    """
+    Import a module from a particular filesystem location.
+
+    :param str module_filepath: path to the file that constitutes the module
+        to import
+    :return module: module imported from the given location, named as indicated
+    :raises ValueError: if path provided does not point to an extant file
+    """
+    import sys
+
+    if not os.path.exists(module_filepath):
+        raise ValueError("Path to alleged module file doesn't point to an "
+                         "extant file: '{}'".format(module_filepath))
+
+    # Randomly generate module name.
+    fname_chars = string.ascii_letters + string.digits
+    name = "".join(random.choice(fname_chars) for _ in range(20))
+
+    # Import logic is version-dependent.
+    if sys.version_info >= (3, 5):
+        from importlib import util as _il_util
+        modspec = _il_util.spec_from_file_location(
+            name, module_filepath)
+        mod = _il_util.module_from_spec(modspec)
+        modspec.loader.exec_module(mod)
+    elif sys.version_info < (3, 3):
+        import imp
+        mod = imp.load_source(name, module_filepath)
+    else:
+        # 3.3 or 3.4
+        from importlib import machinery as _il_mach
+        loader = _il_mach.SourceFileLoader(name, module_filepath)
+        mod = loader.load_module()
+
+    return mod
+
+
+
+def parse_ftype(input_file):
+    """
+    Checks determine filetype from extension.
+
+    :param str input_file: String to check.
+    :return str: filetype (extension without dot prefix)
+    :raises TypeError: if file does not appear of a supported type
+    """
+    if input_file.endswith(".bam"):
+        return "bam"
+    elif input_file.endswith(".fastq") or \
+            input_file.endswith(".fq") or \
+            input_file.endswith(".fq.gz") or \
+            input_file.endswith(".fastq.gz"):
+        return "fastq"
+    else:
+        raise TypeError("Type of input file ends in neither '.bam' "
+                        "nor '.fastq' [file: '" + input_file + "']")
 
 
 
@@ -82,33 +233,42 @@ def partition(items, test):
     assume that the argument is not terribly large and that the function is
     cheap to compute and use a simpler single-pass approach.
 
-    :param collections.Iterable[object] items: items to partition
+    :param Sized[object] items: items to partition
     :param function(object) -> bool test: test to apply to each item to
         perform the partitioning procedure
     :return: list[object], list[object]: partitioned items sequences
     """
     passes, fails = [], []
-    _LOGGER.debug("Testing {} items: {}".format(len(items), items))
+    _LOGGER.log(5, "Testing {} items: {}".format(len(items), items))
     for item in items:
-        _LOGGER.debug("Testing item {}".format(item))
+        _LOGGER.log(5, "Testing item {}".format(item))
         group = passes if test(item) else fails
         group.append(item)
     return passes, fails
 
 
 
-# TODO:
-# It appears that this isn't currently used.
-# It could be included as a validation stage in Project instantiation.
-# If Project instance being validated lacked specific relevant
-# configuration section the call here would either need to be skipped,
-# or this would need to pass in such a scenario. That would not be
-# a challenge, but it just needs to be noted.
+@contextlib.contextmanager
+def standard_stream_redirector(stream):
+    """
+    Temporarily redirect stdout and stderr to another stream.
+
+    This can be useful for capturing messages for easier inspection, or
+    for rerouting and essentially ignoring them, with the destination as
+    something like an opened os.devnull.
+
+    :param FileIO[str] stream: temporary proxy for standard streams
+    """
+    import sys
+    genuine_stdout, genuine_stderr = sys.stdout, sys.stderr
+    sys.stdout, sys.stderr = stream, stream
+    try:
+        yield
+    finally:
+        sys.stdout, sys.stderr = genuine_stdout, genuine_stderr
+
+
 
-# TODO:
-# Test this with additional pipeline config file,
-# pointed to in relevant section of project config file:
-# http://looper.readthedocs.io/en/latest/define-your-project.html#project-config-section-pipeline-config
 class CommandChecker(object):
     """
     Validate PATH availability of executables referenced by a config file.
@@ -124,8 +284,10 @@ class CommandChecker(object):
     :param sections_to_skip: analogous to
         the check names parameter, but for specific sections to skip.
     :type sections_to_skip: Iterable[str]
-    
+
     """
+
+
     def __init__(self, path_conf_file,
                  sections_to_check=None, sections_to_skip=None):
 
@@ -142,9 +304,9 @@ def __init__(self, path_conf_file,
 
         # Determine which sections to validate.
         sections = {sections_to_check} if isinstance(sections_to_check, str) \
-                else set(sections_to_check or conf_data.keys())
+            else set(sections_to_check or conf_data.keys())
         excl = {sections_to_skip} if isinstance(sections_to_skip, str) \
-                else set(sections_to_skip or [])
+            else set(sections_to_skip or [])
         sections -= excl
 
         self._logger.info("Validating %d sections: %s",
@@ -154,8 +316,8 @@ def __init__(self, path_conf_file,
         # Store per-command mapping of status, nested under section.
         self.section_to_status_by_command = defaultdict(dict)
         # Store only information about the failures.
-        self.failures_by_section = defaultdict(list)    # Access by section.
-        self.failures = set()                           # Access by command.
+        self.failures_by_section = defaultdict(list)  # Access by section.
+        self.failures = set()  # Access by command.
 
         for s in sections:
             # Fetch section data or skip.
@@ -244,86 +406,3 @@ def is_command_callable(command, name=""):
         _LOGGER.debug("Command{0}is not callable: {1}".
                       format(alias_value, command))
     return not bool(code)
-
-
-
-def parse_ftype(input_file):
-    """
-    Checks determine filetype from extension.
-
-    :param str input_file: String to check.
-    :return str: filetype (extension without dot prefix)
-    :raises TypeError: if file does not appear of a supported type
-    """
-    if input_file.endswith(".bam"):
-        return "bam"
-    elif input_file.endswith(".fastq") or \
-            input_file.endswith(".fq") or \
-            input_file.endswith(".fq.gz") or \
-            input_file.endswith(".fastq.gz"):
-        return "fastq"
-    else:
-        raise TypeError("Type of input file ends in neither '.bam' "
-                        "nor '.fastq' [file: '" + input_file + "']")
-
-
-
-def check_bam(bam, o):
-    """
-    Check reads in BAM file for read type and lengths.
-
-    :param str bam: BAM file path.
-    :param int o: Number of reads to look at for estimation.
-    """
-    try:
-        p = sp.Popen(['samtools', 'view', bam], stdout=sp.PIPE)
-        # Count paired alignments
-        paired = 0
-        read_length = Counter()
-        while o > 0:  # Count down number of lines
-            line = p.stdout.readline().decode().split("\t")
-            flag = int(line[1])
-            read_length[len(line[9])] += 1
-            if 1 & flag:  # check decimal flag contains 1 (paired)
-                paired += 1
-            o -= 1
-        p.kill()
-    except OSError:
-        reason = "Note (samtools not in path): For NGS inputs, " \
-                 "looper needs samtools to auto-populate " \
-                 "'read_length' and 'read_type' attributes; " \
-                 "these attributes were not populated."
-        raise OSError(reason)
-
-    _LOGGER.debug("Read lengths: {}".format(read_length))
-    _LOGGER.debug("paired: {}".format(paired))
-    return read_length, paired
-
-
-
-def check_fastq(fastq, o):
-    raise NotImplementedError("Detection of read type/length for "
-                              "fastq input is not yet implemented.")
-
-
-
-def get_file_size(filename):
-    """
-    Get size of all files in gigabytes (Gb).
-
-    :param str | collections.Iterable[str] filename: A space-separated
-        string or list of space-separated strings of absolute file paths.
-    :return float: size of file(s), in gigabytes.
-    """
-    if filename is None:
-        return float(0)
-    if type(filename) is list:
-        return float(sum([get_file_size(x) for x in filename]))
-    try:
-        total_bytes = sum([float(os.stat(f).st_size)
-                           for f in filename.split(" ") if f is not ''])
-    except OSError:
-        # File not found
-        return 0.0
-    else:
-        return float(total_bytes) / (1024 ** 3)
diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
index aa459e2c..cc4117e1 100644
--- a/requirements/requirements-all.txt
+++ b/requirements/requirements-all.txt
@@ -1,3 +1,3 @@
 colorama==0.3.9
-pandas==0.20.1
+pandas==0.20.2
 pyyaml==3.12
diff --git a/scripts/cleanFailed.sh b/scripts/cleanFailed.sh
deleted file mode 100755
index 49842138..00000000
--- a/scripts/cleanFailed.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-# Deletes all directories with a failed flag
-ls -d */*failed*
-
-read -p "Are you sure? " -n 1 -r
-echo    # (optional) move to a new line
-if [[ $REPLY =~ ^[Yy]$ ]]
-then
-    ls -d */*failed* | cut -d'/' -f1 | xargs rm -rfv
-fi
-
diff --git a/scripts/convertBismarkReport.R b/scripts/convertBismarkReport.R
deleted file mode 100755
index e2bc8da6..00000000
--- a/scripts/convertBismarkReport.R
+++ /dev/null
@@ -1,84 +0,0 @@
-#!/usr/bin/env Rscript
-options(echo=FALSE)
-library("data.table")
-suppressPackageStartupMessages(library("optparse"))
-#d <- fread("/data/groups/lab_bock/fhalbrit/projects/hema_precursors//results_pipeline//results_pipeline/MPP_10_D1_1_R1//bismark_hg38/extractor/MPP_10_D1_1_R1.aln.dedup.filt.CpG_report_filt.txt")
-
-
-optionList <- list(
-	make_option( c("-i", "--input"), type="character", help="Input file. A Bismark CpG report (CHR START STRAND HITCOUNT MISSCOUNT DINUCLEOTIDE CONTEXT)"),
-	make_option( c("-f", "--formats"), type="character", default="cov,min", help="A comma-separated list of output formats. Supported formats are: cov (Bismark coverage file: CHR START END METHPERCENT HITCOUNT MISSCOUNT), min (minimal coverage file: CHR START HITS TOTAL). Default: cov,min"),
-	make_option( c("-c", "--noCovFilter"), default=FALSE,type="logical", action="store_true", help="Disable coverage filter. If not set, CpG's without any coverage will be removed"),
-	make_option( c("-s", "--noChromFilter"), default=FALSE, type="logical", action="store_true", help="Disable chromosome filter. If not set, non-standard chromosomes (everything with an underscore in the name) will be removed"),
-	make_option( c("-a", "--noAdjustMinusStrand"), default=FALSE, type="logical", action="store_true", help="Disable reverse strand adjustment. If not set, the coordiantes of all sites on the reverse strand (-) will be adjusted by subtracting 1")
-)
-opts <- parse_args(OptionParser(option_list=optionList))
-
-
-if (is.null(opts$input)) {
-	print_help(OptionParser(option_list=optionList))
-	stop("No input file provided")
-} else {
-	cpgReport <- opts$input
-	filterUncovered <- !opts$noCovFilter
-	removeNonStandardChroms <- !opts$noChromFilter
-	adjustMinusStrand <- !opts$noAdjustMinusStrand
-	outputFormats <- strsplit(tolower(opts$formats),",")[[1]]
-
-	message("+ Starting to convert Bismark CpG report file: ", cpgReport)
-
-	# read in data:
-	message("\tReading and modifying data...")
-	d <- fread(cpgReport)
-	setnames(d, paste0("V", 1:7), c("chr", "start", "strand", "hitCount", "missCount", "dinucleotide", "context"))
-
-	# calculate total read count:
-	d[, readCount:=hitCount+missCount]
-
-	# remove unnecessary columns:
-	d[, c("dinucleotide", "context", "missCount"):=NULL]
-
-	# remove uncovered regions:
-	if(filterUncovered) {
-		message("\tRemove uncovered CpG's...")
-		d <- d[ readCount>0,]
-	}
-
-	# adjust the coordinate of C's on the (-)-strand:
-	if(adjustMinusStrand) {
-		message("\tAdjusting reverse strand coordinates...")
-		d[strand=="-",start := as.integer(start-1)] 
-	}
-	d[, strand:=NULL]
-
-	# aggregate all regions with identical coordinates:
-	message("\tAggregating regions by coordinate...")
-	d <- d[,list(hitCount= sum(hitCount), readCount=sum(readCount)), by=list(chr, start)] 
-	setcolorder(d,c("chr", "start", "hitCount", "readCount"));
-
-	# remove non-standard chromosomes (_random, unintegrated contiqs, etc.)
-	if(removeNonStandardChroms) {
-		message("\tFiltering chromosomes...")
-		d <- d[ !grep("_",chr),];
-	}
-		
-	# write output file(s):
-	for(outputFormat in outputFormats) {
-		outName <- paste0(gsub(".txt$", "", cpgReport, perl=TRUE, ignore.case=TRUE), ".", outputFormat)
-		if(outputFormat == "cov") {
-			message("\tWriting Bismark coverage format (CHR START END METHPERCENT HITCOUNT MISSCOUNT): ", outName)
-			d[, methPerc:= hitCount/readCount*100]
-			d[, missCount:= readCount-hitCount]
-			write.table(d[,list(chr,start,start,methPerc,hitCount,missCount)], file=outName, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE)
-		}
-		else if(outputFormat == "min") {	
-			message("\tWriting minimal coverage output format (CHR START HITS TOTAL): ", outName)
-			write.table(d[,list(chr,start,hitCount,readCount)], file=outName, sep="\t", row.names=FALSE, col.names=FALSE, quote=FALSE)
-		}
-		else {
-			warning("\tUnrecognized output format: ", outputFormat)
-		}
-	}
-
-	message("+ Finished conversion: ", cpgReport)
-}
diff --git a/scripts/fastqcSummary.py b/scripts/fastqcSummary.py
deleted file mode 100755
index b5649c2d..00000000
--- a/scripts/fastqcSummary.py
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/usr/bin/env python
-
-import os, inspect, ConfigParser, subprocess, sys, errno, glob, zipfile, csv
-from argparse import ArgumentParser
-#import csv
-#import 
-#import glob
-#import 
-#import re
-
-# constants:
-nThreadsPerCpu = 4
-nMemPerThread = 1024
-nCpusSlurm = 8
-defaultRawDataPath = "/fhgfs/groups/lab_bsf/samples/"
-
-# parse user-supplied arguments:
-parser = ArgumentParser(description='FASTQC')
-parser.add_argument('-c', '--config-file', dest='confFile', help="Supply config file with [-c]. The path of the sample annotation sheet will be parsed from this. Example: /fhgfs/groups/lab_bock/shared/COREseq/config.txt")
-parser.add_argument('-a', '--annot-file', dest='annotFile', help="Specify a sample annotation sheet directly")
-parser.add_argument('-o', '--output-dir', dest='outputDir', help="Directory to write results to")
-parser.add_argument('-f', '--fastqc', dest='fastqcPath', help="Full path of FASTQC exectuable", default="/cm/shared/apps/FastQC/0.11.2/fastqc")
-parser.add_argument('-s', '--slurm', dest='useSlurm', action='store_true', help="Execute script on SLURM cluster.", default=False)
-parser.add_argument('-q', '--quick-summary', dest='quickSummary', action='store_true', help="Skip FastQC, just write the summary report", default=False)
-parser.add_argument('-p', '--parallel', dest='nCpus', help="Number of CPUs to use (going to start 4 threads per CPU)", default=1)
-parser.add_argument('-d', '--raw-path', dest='rawPath', help="Raw data path")
-args, remaining_args = parser.parse_known_args()
-
-# get input directory either directly as command line argument (highest priority) or from a config file:
-annotFile = None
-outputDir = None
-rawDataPath = defaultRawDataPath
-if args.annotFile:
-	annotFile = args.annotFile
-elif args.confFile: 
-	#get configurations
-	config = ConfigParser.ConfigParser({"results": None, "raw_data_path": defaultRawDataPath}) 
-	config.readfp(open(os.path.abspath(args.confFile)))
-	annotFile = config.get("paths","psa")
-	if annotFile is None:
-		print "The config file provided does not define an annotation sheet (parameter name: 'psa')"
-		raise SystemExit
-	outputDir = config.get("paths","project_root")
-	if outputDir is not None:
-		outputDir = outputDir + "/fastqc"
-	rawDataPath = config.get("paths","raw_data_path")
-else:
-	print "Supply either a config file (--config-file=X) or the full path of the annotation sheet (--annot-file=X)"
-	raise SystemExit
-
-# define relevant paths:
-scriptPath = os.path.abspath(inspect.getfile(inspect.currentframe()))
-fastqcPath = os.path.abspath(args.fastqcPath)
-annotFile = os.path.abspath(annotFile)
-if args.outputDir:
-	outputDir = args.outputDir
-if outputDir is None:
-	print "No output directory specified (--output-dir=X)"
-	raise SystemExit
-outputDir = os.path.abspath(args.outputDir)
-if args.rawPath:
-	rawDataPath = args.rawPath
-nCpus = args.nCpus
-
-# print some basic information:
-print "FASTQC Summary"
-print "----"
-print "Full script path:\t" + scriptPath
-print "Full FASTQC path:\t" + fastqcPath
-print "Raw data root directory:\t" + rawDataPath
-print "Sample sheet:\t" + annotFile
-print "Output root directory:\t:" + outputDir
-print "#CPUs:\t:" + str(nCpus)
-print "#treads/CPU:\t:" + str(nThreadsPerCpu)
-print "#mem/thread:\t:" + str(nMemPerThread)
-print "----"
-
-# create results directory if it doesn't exist yet:
-try:
-	os.makedirs(outputDir)
-except OSError as exception:
-	if exception.errno != errno.EEXIST:
-		raise
-
-### MAIN JOB EXECUTION ### 
-
-# if desired, submit the job for execution on the cluster:
-if args.useSlurm:
-	slurmScript = outputDir + "/fastqc_slurm.sub"
-	slurmLog = outputDir + "/fastqc_slurm.log"
-
-	with open(slurmScript, "w") as fout:
-		fout.write("#!/bin/bash\n")
-		fout.write("#SBATCH --job-name=fastqc\n")
-		fout.write("#SBATCH --mem-per-cpu=" + str(nThreadsPerCpu * nMemPerThread) + "\n")
-		fout.write("#SBATCH --cpus-per-task=" + str(nCpus) + "\n")
-		fout.write("#SBATCH -m block\n")
-		fout.write("#SBATCH --partition=mediumq\n")
-		fout.write("#SBATCH --time=24:00:00\n")
-		fout.write("#SBATCH --output " + slurmLog + "\n")
-		fout.write("echo 'Compute node:' `hostname`\n")
-		fout.write("echo 'Start time:' `date +'%Y-%m-%d %T'`\n")
-		fout.write("python " + scriptPath + " --raw-path=" + rawDataPath + " --annot-file=" + annotFile + " --parallel=" + str(nCpusSlurm) + " --output-dir=" + outputDir + "\n")
-		fout.write("echo 'End time:' `date +'%Y-%m-%d %T'`\n")
-
-	subprocess.check_call(["sbatch", slurmScript])
-
-# otherwise, just execute the command directly on the current machine:
-# (this is what the SLURM-based execution mode will do once the job has been allocated to a specific node)
-else:
-	# execute FastQC on all BAM files:
-	if not args.quickSummary:
-		subprocess.check_call([fastqcPath, "--version"])
-
-		bamFolders = {}
-		
-		with open(annotFile, "rb") as annotF:
-			annotDict = csv.DictReader(annotF)
-			for row in annotDict:
-				bamDir = rawDataPath + row["flowcell"] + "/" + row["flowcell"] + "_" + row["lane"] + "_samples/"
-				bamFile = bamDir + row["flowcell"] + "_" + row["lane"] + "#" + row["BSF_name"] + ".bam"
-
-				if os.path.isfile(bamFile):
-					bamFolders[bamDir] = True
-
-		for bamFolder in bamFolders:
-			subprocess.check_call(fastqcPath + " " + bamFolder+"/*.bam --threads="+str(int(nCpus) * nThreadsPerCpu) + " --noextract --outdir="+outputDir, shell=True) # N.B. can't use the proper syntax with an array for args, because FastQC cannot handle the quoted string ('...') as an input path name
-
-	allKeys = {}
-	resultsMap = {}	
-	zipSuffix = ".zip"
-	sep = "\t"
-	summaryFile = outputDir + "/summary.tsv"
-
-	# collate summaries in one overview file:
-	print "Collecting summary statistics into: " + summaryFile
-	with open(summaryFile, "w") as fout:
-		for fastqcZip in glob.glob(outputDir + "/*"+zipSuffix):
-			curName = fastqcZip[len(outputDir+"/"):-len(zipSuffix)]
-			curMap = {}
-			#print curName
-			with zipfile.ZipFile(fastqcZip) as z:
-				with z.open(curName+"/summary.txt") as f:
-					for line in f:
-						tokens = line.split(sep)
-						curMap[tokens[1]] = tokens[0]
-						allKeys[tokens[1]] = True
-			resultsMap[curName] = curMap
-			
-		fout.write("Dataset" + sep + sep.join(allKeys.keys())+"\n")
-		for sample, curMap in resultsMap.items():
-			fout.write(sample)
-			for k in allKeys:
-				fout.write(sep + curMap[k])
-			fout.write("\n")
-
diff --git a/scripts/flagCheck.sh b/scripts/flagCheck.sh
deleted file mode 100755
index 2468689b..00000000
--- a/scripts/flagCheck.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-completed=`ls */*completed.flag 2> /dev/null | wc -l`
-running=`ls */*running.flag 2> /dev/null | wc -l`
-failed=`ls */*failed.flag 2> /dev/null | wc -l`
-echo "completed: $completed" 
-echo "running: $running"
-echo "failed: $failed" 
-ls  */*.flag | xargs -n1 basename | sort | uniq -c
-
-if [ $failed -lt 30 ]; then
-echo "List of failed flags:"
-ls */*failed.flag 2> /dev/null
-fi
-
-if [ $completed -lt 30 ]; then
-echo "List of completed flags:"
-ls */*completed.flag 2> /dev/null
-fi
-
-if [ $running -lt 30 ]; then
-echo "List of running flags:"
-ls */*running.flag 2> /dev/null
-fi
-
diff --git a/scripts/make_SummaryTable.py b/scripts/make_SummaryTable.py
deleted file mode 100755
index 6ca15a6e..00000000
--- a/scripts/make_SummaryTable.py
+++ /dev/null
@@ -1,319 +0,0 @@
-#! /usr/bin/env python
-
-# This script loops through all the samples,
-# creates a summary stats table
-import csv
-import os
-from argparse import ArgumentParser
-from pypiper import AttributeDict
-import yaml
-
-
-# Argument Parsing
-# #######################################################################################
-parser = ArgumentParser(description='make_SummaryTable')
-parser.add_argument('-c', '--config-file', dest='config_file', help="path to YAML config file", required=True, type=str)
-parser.add_argument('--excel', dest='excel', action='store_true', help="generate extra XLS and XLSX sheet", default=False, required=False)
-# Charles : On time the legacy/rigid mode will be removed
-parser.add_argument('--rigid', dest='rigid', action='store_true', help="the legacy rigid mode that only takes in the hard-coded values", default=False, required=False)
-args = parser.parse_args()
-
-with open(args.config_file, 'r') as config_file:
-	config_yaml = yaml.load(config_file)
-	config = AttributeDict(config_yaml, default=True)
-paths = config.paths
-
-
-
-if not os.path.exists(paths.output_dir):
-	raise Exception(paths.output_dir + " : project directory does not exist!")
-
-
-# FOR RIGID
-# #######################################################################################
-fields_in = []
-fields_out = []
-if args.rigid:
-	# the hard-coded fields for the legacy/rigid mode
-	fields_in = ['sample_name','instrument_model','flowcell','lane','read_length','Read_type','organism','Genome'\
-	,'cell_type','Raw_reads','Trimmed_reads','Trimmed_rate','Aligned_reads','Aligned_rate'\
-	,'Multimap_reads','Multimap_rate','Unique_CpGs','Total_CpGs','meanCoverage',\
-	'bisulfiteConversionRate','globalMethylationMean',\
-	'K1_unmethylated_count','K1_unmethylated_meth','K3_methylated_count','K3_methylated_meth']
-	fields_out = ['Sample','Instrument','Flowcell','Lane','Read Length','Read Type','Organism','Genome'\
-	,'Cell Type','Raw Reads','Trimmed Reads','Trimmed Rate','Aligned Reads','Aligned Rate'\
-	,'Multimap Reads','Multimap Rate','Unique CpGs','Total CpGs','Mean Coverage',\
-	'Bisulfite Conversion Rate',' Global Methylation Mean',\
-	'K1 Unmethylated Count','K1 Unmethylated Meth','K3 Methylated Count','K3 Methylated Meth']
-
-
-# Open samples CSV file
-# #######################################################################################
-csv_file_path = os.path.join(os.path.dirname(args.config_file),config.metadata.sample_annotation)
-print("\nOpening CSV file: " + csv_file_path)
-if os.path.isfile(csv_file_path):
-	csv_file = open(os.path.join(os.path.dirname(args.config_file),config.metadata.sample_annotation), 'rb')
-	print("Found " + csv_file_path)
-else:
-	raise Exception(csv_file_path + " : that file does not exist!")
-csv_reader = csv.DictReader(csv_file)
-
-
-# Looping over all samples
-# #######################################################################################
-global_list = dict()
-global_keys = dict()
-
-pipelines = []
-sample_count = 0
-column_count = 0
-print("\nStart iterating over samples")
-
-for row in csv_reader:
-
-	sample_count += 1
-	sample_name = row['sample_name']
-	print("\n##### Processing sample #"+ str(sample_count) + " : " + sample_name + " #####")
-
-	# wrap this all in a try block, so it can skip a few bad samples
-	# without breaking the whole thing 
-	try:
-
-		# Open sample TSV stat file
-		stats_file_dir = os.path.join(paths.output_dir,paths.results_subdir,sample_name)
-		stats_file_path = os.path.join(paths.output_dir,paths.results_subdir,sample_name,row['library']+'_stats.tsv')
-		if not os.path.isfile(stats_file_path):
-			for thefile in os.listdir(stats_file_dir):
-				if thefile.endswith("stats.tsv"): stats_file_path = os.path.join(stats_file_dir,thefile)
-		if os.path.isfile(stats_file_path):
-			stats_file = open(stats_file_path, 'rb')
-			print("Found: " + stats_file_path)
-		else:
-			raise Exception(stat_file_path + " : file does not exist!")
-
-
-		stats_dict = dict()
-		stats_dict_keys = dict()
-
-		# Check if file has third column -> define pipelines based on that
-		# plus read info from file
-
-		for line in stat_file:
-
-			line_content = line.split('\t')
-			key = line_content[0]
-			value = line_content[1]
-			pip = "x"
-			if len(line_content) == 3: 
-				pip = line_content[2].strip()
-			pipelines.append(pip)
-			if not pip in stats_dict: stats_dict[pip] = dict()
-			if not pip in stats_dict_keys: stats_dict_keys[pip] = []
-			stats_dict[pip][key] = value.strip()
-			stats_dict_keys[pip].append(key)
-
-		pipelines = list(set(pipelines))
-		print "Pipelines: " + str(pipelines)
-
-
-		# stats_dict and stats_dict_keys are pipeline specific
-		for pip in pipelines:
-			if not pip in global_list: global_list[pip] = []
-			if not pip in global_keys: global_keys[pip] = []
-
-
-
-		# if there are two pipelines make sure that certain values are present in both
-		missing_cols = ["Raw_reads", "Fastq_reads", "Trimmed_reads", "Trim_loss_rate"]
-		if len(pipelines) == 2:
-			for col in missing_cols:
-				if not col in stats_dict[pipelines[1]] and col in stats_dict[pipelines[0]]: stats_dict[pipelines[1]][col] = stats_dict[pipelines[0]][col]
-				if not col in stats_dict[pipelines[0]] and col in stats_dict[pipelines[1]]: stats_dict[pipelines[0]][col] = stats_dict[pipelines[1]][col]
-				for pip in pipelines:
-					stats_dict_keys[pip] = list(set(stats_dict_keys[pip] + missing_cols))
-
-		# Write to global list and keys
-		new_row = dict()
-		column_count = 0
-		for pip in pipelines:
-			new_row = row.copy()
-			new_row.update(stats_dict[pip])
-			global_list[pip].append(new_row)
-			global_keys[pip] = csv_reader.fieldnames + stats_dict_keys[pip]
-
-
-	except Exception as e:
-
-		print("Sample " + sample_name + " failed. Error: " + str(e))
-
-csv_file.close()
-# print global_keys
-# print global_list
-
-# Writing to Output Files
-# #######################################################################################
-if not args.rigid:
-	
-	# Writing TSV file
-	# #######################################################################################
-
-	for pip in pipelines:
-		
-		pip_nam = "_" + pip
-		if pip_nam == "_x": pip_nam = ""
-		tsv_outfile_path = os.path.join(paths.output_dir,os.path.basename(paths.output_dir)+ pip_nam + '_stats_summary.tsv')
-		tsv_outfile = open(tsv_outfile_path, 'w')
-	
-		if global_list[pip] and global_keys[pip]:
-
-			tsv_writer = csv.DictWriter(tsv_outfile, fieldnames=global_keys[pip], delimiter='\t')
-			tsv_writer.writeheader()
-
-			for i,sample in enumerate(global_list[pip]):
-				tsv_writer.writerow(sample)
-				if args.excel:
-					for j,field in enumerate(global_keys[pip]):
-						if i == 0: xls_sheet.write(0, j, field)
-						xls_sheet.write(i+1, j, sample[field])
-
-		tsv_outfile.close()
-
-	print("\nInput used: " + csv_file_path)
-	print("Results TSV file: " + tsv_outfile_path)
-
-
-
-
-	# Output XLS file
-	# #######################################################################################
-	if args.excel:
-		
-		raise Exception("--excel not implemented")
-			
-		import xlwt
-		
-		for pip in pipelines:
-			
-			pip_nam = "_" + pip
-			if pip_nam == "_x": pip_nam = ""
-
-			xls_book = xlwt.Workbook(encoding="utf-8")
-			xls_sheet_name = "Stats" + pip_nam
-			xls_sheet = xls_book.add_sheet(xls_sheet_name)
-
-			# Where should this be written? Here or below?
-			# if args.rigid:
-			# 	for i,field in enumerate(fields_out):
-			# 		xls_sheet.write(0, i, field)
-
-			import xlrd
-			import openpyxl
-
-			# saving the XLS sheet
-			xls_filename = os.path.join(paths.output_dir,os.path.basename(paths.output_dir)+'_stats_summary.xls')
-			xls_book.save(xls_filename)
-			print("Results XLS file: " + xls_filename)
-
-			# convert XLS sheet to XLSX format
-			xlsx_book_in = xlrd.open_workbook(xls_filename)
-			index = 0
-			nrows = sample_count + 2
-			ncols = 0
-			if global_keys[pip]: ncols = len(global_keys[pip])
-			else: ncols = column_count
-			ncols += 1
-			xlsx_sheet_in = xlsx_book_in.sheet_by_index(0)
-			xlsx_book_out = openpyxl.Workbook()
-			xlsx_sheet = xlsx_book_out.active
-			xlsx_sheet.title = xls_sheet_name
-			for row in range(1, nrows):
-				for col in range(1, ncols):
-					xlsx_sheet.cell(row=row, column=col).value = xlsx_sheet_in.cell_value(row-1, col-1)
-			xlsx_filename = os.path.join(paths.output_dir,os.path.basename(paths.output_dir)+'_stats_summary.xlsx')
-			xlsx_book_out.save(xlsx_filename)
-			print("Results XLSX file: " + xlsx_filename)
-
-	print("\n")
-
-
-
-# RIGID		
-else:
-	if args.excel:
-		raise Exception("--excel not implemented for option --rigid")
-		
-	for pip in pipelines:
-
-		pip_nam = "_" + pip
-		if pip_nam == "_x": pip_nam = ""
-		# Open file to write to
-		tsv_outfile_path = os.path.join(paths.output_dir,os.path.basename(paths.output_dir)+ pip_nam + '_stats_summary.tsv')
-		tsv_outfile = open(tsv_outfile_path, 'w')
-		tsv_writer = csv.DictWriter(tsv_outfile, fieldnames=fields_out, delimiter='\t')
-		tsv_writer.writeheader()
-		
-		
-
-		
-		# for each sample data (one element of the global list)
-		for sample_dict in global_list[pip]:
-			
-			new_row = dict()
-			# Write each field
-			for i in range(0,len(fields_in)):
-
-				field = fields_in[i]
-				field_out = fields_out[i]
-				content = str('')
-				content_float = float(-1e10)
-				content_int = int(-1)
-
-				# extract all the listed fields
-				# some data types might not have all the fields in stats_dict, then catch the KeyError
-				try:
-					if field == 'Trimmed_rate':
-						content_float = 100.0*float(sample_dict['Trimmed_reads'])/float(sample_dict['Raw_reads'])
-					elif field == 'Aligned_rate':
-						content_float = 100.0*float(sample_dict['Aligned_reads'])/float(sample_dict['Trimmed_reads'])
-					elif field == 'Multimap_rate':
-						content_float = 100.0*float(sample_dict['Multimap_reads'])/float(sample_dict['Trimmed_reads'])
-					elif field in sample_dict:
-						content = str(sample_dict[field].strip())
-					else:
-						content = 'NA'
-						print("No field called: " + field)
-				except KeyError:
-					content = 'NA'
-					print("Data missing to calculate: " + field)
-
-				# convert str to float or int if needed
-				got_comma = content.find('.')
-				try:
-					content_float = float(content)
-				except ValueError:
-					pass
-				if not got_comma:
-					content_int = int(content_float)
-
-				# write the field for each row
-				if content_int > -1:
-					column_count += 1
-					new_row[field_out] = content_int
-					if args.excel: xls_sheet.write(sample_count, i, content_int)
-				elif content_float > -1e10:
-					column_count += 1
-					new_row[field_out] = content_float
-					if args.excel: xls_sheet.write(sample_count, i, content_float)
-				else:
-					column_count += 1
-					new_row[field_out] = content
-					if args.excel: xls_sheet.write(sample_count, i, content)
-			
-			tsv_writer.writerow(new_row)
-		
-	tsv_outfile.close()
-
-
-
-
-
diff --git a/scripts/make_trackhubs.py b/scripts/make_trackhubs.py
deleted file mode 100644
index 71f7cb0a..00000000
--- a/scripts/make_trackhubs.py
+++ /dev/null
@@ -1,543 +0,0 @@
-#! /usr/bin/env python
-""" Create a trackhub for each sample. """
-
-from argparse import ArgumentParser
-import csv
-import datetime
-import getpass
-import os
-import subprocess
-import yaml
-from looper.looper import SAMPLE_EXECUTION_TOGGLE
-from pypiper import AttributeDict
-
-
-#  Argument Parsing
-#  #######################################################################################
-parser = ArgumentParser(description='make_trackhubs')
-parser.add_argument('-c', '--config-file', dest='config_file', help="path to YAML config file", required=True, type=str)
-parser.add_argument('-f', dest='filter', action='store_false', required=False, default=True)
-parser.add_argument('-v', '--visibility', dest='visibility', help='visibility mode (default: full)', required=False, default='full', type=str)
-parser.add_argument('--copy', dest='copy', help='copy sepcified file types instead of creating symbolic links, example: --copy BAM-BB-BW-BED-TH', required=False, type=str)
-
-args = parser.parse_args()
-
-with open(args.config_file, 'r') as config_file:
-	config_yaml = yaml.load(config_file)
-	config = AttributeDict(config_yaml, default=True)
-
-trackhubs = config.trackhubs
-paths = config.paths
-
-print(config)
-
-if not os.path.exists(paths.output_dir):
-	raise Exception(paths.output_dir + " : that project directory does not exist!")
-
-present_genomes = {}
-subGroups_perGenome = {}
-subGroups = {
-	"exp_category": {},
-	"FACS_marker": {},
-	"cell_type": {},
-	"treatment": {},
-	"treatment_length": {},
-	"cell_count": {},
-	"library": {},
-	"data_type": {}
-}
-# add x- and y-dimension to subGroups even if they are not in the standard column selection:
-subGroups[trackhubs.matrix_x] = {}
-subGroups[trackhubs.matrix_y] = {}
-
-
-csv_file_path = os.path.join(os.path.dirname(args.config_file), config.metadata.sample_annotation)
-print "\nOpening CSV file: " + csv_file_path
-if os.path.isfile(csv_file_path):
-	csv_file = open(os.path.join(os.path.dirname(args.config_file), config.metadata.sample_annotation), 'rb')  # opens the csv file
-else:
-	raise Exception(csv_file_path + " : that file does not exist!")
-
-try:
-
-	csv_file_0 = open(os.path.join(os.path.dirname(args.config_file), config.metadata.sample_annotation), 'rb')
-	input_file_0 = csv.DictReader(csv_file_0)  # creates the reader object
-
-	pipeline = ""
-	genome = ""
-	for row in input_file_0:
-		if ("library" in row.keys()):
-			pipeline = str(row["library"]).upper()
-		if ("organism" in row.keys()):
-			genome = str(getattr(config.genomes, str(row["organism"])))
-	print 'Pipeline: ' + pipeline
-	print 'Genome: ' + genome
-	print("Trackhub dir: " + trackhubs.trackhub_dir)
-	if pipeline != "":
-		pipeline += '_'
-
-	paths.write_dir = ""
-
-	if args.copy:
-		paths.write_dir = trackhubs.trackhub_dir
-		if not os.path.exists(paths.write_dir):
-			os.makedirs(paths.write_dir)
-	else:
-		paths.write_dir = paths.output_dir
-		if not os.path.islink(trackhubs.trackhub_dir):
-			os.symlink(os.path.relpath(paths.write_dir, os.path.dirname(trackhubs.trackhub_dir)), trackhubs.trackhub_dir)
-			print 'Linking to: ' + str(trackhubs.trackhub_dir)
-		else:
-			print 'Link already exists: ' + str(trackhubs.trackhub_dir)
-	print 'Writing files to: ' + paths.write_dir
-
-	genomes_file = open(os.path.join(paths.write_dir, pipeline + 'genomes.txt'), 'w')
-
-	track_out = os.path.join(paths.write_dir, genome)
-	if not os.path.exists(track_out):
-		os.makedirs(track_out)
-		print 'Writing tracks to: ' + track_out
-	else:
-		print 'Trackhubs already exists! Overwriting everything in ' + track_out
-		userID = os.getuid()
-		for root, dirs, files in os.walk(track_out, topdown=False):
-			for name in files:
-				ownerID = 0
-				try:
-					ownerID = os.stat(os.path.join(root, name)).st_uid
-				except: 
-					os.remove(os.path.join(root, name))
-				if ownerID == userID:
-					try:
-	  					os.remove(os.path.join(root, name))
-					except: 
-						pass
-			for name in dirs:
-				ownerID = os.stat(os.path.join(root, name)).st_uid
-				if ownerID == userID:
-					try:
-	  					os.rmdir(os.path.join(root, name))
-					except: 
-						pass
-
-	# write hub.txt
-	hub_file_name = pipeline + "hub.txt"
-	hub_file = open(os.path.join(paths.write_dir, hub_file_name), 'w')
-	hub_file.writelines("hub " + trackhubs.hub_name + "\n")
-	hub_file.writelines("shortLabel " + trackhubs.hub_name + "\n")
-	hub_file.writelines("longLabel " + trackhubs.hub_name + "\n")
-	hub_file.writelines("genomesFile " + pipeline + "genomes.txt\n")
-	hub_file.writelines("email " + trackhubs.email + "\n")
-
-	# Write a HTML document.
-	html_out = str()
-	html_out_tab1 = str()
-	html_out_tab2 = str()
-	clean_title = os.path.basename(paths.output_dir).replace('_',' ')
-	# Write HTML header and title
-	html_out += '<!DOCTYPE html PUBLIC ' \
-		  '"-//W3C//DTD XHTML 1.0 Transitional//EN" ' \
-		  '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n'
-	html_out += '\n'
-	html_out += '<html xmlns="http://www.w3.org/1999/xhtml">\n'
-	html_out += '<head>\n'
-	html_out += '<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />\n'
-	html_out += '<link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" />\n'
-	html_out += '<meta name="DC.Creator" content="{}" />\n'.format(getpass.getuser())
-	html_out += '<meta name="DC.Date" content="{}" />\n'.format(datetime.datetime.now().isoformat())
-	html_out += '<meta name="DC.Title" content="{}" />\n'.format(clean_title)
-	html_out += '<link rel="stylesheet" type="text/css" href="styles.css"/>\n'
-	html_out += '<title>{}</title>\n'.format(clean_title)
-	html_out += '</head>\n'
-	html_out += '\n'
-
-	tableDict = dict()
-
-	input_file = csv.DictReader(csv_file)
-	sample_count = 0
-
-	print '\nStart iterating over samples'
-	for row in input_file:  # iterates the rows of the file in orders
-
-		sample_count += 1
-
-		sample_name = row["sample_name"]
-		print '\nProcessing sample #' + str(sample_count) + " : " + sample_name
-
-		tableDict[sample_name] = dict()
-
-		if SAMPLE_EXECUTION_TOGGLE in row:
-			exec_flag = row[SAMPLE_EXECUTION_TOGGLE]
-			if exec_flag == "0" or exec_flag.lower() == "false":
-				print(sample_name + ": not selected")
-				continue
-			else:
-				print(sample_name + ": SELECTED")
-
-		sample_path = os.path.join(paths.output_dir, paths.results_subdir, sample_name)
-
-		present_subGroups = "\tsubGroups "
-
-		# bsmap aligned bam files
-		bsmap_mapped_bam = os.path.join(sample_path, "bsmap_" + genome, sample_name + ".bam")
-		bsmap_mapped_bam_name = os.path.basename(bsmap_mapped_bam)
-		bsmap_mapped_bam_index = os.path.join(sample_path, "bsmap_" + genome, sample_name + ".bam.bai")
-		bsmap_mapped_bam_index_name = os.path.basename(bsmap_mapped_bam_index)
-
-		# With the new meth bigbeds, RRBS pipeline should yield this file:
-		meth_bb_file = os.path.join(sample_path, "bigbed_" + genome, "RRBS_" + sample_name + ".bb")
-		meth_bb_name = os.path.basename(meth_bb_file)
-
-		# bismark bigwig files
-		bismark_bw_file = os.path.join(sample_path, "bismark_" + genome, "extractor", sample_name + ".aln.dedup.filt.bw")
-		bismark_bw_name = os.path.basename(bismark_bw_file)
-
-		# bigwigs are better actually
-		if not os.path.isfile(bismark_bw_file):
-			bismark_bw_file = os.path.join(sample_path, "bigwig_" + genome, "RRBS_" + sample_name + ".bw")
-			bismark_bw_name = os.path.basename(bismark_bw_file)
-
-		# biseqMethcalling bed file
-		biseq_bed = os.path.join(sample_path, "biseq_" + genome, "RRBS_cpgMethylation_" + sample_name + ".bed")
-		biseq_bed_name = os.path.basename(biseq_bed)
-
-		# tophat files
-		if args.filter:
-			tophat_bw_file = os.path.join(sample_path, "tophat_" + genome, sample_name + ".aln.filt_sorted.bw")
-		else:
-			tophat_bw_file = os.path.join(sample_path, "tophat_" + genome, sample_name + ".aln_sorted.bw")
-		tophat_bw_name = os.path.basename(tophat_bw_file)
-
-		if os.path.isfile(tophat_bw_file) or os.path.isfile(bismark_bw_file) or os.path.isfile(meth_bb_file):
-
-			track_out_file = os.path.join(track_out, pipeline + "trackDB.txt")
-			if track_out_file not in present_genomes.keys():
-				# initialize a new genome
-				open(track_out_file, 'w').close()
-				genomes_file.writelines("genome " + genome.split('_')[0] + "\n")
-				genomes_file.writelines("trackDb " + os.path.join(genome, os.path.basename(track_out_file)) + "\n")
-				present_genomes[track_out_file] = []
-				subGroups_perGenome[track_out_file] = subGroups
-
-			# construct subGroups for each sample and initialize subgroups if not present
-			for key in subGroups_perGenome[track_out_file].keys():
-				if key not in input_file.fieldnames:
-					continue
-				if not row[key] in ["NA", "", " "]:
-					present_subGroups += key + "=" + row[key] + " "
-					if not row[key] in subGroups_perGenome[track_out_file][key]:
-						subGroups_perGenome[track_out_file][key][row[key]] = row[key]
-
-		# TODO NS: we should only have build these once; like so:
-		# Build short label
-		if trackhubs.short_label_column is not None:
-			shortLabel = row[trackhubs.short_label_column]
-		else:
-			shortLabel = "sl_"
-			if ("Library" in row.keys()):
-				shortLabel += row["library"][0]
-			if ("cell_type" in row.keys()):
-				shortLabel += "_" + row["cell_type"]
-			if ("cell_count" in row.keys()):
-				shortLabel += "_" + row["cell_count"]
-
-		##########################################
-		### Aligned BAM files and index files
-		##########################################
-
-		if os.path.isfile(bsmap_mapped_bam):
-
-			print "  FOUND bsmap mapped file: " + bsmap_mapped_bam
-
-			# copy or link the file to the hub directory
-			if args.copy and args.copy.find('BAM') > -1:
-				cmd = "cp " + bsmap_mapped_bam + " " + track_out
-				print(cmd)
-				subprocess.call(cmd, shell=True)
-				cmd = "cp " + bsmap_mapped_bam_index + " " + track_out
-				print(cmd)
-				subprocess.call(cmd, shell=True)
-			else:
-				os.symlink(os.path.relpath(bsmap_mapped_bam, track_out), os.path.join(track_out, pipeline + bsmap_mapped_bam_name))
-				os.symlink(os.path.relpath(bsmap_mapped_bam_index, track_out), os.path.join(track_out, pipeline + bsmap_mapped_bam_index_name))
-
-			# construct track for data file
-			track_text = "\n\ttrack " + bsmap_mapped_bam_name + "_Meth_Align" + "\n"
-			track_text += "\tparent DNA_Meth_Align on\n"
-			track_text += "\ttype bam\n"
-			track_text += present_subGroups + "data_type=Meth_Align" + "\n"
-			track_text += "\tshortLabel " + shortLabel + "\n"
-			track_text += "\tlongLabel " + sample_name + "_Meth_Align" + "\n"
-			track_text += "\tbigDataUrl " + pipeline + bsmap_mapped_bam_name + "\n"
-
-			tableDict[sample_name]['BAM'] = dict([('label', 'BAM'), ('link', os.path.relpath(os.path.join(track_out, pipeline + bsmap_mapped_bam_name), track_out))])
-			tableDict[sample_name]['BAI'] = dict([('label', 'BAI'), ('link', os.path.relpath(os.path.join(track_out, pipeline + bsmap_mapped_bam_index_name), track_out))])
-
-			present_genomes[track_out_file].append(track_text)
-		else:
-			print ("  No bsmap mapped bam found: " + bsmap_mapped_bam_name)
-
-		##########################################
-		### For BigBed files
-		##########################################
-
-		if os.path.isfile(meth_bb_file):
-
-			print "  FOUND BigBed file: " + meth_bb_file
-
-			# copy or link the file to the hub directory
-			if args.copy and args.copy.find('BB') > -1:
-				cmd = "cp " + meth_bb_file + " " + track_out
-				print(cmd)
-				subprocess.call(cmd, shell=True)
-			else:
-				os.symlink(os.path.relpath(meth_bb_file, track_out), os.path.join(track_out, meth_bb_name))
-
-			# construct track for data file
-			track_text = "\n\ttrack " + meth_bb_name + "_Meth_BB" + "\n"
-			track_text += "\tparent DNA_Meth_BB on\n"
-			track_text += "\ttype bigBed\n"
-			track_text += present_subGroups + "data_type=Meth_BB" + "\n"
-			track_text += "\tshortLabel " + shortLabel + "\n"
-			track_text += "\tlongLabel " + sample_name + "_Meth_BB" + "\n"
-			track_text += "\tbigDataUrl " + pipeline + meth_bb_name + "\n"
-
-			tableDict[sample_name]['BB'] = dict([('label', 'BB'), ('link', os.path.relpath(os.path.relpath(os.path.join(track_out, meth_bb_name), track_out)))])
-
-			present_genomes[track_out_file].append(track_text)
-		else:
-			print ("  No Bigbed file found: " + meth_bb_file)
-
-		##########################################
-		### For Methylation (bismark) BIGWIG files
-		##########################################
-
-		if os.path.isfile(bismark_bw_file):
-			print "  FOUND bismark bw: " + bismark_bw_file
-			# copy or link the file to the hub directory
-			if args.copy and args.copy.find('BW') > -1:
-				cmd = "cp " + bismark_bw_file + " " + track_out
-				print(cmd)
-				subprocess.call(cmd, shell=True)
-			else:
-				os.symlink(os.path.relpath(bismark_bw_file, track_out), os.path.join(track_out, bismark_bw_name))
-			# add data_type subgroup (not included in sampleAnnotation)
-			if "Meth" not in subGroups_perGenome[track_out_file]["data_type"]:
-				subGroups_perGenome[track_out_file]["data_type"]["Meth"] = "Meth"
-			# construct track for data file
-			track_text = "\n\ttrack " + bismark_bw_name + "_Meth" + "\n"
-			track_text += "\tparent " + trackhubs.parent_track_name + " on\n"
-			track_text += "\ttype bigWig\n"
-			track_text += present_subGroups + "data_type=Meth" + "\n"
-			track_text += "\tshortLabel " + shortLabel + "\n"
-			track_text += "\tlongLabel " + sample_name + "_Meth" + "\n"
-			track_text += "\tbigDataUrl " + bismark_bw_name + "\n"
-			track_text += "\tviewLimits 0:100" + "\n"
-			track_text += "\tviewLimitsMax 0:100" + "\n"
-			track_text += "\tmaxHeightPixels 100:30:10" + "\n"
-
-			tableDict[sample_name]['BW'] = dict([('label', 'BW'), ('link', os.path.relpath(os.path.relpath(os.path.join(track_out, bismark_bw_name), track_out)))])
-
-			present_genomes[track_out_file].append(track_text)
-		else:
-			print ("  No bismark bw found: " + bismark_bw_file)
-
-		##########################################
-		### For biseq BED files
-		##########################################
-
-		if os.path.isfile(biseq_bed):
-
-			print "  FOUND biseq bed file: " + biseq_bed
-
-			# copy or link the file to the hub directory
-			if args.copy and args.copy.find('BED') > -1:
-				cmd = "cp " + biseq_bed + " " + track_out
-				print(cmd)
-				subprocess.call(cmd, shell=True)
-			else:
-				os.symlink(os.path.relpath(biseq_bed, track_out), os.path.join(track_out, biseq_bed_name))
-
-			tableDict[sample_name]['BED'] = dict([('label', 'BED'), ('link', os.path.relpath(os.path.join(track_out, biseq_bed_name), track_out))])
-
-		else:
-			print ("  No biseq bed file found: " + biseq_bed)
-
-		##########################################
-		### For RNA (tophat) files
-		##########################################
-
-		if os.path.isfile(tophat_bw_file):
-			print "  FOUND tophat bw: " + tophat_bw_file
-			# copy or link the file to the hub directory
-			if args.copy and args.copy.find('TH') > -1:
-				cmd = "cp " + tophat_bw_file + " " + track_out + "\n"
-				cmd += "chmod o+r " + os.path.join(track_out, tophat_bw_name)
-				print(cmd)
-				subprocess.call(cmd, shell=True)
-			else:
-				os.symlink(os.path.relpath(tophat_bw_file, track_out), os.path.join(track_out, tophat_bw_name))
-			# add data_type subgroup (not included in sampleAnnotation)
-			if "RNA" not in subGroups_perGenome[track_out_file]["data_type"]:
-				subGroups_perGenome[track_out_file]["data_type"]["RNA"] = "RNA"
-			# construct track for data file
-			track_text = "\n\ttrack " + tophat_bw_name + "_RNA" + "\n"
-			track_text += "\tparent " + trackhubs.parent_track_name + " on\n"
-			track_text += "\ttype bigWig\n"
-			track_text += present_subGroups + "data_type=RNA" + "\n"
-			track_text += "\tshortLabel " + shortLabel + "\n"
-			track_text += "\tlongLabel " + sample_name + "_RNA" + "\n"
-			track_text += "\tbigDataUrl " + tophat_bw_name + "\n"
-			track_text += "\tautoScale on" + "\n"
-
-			tableDict[sample_name]['TH'] = dict([('label', 'BW'), ('link', os.path.relpath(os.path.join(track_out, tophat_bw_name), track_out))])
-
-			present_genomes[track_out_file].append(track_text)
-		else:
-			print ("  No tophat bw found: " + tophat_bw_file)
-
-	# write composit-header followed by the individual tracks to a genome specific trackDB.txt
-	composit_text = ""
-	for key in present_genomes.keys():
-		# construct composite header
-		composit_text += "\ntrack " + str(trackhubs.parent_track_name) + "\n"
-		composit_text += "compositeTrack on"
-		count = 0
-		dim_text = "dimensions dimX=" + str(trackhubs.matrix_x) + " dimY=" + str(trackhubs.matrix_y)
-		for subGroup in subGroups_perGenome[key].keys():
-			if len(subGroups_perGenome[key][subGroup]) < 1:
-				continue
-			if not subGroup == str(trackhubs.matrix_x) and not subGroup == str(trackhubs.matrix_y):
-				dim_text += " dimA=" + subGroup
-			count += 1
-			composit_text += "\nsubGroup" + str(count) + " " + subGroup + " " + subGroup + " "
-			for type in subGroups_perGenome[key][subGroup].keys():
-				composit_text += type + "=" + subGroups_perGenome[key][subGroup][type] + " "
-		composit_text += "\nshortLabel " + str(trackhubs.parent_track_name) + "\n"
-		composit_text += "longLabel " + str(trackhubs.parent_track_name) + "\n"
-		composit_text += "type bigWig" + "\n"
-		composit_text += "color 0,60,120" + "\n"
-		composit_text += "spectrum on" + "\n"
-		composit_text += "visibility " + args.visibility + "\n"
-		composit_text += dim_text + "\n"
-		composit_text += "sortOrder " + str(trackhubs.sortOrder) + "\n"
-
-		# write composite header
-		trackDB = open(key, 'a')
-		trackDB.writelines(composit_text)
-		# write individual tracks
-		for i in range(len(present_genomes[key])):
-			trackDB.writelines(present_genomes[key][i])
-		super_text = "\n"
-		super_text += "track DNA_Meth_Align\n"
-		super_text += "shortLabel DNA_Meth_Align\n"
-		super_text += "longLabel DNA_Meth_Align\n"
-		super_text += "superTrack on\n"
-		super_text += "\n"
-		super_text += "track DNA_Meth_BB\n"
-		super_text += "shortLabel DNA_Meth_BB\n"
-		super_text += "longLabel DNA_Meth_BB\n"
-		super_text += "superTrack on\n"
-
-		trackDB.writelines(super_text)
-		trackDB.close()
-
-	report_name = pipeline + 'report.html'
-
-	html_out += '<body>\n'
-	html_out += '<h1>{} Project</h1>\n'.format(clean_title)
-	html_out += '\n'
-
-	today = datetime.datetime.now()
-	#html_out += '<p>Last updated on ' + str(today.day) +'/'+ str(today.month) +'/'+ str(today.year) + ' at ' + str(today.hour) +':'+ str(today.minute) +'</p>\n'
-	html_out += '<p><br /></p>\n'
-
-	html_out += '<h2>Useful Links</h2>\n'
-	tsv_stats_name = os.path.basename(paths.output_dir)+'_stats_summary.tsv'
-	tsv_stats_path = os.path.relpath(os.path.join(paths.output_dir,tsv_stats_name),track_out)
-	xls_stats_name = os.path.basename(paths.output_dir)+'_stats_summary.xls'
-	xls_stats_path = os.path.relpath(os.path.join(paths.output_dir,xls_stats_name),track_out)
-	xlsx_stats_name = os.path.basename(paths.output_dir)+'_stats_summary.xlsx'
-	xlsx_stats_path = os.path.relpath(os.path.join(paths.output_dir,xlsx_stats_name),track_out)
-
-	if os.path.isfile(os.path.join(paths.write_dir,tsv_stats_name)):
-		if os.path.isfile(os.path.join(paths.write_dir,xls_stats_name)):
-			if os.path.isfile(os.path.join(paths.write_dir,xlsx_stats_name)):
-				html_out += '<p>Stats summary table: <a href="{}"><b>{}</b></a> <a href="{}"><b>{}</b></a> <a href="{}"><b>{}</b></a></p>\n'.format(tsv_stats_path,'TSV',xls_stats_path,'XLS', xlsx_stats_path,'XLSX')
-			else:
-				html_out += '<p>Stats summary table: <a href="{}"><b>{}</b></a> <a href="{}"><b>{}</b></a></p>\n'.format(tsv_stats_path,'TSV',xls_stats_path,'XLS')
-		else:
-			html_out += '<p>Stats summary table: <a href="{}"><b>{}</b></a></p>\n'.format(tsv_stats_path,'TSV')
-	url = str(trackhubs.url).replace(':','%3A').replace('/','%2F')
-	paths.ucsc_browser_link = 'https://genome-euro.ucsc.edu/cgi-bin/hgTracks?db='+genome.split('_')[0]+'&amp;hubUrl='+url+'%2F'+hub_file_name
-	html_out += '<p>UCSC Genome Browser: <a href="{}"><b>{}</b></a></p>\n'.format(paths.ucsc_browser_link,'Link')
-	html_out += '<p><br /></p>\n'
-
-	html_file_name = os.path.join(track_out, report_name)
-	file_handle = open(name=html_file_name, mode='w')
-	file_handle.write(html_out)
-
-
-	html_out_tab = '<h2>Data Files</h2>\n'
-	html_out_tab += '<table cellpadding="5">\n'
-	html_out_tab += '<tr>\n'
-	html_out_tab += '<th></th>\n'
-	html_out_tab += '<th>Sample Name</th>\n'
-	html_out_tab += '<th>Aligned BAM</th>\n'
-	html_out_tab += '<th>BAM Index</th>\n'
-	html_out_tab += '<th>BigBed</th>\n'
-	html_out_tab += '<th>BigWig</th>\n'
-	html_out_tab += '<th>Biseq Bed</th>\n'
-	html_out_tab += '</tr>\n'
-	key_list = tableDict.keys()
-	key_list.sort()
-	counter = 0
-	for key in key_list:
-		counter += 1
-		value = tableDict[key]
-		html_out_tab += '<tr>\n'
-		html_out_tab += '<td><b>{}</b></td>\n'.format(str(counter)+'.')
-		html_out_tab += '<td>{}</td>\n'.format(key)
-		html_out_tab += '<td><a href="{}"><b>{}</b></a></td>\n'.format(value['BAM']['link'],value['BAM']['label'])
-		html_out_tab += '<td><a href="{}"><b>{}</b></a></td>\n'.format(value['BAI']['link'],value['BAI']['label'])
-		html_out_tab += '<td><a href="{}"><b>{}</b></a></td>\n'.format(value['BB']['link'],value['BB']['label'])
-		html_out_tab += '<td><a href="{}"><b>{}</b></a></td>\n'.format(value['BW']['link'],value['BW']['label'])
-		html_out_tab += '<td><a href="{}"><b>{}</b></a></td>\n'.format(value['BED']['link'],value['BED']['label'])
-		html_out_tab += '</tr>\n'
-	html_out_tab += '</table>\n'
-	file_handle.write(html_out_tab)
-
-	html_out = '<p><br /></p>\n'
-	html_out += '<p>This report was generated with software of the Biomedical Sequencing Facility: <a href="https://www.biomedical-sequencing.at"><b>www.biomedical-sequencing.at</b></a></p>\n'
-	html_out += '<p>Contact: <a href="mailto:bsf@cemm.oeaw.ac.at"><b>bsf@cemm.oeaw.ac.at</b></a></p>\n'
-	html_out += '<p><br /></p>\n'
-	html_out += '<p><a href="https://validator.w3.org/check?uri=referer"><img src="https://www.w3.org/Icons/valid-xhtml10" alt="Valid XHTML 1.0 Transitional" height="31" width="88" /></a>\n'
-	html_out += '<a href="https://jigsaw.w3.org/css-validator/check/referer"><img style="border:0;width:88px;height:31px" src="https://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!" /></a></p>'
-	html_out += '</body>\n'
-	html_out += '</html>\n'
-	html_out += '\n'
-
-	file_handle.write(html_out)
-	file_handle.close()
-
-	html_link_name = os.path.join(track_out, "index.html")
-	os.symlink(os.path.relpath(html_file_name,track_out),html_link_name)
-
-	cmd = "cp /scratch/lab_bsf/projects/BSA_0000_RRBS_Global_Report/styles.css " + track_out
- 	subprocess.call(cmd, shell=True)
-	cmd = "chmod -R go+rX " + paths.write_dir
-	subprocess.call(cmd, shell=True)
-
-	hub_file_link = str(trackhubs.url) + "/" + hub_file_name
-	report_link = str(trackhubs.url) + "/" + genome + "/"
-	link_string = 'Report ' + report_link + '\n'
-	link_string += 'UCSCbrowser ' + paths.ucsc_browser_link + '\n'
-	print '\nDONE!'
-	print link_string
-
-	link_file = open(name=os.path.join(paths.write_dir, pipeline + 'links.txt'), mode='w')
-	link_file.write(link_string)
-	link_file.close()
-
-finally:
-	csv_file.close()
diff --git a/scripts/normalize_wig.R b/scripts/normalize_wig.R
deleted file mode 100755
index 38b0f443..00000000
--- a/scripts/normalize_wig.R
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/env Rscript
-
-library(data.table)
-suppressPackageStartupMessages(library("optparse"))
-
-##scale=10000000
-##genome="mm10"
-##results_dir="/scratch/lab_bock/shared/projects/geissmann/results_pipeline/results_pipeline/"
-##stats_path="/scratch/lab_bock/shared/projects/geissmann/results_pipeline/results_pipeline/ALL_stats_summary.tsv"
-
-# specify our desired options in a list
-option_list = list(
-  make_option(c("-r", "--results_dir"), type="character", help="Input Results folder (REQUIRED)"),
-  make_option(c("-g", "--genome"), type="character", help="Genome used for alignment (REQUIRED)"),
-  make_option(c("-s", "--stats"), type="character", help="Alignment stats table for all samples (REQUIRED)"),
-  make_option(c("-n", "--scale"), type="character", help="Normalization scale (REQUIRED)")
-  )
-
-opt = parse_args(OptionParser(option_list=option_list))
-if (length(opt)<4) {
-  print_help(OptionParser(option_list=option_list))
-}else {
-  results_dir=opt$results_dir
-  genome=opt$genome
-  stats_path=opt$stats
-  scale=opt$scale
-}
-
-print(results_dir)
-print(genome)
-print(stats_path)
-print(scale)
-
-chroSizes_path=paste0("/data/groups/lab_bock/shared/resources/genomes/",genome,"/",genome,".chromSizes")
-
-
-
-
-stats=fread(stats_path)
-stats=stats[pipeline=="rnaTopHat"]
-stats[,wigPath:=paste0(results_dir,"/",sampleName,"/tophat_",genome,"/",sampleName,".aln_sorted.wig"),]
-
-for (i in c(2:nrow(stats))){
-  sampleName=stats[i]$sampleName
-  message(sampleName)
-  wigFileName=stats[i]$wigPath
-  mappedReads=stats[i]$Aligned_reads
-  if (file.exists(wigFileName)){
-    system(paste0("sed 's/ \\+/\\t/g'  ",wigFileName," > ", wigFileName,"_temp",sep=""))
-    wig=fread(paste0(wigFileName,"_temp"),header=FALSE)
-    wig[V1=="variableStep",V3:=paste0(V1," ",V2)]
-    wig[grep("variableStep",V3),V1:=NA]
-    wig[grep("variableStep",V3),V2:=NA]
-    wig[,V2:=round(as.numeric(V2)/mappedReads*scale,2),]
-    wig[,c("V1","V2"):=list(as.character(V1),as.character(V2)),]
-    wig[grep("variableStep",V3),c("V1","V2"):=list(V3,"")]
-    wig[,V3:=NULL,]
-    write.table(wig,sub(".wig","_norm.wig_temp",wigFileName),sep="\t",col.names=FALSE,row.names=FALSE,quote=FALSE)
-    system(paste0("sed 's/\t$//g' ",sub(".wig","_norm.wig_temp",wigFileName)," > ", sub(".wig","_norm.wig",wigFileName)))
-    system(paste("wigToBigWig",sub(".wig","_norm.wig",wigFileName),chroSizes_path,sub(".wig","_norm.bw",wigFileName),sep=" "))
-    system(paste("rm ",sub(".wig",".wig_temp",wigFileName)))
-    system(paste("rm ",sub(".wig","_norm.wig_temp",wigFileName)))
-  }else{
-    message(paste0("File not found. Skipping: ",wigFileName))
-    next}
-}
-
diff --git a/scripts/normalize_wig_submit.sh b/scripts/normalize_wig_submit.sh
deleted file mode 100755
index 374eb7b5..00000000
--- a/scripts/normalize_wig_submit.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#! /bin/bash
-
-scale=10000000
-genome="mm10"
-results_dir="/scratch/lab_bock/shared/projects/geissmann/results_pipeline/results_pipeline/"
-stats_path="/scratch/lab_bock/shared/projects/geissmann/results_pipeline/results_pipeline/ALL_stats_summary.tsv"
-
-logdir="$results_dir/log/"
-mkdir -p $logdir
-
- 
-
-sbatch --export=NONE --get-user-env=L --job-name=normalize_wig --ntasks=1 --cpus-per-task=1 --mem-per-cpu=8000 --partition=longq --time=2-00:00:00 -o ${logdir}/normalize_wig_%j.log normalize_wig.R -g $genome -n $scale -r $results_dir -s $stats_path
\ No newline at end of file
diff --git a/scripts/summarizePipelineStats.R b/scripts/summarizePipelineStats.R
deleted file mode 100755
index 4f7f150f..00000000
--- a/scripts/summarizePipelineStats.R
+++ /dev/null
@@ -1,136 +0,0 @@
-#!/usr/bin/env Rscript
-options(echo=FALSE);
-library(data.table)
-library(reshape2) #no longer necessary after data.table 1.9.5??
-suppressPackageStartupMessages(library("optparse"))
-
-# specify our desired options in a list
-option_list = list(
-make_option(c("-i", "--inputFolder"), type="character", help="Input Results folder (REQUIRED)"))
-
-opt = parse_args(OptionParser(option_list=option_list))
-if (is.null(opt$inputFolder)) {
-	print_help(OptionParser(option_list=option_list));
-	inputFolder = "/fhgfs/groups/lab_bock/shared/COREseq/results_pipeline3"
-#	q();
-} else {
-	inputFolder=opt$inputFolder
-}
-message("input folder: ", inputFolder);
-pipeDirs = list.dirs(inputFolder, recursive=FALSE)
-
-message("Read all *_stats.tsv files in the pipeline results folder")
-results=list()
-dir = pipeDirs[[1]];
-for (dir in pipeDirs) {
-	message(dir);
-	statFiles = list.files(dir, pattern="_stats", recursive=FALSE)
-	statFiles2 = list.files(dir, pattern="stats_", recursive=FALSE)
-	statFiles = c(statFiles, statFiles2)
-	for (statFile in statFiles) {
-		message(statFile);
-		pipeline = gsub("_stats.tsv", "", statFile)
-		pipeline = gsub("stats_", "", pipeline)
-		statPath = paste0(dir, "/", statFile);
-		# Not the best, but I had to put this in just in case
-		# there are empty lines in the stat file; this removes them
-		message(":")
-		system(paste0("sed -i '/^\\s*$/d' ", statPath))
-		message(":")
-		a = fread(statPath)
-		setnames(a, c("key", "value"))
-		a[,key:=gsub(" ", "_", key)] # Change spaces to underscores
-		#Order keys as factors, to maintain order through later cast.
-		a[,key:=factor(key, levels=unique(key))]
-		#setkey(a, "key")
-		a[,sampleName:=basename(dir)]
-		a[,pipeline:=pipeline]
-		sampleName = basename(dir)
-		if (is.null(results[[pipeline]])) { results[[pipeline]] = list(); }
-		results[[pipeline]][[sampleName]] = a;
-	}
-}
-if (length(results) ==0) {
-	stop("No stats files found.");
-}
-results
-#Combined, divided by pipeline
-resultsDT = lapply(results, function(x) { do.call(rbind, x); })
-
-# Select latest for identical statistics
-resultsDT = lapply(resultsDT, function(x) { x[,list(value=value[length(value)]), by=c("key", "sampleName", "pipeline"), roll=+Inf] })
-
-# Cast to wide format
-resultsMat = lapply(resultsDT, dcast, formula= "... ~ key")
-resultsMat = lapply(resultsMat, as.data.table)
-# Convert number-only cols to numerics, so I can do some stats below.
-numToNumeric = function(DT) {
-	return(DT[,lapply(.SD, function(x) { if(!any(grepl("[a-zA-Z:_\\-]", x))) { return(as.numeric(x)); } else { return(x)} })])
-}
-#lapply(resultsMat, sapply, mode)
-resultsMat = lapply(resultsMat, numToNumeric)
-
-################################################################################
-# Do any pipeline-specific calculations here
-################################################################################
-nofail = function(x) {
-	tryCatch( {x}, error = function(e) { message("Pipeline-specific summary error: ", e); } )
-}
-	#WGBS
-	if ("WGBS" %in% names(resultsMat) ) {
-	nofail( { resultsMat$WGBS[, total_efficiency := (Deduplicated_reads)/Raw_reads] })
-	nofail( { resultsMat$WGBS[, trim_loss_rate := (Raw_reads - Trimmed_reads)/Raw_reads] })
-	nofail( { resultsMat$WGBS[, alignment_rate := (Aligned_reads)/Trimmed_reads] })
-	nofail( { resultsMat$WGBS[, dupe_loss_rate := (Aligned_reads - Deduplicated_reads)/Aligned_reads] })
-	nofail( { resultsMat$WGBS[, filt_loss_rate := (Deduplicated_reads - Filtered_reads)/Deduplicated_reads] })
-	}
-
-	if ("RRBS" %in% names(resultsMat) ) {
-	nofail( { resultsMat$RRBS[, total_efficiency := (Aligned_reads)/Raw_reads] })
-	nofail( { resultsMat$RRBS[, trim_loss_rate := (Raw_reads - Trimmed_reads)/Raw_reads] })
-	nofail( { resultsMat$RRBS[, alignment_rate := (Aligned_reads)/Trimmed_reads] })
-	#nofail( { resultsMat$RRBS[, dupe_loss_rate := (Aligned_reads - Deduplicated_reads)/Aligned_reads] })
-	#nofail( { resultsMat$RRBS[, filt_loss_rate := (Deduplicated_reads - Filtered_reads)/Deduplicated_reads] })
-	}
-
-	# Tophat
-if ("rnaTopHat" %in% names(resultsMat) ) {
-	nofail( { resultsMat$rnaTopHat[, total_efficiency := Filtered_reads/Raw_reads] })
-	nofail( { resultsMat$rnaTopHat[, trim_loss_rate := (Raw_reads - Trimmed_reads)/Raw_reads] })
-	nofail( { resultsMat$rnaTopHat[, alignment_rate := (Aligned_reads)/Trimmed_reads] })
-	nofail( { resultsMat$rnaTopHat[, dupe_loss_rate := (Filtered_reads - Deduplicated_reads)/Filtered_reads] })
-	nofail( { resultsMat$rnaTopHat[, filt_loss_rate := (Aligned_reads - Filtered_reads)/Aligned_reads] })
-	}
-
-	# Bitseq
-if ("rnaBitSeq" %in% names(resultsMat) ) {
-	nofail( { resultsMat$rnaBitSeq[, total_efficiency := Filtered_reads/Raw_reads] })
-	nofail( { resultsMat$rnaBitSeq[, trim_loss_rate := (Raw_reads - Trimmed_reads)/Raw_reads] })
-	nofail( { resultsMat$rnaBitSeq[, alignment_rate := (Aligned_reads)/Trimmed_reads] })
-	nofail( { resultsMat$rnaBitSeq[, dupe_loss_rate := (Filtered_reads - Deduplicated_reads)/Filtered_reads] })
-	nofail( { resultsMat$rnaBitSeq[, filt_loss_rate := (Aligned_reads - Filtered_reads)/Aligned_reads] })
-	nofail( { resultsMat$rnaBitSeq[, ERCC_alignment_rate := (ERCC_aligned_reads)/Trimmed_reads] })
-	}
-
-################################################################################
-# Write results
-################################################################################
-commonCols = Reduce(intersect, lapply(resultsMat, colnames));
-commonList = lapply(resultsMat, function(x) { x[,commonCols, with=FALSE] })
-commonTable = do.call(rbind, commonList)
-
-
-# Write individual result tables for each pipeline
-pipelines = names(resultsMat)
-for (p in pipelines) {
-	pipeStatFile = paste0(inputFolder, "/", p, "_stats_summary.tsv")
-	message("Writing pipeline stats table: ", pipeStatFile)
-	write.table(resultsMat[[p]], pipeStatFile, sep="\t",row.names=FALSE,quote=FALSE)
-}
-if (length(names(resultsMat)) > 1 ) { # only if there are multiple pipelines
-# Produce an additional table with only common features
-commonTableFile = paste0(inputFolder, "/ALL_stats_summary.tsv");
-message("Writing common table: ", commonTableFile);
-write.table(commonTable, commonTableFile,sep="\t",row.names=FALSE,quote=FALSE)
-}
-
diff --git a/scripts/summarizePipelineStats_complex.R b/scripts/summarizePipelineStats_complex.R
deleted file mode 100755
index d0888b30..00000000
--- a/scripts/summarizePipelineStats_complex.R
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/usr/bin/env Rscript
-options(echo=FALSE);
-library(data.table)
-library(reshape2) #no longer necessary after data.table 1.9.5??
-suppressPackageStartupMessages(library("optparse"))
-
-# specify our desired options in a list
-option_list = list(
-make_option(c("-i", "--inputFolder"), type="character", help="Input Results folder (REQUIRED)"))
-
-opt = parse_args(OptionParser(option_list=option_list))
-if (is.null(opt$inputFolder)) {
-	print_help(OptionParser(option_list=option_list));
-	inputFolder = "/fhgfs/groups/lab_bock/shared/COREseq/results_pipeline3"
-#	q();
-} else {
-	inputFolder=opt$inputFolder
-}
-
-message("input folder: ", inputFolder);
-pipeDirs = list.dirs(inputFolder, recursive=FALSE)
-
-message("Read all *_stats.txt files in the pipeline results folder")
-results=list()
-dir = pipeDirs[[1]];
-for (dir in pipeDirs) {
-	message(dir);
-	statFiles = list.files(dir, pattern="_stats.tsv")
-	statFiles2 = list.files(dir, pattern="stats_")
-	statFiles = c(statFiles, statFiles2)
-	for (statFile in statFiles) {
-		message(statFile);
-		pipeline = gsub("_stats.tsv", "", statFile)
-		pipeline = gsub("stats_", "", pipeline)
-		statPath = paste0(dir, "/", statFile);
-		a = fread(statPath)
-		setnames(a, c("key", "value"))
-		a[,key:=gsub(" ", "_", key)] # Change spaces to underscores
-		#Order keys as factors, to maintain order through later cast.
-		a[,key:=factor(key, levels=unique(key))]
-		#setkey(a, "key")
-		a[,sampleName:=basename(dir)]
-		a[,pipeline:=pipeline]
-		sampleName = basename(dir)
-		if (is.null(results[[pipeline]])) { results[[pipeline]] = list(); }
-		results[[pipeline]][[sampleName]] = a;
-	}
-}
-if (length(results) ==0) {
-	stop("No stats files found.");
-}
-results
-#Combined, divided by pipeline
-resultsDT = lapply(results, function(x) { do.call(rbind, x); })
-
-# Select latest for identical statistics
-resultsDT = lapply(resultsDT, function(x) { x[,list(value=value[length(value)]), by=c("key", "sampleName", "pipeline"), roll=+Inf] })
-
-# Cast to wide format
-resultsMat = lapply(resultsDT, dcast, formula= "... ~ key")
-resultsMat = lapply(resultsMat, as.data.table)
-# Convert number-only cols to numerics, so I can do some stats below.
-numToNumeric = function(DT) {
-	return(DT[,lapply(.SD, function(x) { if(!any(grepl("[a-zA-Z:_\\-]", x))) { return(as.numeric(x)); } else { return(x)} })])
-}
-resultsMat = lapply(resultsMat, numToNumeric)
-#lapply(resultsMat, sapply, mode)
-
-################################################################################
-# Do any pipeline-specific calculations here
-################################################################################
-
-#WGBS
-
-if ("WGBS" %in% names(resultsMat)){
-	resultsMat$WGBS[, total_efficiency := (Deduplicated_reads)/Raw_reads]
-	resultsMat$WGBS[, trim_loss_rate := (Raw_reads - Trimmed_reads)/Raw_reads]
-	resultsMat$WGBS[, alignment_rate := (Aligned_reads)/Trimmed_reads]
-	resultsMat$WGBS[, dupe_loss_rate := (Aligned_reads - Deduplicated_reads)/Aligned_reads]
-	resultsMat$WGBS[, filt_loss_rate := (Deduplicated_reads - Filtered_reads)/Deduplicated_reads]
-}
-
-
-# Tophat
-if ("rnaTopHat" %in% names(resultsMat)){
-  if ("Filtered_reads" %in% names(resultsMat$rnaTopHat)){
-  resultsMat$rnaTopHat[, total_efficiency := Filtered_reads/Raw_reads]}
-	resultsMat$rnaTopHat[, trim_loss_rate := (Raw_reads - Trimmed_reads)/Raw_reads]
-	resultsMat$rnaTopHat[, alignment_rate := (Aligned_reads)/Trimmed_reads]
-	if ("Filtered_reads" %in% names(resultsMat$rnaTopHat)){
-	  if ("Deduplicated_reads" %in% names(resultsMat$rnaTopHat)){
-      resultsMat$rnaTopHat[, dupe_loss_rate := (Filtered_reads - Deduplicated_reads)/Filtered_reads]}
-    resultsMat$rnaTopHat[, filt_loss_rate := (Aligned_reads - Filtered_reads)/Aligned_reads]}
-  else if ("Deduplicated_reads" %in% names(resultsMat$rnaTopHat)){
-    resultsMat$rnaTopHat[, dupe_loss_rate := (Aligned_reads - Deduplicated_reads)/Aligned_reads]}
-}
-# Bitseq
-if ("rnaBitSeq" %in% names(resultsMat)){
-  if ("Filtered_reads" %in% names(resultsMat$rnaTopHat)){
-	  resultsMat$rnaBitSeq[, total_efficiency := Filtered_reads/Raw_reads]}
-	resultsMat$rnaBitSeq[, trim_loss_rate := (Raw_reads - Trimmed_reads)/Raw_reads]
-	resultsMat$rnaBitSeq[, alignment_rate := Aligned_reads/Trimmed_reads]
-  if ("Filtered_reads" %in% names(resultsMat$rnaTopHat)){
-    resultsMat$rnaBitSeq[, dupe_loss_rate := (Filtered_reads - Deduplicated_reads)/Filtered_reads]
-	  resultsMat$rnaBitSeq[, filt_loss_rate := (Aligned_reads - Filtered_reads)/Aligned_reads]}
-  else {resultsMat$rnaBitSeq[, dupe_loss_rate := (Aligned_reads - Deduplicated_reads)/Aligned_reads]}
-	resultsMat$rnaBitSeq[, ERCC_alignment_rate := (ERCC_aligned_reads)/Trimmed_reads]
-}
-
-################################################################################
-# Write results
-################################################################################
-commonCols = Reduce(intersect, lapply(resultsMat, colnames));
-commonList = lapply(resultsMat, function(x) { x[,commonCols, with=FALSE] })
-commonTable = do.call(rbind, commonList)
-
-
-# Write individual result tables for each pipeline
-pipelines = names(resultsMat)
-for (p in pipelines) {
-	pipeStatFile = paste0(inputFolder, "/", p, "_stats_summary.tsv")
-	message("Writing pipeline stats table: ", pipeStatFile)
-	write.table(resultsMat[[p]], pipeStatFile, sep="\t",row.names=FALSE,quote=FALSE)
-}
-
-# Produce an additional table with only common features
-commonTableFile = paste0(inputFolder, "/ALL_stats_summary.tsv");
-message("Writing common table: ", commonTableFile);
-write.table(commonTable, commonTableFile,sep="\t",row.names=FALSE,quote=FALSE)
-
-
diff --git a/setup.cfg b/setup.cfg
index e13f2591..5d8fdac6 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,7 +1,7 @@
+[aliases]
+test = pytest
+
 [pytest]
 # Only request extra info from failures and errors.
 addopts = -rfE
 
-[aliases]
-test = pytest
-
diff --git a/setup.py b/setup.py
index c54d33a9..1e102fe0 100644
--- a/setup.py
+++ b/setup.py
@@ -1,27 +1,36 @@
 #! /usr/bin/env python
 
 import os
+from setuptools import setup
 import sys
 
 
 # Additional keyword arguments for setup().
 extra = {}
 
+# Ordinary dependencies
 DEPENDENCIES = []
 with open("requirements/requirements-all.txt", "r") as reqs_file:
     for line in reqs_file:
         if not line.strip():
             continue
-        DEPENDENCIES.append(line.split("=")[0].rstrip("<>"))
+        #DEPENDENCIES.append(line.split("=")[0].rstrip("<>"))
+        DEPENDENCIES.append(line)
 
+# numexpr for pandas
 try:
-    from setuptools import setup
-    if sys.version_info >= (3,):
-        extra["use_2to3"] = True
-    extra["install_requires"] = DEPENDENCIES
+    import numexpr
 except ImportError:
-    from distutils.core import setup
-    extra["requires"] = DEPENDENCIES
+    # No numexpr is OK for pandas.
+    pass
+else:
+    # pandas 0.20.2 needs updated numexpr; the claim is 2.4.6, but that failed.
+    DEPENDENCIES.append("numexpr==2.6.2")
+
+# 2to3
+if sys.version_info >= (3, ):
+    extra["use_2to3"] = True
+extra["install_requires"] = DEPENDENCIES
 
 
 # Additional files to include with package
@@ -34,7 +43,9 @@ def get_static(name, condition=None):
         return [i for i in filter(lambda x: eval(condition), static)]
 
 # scripts to be added to the $PATH
-scripts = get_static("scripts", condition="'.' in x")
+# scripts = get_static("scripts", condition="'.' in x")
+# scripts removed (TO remove this)
+scripts = None
 
 with open("looper/_version.py", 'r') as versionfile:
     version = versionfile.readline().split()[-1].strip("\"'\n")
@@ -64,9 +75,7 @@ def get_static(name, condition=None):
     package_data={'looper': ['submit_templates/*']},
     include_package_data=True,
     test_suite="tests",
-    tests_require=["mock", "pytest"],
-    setup_requires=(["pytest-runner"]
-                    if {"ptr", "test", "pytest"} & set(sys.argv)
-                    else []),
+    tests_require=(["mock", "pytest"]),
+    setup_requires=(["pytest-runner"] if {"test", "pytest", "ptr"} & set(sys.argv) else []),
     **extra
 )
diff --git a/tests/conftest.py b/tests/conftest.py
index 96cdf5f9..4765186b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,6 +7,7 @@
 
 """
 
+import copy
 import logging
 import os
 import shutil
@@ -16,21 +17,22 @@
 
 from pandas.io.parsers import EmptyDataError
 import pytest
+import yaml
 
 from looper import setup_looper_logger
-from looper.models import PipelineInterface
-from looper.loodels import Project
+from looper.models import PipelineInterface, Project, SAMPLE_NAME_COLNAME
 
 
-# TODO: needed for interactive mode, but may crush cmdl option for setup.
 _LOGGER = logging.getLogger("looper")
 
 
+P_CONFIG_FILENAME = "project_config.yaml"
+
 # {basedir} lines are formatted during file write; other braced entries remain.
 PROJECT_CONFIG_LINES = """metadata:
   sample_annotation: samples.csv
   output_dir: test
-  pipelines_dir: pipelines
+  pipeline_interfaces: pipelines
   merge_table: merge.csv
 
 derived_columns: [{derived_column_names}]
@@ -171,6 +173,18 @@
 }
 COMPARISON_FUNCTIONS = ["__eq__", "__ne__", "__len__",
                         "keys", "values", "items"]
+COLUMNS = [SAMPLE_NAME_COLNAME, "val1", "val2", "library"]
+PROJECT_CONFIG_DATA = {"metadata": {"sample_annotation": "annotations.csv"}}
+
+
+
+def update_project_conf_data(extension):
+    """ Updated Project configuration data mapping based on file extension """
+    updated = copy.deepcopy(PROJECT_CONFIG_DATA)
+    filename = updated["metadata"]["sample_annotation"]
+    base, _ = os.path.splitext(filename)
+    updated["metadata"]["sample_annotation"] = "{}.{}".format(base, extension)
+    return updated
 
 
 
@@ -207,11 +221,47 @@ def conf_logs(request):
 
 
 
+
+@pytest.fixture(scope="function")
+def sample_annotation_lines():
+    return SAMPLE_ANNOTATION_LINES
+
+
+
+@pytest.fixture(scope="function")
+def path_empty_project(request, tmpdir):
+    """ Provide path to Project config file with empty annotations. """
+
+    # Determine how to write the data and how to name a file.
+    if "delimiter" in request.fixturenames:
+        delimiter = request.getfixturevalue("delimiter")
+        extension = "txt"
+    else:
+        delimiter = ","
+        extension = "csv"
+
+    # Update the Project configuration data.
+    conf_data = update_project_conf_data(extension)
+
+    # Write the needed files.
+    anns_path = os.path.join(
+            tmpdir.strpath, conf_data["metadata"]["sample_annotation"])
+
+    with open(anns_path, 'w') as anns_file:
+        anns_file.write(delimiter.join(COLUMNS))
+    conf_path = os.path.join(tmpdir.strpath, "proj-conf.yaml")
+    with open(conf_path, 'w') as conf_file:
+        yaml.dump(conf_data, conf_file)
+
+    return conf_path
+
+
+
 def interactive(prj_lines=PROJECT_CONFIG_LINES,
                 iface_lines=PIPELINE_INTERFACE_CONFIG_LINES,
                 merge_table_lines = MERGE_TABLE_LINES,
                 sample_annotation_lines=SAMPLE_ANNOTATION_LINES,
-                project_kwargs=None):
+                loglevel=logging.DEBUG, project_kwargs=None):
     """
     Create Project and PipelineInterface instances from default or given data.
 
@@ -227,14 +277,24 @@ def interactive(prj_lines=PROJECT_CONFIG_LINES,
         table file
     :param collections.Iterable[str] sample_annotation_lines: lines for a
         sample annotations file
+    :param str | int loglevel: level at which to attend to log messages
     :param dict project_kwargs: keyword arguments for Project constructor
     :return Project, PipelineInterface: one Project and one PipelineInterface,
     """
+
+    # Establish logging for interactive session
+    import logging, sys
+    h = logging.StreamHandler(sys.stdout)
+    h.setLevel(loglevel)
+    logging.root.setLevel(loglevel)
+    logging.root.addHandler(h)
+
+
     # TODO: don't work with tempfiles once ctors tolerate Iterable.
     dirpath = tempfile.mkdtemp()
     path_conf_file = _write_temp(
         prj_lines,
-        dirpath=dirpath, fname="project_config.yaml")
+        dirpath=dirpath, fname=P_CONFIG_FILENAME)
     path_iface_file = _write_temp(
         iface_lines,
         dirpath=dirpath, fname="pipeline_interface.yaml")
@@ -287,8 +347,8 @@ def _write_temp(lines, dirpath, fname):
             **{"derived_column_names": ", ".join(DERIVED_COLNAMES)}
     )
     filepath = os.path.join(dirpath, fname)
-    _LOGGER.debug("Writing %d lines to file '%s'", len(lines), filepath)
     data_source_formatter = string.Formatter()
+    num_lines = 0
     with open(filepath, 'w') as tmpf:
         for l in lines:
             if "{basedir}" in l:
@@ -298,7 +358,67 @@ def _write_temp(lines, dirpath, fname):
                 l = data_source_formatter.vformat(
                     l, (), derived_columns_replacement)
             tmpf.write(l)
-        return tmpf.name
+            num_lines += 1
+    _LOGGER.debug("Wrote %d line(s) to disk: '%s'", num_lines, filepath)
+    return filepath
+
+
+
+@pytest.fixture(scope="function")
+def project_config_lines():
+    """ Provide safer iteration over the lines for Project config file. """
+    return PROJECT_CONFIG_LINES
+
+
+
+@pytest.fixture(scope="function")
+def path_project_conf(tmpdir, project_config_lines):
+    """
+    Write the Project configuration data.
+
+    :param py.path.local.LocalPath tmpdir: temporary Path fixture
+    :param Iterable[str] project_config_lines: collection of lines for
+        Project configuration file
+    :return str: path to file with Project configuration data
+    """
+    return _write_temp(
+        project_config_lines, tmpdir.strpath, P_CONFIG_FILENAME)
+
+
+
+@pytest.fixture(scope="function")
+def proj_conf_data(path_project_conf):
+    """
+    Read and parse raw Project configuration data.
+
+    :param str path_project_conf: path to file with Project configuration data
+    :return Mapping: the data parsed from the configuration file written,
+        a Mapping form of the raw Project config text lines
+    """
+    with open(path_project_conf, 'r') as conf_file:
+        return yaml.safe_load(conf_file)
+
+
+
+@pytest.fixture(scope="function")
+def path_sample_anns(tmpdir, sample_annotation_lines):
+    """
+    Write the sample annotations file and return the path to it.
+
+    :param py.path.local.LocalPath tmpdir: temporary Path fixture
+    :param Iterable[str] sample_annotation_lines: collection of lines for
+        the sample annotations files
+    :return str: path to the sample annotations file that was written
+    """
+    filepath = _write_temp(
+            sample_annotation_lines, tmpdir.strpath, ANNOTATIONS_FILENAME)
+    return filepath
+
+
+
+@pytest.fixture(scope="function")
+def p_conf_fname():
+    return P_CONFIG_FILENAME
 
 
 
@@ -313,7 +433,7 @@ def write_project_files(request):
     """
     dirpath = tempfile.mkdtemp()
     path_conf_file = _write_temp(PROJECT_CONFIG_LINES,
-                                 dirpath=dirpath, fname="project_config.yaml")
+                                 dirpath=dirpath, fname=P_CONFIG_FILENAME)
     path_merge_table_file = _write_temp(
             MERGE_TABLE_LINES,
             dirpath=dirpath, fname=MERGE_TABLE_FILENAME
@@ -390,7 +510,7 @@ def request_class_attribute(req, attr):
 
 
 
-def _create(request, data_type):
+def _create(request, data_type, **kwargs):
     """
     Create instance of desired type, using file in request class.
 
@@ -403,7 +523,7 @@ def _create(request, data_type):
     _LOGGER.debug("Using %s as source of data to build %s",
                   data_source, data_type.__class__.__name__)
     try:
-        return data_type(data_source)
+        return data_type(data_source, **kwargs)
     except EmptyDataError:
         with open(data_source, 'r') as datafile:
             _LOGGER.error("File contents:\n{}".format(datafile.readlines()))
@@ -421,7 +541,9 @@ def proj(request):
     :return looper.models.Project: object created by parsing
         data in file pointed to by `request` class
     """
-    return _create(request, Project)
+    p = _create(request, Project)
+    p.finalize_pipelines_directory()
+    return p
 
 
 
diff --git a/tests/models/conftest.py b/tests/models/conftest.py
index 410c6303..3882d052 100644
--- a/tests/models/conftest.py
+++ b/tests/models/conftest.py
@@ -1,8 +1,19 @@
-""" Models' tests' configuration. """
+""" Configuration for modules with independent tests of models. """
 
 from collections import OrderedDict
-import pytest
+import copy
+import os
+import sys
+if sys.version_info < (3, 3):
+    from collections import Iterable, Mapping
+else:
+    from collections.abc import Iterable, Mapping
+
 import pandas as pd
+import pytest
+import yaml
+
+from looper.models import DEFAULT_COMPUTE_RESOURCES_NAME, SAMPLE_NAME_COLNAME
 
 
 __author__ = "Vince Reuter"
@@ -31,6 +42,146 @@
     submission_command: sh
 """
 
+BASIC_PROTOMAP = {"ATAC": "ATACSeq.py"}
+
+# Compute resource bundles for pipeline interface configuration data
+DEFAULT_RESOURCES = {"file_size": 0, "cores": 1, "mem": 8000,
+                     "time": "0-01:00:00", "partition": "local"}
+MIDSIZE_RESOURCES = {"file_size": 10, "cores": 8, "mem": 16000,
+                     "time": "0-07:00:00", "partition": "serial"}
+HUGE_RESOURCES = {"file_size": 30, "cores": 24, "mem": 64000,
+                  "time": "30-00:00:00", "partition": "longq"}
+
+
+
+def pytest_generate_tests(metafunc):
+    """ Conditional customization of test cases in this directory. """
+    try:
+        classname = metafunc.cls.__name__
+    except AttributeError:
+        # Some functions don't belong to a class.
+        pass
+    else:
+        if classname == "ConstructorPathParsingTests":
+            # Provide test case with two PipelineInterface config bundles.
+            metafunc.parametrize(
+                    argnames="config_bundles",
+                    argvalues=[(atacseq_iface_without_resources(),
+                                {"name": "sans-path"})])
+
+
+
+@pytest.fixture(scope="function")
+def atacseq_iface_without_resources():
+    """
+    Provide the ATAC-Seq pipeline interface as a fixture, without resources.
+
+    Note that this represents the configuration data for the interface for a
+    single pipeline. In order to use this in the form that a PipelineInterface
+    expects, this needs to be the value to which a key is mapped within a
+    larger Mapping.
+
+    :return Mapping: all of the pipeline interface configuration data for
+        ATAC-Seq, minus the resources section
+    """
+    return {
+        "name": "ATACseq",
+        "looper_args": True,
+        "required_input_files": ["read1", "read2"],
+        "all_input_files": ["read1", "read2"],
+        "ngs_input_files": ["read1", "read2"],
+        "arguments": {
+            "--sample-name": "sample_name",
+            "--genome": "genome",
+            "--input": "read1",
+            "--input2": "read2",
+            "--single-or-paired": "read_type"
+        },
+        "optional_arguments": {
+            "--frip-ref-peaks": "FRIP_ref",
+            "--prealignments": "prealignments",
+            "--genome-size": "macs_genome_size"
+        }
+    }
+
+
+
+@pytest.fixture(scope="function")
+def atac_pipe_name():
+    """ Oft-used as filename for pipeline module and PipelineInterface key. """
+    return "ATACSeq.py"
+
+
+
+@pytest.fixture(scope="function")
+def atacseq_iface_with_resources(
+        atacseq_iface_without_resources, resources):
+    """
+
+    :param dict atacseq_iface_without_resources: PipelineInterface config
+        data, minus a resources section
+    :param Mapping resources: resources section of PipelineInterface
+        configuration data
+    :return Mapping: pipeline interface data for ATAC-Seq pipeline, with all
+        of the base sections plus resources section
+    """
+    iface_data = copy.deepcopy(atacseq_iface_without_resources)
+    iface_data["resources"] = copy.deepcopy(resources)
+    return iface_data
+
+
+
+@pytest.fixture(scope="function")
+def atacseq_piface_data(atacseq_iface_with_resources, atac_pipe_name):
+    """
+    Provide a test case with data for an ATACSeq PipelineInterface.
+
+    :param str atac_pipe_name: name/key for the pipeline to which the
+        interface data pertains
+    :return dict: configuration data needed to create PipelineInterface
+    """
+    return {atac_pipe_name: copy.deepcopy(atacseq_iface_with_resources)}
+
+
+
+@pytest.fixture(scope="function")
+def basic_data_raw():
+    return copy.deepcopy({
+            "AttributeDict": {}, "ProtocolMapper": BASIC_PROTOMAP,
+            "Sample": {SAMPLE_NAME_COLNAME: "arbitrary-sample"}})
+
+
+
+@pytest.fixture(scope="function")
+def basic_instance_data(request, instance_raw_data):
+    """
+    Transform the raw data for a basic model instance to comply with its ctor.
+
+    :param pytest._pytest.fixtures.SubRequest request: test case requesting
+        the basic instance data
+    :param Mapping instance_raw_data: the raw data needed to create a
+        model instance
+    :return object: basic instance data in a form accepted by its constructor
+    """
+    # Cleanup is free with _write_config, using request's temp folder.
+    transformation_by_class = {
+            "AttributeDict": lambda data: data,
+            "PipelineInterface": lambda data:
+                    _write_config(data, request, "pipeline_interface.yaml"),
+            "ProtocolInterface": lambda data:
+                    _write_config(data, request, "pipeline_interface.yaml"),
+            "ProtocolMapper": lambda data: data,
+            "Sample": lambda data: pd.Series(data)}
+    which_class = request.getfixturevalue("class_name")
+    return transformation_by_class[which_class](instance_raw_data)
+
+
+
+@pytest.fixture(scope="function")
+def default_resources():
+    """ Provide test case with default PipelineInterface resources section. """
+    return copy.deepcopy(DEFAULT_RESOURCES)
+
 
 
 @pytest.fixture(scope="function")
@@ -42,6 +193,35 @@ def env_config_filepath(tmpdir):
 
 
 
+@pytest.fixture(scope="function")
+def huge_resources():
+    """ Provide non-default resources spec. section for PipelineInterface. """
+    return copy.deepcopy(HUGE_RESOURCES)
+
+
+
+@pytest.fixture(scope="function")
+def instance_raw_data(request, basic_data_raw, atacseq_piface_data):
+    """ Supply the raw data for a basic model instance as a fixture. """
+    which_class = request.getfixturevalue("class_name")
+    if which_class == "PipelineInterface":
+        return copy.deepcopy(atacseq_piface_data)
+    elif which_class == "ProtocolInterface":
+        return {"protocol_mapping":
+                        copy.deepcopy(basic_data_raw["ProtocolMapper"]),
+                "pipelines": copy.deepcopy(atacseq_piface_data)}
+    else:
+        return copy.deepcopy(basic_data_raw[which_class])
+
+
+
+@pytest.fixture(scope="function")
+def midsize_resources():
+    """ Provide non-default resources spec. section for PipelineInterface. """
+    return copy.deepcopy(MIDSIZE_RESOURCES)
+
+
+
 @pytest.fixture(scope="function")
 def minimal_project_conf_path(tmpdir):
     """ Write minimal sample annotations and project configuration. """
@@ -55,3 +235,88 @@ def minimal_project_conf_path(tmpdir):
             "metadata:\n  sample_annotation: {}".format(anns_file)
     conf_file.write(config_lines)
     return conf_file.strpath
+
+
+
+@pytest.fixture(scope="function")
+def path_proj_conf_file(tmpdir, proj_conf):
+    """ Write basic project configuration data and provide filepath. """
+    conf_path = os.path.join(tmpdir.strpath, "project_config.yaml")
+    with open(conf_path, 'w') as conf:
+        yaml.safe_dump(proj_conf, conf)
+    return conf_path
+
+
+
+@pytest.fixture(scope="function")
+def path_anns_file(request, tmpdir, sample_sheet):
+    """ Write basic annotations, optionally using a different delimiter. """
+    filepath = os.path.join(tmpdir.strpath, "annotations.csv")
+    if "delimiter" in request.fixturenames:
+        delimiter = request.getfixturevalue("delimiter")
+    else:
+        delimiter = ","
+    with open(filepath, 'w') as anns_file:
+        sample_sheet.to_csv(anns_file, sep=delimiter, index=False)
+    return filepath
+
+
+
+@pytest.fixture(scope="function")
+def piface_config_bundles(request, resources):
+    """
+    Provide the ATAC-Seq pipeline interface as a fixture, including resources.
+
+    Note that this represents the configuration data for the interface for a
+    single pipeline. In order to use this in the form that a PipelineInterface
+    expects, this needs to be the value to which a key is mapped within a
+    larger Mapping.
+
+    :param pytest._pytest.fixtures.SubRequest request: hook into test case
+        requesting this fixture, which is queried for a resources value with
+        which to override the default if it's present.
+    :param Mapping resources: pipeline interface resource specification
+    :return Iterable[Mapping]: collection of bundles of pipeline interface
+        configuration bundles
+    """
+    iface_config_datas = request.getfixturevalue("config_bundles")
+    if isinstance(iface_config_datas, Mapping):
+        data_bundles = iface_config_datas.values()
+    elif isinstance(iface_config_datas, Iterable):
+        data_bundles = iface_config_datas
+    else:
+        raise TypeError("Expected mapping or list collection of "
+                        "PipelineInterface data: {} ({})".format(
+                iface_config_datas, type(iface_config_datas)))
+    resource_specification = request.getfixturevalue("resources") \
+            if "resources" in request.fixturenames else resources
+    for config_bundle in data_bundles:
+        config_bundle.update(resource_specification)
+    return iface_config_datas
+
+
+
+@pytest.fixture(scope="function")
+def resources():
+    """ Basic PipelineInterface compute resources data. """
+    return {DEFAULT_COMPUTE_RESOURCES_NAME: copy.deepcopy(DEFAULT_RESOURCES),
+            "huge": copy.copy(HUGE_RESOURCES)}
+
+
+
+def _write_config(data, request, filename):
+    """
+    Write configuration data to file.
+
+    :param str Sequence | Mapping data: data to write to file, YAML compliant
+    :param pytest._pytest.fixtures.SubRequest request: test case that
+        requested a fixture from which this function was called
+    :param str filename: name for the file to write
+    :return str: full path to the file written
+    """
+    # We get cleanup for free by writing to file in requests temp folder.
+    dirpath = request.getfixturevalue("tmpdir").strpath
+    filepath = os.path.join(dirpath, filename)
+    with open(filepath, 'w') as conf_file:
+        yaml.safe_dump(data, conf_file)
+    return filepath
diff --git a/tests/models/independent/test_AttributeDict.py b/tests/models/independent/test_AttributeDict.py
index 6d63430e..959447d5 100644
--- a/tests/models/independent/test_AttributeDict.py
+++ b/tests/models/independent/test_AttributeDict.py
@@ -162,8 +162,6 @@ class AttributeDictUpdateTests:
 
     """
 
-    # TODO: ensure that we cover tests cases for both merged and non-merged.
-
     _TOTALLY_ARBITRARY_VALUES = [
         "abc", 123,
         (4, "text", ("nes", "ted")), list("-101")
@@ -562,30 +560,6 @@ def test_all_defaults_no_metadata(self, tmpdir, proj, metadata_attribute):
             lines, _ = self._yaml_data(sample, filepath)
             assert all([metadata_attribute not in line for line in lines])
 
-
-    @pytest.mark.parametrize(
-        argnames="metadata_attribute", argvalues=ATTRDICT_METADATA.keys(),
-        ids=lambda attr_name: " metadata item = {} ".format(attr_name))
-    def test_non_defaults_have_metadata(
-            self, tmpdir, proj, metadata_attribute):
-        """ Only non-default metadata elements are written to file. """
-        for i, sample in enumerate(proj.samples):
-            filepath = os.path.join(tmpdir.strpath, "sample{}.yaml".format(i))
-
-            # Flip the value of an attribute in the project section.
-            newval = not ATTRDICT_METADATA[metadata_attribute]
-            lines, data = self._yaml_data(
-                    sample, filepath, section_to_change="prj",
-                    attr_to_change=metadata_attribute, newval=newval)
-
-            # Is the test sensitive?
-            assert newval == data["prj"][metadata_attribute]
-            # How about specific?
-            num_meta_lines = sum(1 if any(
-                    [meta_item in line for meta_item
-                     in ATTRDICT_METADATA.keys()]) else 0 for line in lines)
-            assert 1 == num_meta_lines
-
     
     @staticmethod
     def _yaml_data(sample, filepath, section_to_change=None,
diff --git a/tests/models/independent/test_PipelineInterface.py b/tests/models/independent/test_PipelineInterface.py
index 8da94069..c23350fa 100644
--- a/tests/models/independent/test_PipelineInterface.py
+++ b/tests/models/independent/test_PipelineInterface.py
@@ -1,14 +1,18 @@
 """ Tests for PipelineInterface ADT. """
 
 import copy
+import inspect
 import itertools
+import logging
+import os
 import random
 
+import mock
 import pytest
 import yaml
 
 from looper.models import \
-    PipelineInterface, _InvalidResourceSpecificationException, \
+    PipelineInterface, Sample, _InvalidResourceSpecificationException, \
     _MissingPipelineConfigurationException, DEFAULT_COMPUTE_RESOURCES_NAME
 
 
@@ -16,19 +20,13 @@
 __email__ = "vreuter@virginia.edu"
 
 
+_LOGGER = logging.getLogger(__name__)
+
+
 # Values with which to build pipeline interface keys and names
 PIPELINE_NAMES = ["ATACseq", "WGBS"]
 EXTENSIONS = [".py", ".sh", ".R"]
 
-# Compute resource bundles for pipeline interface configuration data
-DEFAULT_RESOURCES = {"file_size": 0, "cores": 1, "mem": 8000,
-                     "time": "0-01:00:00", "partition": "local"}
-MIDSIZE_RESOURCES = {"file_size": 10, "cores": 8, "mem": 16000,
-                     "time": "0-07:00:00", "partition": "serial"}
-HUGE_RESOURCES = {"file_size": 30, "cores": 24, "mem": 64000,
-                  "time": "30-00:00:00", "partition": "longq"}
-HUGE_RESOURCES_NAME = "huge"
-
 
 
 def pytest_generate_tests(metafunc):
@@ -36,6 +34,8 @@ def pytest_generate_tests(metafunc):
     try:
         parameters = metafunc.cls.PARAMETERS
     except AttributeError:
+        _LOGGER.debug("No indirect parameterization for test class: '{}'".
+                      format(metafunc.cls))
         pass
     else:
         for name, values in parameters.items():
@@ -53,29 +53,6 @@ def basic_pipe_iface_data(request):
 
 
 
-@pytest.fixture(scope="function")
-def resources():
-    """ Basic PipelineInterface compute resources data. """
-    return {DEFAULT_COMPUTE_RESOURCES_NAME: copy.deepcopy(DEFAULT_RESOURCES),
-            "huge": copy.copy(HUGE_RESOURCES)}
-
-
-
-@pytest.mark.parametrize(argnames="from_file", argvalues=[False, True])
-def test_constructor_input_types(tmpdir, from_file, basic_pipe_iface_data):
-    """ PipelineInterface constructor handles Mapping or filepath. """
-    if from_file:
-        pipe_iface_config = tmpdir.join("pipe-iface-conf.yaml").strpath
-        with open(tmpdir.join("pipe-iface-conf.yaml").strpath, 'w') as f:
-            yaml.safe_dump(basic_pipe_iface_data, f)
-    else:
-        pipe_iface_config = basic_pipe_iface_data
-    pi = PipelineInterface(pipe_iface_config)
-    assert basic_pipe_iface_data == pi.pipe_iface_config
-    assert pi.pipe_iface_file == (pipe_iface_config if from_file else None)
-
-
-
 @pytest.fixture(scope="function")
 def pi_with_resources(request, basic_pipe_iface_data, resources):
     """ Add resource bundle data to each config section. """
@@ -95,10 +72,27 @@ def pi_with_resources(request, basic_pipe_iface_data, resources):
 
 
 
+@pytest.mark.parametrize(argnames="from_file", argvalues=[False, True])
+def test_constructor_input_types(tmpdir, from_file, basic_pipe_iface_data):
+    """ PipelineInterface constructor handles Mapping or filepath. """
+    if from_file:
+        pipe_iface_config = tmpdir.join("pipe-iface-conf.yaml").strpath
+        with open(tmpdir.join("pipe-iface-conf.yaml").strpath, 'w') as f:
+            yaml.safe_dump(basic_pipe_iface_data, f)
+    else:
+        pipe_iface_config = basic_pipe_iface_data
+    pi = PipelineInterface(pipe_iface_config)
+    assert basic_pipe_iface_data == pi.pipe_iface_config
+    assert pi.pipe_iface_file == (pipe_iface_config if from_file else None)
+
+
+
 @pytest.mark.parametrize(
         argnames="funcname_and_kwargs",
         argvalues=[("choose_resource_package", {"file_size": 4}),
-                   ("get_arg_string", {"sample": "arbitrary-sample-name"}),
+                   ("get_arg_string",
+                    {"sample": Sample(
+                            {"sample_name": "arbitrary-sample-name"})}),
                    ("get_attribute",
                     {"attribute_key": "irrelevant-attr-name"}),
                    ("get_pipeline_name", {}),
@@ -115,9 +109,17 @@ def test_unconfigured_pipeline_exception(
             except KeyError:
                 # Already no default resource package.
                 pass
+
+    # Each of the functions being tested should take pipeline_name arg,
+    # and we want to test behavior for the call on an unknown pipeline.
     funcname, kwargs = funcname_and_kwargs
+    func = getattr(pi, funcname)
+    required_parameters = inspect.getargspec(func).args
+    for parameter in ["pipeline_name", "pipeline"]:
+        if parameter in required_parameters and parameter not in kwargs:
+            kwargs[parameter] = "missing-pipeline"
     with pytest.raises(_MissingPipelineConfigurationException):
-        getattr(pi, funcname).__call__("missing-pipeline", **kwargs)
+        func.__call__(**kwargs)
 
 
 
@@ -147,7 +149,8 @@ def test_get_pipeline_name_inferred(self):
             pipelines = [name + ext for name, ext
                          in zip(pipeline_names, extensions)]
             pi_config_data = {pipeline: None for pipeline in pipelines}
-            pi = PipelineInterface(pi_config_data)
+            with mock.patch("looper.models.PipelineInterface._expand_paths"):
+                pi = PipelineInterface(pi_config_data)
             for expected_name, pipeline in zip(pipeline_names, pipelines):
                 assert expected_name == pi.get_pipeline_name(pipeline)
 
@@ -160,7 +163,7 @@ class PipelineInterfaceResourcePackageTests:
 
 
     def test_requires_default(
-            self, use_new_file_size, pi_with_resources):
+            self, use_new_file_size, pi_with_resources, huge_resources):
         """ If provided, resources specification needs 'default.' """
         pi = pi_with_resources
         for name, pipeline in pi:
@@ -172,7 +175,7 @@ def test_requires_default(
             assert "default" not in pipeline["resources"]
             with pytest.raises(_InvalidResourceSpecificationException):
                 pi.choose_resource_package(
-                        name, file_size=HUGE_RESOURCES["file_size"] + 1)
+                        name, file_size=huge_resources["file_size"] + 1)
 
 
     def test_negative_file_size_request(
@@ -204,11 +207,11 @@ def test_resources_not_required(
                        (16, "midsize"), (64, "huge")])
     def test_selects_proper_resource_package(
             self, use_new_file_size, pi_with_resources,
-            file_size, expected_package_name):
+            file_size, expected_package_name, midsize_resources):
         """ Minimal resource package sufficient for pipeline and file size. """
         for pipe_data in pi_with_resources.pipelines:
             pipe_data["resources"].update(
-                    {"midsize": copy.deepcopy(MIDSIZE_RESOURCES)})
+                    {"midsize": copy.deepcopy(midsize_resources)})
         for pipe_name, pipe_data in pi_with_resources:
             observed_package = pi_with_resources.choose_resource_package(
                 pipe_name, file_size)
@@ -232,7 +235,8 @@ def test_negative_file_size_prohibited(
 
 
     def test_file_size_spec_not_required_for_default(
-            self, use_new_file_size, basic_pipe_iface_data):
+            self, use_new_file_size, basic_pipe_iface_data, 
+            default_resources, huge_resources, midsize_resources):
         """ Default package implies minimum file size of zero. """
 
         def clear_file_size(resource_package):
@@ -244,7 +248,7 @@ def clear_file_size(resource_package):
         resources_data = dict(zip(
                 ["default", "midsize", "huge"],
                 [copy.deepcopy(data) for data in
-                 [DEFAULT_RESOURCES, MIDSIZE_RESOURCES, HUGE_RESOURCES]]))
+                 [default_resources, midsize_resources, huge_resources]]))
         for pack_name, pack_data in resources_data.items():
             # Use file size spec name as appropriate; clean default package.
             if pack_name == "default":
@@ -301,13 +305,14 @@ def test_default_package_new_name_zero_size(
 
 
     def test_file_size_spec_required_for_non_default_packages(
-            self, use_new_file_size, basic_pipe_iface_data):
+            self, use_new_file_size, basic_pipe_iface_data, 
+            default_resources, huge_resources):
         """ Resource packages besides default require file size. """
 
         # Establish the resource specification.
         resource_package_data = {
-                "default": copy.deepcopy(DEFAULT_RESOURCES),
-                "huge": copy.deepcopy(HUGE_RESOURCES)}
+                "default": copy.deepcopy(default_resources),
+                "huge": copy.deepcopy(huge_resources)}
 
         # Remove file size for non-default; set it for default.
         del resource_package_data["huge"]["file_size"]
@@ -330,6 +335,119 @@ def test_file_size_spec_required_for_non_default_packages(
 
 
 
+class ConstructorPathParsingTests:
+    """ The constructor is responsible for expanding pipeline path(s). """
+
+    ADD_PATH = [True, False]
+    PIPELINE_KEYS = ["ATACSeq.py", "no_path.py"]
+    RELATIVE_PATH_DATA = [
+            ("./arbitrary-test-pipelines",
+             {},
+             "./arbitrary-test-pipelines"),
+            ("path/to/$TEMP_PIPE_LOCS",
+             {"TEMP_PIPE_LOCS": "validation-value"},
+             "path/to/validation-value")]
+    ABSOLUTE_PATHS = [
+            os.path.join("~", "code_home", "bioinformatics"),
+            os.path.join("$TEMP_TEST_HOME", "subfolder"),
+            os.path.join("~", "$TEMPORARY_SUBFOLDER", "leaf")]
+    ABSPATH_ENVVARS = {"TEMP_TEST_HOME": "tmptest-home-folder",
+                       "TEMPORARY_SUBFOLDER": "temp-subfolder"}
+    EXPECTED_PATHS_ABSOLUTE = [
+            os.path.join(os.path.expanduser("~"), "code_home",
+                         "bioinformatics"),
+            os.path.join("tmptest-home-folder", "subfolder"),
+            os.path.join(os.path.expanduser("~"), "temp-subfolder", "leaf")]
+
+
+    @pytest.fixture(scope="function")
+    def pipe_iface_data(self, piface_config_bundles):
+        return dict(zip(self.PIPELINE_KEYS, piface_config_bundles))
+
+
+    @pytest.fixture(scope="function", autouse=True)
+    def apply_envvars(self, request):
+        """ Use environment variables temporarily. """
+
+        if "envvars" not in request.fixturenames:
+            # We're autousing, so check for the relevant fixture.
+            return
+
+        original_envvars = {}
+        new_envvars = request.getfixturevalue("envvars")
+
+        # Remember values that are replaced as variables are updated.
+        for name, value in new_envvars.items():
+            try:
+                original_envvars[name] = os.environ[name]
+            except KeyError:
+                pass
+            os.environ[name] = value
+
+        def restore():
+            # Restore swapped variables and delete added ones.
+            for k, v in new_envvars.items():
+                try:
+                    os.environ[k] = original_envvars[k]
+                except KeyError:
+                    del os.environ[k]
+        request.addfinalizer(restore)
+
+
+    def test_no_path(self, config_bundles, piface_config_bundles,
+                     pipe_iface_data):
+        """ PipelineInterface config sections need not specify path. """
+        pi = PipelineInterface(pipe_iface_data)
+        for pipe_key in self.PIPELINE_KEYS:
+            piface_config = pi[pipe_key]
+            # Specific negative test of interest.
+            assert "path" not in piface_config
+            # Positive control validation.
+            assert pipe_iface_data[pipe_key] == piface_config
+
+
+    @pytest.mark.parametrize(
+            argnames=["pipe_path", "envvars", "expected"],
+            argvalues=RELATIVE_PATH_DATA)
+    def test_relative_path(
+            self, config_bundles, piface_config_bundles, pipe_iface_data,
+            pipe_path, envvars, expected, apply_envvars):
+        """
+        PipelineInterface construction expands pipeline path.
+
+        Environment variable(s) expand(s), but the path remains relative
+        if specified as such, deferring the joining with pipelines location,
+        which makes the path absolute, until the path is actually used.
+
+        """
+        for add_path, pipe_key in zip(self.ADD_PATH, self.PIPELINE_KEYS):
+            if add_path:
+                pipe_iface_data[pipe_key]["path"] = pipe_path
+        pi = PipelineInterface(pipe_iface_data)
+        for add_path, pipe_key in zip(self.ADD_PATH, self.PIPELINE_KEYS):
+            if add_path:
+                assert expected == pi[pipe_key]["path"]
+            else:
+                assert "path" not in pi[pipe_key]
+
+
+    @pytest.mark.parametrize(
+            argnames=["pipe_path", "envvars", "expected"],
+            argvalues=zip(ABSOLUTE_PATHS,
+                          len(ABSOLUTE_PATHS) * [ABSPATH_ENVVARS],
+                          EXPECTED_PATHS_ABSOLUTE))
+    def test_path_expansion(
+            self, pipe_path, envvars, expected,
+            config_bundles, piface_config_bundles, pipe_iface_data):
+        """ User/environment variables are expanded. """
+        for piface_data in pipe_iface_data.values():
+            piface_data["path"] = pipe_path
+        pi = PipelineInterface(pipe_iface_data)
+        for _, piface_data in pi:
+            assert expected == piface_data["path"]
+
+
+
 @pytest.mark.skip("Not implemented")
 class PipelineInterfaceArgstringTests:
     """  """
diff --git a/tests/models/independent/test_Project.py b/tests/models/independent/test_Project.py
index e28c992c..3683a988 100644
--- a/tests/models/independent/test_Project.py
+++ b/tests/models/independent/test_Project.py
@@ -8,7 +8,7 @@
 import yaml
 import looper
 from looper.models import \
-        AttributeDict, Project, \
+        AttributeDict, Project, Sample, \
         _MissingMetadataException, SAMPLE_ANNOTATIONS_KEY
 
 
@@ -23,7 +23,7 @@ def project_config_data():
         "metadata": {
             SAMPLE_ANNOTATIONS_KEY: "sample-anns-filler.csv",
             "output_dir": "$HOME/sequencing/output",
-            "pipelines_dir": "${CODE}/pipelines"},
+            "pipeline_interfaces": "${CODE}/pipelines"},
         "data_sources": {"arbitrary": "placeholder/data/{filename}"},
         "genomes": {"human": "hg19", "mouse": "mm10"},
         "transcriptomes": {"human": "hg19_cdna", "mouse": "mm10_cdna"}}
@@ -42,6 +42,105 @@ def pytest_generate_tests(metafunc):
 
 
 
+class ProjectConstructorTests:
+    """ Tests of Project constructor, particularly behavioral details. """
+
+
+    def test_no_samples(self, path_empty_project):
+        """ Lack of Samples is unproblematic. """
+        p = Project(path_empty_project)
+        assert 0 == p.num_samples
+        assert [] == list(p.samples)
+
+
+
+    @pytest.mark.parametrize(
+            argnames="spec_type", argvalues=["as_null", "missing"],
+            ids=lambda spec: "spec_type={}".format(spec))
+    @pytest.mark.parametrize(
+            argnames="lazy", argvalues=[False, True],
+            ids=lambda lazy: "lazy={}".format(lazy))
+    def test_no_merge_table_in_config(
+            self, tmpdir, spec_type, lazy, proj_conf_data, path_sample_anns):
+        """ Merge table attribute remains null if config lacks merge_table. """
+        metadata = proj_conf_data["metadata"]
+        try:
+            assert "merge_table" in metadata
+        except AssertionError:
+            print("Project metadata section lacks 'merge_table'")
+            print("All config data: {}".format(proj_conf_data))
+            print("Config metadata section: {}".format(metadata))
+            raise
+        if spec_type == "as_null":
+            metadata["merge_table"] = None
+        elif spec_type == "missing":
+            del metadata["merge_table"]
+        else:
+            raise ValueError("Unknown way to specify no merge table: {}".
+                             format(spec_type))
+        path_config_file = os.path.join(tmpdir.strpath, "project_config.yaml")
+        with open(path_config_file, 'w') as conf_file:
+            yaml.safe_dump(proj_conf_data, conf_file)
+        p = Project(path_config_file, defer_sample_construction=lazy)
+        assert p.merge_table is None
+
+
+    @pytest.mark.skip("Not implemented")
+    def test_merge_table_construction(
+            self, tmpdir, project_config_data):
+        """ Merge table is constructed iff samples are constructed. """
+        # TODO: implement
+        pass
+
+
+    def test_counting_samples_doesnt_create_samples(
+            self, sample_annotation_lines,
+            path_project_conf, path_sample_anns):
+        """ User can ask about sample count without creating samples. """
+        # We're not parameterized in terms of Sample creation laziness here
+        # because a piece of the test's essence is Sample collection absence.
+        p = Project(path_project_conf, defer_sample_construction=True)
+        assert p._samples is None
+        expected_sample_count = sum(1 for _ in sample_annotation_lines) - 1
+        assert expected_sample_count == p.num_samples
+        assert p._samples is None
+
+
+    @pytest.mark.parametrize(argnames="lazy", argvalues=[False, True])
+    def test_sample_creation_laziness(
+            self, path_project_conf, path_sample_anns, lazy):
+        """ Project offers control over whether to create base Sample(s). """
+
+        p = Project(path_project_conf, defer_sample_construction=lazy)
+
+        if lazy:
+            # Samples should remain null during lazy Project construction.
+            assert p._samples is None
+
+        else:
+            # Eager Project construction builds Sample objects.
+            assert p._samples is not None
+            with open(path_sample_anns, 'r') as anns_file:
+                anns_file_lines = anns_file.readlines()
+
+            # Sum excludes the header line.
+            num_samples_expected = sum(1 for l in anns_file_lines[1:] if l)
+            assert num_samples_expected == len(p._samples)
+            assert all([Sample == type(s) for s in p._samples])
+
+
+    @pytest.mark.parametrize(argnames="lazy", argvalues=[False, True])
+    def test_sample_name_availability(
+            self, path_project_conf, path_sample_anns, lazy):
+        """ Sample names always available on Project. """
+        with open(path_sample_anns, 'r') as anns_file:
+            expected_sample_names = \
+                    [l.split(",")[0] for l in anns_file.readlines()[1:] if l]
+        p = Project(path_project_conf, defer_sample_construction=lazy)
+        assert expected_sample_names == list(p.sample_names)
+
+
+
 class ProjectRequirementsTests:
     """ Tests for a Project's set of requirements. """
 
@@ -220,7 +319,7 @@ def _assert_null_compute_environment(project):
 
     @staticmethod
     def default_compute_settings(project):
-        settings_filepath = project.default_cmpenv_file
+        settings_filepath = project.default_compute_envfile
         with open(settings_filepath, 'r') as settings_data_file:
             settings = yaml.safe_load(settings_data_file)
         return {"environment": copy.deepcopy(settings),
@@ -276,7 +375,7 @@ def create_project(
         # Write the config and build the Project.
         conf_file_path = _write_project_config(
                 project_config_data, dirpath=dirpath)
-        with mock.patch("looper.models.Project.add_sample_sheet"):
+        with mock.patch("looper.models.check_sheet"):
             project = Project(conf_file_path, default_compute=default_env_path)
         return expected_derived_columns, project
 
@@ -489,7 +588,7 @@ def observed_argstring_elements(
         conf_file_path = _write_project_config(confdata, dirpath=confpath)
 
         # Subvert requirement for sample annotations file.
-        with mock.patch("looper.models.Project.add_sample_sheet"):
+        with mock.patch("looper.models.check_sheet"):
             project = Project(conf_file_path, default_compute=envpath)
 
         argstring = project.get_arg_string(pipeline)
@@ -545,6 +644,7 @@ def _parse_flags_and_options(command_elements):
         return parsed_command_elements
 
 
+
 def _write_project_config(config_data, dirpath, filename="proj-conf.yaml"):
     """
     Write the configuration file for a Project.
@@ -574,8 +674,6 @@ def _env_paths_to_names(envs):
     """
     reduced = {}
     for env_name, env_data in envs.items():
-        # DEBUG
-        print(env_name)
         reduced[env_name] = _compute_paths_to_names(env_data)
     return reduced
 
@@ -594,15 +692,5 @@ def _compute_paths_to_names(env):
     """
     reduced = copy.deepcopy(env)
     for pathvar in ["submission_template"]:
-
-        # DEBUG
-        try:
-            _, reduced[pathvar] = os.path.split(reduced[pathvar])
-        except KeyError:
-            print("REDUCED: {}".format(reduced))
-            print("ENV: {}".format(env))
-            print("KEYS: {}".format(reduced.keys()))
-            print("ENV KEYS: {}".format(env.keys()))
-            raise
-
+        _, reduced[pathvar] = os.path.split(reduced[pathvar])
     return reduced
diff --git a/tests/models/independent/test_ProtocolInterface.py b/tests/models/independent/test_ProtocolInterface.py
new file mode 100644
index 00000000..4c673dbb
--- /dev/null
+++ b/tests/models/independent/test_ProtocolInterface.py
@@ -0,0 +1,690 @@
+""" Tests for ProtocolInterface, for Project/PipelineInterface interaction. """
+
+import inspect
+import itertools
+import logging
+import os
+import sys
+if sys.version_info < (3, ):
+    import __builtin__ as builtins
+else:
+    import builtins
+
+import mock
+import pytest
+import yaml
+
+from looper import models, DEV_LOGGING_FMT
+from looper.models import ProtocolInterface, Sample
+
+
+__author__ = "Vince Reuter"
+__email__ = "vreuter@virginia.edu"
+
+
+SUBTYPES_KEY = ProtocolInterface.SUBTYPE_MAPPING_SECTION
+ATAC_PROTOCOL_NAME = "ATAC"
+SAMPLE_IMPORT = "from looper.models import Sample"
+
+
+class CustomExceptionA(Exception):
+    def __init__(self, *args):
+        super(CustomExceptionA, self).__init__(*args)
+
+class CustomExceptionB(Exception):
+    def __init__(self, *args):
+        super(CustomExceptionB, self).__init__(*args)
+
+CUSTOM_EXCEPTIONS = [CustomExceptionA, CustomExceptionB]
+
+
+# Test case parameterization, but here for import locality and
+# to reduce clutter in the pararmeterization declaration.
+_, BUILTIN_EXCEPTIONS_WITHOUT_REQUIRED_ARGUMENTS = \
+        list(map(list, zip(*inspect.getmembers(
+                builtins, lambda o: inspect.isclass(o) and
+                                       issubclass(o, BaseException) and
+                                       not issubclass(o, UnicodeError)))))
+
+
+def pytest_generate_tests(metafunc):
+    """ Customization of this module's test cases. """
+    if "subtypes_section_spec_type" in metafunc.fixturenames:
+        # Subtypes section can be raw string or mapping.
+        metafunc.parametrize(argnames="subtypes_section_spec_type",
+                             argvalues=[str, dict])
+
+
+
+@pytest.fixture(scope="function")
+def path_config_file(request, tmpdir, atac_pipe_name):
+    """
+    Write PipelineInterface configuration data to disk.
+
+    Grab the data from the test case's appropriate fixture. Also check the
+    test case parameterization for pipeline path specification, adding it to
+    the configuration data before writing to disk if the path specification is
+    present
+
+    :param pytest._pytest.fixtures.SubRequest request: test case requesting
+        this fixture
+    :param py.path.local.LocalPath tmpdir: temporary directory fixture
+    :param str atac_pipe_name: name/key for ATAC-Seq pipeline; this should
+        also be used by the requesting test case if a path is to be added;
+        separating the name from the folder path allows parameterization of
+        the test case in terms of folder path, with pipeline name appended
+        after the fact (that is, the name fixture can't be used in the )
+    :return str: path to the configuration file written
+    """
+    conf_data = request.getfixturevalue("atacseq_piface_data")
+    if "pipe_path" in request.fixturenames:
+        pipeline_dirpath = request.getfixturevalue("pipe_path")
+        pipe_path = os.path.join(pipeline_dirpath, atac_pipe_name)
+        # Pipeline key/name is mapped to the interface data; insert path in
+        # that Mapping, not at the top level, in which name/key is mapped to
+        # interface data bundle.
+        for iface_bundle in conf_data.values():
+            iface_bundle["path"] = pipe_path
+    return _write_config_data(protomap={ATAC_PROTOCOL_NAME: atac_pipe_name},
+                              conf_data=conf_data, dirpath=tmpdir.strpath)
+
+
+
+class PipelinePathResolutionTests:
+    """ Project requests pipeline information via an interface key. """
+
+
+    def test_no_path(self, atacseq_piface_data,
+                     path_config_file, atac_pipe_name):
+        """ Without explicit path, pipeline is assumed parallel to config. """
+
+        piface = ProtocolInterface(path_config_file)
+
+        # The pipeline is assumed to live alongside its configuration file.
+        config_dirpath = os.path.dirname(path_config_file)
+        expected_pipe_path = os.path.join(config_dirpath, atac_pipe_name)
+
+        _, full_pipe_path, _ = \
+                piface.finalize_pipeline_key_and_paths(atac_pipe_name)
+        assert expected_pipe_path == full_pipe_path
+
+
+    def test_relpath_with_dot_becomes_absolute(
+            self, tmpdir, atac_pipe_name, atacseq_piface_data):
+        """ Leading dot drops from relative path, and it's made absolute. """
+        path_parts = ["relpath", "to", "pipelines", atac_pipe_name]
+        sans_dot_path = os.path.join(*path_parts)
+        pipe_path = os.path.join(".", sans_dot_path)
+        atacseq_piface_data[atac_pipe_name]["path"] = pipe_path
+
+        exp_path = os.path.join(tmpdir.strpath, sans_dot_path)
+
+        path_config_file = _write_config_data(
+                protomap={ATAC_PROTOCOL_NAME: atac_pipe_name},
+                conf_data=atacseq_piface_data, dirpath=tmpdir.strpath)
+        piface = ProtocolInterface(path_config_file)
+        _, obs_path, _ = piface.finalize_pipeline_key_and_paths(atac_pipe_name)
+        # Dot may remain in path, so assert equality of absolute paths.
+        assert os.path.abspath(exp_path) == os.path.abspath(obs_path)
+
+
+    @pytest.mark.parametrize(
+            argnames="pipe_path", argvalues=["relative/pipelines/path"])
+    def test_non_dot_relpath_becomes_absolute(
+            self, atacseq_piface_data, path_config_file,
+            tmpdir, pipe_path, atac_pipe_name):
+        """ Relative pipeline path is made absolute when requested by key. """
+        # TODO: constant-ify "path" and "ATACSeq.py", as well as possibly "pipelines"
+        # and "protocol_mapping" section names of PipelineInterface
+        exp_path = os.path.join(
+                tmpdir.strpath, pipe_path, atac_pipe_name)
+        piface = ProtocolInterface(path_config_file)
+        _, obs_path, _ = piface.finalize_pipeline_key_and_paths(atac_pipe_name)
+        assert exp_path == obs_path
+
+
+    @pytest.mark.parametrize(
+            argnames=["pipe_path", "expected_path_base"],
+            argvalues=[(os.path.join("$HOME", "code-base-home", "biopipes"),
+                        os.path.join(os.path.expandvars("$HOME"),
+                                "code-base-home", "biopipes")),
+                       (os.path.join("~", "bioinformatics-pipelines"),
+                        os.path.join(os.path.expanduser("~"),
+                                     "bioinformatics-pipelines"))])
+    def test_absolute_path(
+            self, atacseq_piface_data, path_config_file, tmpdir, pipe_path,
+            expected_path_base, atac_pipe_name):
+        """ Absolute path regardless of variables works as pipeline path. """
+        exp_path = os.path.join(
+                tmpdir.strpath, expected_path_base, atac_pipe_name)
+        piface = ProtocolInterface(path_config_file)
+        _, obs_path, _ = piface.finalize_pipeline_key_and_paths(atac_pipe_name)
+        assert exp_path == obs_path
+
+
+    @pytest.mark.xfail(
+            condition=models._LOGGER.getEffectiveLevel() < logging.WARN,
+            reason="Insufficient logging level to capture warning message: {}".
+                   format(models._LOGGER.getEffectiveLevel()))
+    @pytest.mark.parametrize(
+        argnames="pipe_path",
+        argvalues=["nonexistent.py", "path/to/missing.py",
+                   "/abs/path/to/mythical"])
+    def test_warns_about_nonexistent_pipeline_script_path(
+            self, atacseq_piface_data, path_config_file,
+            tmpdir, pipe_path, atac_pipe_name):
+        """ Nonexistent, resolved pipeline script path generates warning. """
+        name_log_file = "temp-test-log.txt"
+        path_log_file = os.path.join(tmpdir.strpath, name_log_file)
+        temp_hdlr = logging.FileHandler(path_log_file, mode='w')
+        fmt = logging.Formatter(DEV_LOGGING_FMT)
+        temp_hdlr.setFormatter(fmt)
+        temp_hdlr.setLevel(logging.WARN)
+        models._LOGGER.handlers.append(temp_hdlr)
+        pi = ProtocolInterface(path_config_file)
+        pi.finalize_pipeline_key_and_paths(atac_pipe_name)
+        with open(path_log_file, 'r') as logfile:
+            loglines = logfile.readlines()
+        assert 1 == len(loglines)
+        logmsg = loglines[0]
+        assert "WARN" in logmsg and pipe_path in logmsg
+
+
+
+class SampleSubtypeTests:
+    """ ProtocolInterface attempts import of pipeline-specific Sample. """
+
+    # Basic cases
+    # 1 -- unmapped pipeline
+    # 2 -- subtypes section is single string
+    # 3 -- subtypes section is mapping ()
+    # 4 -- subtypes section is missing (use single Sample subclass if there is one, base Sample for 0 or > 1 Sample subtypes defined)
+    # 5 -- subtypes section is null  --> ALWAYS USE BASE SAMPLE (backdoor user side mechanism for making this be so)
+
+    # Import trouble cases
+    # No __main__
+    # Argument parsing
+    # missing import(s)
+
+    # Subcases
+    # 2 -- single string
+    # 2a -- named class isn't defined in the module
+    # 2b -- named class is in module but isn't defined
+    #
+
+    PROTOCOL_NAME_VARIANTS = [
+            "ATAC-Seq", "ATACSeq", "ATACseq", "ATAC-seq", "ATAC",
+            "ATACSEQ", "ATAC-SEQ", "atac", "atacseq", "atac-seq"]
+
+
+    @pytest.mark.parametrize(
+            argnames="pipe_key",
+            argvalues=["{}.py".format(proto) for proto
+                       in PROTOCOL_NAME_VARIANTS])
+    @pytest.mark.parametrize(
+            argnames="protocol",
+            argvalues=PROTOCOL_NAME_VARIANTS)
+    def test_pipeline_key_match_is_strict(
+            self, tmpdir, pipe_key, protocol, atac_pipe_name,
+            atacseq_iface_with_resources):
+        """ Request for Sample subtype for unmapped pipeline is KeyError. """
+
+        # Create the ProtocolInterface.
+        strict_pipe_key = atac_pipe_name
+        protocol_mapping = {protocol: strict_pipe_key}
+        confpath = _write_config_data(
+                protomap=protocol_mapping, dirpath=tmpdir.strpath,
+                conf_data={strict_pipe_key: atacseq_iface_with_resources})
+        piface = ProtocolInterface(confpath)
+
+        # The absolute pipeline path is the pipeline name, joined to the
+        # ProtocolInterface's pipelines location. This location is the
+        # location from which a Sample subtype import is attempted.
+        full_pipe_path = os.path.join(tmpdir.strpath, atac_pipe_name)
+
+        # TODO: update to pytest.raises(None) if/when 3.1 adoption.
+        # Match between pipeline key specified and the strict key used in
+        # the mapping --> no error while mismatch --> error.
+        if pipe_key == atac_pipe_name:
+            piface.fetch_sample_subtype(
+                protocol, pipe_key, full_pipe_path=full_pipe_path)
+        else:
+            with pytest.raises(KeyError):
+                piface.fetch_sample_subtype(
+                        protocol, pipe_key, full_pipe_path=full_pipe_path)
+
+
+    @pytest.mark.parametrize(
+            argnames=["mapped_protocol", "requested_protocol"],
+            argvalues=itertools.combinations(PROTOCOL_NAME_VARIANTS, 2))
+    def test_protocol_match_is_fuzzy(
+            self, tmpdir, mapped_protocol, atac_pipe_name,
+            requested_protocol, atacseq_piface_data):
+        """ Punctuation and case mismatches are tolerated in protocol name. """
+
+        # Needed to create the ProtocolInterface.
+        protomap = {mapped_protocol: atac_pipe_name}
+        # Needed to invoke the function under test.
+        full_pipe_path = os.path.join(tmpdir.strpath, atac_pipe_name)
+
+        # PipelineInterface data provided maps name to actual interface data
+        # Mapping, so modify the ATAC-Seq mapping within that.
+        # In this test, we're interested in the resolution of the protocol
+        # name, that with it we can grab the name of a class. Thus, we
+        # need only an arbitrary class name about which we can make the
+        # relevant assertion(s).
+        test_class_name = "TotallyArbitrary"
+        atacseq_piface_data[atac_pipe_name][SUBTYPES_KEY] = \
+                test_class_name
+
+        # Write out configuration data and create the ProtocolInterface.
+        conf_path = _write_config_data(
+                protomap=protomap, conf_data=atacseq_piface_data,
+                dirpath=tmpdir.strpath)
+        piface = ProtocolInterface(conf_path)
+
+        # Make the call under test, patching the function protected
+        # function that's called iff the protocol name match succeeds.
+        with mock.patch("looper.models._import_sample_subtype",
+                        return_value=None) as mocked_import:
+            # Return value is irrelevant; the effect of the protocol name
+            # match/resolution is entirely observable via the argument to the
+            # protected import function.
+            piface.fetch_sample_subtype(
+                    protocol=requested_protocol,
+                    strict_pipe_key=atac_pipe_name,
+                    full_pipe_path=full_pipe_path)
+        # When the protocol name match/resolution succeeds, the name of the
+        # Sample subtype class to which it was mapped is passed as an
+        # argument to the protected import function.
+        mocked_import.assert_called_with(full_pipe_path, test_class_name)
+
+
+
+    @pytest.mark.parametrize(
+            argnames="error_type",
+            argvalues=CUSTOM_EXCEPTIONS +
+                      BUILTIN_EXCEPTIONS_WITHOUT_REQUIRED_ARGUMENTS)
+    def test_problematic_import_builtin_exception(
+            self, tmpdir, error_type, atac_pipe_name, atacseq_piface_data):
+        """ Base Sample is used if builtin exception on pipeline import. """
+
+        # Values needed for object creation and function invocation
+        protocol = ATAC_PROTOCOL_NAME
+        protocol_mapping = {protocol: atac_pipe_name}
+        full_pipe_path = os.path.join(tmpdir.strpath, atac_pipe_name)
+
+        # Modify the data for the ProtocolInterface and create it.
+        atacseq_piface_data[atac_pipe_name][SUBTYPES_KEY] = \
+                {protocol: "IrrelevantClassname"}
+        conf_path = _write_config_data(
+                protomap=protocol_mapping,
+                conf_data=atacseq_piface_data, dirpath=tmpdir.strpath)
+        piface = ProtocolInterface(conf_path)
+
+        # We want to test the effect of an encounter with an exception during
+        # the import attempt, so patch the relevant function with a function
+        # to raise the parameterized exception type.
+        with mock.patch(
+                "looper.utils.import_from_source",
+                side_effect=error_type()):
+            subtype = piface.fetch_sample_subtype(
+                    protocol=protocol, strict_pipe_key=atac_pipe_name,
+                    full_pipe_path=full_pipe_path)
+        # When the import hits an exception, the base Sample type is used.
+        assert subtype is Sample
+
+
+    @pytest.mark.parametrize(
+            argnames="num_sample_subclasses", argvalues=[0, 1, 2],
+            ids=lambda n_samples:
+            " num_sample_subclasses = {} ".format(n_samples))
+    @pytest.mark.parametrize(
+            argnames="decoy_class", argvalues=[False, True],
+            ids=lambda decoy: " decoy_class = {} ".format(decoy))
+    def test_no_subtypes_section(
+            self, tmpdir, path_config_file, atac_pipe_name,
+            num_sample_subclasses, decoy_class):
+        """ DEPENDS ON PIPELINE MODULE CONTENT """
+
+        # Basic values to invoke the function under test
+        pipe_path = os.path.join(tmpdir.strpath, atac_pipe_name)
+        piface = ProtocolInterface(path_config_file)
+
+        # How to define the Sample subtypes (and non-subtype)
+        sample_subclass_basename = "SampleSubclass"
+        sample_lines = [
+                "class {basename}{index}(Sample):",
+                "\tdef __init__(*args, **kwargs):",
+                "\t\tsuper({basename}{index}, self).__init__(*args, **kwargs)"]
+        non_sample_class_lines = [
+                "class NonSample(object):", "\tdef __init__(self):",
+                "\t\tsuper(NonSample, self).__init__()"]
+
+        # We expect the subtype iff there's just one Sample subtype.
+        if num_sample_subclasses == 1:
+            exp_subtype_name = "{}0".format(sample_subclass_basename)
+        else:
+            exp_subtype_name = Sample.__name__
+
+        # Fill in the class definition template lines.
+        def populate_sample_lines(n_classes):
+            return [[sample_lines[0].format(basename=sample_subclass_basename,
+                                            index=class_index),
+                     sample_lines[1],
+                     sample_lines[2].format(basename=sample_subclass_basename,
+                                            index=class_index)]
+                    for class_index in range(n_classes)]
+
+        # Determine the groups of lines to permute.
+        class_lines_pool = populate_sample_lines(num_sample_subclasses)
+        if decoy_class:
+            class_lines_pool.append(non_sample_class_lines)
+
+        # Subtype fetch is independent of class declaration order,
+        # so validate each permutation.
+        for lines_order in itertools.permutations(class_lines_pool):
+            # Write out class declarations and invoke the function under test.
+            _create_module(lines_by_class=lines_order, filepath=pipe_path)
+            subtype = piface.fetch_sample_subtype(
+                    protocol=ATAC_PROTOCOL_NAME,
+                    strict_pipe_key=atac_pipe_name, full_pipe_path=pipe_path)
+
+            # Make the assertion on subtype name, getting additional
+            # information about the module that we defined if there's failure.
+            try:
+                assert exp_subtype_name == subtype.__name__
+            except AssertionError:
+                with open(pipe_path, 'r') as f:
+                    print("PIPELINE MODULE LINES: {}".
+                          format("".join(f.readlines())))
+                raise
+
+
+    @pytest.mark.parametrize(
+            argnames="subtype_name", argvalues=[Sample.__name__])
+    def test_Sample_as_name(
+            self, tmpdir, subtype_name, atac_pipe_name,
+            subtypes_section_spec_type, atacseq_piface_data_with_subtypes):
+        """ A pipeline may redeclare Sample as a subtype name. """
+
+        # General values for the test
+        subtype_name = Sample.__name__
+        pipe_path = os.path.join(tmpdir.strpath, atac_pipe_name)
+
+        # Define the subtype in the pipeline module.
+        lines = ["from looper.models import Sample\n",
+                 "class {}({}):\n".format(subtype_name, subtype_name),
+                 "\tdef __init__(self, *args, **kwargs):\n",
+                 "\t\tsuper({}, self).__init__(*args, **kwargs)\n".
+                        format(subtype_name)]
+        with open(pipe_path, 'w') as pipe_module_file:
+            for l in lines:
+                pipe_module_file.write(l)
+
+        conf_path = _write_config_data(
+                protomap={ATAC_PROTOCOL_NAME: atac_pipe_name},
+                conf_data=atacseq_piface_data_with_subtypes,
+                dirpath=tmpdir.strpath)
+        piface = ProtocolInterface(conf_path)
+        subtype = piface.fetch_sample_subtype(
+                protocol=ATAC_PROTOCOL_NAME,
+                strict_pipe_key=atac_pipe_name, full_pipe_path=pipe_path)
+
+        # Establish that subclass relationship is improper.
+        assert issubclass(Sample, Sample)
+        # Our subtype derives from base Sample...
+        assert issubclass(subtype, Sample)
+        # ...but not vice-versa.
+        assert not issubclass(Sample, subtype)
+        # And we retained the name.
+        assert subtype.__name__ == Sample.__name__
+
+
+    @pytest.mark.parametrize(argnames="subtype_name", argvalues=["NonSample"])
+    @pytest.mark.parametrize(
+            argnames="test_type", argvalues=["return_sample", "class_found"])
+    def test_subtype_is_not_Sample(
+            self, tmpdir, atac_pipe_name, subtype_name, test_type,
+            atacseq_piface_data_with_subtypes, subtypes_section_spec_type):
+        """ Subtype in interface but not in pipeline is exceptional. """
+
+        pipe_path = os.path.join(tmpdir.strpath, atac_pipe_name)
+
+        # Write out pipeline module file with non-Sample class definition.
+        lines = _class_definition_lines(subtype_name, name_super_type="object")
+        with open(pipe_path, 'w') as pipe_module_file:
+            pipe_module_file.write("{}\n\n".format(SAMPLE_IMPORT))
+            for l in lines:
+                pipe_module_file.write(l)
+
+        # Create the ProtocolInterface and do the test call.
+        path_config_file = _write_config_data(
+                protomap={ATAC_PROTOCOL_NAME: atac_pipe_name},
+                conf_data=atacseq_piface_data_with_subtypes,
+                dirpath=tmpdir.strpath)
+        piface = ProtocolInterface(path_config_file)
+        with pytest.raises(ValueError):
+            piface.fetch_sample_subtype(
+                    protocol=ATAC_PROTOCOL_NAME,
+                    strict_pipe_key=atac_pipe_name, full_pipe_path=pipe_path)
+
+
+    @pytest.mark.parametrize(argnames="subtype_name", argvalues=["irrelevant"])
+    @pytest.mark.parametrize(argnames="decoy_class", argvalues=[False, True],
+                             ids=lambda decoy: " decoy = {} ".format(decoy))
+    def test_subtype_not_implemented(
+            self, tmpdir, atac_pipe_name, subtype_name, decoy_class,
+            atacseq_piface_data_with_subtypes, subtypes_section_spec_type):
+        """ Subtype that doesn't extend Sample isn't used. """
+        # Create the pipeline module.
+        pipe_path = os.path.join(tmpdir.strpath, atac_pipe_name)
+        lines = _class_definition_lines("Decoy", "object") \
+                if decoy_class else []
+        with open(pipe_path, 'w') as modfile:
+            modfile.write("{}\n\n".format(SAMPLE_IMPORT))
+            for l in lines:
+                modfile.write(l)
+        conf_path = _write_config_data(
+                protomap={ATAC_PROTOCOL_NAME: atac_pipe_name},
+                conf_data=atacseq_piface_data_with_subtypes,
+                dirpath=tmpdir.strpath)
+        piface = ProtocolInterface(conf_path)
+        with pytest.raises(ValueError):
+            piface.fetch_sample_subtype(
+                    protocol=ATAC_PROTOCOL_NAME,
+                    strict_pipe_key=atac_pipe_name, full_pipe_path=pipe_path)
+
+    
+    @pytest.mark.parametrize(
+            argnames="subtype_name", argvalues=["SubsampleA", "SubsampleB"])
+    def test_matches_sample_subtype(
+            self, tmpdir, atac_pipe_name, subtype_name, atacseq_piface_data):
+        """ Fetch of subtype is specific even from among multiple subtypes. """
+
+        # Basic values
+        pipe_path = os.path.join(tmpdir.strpath, atac_pipe_name)
+        decoy_class = "Decoy"
+        decoy_proto = "DECOY"
+
+        # Update the ProtocolInterface data and write it out.
+        atacseq_piface_data[atac_pipe_name][SUBTYPES_KEY] = {
+                ATAC_PROTOCOL_NAME: subtype_name, decoy_proto: decoy_class}
+        conf_path = _write_config_data(
+                protomap={ATAC_PROTOCOL_NAME: atac_pipe_name,
+                          decoy_proto: atac_pipe_name},
+                conf_data=atacseq_piface_data, dirpath=tmpdir.strpath)
+
+        # Create the collection of definition lines for each class.
+        legit_lines = _class_definition_lines(subtype_name, Sample.__name__)
+        decoy_lines = _class_definition_lines(decoy_class, Sample.__name__)
+
+        for lines_order in itertools.permutations([legit_lines, decoy_lines]):
+            with open(pipe_path, 'w') as pipe_mod_file:
+                pipe_mod_file.write("{}\n\n".format(SAMPLE_IMPORT))
+                for class_lines in lines_order:
+                    for line in class_lines:
+                        pipe_mod_file.write(line)
+                    pipe_mod_file.write("\n\n")
+
+            # We need the new pipeline module file in place before the
+            # ProtocolInterface is created.
+            piface = ProtocolInterface(conf_path)
+            subtype = piface.fetch_sample_subtype(
+                    protocol=ATAC_PROTOCOL_NAME,
+                    strict_pipe_key=atac_pipe_name, full_pipe_path=pipe_path)
+            assert subtype_name == subtype.__name__
+
+
+    @pytest.mark.parametrize(
+            argnames="spec_type", argvalues=["single", "nested"])
+    def test_subtypes_list(
+            self, tmpdir, atac_pipe_name, atacseq_piface_data, spec_type):
+        """ As singleton or within mapping, only 1 subtype allowed. """
+
+        pipe_path = os.path.join(tmpdir.strpath, atac_pipe_name)
+
+        # Define the classes, writing them in the pipeline module file.
+        subtype_names = ["ArbitraryA", "PlaceholderB"]
+        with open(pipe_path, 'w') as pipe_module_file:
+            pipe_module_file.write("{}\n\n".format(SAMPLE_IMPORT))
+            for subtype_name in subtype_names:
+                # Have the classes be Sample subtypes.
+                for line in _class_definition_lines(
+                        subtype_name, name_super_type=Sample.__name__):
+                    pipe_module_file.write(line)
+                pipe_module_file.write("\n\n")
+
+        # Update the ProtocolInterface data.
+        subtype_section = subtype_names if spec_type == "single" \
+                else {ATAC_PROTOCOL_NAME: subtype_names}
+        atacseq_piface_data[atac_pipe_name][SUBTYPES_KEY] = subtype_section
+
+        # Create the ProtocolInterface.
+        conf_path = _write_config_data(
+                protomap={ATAC_PROTOCOL_NAME: atac_pipe_name},
+                conf_data=atacseq_piface_data, dirpath=tmpdir.strpath)
+        piface = ProtocolInterface(conf_path)
+
+        # We don't really care about exception type, just that one arises.
+        with pytest.raises(Exception):
+            piface.fetch_sample_subtype(
+                    protocol=ATAC_PROTOCOL_NAME,
+                    strict_pipe_key=atac_pipe_name, full_pipe_path=pipe_path)
+
+
+    @pytest.mark.parametrize(
+            argnames="target", argvalues=["Leaf", "Middle"])
+    @pytest.mark.parametrize(
+            argnames="spec_type", argvalues=["single", "mapping"])
+    def test_sample_grandchild(
+            self, tmpdir, spec_type, target,
+            atacseq_piface_data, atac_pipe_name):
+        """ The subtype to be used can be a grandchild of Sample. """
+
+        pipe_path = os.path.join(tmpdir.strpath, atac_pipe_name)
+        intermediate_sample_subtype = "Middle"
+        leaf_sample_subtype = "Leaf"
+
+        intermediate_subtype_lines = _class_definition_lines(
+                intermediate_sample_subtype, Sample.__name__)
+        leaf_subtype_lines = _class_definition_lines(
+                leaf_sample_subtype, intermediate_sample_subtype)
+        with open(pipe_path, 'w') as pipe_mod_file:
+            pipe_mod_file.write("{}\n\n".format(SAMPLE_IMPORT))
+            for l in intermediate_subtype_lines:
+                pipe_mod_file.write(l)
+            pipe_mod_file.write("\n\n")
+            for l in leaf_subtype_lines:
+                pipe_mod_file.write(l)
+
+        atacseq_piface_data[atac_pipe_name][SUBTYPES_KEY] = \
+                target if spec_type == "single" else \
+                {ATAC_PROTOCOL_NAME: target}
+        conf_path = _write_config_data(
+                protomap={ATAC_PROTOCOL_NAME: atac_pipe_name},
+                conf_data=atacseq_piface_data, dirpath=tmpdir.strpath)
+
+        piface = ProtocolInterface(conf_path)
+        subtype = piface.fetch_sample_subtype(
+                protocol=ATAC_PROTOCOL_NAME, strict_pipe_key=atac_pipe_name,
+                full_pipe_path=pipe_path)
+
+        assert target == subtype.__name__
+
+
+    @pytest.fixture(scope="function")
+    def atacseq_piface_data_with_subtypes(
+            self, request, atacseq_piface_data, atac_pipe_name):
+        """
+        Provide test case with ProtocolInterface data.
+
+        :param pytest._pytest.fixtures.SubRequest request: test case
+            requesting the parameterization
+        :param Mapping atacseq_piface_data: the ProtocolInterface data
+        :param str atac_pipe_name: name for the pipeline
+        :return Mapping: same as input, but with Sample subtype specification
+            section mixed in
+        """
+
+        # Get the test case's parameterized values.
+        spec_type = request.getfixturevalue("subtypes_section_spec_type")
+        subtype_name = request.getfixturevalue("subtype_name")
+
+        # Determine how to specify the subtype(s).
+        if spec_type is str:
+            section_value = subtype_name
+        elif spec_type is dict:
+            section_value = {ATAC_PROTOCOL_NAME: subtype_name}
+        else:
+            raise ValueError("Unexpected subtype section specification type: "
+                             "{}".format(spec_type))
+
+        # Update and return the interface data.
+        atacseq_piface_data[atac_pipe_name][SUBTYPES_KEY] = section_value
+        return atacseq_piface_data
+
+
+
+def _class_definition_lines(name, name_super_type):
+    """ Create lines that define a class. """
+    return ["class {t}({st}):\n".format(t=name, st=name_super_type),
+            "\tdef __init__(self, *args, **kwarggs):\n",
+            "\t\tsuper({t}, self).__init__(*args, **kwargs)".format(
+                    t=name, st=name_super_type)]
+
+
+
+def _create_module(lines_by_class, filepath):
+    """
+    Write out lines that will defined a module.
+
+    :param Sequence[str] lines_by_class: lines that define a class
+    :param str filepath: path to module file to create
+    :return str: path to the module file written
+    """
+    lines = "\n\n".join(
+        [SAMPLE_IMPORT] + ["\n".join(class_lines)
+                    for class_lines in lines_by_class])
+    with open(filepath, 'w') as modfile:
+        modfile.write("{}\n".format(lines))
+    return filepath
+
+
+
+def _write_config_data(protomap, conf_data, dirpath):
+    """
+    Write ProtocolInterface data to (temp)file.
+
+    :param Mapping protomap: mapping from protocol name to pipeline key/name
+    :param Mapping conf_data: mapping from pipeline key/name to configuration
+        data for a PipelineInterface
+    :param str dirpath: path to filesystem location in which to place the
+        file to write
+    :return str: path to the (temp)file written
+    """
+    full_conf_data = {"protocol_mapping": protomap, "pipelines": conf_data}
+    filepath = os.path.join(dirpath, "pipeline_interface.yaml")
+    with open(filepath, 'w') as conf_file:
+        yaml.safe_dump(full_conf_data, conf_file)
+    return filepath
diff --git a/tests/models/independent/test_Sample.py b/tests/models/independent/test_Sample.py
index 6532ee38..8c5772b4 100644
--- a/tests/models/independent/test_Sample.py
+++ b/tests/models/independent/test_Sample.py
@@ -1,12 +1,13 @@
 """ Tests for the Sample. """
 
 import os
+import tempfile
 import mock
 import numpy as np
 from pandas import Series
 import pytest
 import looper
-from looper.models import Sample
+from looper.models import Sample, SAMPLE_NAME_COLNAME
 
 
 __author__ = "Vince Reuter"
@@ -17,46 +18,33 @@
 class ParseSampleImplicationsTests:
     """ Tests for appending columns/fields to a Sample based on a mapping. """
 
-    IMPLIER_NAME = "sample_name"
+    IMPLIER_NAME = SAMPLE_NAME_COLNAME
     IMPLIER_VALUES = ["a", "b"]
     SAMPLE_A_IMPLICATIONS = {"genome": "hg38", "phenome": "hg72"}
     SAMPLE_B_IMPLICATIONS = {"genome": "hg38"}
-    IMPLICATIONS = [SAMPLE_A_IMPLICATIONS, SAMPLE_B_IMPLICATIONS]
-    IMPLICATIONS_MAP = {
-        IMPLIER_NAME: IMPLICATIONS
-    }
+    IMPLICATIONS = {"a": SAMPLE_A_IMPLICATIONS, "b": SAMPLE_B_IMPLICATIONS}
+    IMPLICATIONS_MAP = {IMPLIER_NAME: IMPLICATIONS}
 
 
-    def test_project_lacks_implications(self, sample):
+    @pytest.mark.parametrize(argnames="implications", argvalues=[None, {}, []])
+    def test_project_no_implications(self, sample, implications):
         """ With no implications mapping, sample is unmodified. """
         before_inference = sample.__dict__
-        with mock.patch.object(sample, "prj", create=True):
-            sample.infer_columns()
+        sample.infer_columns(implications)
         after_inference = sample.__dict__
         assert before_inference == after_inference
 
 
-    def test_empty_implications(self, sample):
-        """ Empty implications mapping --> unmodified sample. """
-        before_inference = sample.__dict__
-        implications = mock.MagicMock(implied_columns={})
-        with mock.patch.object(sample, "prj", create=True, new=implications):
-            sample.infer_columns()
-        assert before_inference == sample.__dict__
-
-
     def test_null_intersection_between_sample_and_implications(self, sample):
         """ Sample with none of implications' fields --> no change. """
         before_inference = sample.__dict__
-        implications = mock.MagicMock(implied_columns=self.IMPLICATIONS_MAP)
-        with mock.patch.object(sample, "prj", create=True, new=implications):
-            sample.infer_columns()
+        sample.infer_columns(self.IMPLICATIONS_MAP)
         assert before_inference == sample.__dict__
 
 
     @pytest.mark.parametrize(
         argnames=["implier_value", "implications"],
-        argvalues=zip(IMPLIER_VALUES, IMPLICATIONS),
+        argvalues=IMPLICATIONS.items(),
         ids=lambda implier_and_implications:
         "implier='{}', implications={}".format(
             implier_and_implications[0], str(implier_and_implications[1])))
@@ -70,11 +58,7 @@ def test_intersection_between_sample_and_implications(
 
         # Set the parameterized value for the implications source field.
         setattr(sample, self.IMPLIER_NAME, implier_value)
-
-        # Perform column inference based on mocked implications.
-        implications = mock.MagicMock(implied_columns=self.IMPLICATIONS_MAP)
-        with mock.patch.object(sample, "prj", create=True, new=implications):
-            sample.infer_columns()
+        sample.infer_columns(self.IMPLICATIONS_MAP)
 
         # Validate updates to sample based on column implications & inference.
         for implied_name, implied_value in implications.items():
@@ -84,29 +68,18 @@ def test_intersection_between_sample_and_implications(
     @pytest.mark.parametrize(
         argnames="unmapped_implier_value",
         argvalues=["totally-wacky-value", 62, None, np.nan])
-    @pytest.mark.parametrize(
-        argnames="implications", argvalues=IMPLICATIONS,
-        ids=lambda implications: "implied={}".format(str(implications)))
     def test_sample_has_unmapped_value_for_implication(
-            self, sample, unmapped_implier_value, implications):
+            self, sample, unmapped_implier_value):
         """ Unknown value in implier field --> null inference. """
 
-
         # Negative control pre-/post-test.
         def no_implied_values():
             assert all([not hasattr(sample, implied_field_name)
-                        for implied_field_name in implications.keys()])
-
+                        for implied_field_name in self.IMPLICATIONS.keys()])
 
         no_implied_values()
-
-        # Set the parameterized value for the implications source field.
         setattr(sample, self.IMPLIER_NAME, unmapped_implier_value)
-
-        # Perform column inference based on mocked implications.
-        implications = mock.MagicMock(implied_columns=self.IMPLICATIONS_MAP)
-        with mock.patch.object(sample, "prj", create=True, new=implications):
-            sample.infer_columns()
+        sample.infer_columns(self.IMPLICATIONS_MAP)
         no_implied_values()
 
 
@@ -128,7 +101,7 @@ def sample(self, request):
             data = request.getfixturevalue("data")
         else:
             data = {}
-        data.setdefault("sample_name", "test-sample")
+        data.setdefault(SAMPLE_NAME_COLNAME, "test-sample")
 
         # Mock the validation and return a new Sample.
         rubber_stamper = mock.MagicMock(return_value=[])
@@ -150,12 +123,11 @@ class SampleRequirementsTests:
         ids=lambda has_name: "has_name: {}".format(has_name))
     def test_requires_sample_name(self, has_name, data_type):
         data = {}
-        sample_name_key = "sample_name"
         sample_name = "test-sample"
         if has_name:
-            data[sample_name_key] = sample_name
+            data[SAMPLE_NAME_COLNAME] = sample_name
             sample = Sample(data_type(data))
-            assert sample_name == getattr(sample, sample_name_key)
+            assert sample_name == getattr(sample, SAMPLE_NAME_COLNAME)
         else:
             with pytest.raises(ValueError):
                 Sample(data_type(data))
@@ -168,7 +140,7 @@ def test_requires_sample_name(self, has_name, data_type):
 @pytest.mark.parametrize(argnames="data_type", argvalues=[dict, Series])
 def test_exception_type_matches_access_mode(data_type, accessor):
     """ Exception for attribute access failure reflects access mode. """
-    data = {"sample_name": "placeholder"}
+    data = {SAMPLE_NAME_COLNAME: "placeholder"}
     sample = Sample(data_type(data))
     if accessor == "attr":
         with pytest.raises(AttributeError):
@@ -191,6 +163,7 @@ def test_exception_type_matches_access_mode(data_type, accessor):
         argnames="preexists", argvalues=[False, True],
         ids=lambda exists: "preexists={}".format(exists))
 def test_make_sample_dirs(paths, preexists, tmpdir):
+    """ Existence guarantee Sample instance's folders is safe and valid. """
 
     # Derive full paths and assure nonexistence before creation.
     fullpaths = []
@@ -202,7 +175,7 @@ def test_make_sample_dirs(paths, preexists, tmpdir):
         fullpaths.append(fullpath)
 
     # Make the sample and assure paths preexistence.
-    s = Sample({"sample_name": "placeholder"})
+    s = Sample({SAMPLE_NAME_COLNAME: "placeholder"})
     s.paths = fullpaths
 
     # Base the test's initial condition on the parameterization.
diff --git a/tests/models/independent/test_SampleSheet.py b/tests/models/independent/test_SampleSheet.py
deleted file mode 100644
index 27d56e78..00000000
--- a/tests/models/independent/test_SampleSheet.py
+++ /dev/null
@@ -1,16 +0,0 @@
-""" Tests for the SampleSheet model. """
-
-import pandas as pd
-import pytest
-from looper.models import SampleSheet
-
-
-__author__ = "Vince Reuter"
-__email__ = "vreuter@virginia.edu"
-
-
-
-# TODO: implement a few of these.
-@pytest.mark.skip("Not implemented")
-class SampleSheetRoundtripTests:
-    pass
diff --git a/tests/models/integration/test_Project_Sample_interaction.py b/tests/models/integration/test_Project_Sample_interaction.py
index 0067ff82..12d0a7fb 100644
--- a/tests/models/integration/test_Project_Sample_interaction.py
+++ b/tests/models/integration/test_Project_Sample_interaction.py
@@ -1,11 +1,19 @@
 """ Tests for interaction between a Project and a Sample. """
 
 from collections import OrderedDict
+import copy
+import itertools
 import os
+import random
+
 import pandas as pd
 import pytest
 import yaml
-from looper.models import Project, SAMPLE_ANNOTATIONS_KEY
+
+from looper.models import \
+        Project, Sample, \
+        SAMPLE_ANNOTATIONS_KEY, SAMPLE_NAME_COLNAME
+from looper.utils import alpha_cased
 
 
 __author__ = "Vince Reuter"
@@ -22,9 +30,171 @@
     "input_dir": "dummy/sequencing/data",
     "tools_folder": "arbitrary-seq-tools-folder"}
 
+NAME_ANNOTATIONS_FILE = "annotations.csv"
+SAMPLE_NAMES = ["WGBS_mm10", "ATAC_mm10", "WGBS_rn6", "ATAC_rn6"]
+COLUMNS = [SAMPLE_NAME_COLNAME, "val1", "val2", "library"]
+VALUES1 = [random.randint(-5, 5) for _ in range(len(SAMPLE_NAMES))]
+VALUES2 = [random.randint(-5, 5) for _ in range(len(SAMPLE_NAMES))]
+LIBRARIES = ["WGBS", "ATAC", "WGBS", "ATAC"]
+DATA = list(zip(SAMPLE_NAMES, VALUES1, VALUES2, LIBRARIES))
+DATA_FOR_SAMPLES = [
+    {SAMPLE_NAME_COLNAME: SAMPLE_NAMES},
+    {"val1": VALUES1}, {"val2": VALUES2}, {"library": LIBRARIES}]
+PROJECT_CONFIG_DATA = {"metadata": {"sample_annotation": NAME_ANNOTATIONS_FILE}}
+PROTOCOLS = ["WGBS", "ATAC"]
+
+
+
+def pytest_generate_tests(metafunc):
+    """ Customization of test cases within this module. """
+    if metafunc.cls == BuildSheetTests:
+        if "protocols" in metafunc.fixturenames:
+            # Apply the test case to each of the possible combinations of
+            # protocols, from none at all up to all of them.
+            metafunc.parametrize(
+                    argnames="protocols",
+                    argvalues=list(itertools.chain.from_iterable(
+                            itertools.combinations(PROTOCOLS, x)
+                            for x in range(1 + len(PROTOCOLS)))),
+                    ids=lambda protos:
+                    " protocols = {} ".format(",".join(protos)))
+        if "delimiter" in metafunc.fixturenames:
+            metafunc.parametrize(argnames="delimiter", argvalues=[",", "\t"])
+
+
+
+@pytest.fixture(scope="function")
+def proj_conf():
+    """ Provide the basic configuration data. """
+    return copy.deepcopy(PROJECT_CONFIG_DATA)
+
+
+
+@pytest.fixture(scope="function")
+def path_proj_conf_file(tmpdir, proj_conf):
+    """ Write basic project configuration data and provide filepath. """
+    conf_path = os.path.join(tmpdir.strpath, "project_config.yaml")
+    with open(conf_path, 'w') as conf:
+        yaml.safe_dump(proj_conf, conf)
+    return conf_path
+
+
+
+@pytest.fixture(scope="function")
+def path_anns_file(request, tmpdir, sample_sheet):
+    """ Write basic annotations, optionally using a different delimiter. """
+    filepath = os.path.join(tmpdir.strpath, NAME_ANNOTATIONS_FILE)
+    if "delimiter" in request.fixturenames:
+        delimiter = request.getfixturevalue("delimiter")
+    else:
+        delimiter = ","
+    with open(filepath, 'w') as anns_file:
+        sample_sheet.to_csv(anns_file, sep=delimiter, index=False)
+    return filepath
+
+
+
+@pytest.fixture(scope="function")
+def samples_rawdata():
+    return copy.deepcopy(DATA)
+
+
+
+@pytest.fixture(scope="function")
+def sample_sheet(samples_rawdata):
+    df = pd.DataFrame(samples_rawdata)
+    df.columns = [SAMPLE_NAME_COLNAME, "val1", "val2", "library"]
+    return df
+
+
+
+def test_samples_are_generic(path_anns_file, path_proj_conf_file):
+    """ Regardless of protocol, Samples for sheet are generic. """
+    # Annotations filepath fixture is also writes that file, so
+    # it's needed even though that return value isn't used locally.
+    p = Project(path_proj_conf_file)
+    assert len(SAMPLE_NAMES) == p.num_samples
+    samples = list(p.samples)
+    assert p.num_samples == len(samples)
+    assert all([Sample is type(s) for s in samples])
+
+
+
+class BuildSheetTests:
+    """ Tests for construction of sheet of Project's Samples. """
+
+    # Note: seemingly unused parameters may affect parameterization
+    # logic of other fixtures used by a test case; tread lightly.
+
+
+    def test_no_samples(self, protocols, delimiter, path_empty_project):
+        """ Lack of Samples is unproblematic for the sheet build. """
+        # Regardless of protocol(s), the sheet should be empty.
+        p = Project(path_empty_project)
+        sheet = p.build_sheet(*protocols)
+        assert sheet.empty
+
+
+    @pytest.mark.parametrize(
+            argnames="which_sample_index", argvalues=range(len(SAMPLE_NAMES)))
+    def test_single_sample(
+            self, tmpdir, path_proj_conf_file, which_sample_index):
+        """ Single Sample is perfectly valid for Project and sheet. """
+
+        # Pull out the values for the current sample.
+        values = DATA[which_sample_index]
+
+        # Write the annotations.
+        anns_path = os.path.join(tmpdir.strpath, NAME_ANNOTATIONS_FILE)
+        with open(anns_path, 'w') as anns_file:
+            anns_file.write("{}\n".format(",".join(COLUMNS)))
+            anns_file.write("{}\n".format(",".join([str(v) for v in values])))
+
+        # Build the sheet.
+        p = Project(path_proj_conf_file)
+        sheet = p.build_sheet()
+
+        # It should be a single-row DataFrame.
+        assert isinstance(sheet, pd.DataFrame)
+        assert 1 == len(sheet)
+        assert 1 == p.num_samples
+
+        # There will be additional values added from the Project,
+        # but the core data values will have remained the same.
+        sample = list(p.samples)[0]
+        for attr, exp_val in zip(COLUMNS, values):
+            obs_val = getattr(sample, attr)
+            try:
+                assert exp_val == obs_val
+            except AssertionError as e:
+                try:
+                    assert exp_val == int(obs_val)
+                except AssertionError:
+                    raise e
+
+
+    def test_multiple_samples(
+            self, protocols, path_anns_file, path_proj_conf_file):
+        """ Project also processes multiple Sample fine. """
+
+        p = Project(path_proj_conf_file)
+
+        # Total sample count is constant.
+        assert len(SAMPLE_NAMES) == sum(1 for _ in p.samples)
+
+        # But the sheet permits filtering to specific protocol(s).
+        exp_num_samples = len(SAMPLE_NAMES) if not protocols else \
+            sum(sum(1 for l in LIBRARIES if l == p) for p in protocols)
+        sheet = p.build_sheet(*protocols)
+        assert exp_num_samples == len(sheet)
+        if protocols:
+            fuzzy_protos = {alpha_cased(p) for p in protocols}
+            for _, sample_data in sheet.iterrows():
+                assert alpha_cased(sample_data.library) in fuzzy_protos
+
 
 
-class ProjectSampleInteractionTests:
+class SampleFolderCreationTests:
     """ Tests for interaction between Project and Sample. """
 
     CONFIG_DATA_PATHS_HOOK = "uses_paths_section"
diff --git a/tests/models/test_models_smoke.py b/tests/models/test_models_smoke.py
index 97dd411e..83c07879 100644
--- a/tests/models/test_models_smoke.py
+++ b/tests/models/test_models_smoke.py
@@ -1,36 +1,117 @@
 """ Basic smoketests for models """
 
+import logging
 import pytest
-from looper.models import AttributeDict
+import looper
+from looper.models import AttributeDict, Project
+
 
 __author__ = "Vince Reuter"
 __email__ = "vreuter@virgnia.edu"
 
 
+_LOGGER = logging.getLogger(__name__)
+
+
 
 def pytest_generate_tests(metafunc):
     """ Dynamic test case parameterization. """
-    if metafunc.cls == AttributeDictRepresentationSmokeTests:
-        metafunc.parametrize(argnames="representation_method",
-                             argvalues=["__repr__", "__str__"])
+    if "funcname" in metafunc.fixturenames:
+        metafunc.parametrize(
+                argnames="funcname", argvalues=["__repr__", "__str__"])
 
 
 
 @pytest.mark.usefixtures("write_project_files")
-class AttributeDictRepresentationSmokeTests:
+class AttributeDictRepresentationTests:
     """ Non-fail validation of AttributeDict representations. """
 
 
     @pytest.mark.parametrize(
             argnames="data",
             argvalues=[[('CO', 145)], {'CO': {"US-50": [550, 62, 145]}}])
-    def test_AttributeDict_representations(
-            self, data, representation_method):
+    def test_AttributeDict_representations_smoke(
+            self, data, funcname):
         """ Text representation of base AttributeDict doesn't fail. """
         attrdict = AttributeDict(data)
-        getattr(attrdict, representation_method).__call__()
+        getattr(attrdict, funcname).__call__()
 
 
-    def test_Project_representations(self, proj, representation_method):
+    def test_Project_representations_smoke(self, proj, funcname):
         """ Representation of Project (AttributeDict subclass) is failsafe. """
-        getattr(proj, representation_method).__call__()
+        getattr(proj, funcname).__call__()
+
+
+    def test_project_repr_name_inclusion(self, proj, funcname):
+        """ Test Project text representation. """
+        func = getattr(proj, funcname)
+        result = func.__call__()
+        assert type(result) is str
+        classname = proj.__class__.__name__
+        if funcname == "__str__":
+            assert classname in result
+        elif funcname == "__repr__":
+            assert classname not in result
+        else:
+            raise ValueError("Unexpected representation function: {}".
+                             format(funcname))
+
+
+
+class ModelCreationSmokeTests:
+    """ Smoketests for creation of various types of project-related models. """
+
+    # TODO: migrate these to pytest.raises(None) with 3.1.
+
+    def test_empty_project(self, path_empty_project):
+        """ It's unproblematic to create a Project that lacks samples. """
+        Project(path_empty_project)
+
+
+
+class ModelRepresentationSmokeTests:
+    """ Tests for the text representation of important ADTs. """
+
+    # NOTE: similar parameterization, but Project construction needs
+    # to be handled with greater care when testing the actual call.
+
+    @pytest.mark.parametrize(
+            argnames="class_name", argvalues=looper.models.__classes__)
+    def test_implements_repr_smoke(self, class_name):
+        """ Each important ADT must implement a representation method. """
+
+        funcname = "__repr__"
+
+        # Attempt a control assertion, that a subclass that doesn't override
+        # the given method of its superclass, uses the superclass version of
+        # the function in question.
+        class ObjectSubclass(object):
+            def __init__(self):
+                super(ObjectSubclass, self).__init__()
+        assert getattr(ObjectSubclass, funcname) is getattr(object, funcname)
+
+        # Make the actual assertion of interest.
+        adt = getattr(looper.models, class_name)
+        assert getattr(adt, funcname) != \
+               getattr(adt.__bases__[0], funcname)
+
+
+    @pytest.mark.parametrize(
+            argnames="class_name",
+            argvalues=[cn for cn in looper.models.__classes__
+                       if cn != "Project"])
+    def test_repr_smoke(
+            self, tmpdir, class_name, basic_instance_data, funcname):
+        """ Object representation method successfully returns string. """
+        # Note that tmpdir is used when config file needs to be written.
+        cls = getattr(looper.models, class_name)
+        instance = cls(basic_instance_data)
+        func = getattr(instance, funcname)
+        result = func.__call__()
+        if funcname == "__str__":
+            assert class_name in result
+        elif funcname == "__repr__":
+            assert type(result) is str
+        else:
+            raise ValueError("Unexpected representation method: {}".
+                             format(funcname))
diff --git a/tests/test_looper.py b/tests/test_looper.py
index 14b8b331..ff3e24c2 100644
--- a/tests/test_looper.py
+++ b/tests/test_looper.py
@@ -14,13 +14,12 @@
 
 import numpy.random as nprand
 import pytest
-import yaml
 
 from looper.looper import aggregate_exec_skip_reasons
 import looper.models
-from looper.models import AttributeDict, ATTRDICT_METADATA, COL_KEY_SUFFIX
+from looper.models import COL_KEY_SUFFIX
 from .conftest import \
-    DERIVED_COLNAMES, EXPECTED_MERGED_SAMPLE_FILES, FILE_BY_SAMPLE, \
+    DERIVED_COLNAMES, EXPECTED_MERGED_SAMPLE_FILES, \
     LOOPER_ARGS_BY_PIPELINE, MERGED_SAMPLE_INDICES, NGS_SAMPLE_INDICES, \
     NUM_SAMPLES, PIPELINE_TO_REQD_INFILES_BY_SAMPLE
 
@@ -32,15 +31,11 @@
 @pytest.mark.usefixtures("write_project_files")
 class ProjectConstructorTest:
 
-    # TODO: docstrings and atomicity/encapsulation.
-    # TODO: conversion to pytest for consistency.
-
 
     @pytest.mark.parametrize(argnames="attr_name",
                              argvalues=["required_inputs", "all_input_attr"])
     def test_sample_required_inputs_not_set(self, proj, attr_name):
         """ Samples' inputs are not set in `Project` ctor. """
-        # TODO: update this to check for null if design is changed as may be.
         with pytest.raises(AttributeError):
             getattr(proj.samples[nprand.randint(len(proj.samples))], attr_name)
 
@@ -67,11 +62,12 @@ def test_data_sources_derivation(self, proj, sample_index):
         merged_columns = filter(
                 lambda col_key: (col_key != "col_modifier") and
                                 not col_key.endswith(COL_KEY_SUFFIX),
-                proj.samples[sample_index].merged_cols.keys()
-        )
+                proj.samples[sample_index].merged_cols.keys())
         # Order may be lost due to mapping.
         # We don't care about that here, or about duplicates.
-        assert set(DERIVED_COLNAMES) == set(merged_columns)
+        expected = set(DERIVED_COLNAMES)
+        observed = set(merged_columns)
+        assert expected == observed
 
 
     @pytest.mark.parametrize(argnames="sample_index",
@@ -94,29 +90,13 @@ def test_unmerged_samples_lack_merged_cols(self, proj, sample_index):
         assert not proj.samples[sample_index].merged_cols
 
 
-    @pytest.mark.parametrize(argnames="sample_index",
-                             argvalues=range(NUM_SAMPLES))
-    def test_multiple_add_sample_sheet_calls_no_rederivation(self, proj,
-                                                             sample_index):
-        """ Don't rederive `derived_columns` for multiple calls. """
-        expected_files = FILE_BY_SAMPLE[sample_index]
-        def _observed(p):
-            return [os.path.basename(f)
-                    for f in p.samples[sample_index].file.split(" ")]
-        assert expected_files == _observed(proj)
-        proj.add_sample_sheet()
-        proj.add_sample_sheet()
-        assert expected_files == _observed(proj)
-        proj.add_sample_sheet()
-        assert expected_files == _observed(proj)
-
-
     def test_duplicate_derived_columns_still_derived(self, proj):
         sample_index = 2
         observed_nonmerged_col_basename = \
             os.path.basename(proj.samples[sample_index].nonmerged_col)
         assert "c.txt" == observed_nonmerged_col_basename
-        assert "" == proj.samples[sample_index].locate_data_source('file')
+        assert "" == proj.samples[sample_index].locate_data_source(
+                proj.data_sources, 'file')
 
 
 
@@ -139,7 +119,8 @@ def test_required_inputs(self, proj, pipe_iface, sample_index):
         observed_required_inputs = [os.path.basename(f)
                                     for f in sample.required_inputs]
         assert expected_required_inputs == observed_required_inputs
-        assert sample.confirm_required_inputs()
+        error_type, error_message = sample.determine_missing_requirements()
+        assert error_type is None and not error_message
 
 
     @pytest.mark.parametrize(argnames="sample_index",
@@ -154,7 +135,8 @@ def test_ngs_pipe_ngs_sample(self, proj, pipe_iface, sample_index):
                                                   [sample_index][0])
         observed_required_input_basename = \
             os.path.basename(sample.required_inputs[0])
-        assert sample.confirm_required_inputs()
+        error_type, error_message = sample.determine_missing_requirements()
+        assert error_type is None and not error_message
         assert 1 == len(sample.required_inputs)
         assert expected_required_input_basename == \
                observed_required_input_basename
@@ -223,102 +205,6 @@ def test_looper_args_usage(self, pipe_iface, pipeline, expected):
 
 
 
-@pytest.mark.usefixtures("write_project_files")
-class SampleRoundtripTests:
-    """ Test equality of objects written to and from YAML files. """
-
-
-    def test_default_behavioral_metadata_retention(self, tmpdir, proj):
-        """ With default metadata, writing to file and restoring is OK. """
-        tempfolder = str(tmpdir)
-        sample_tempfiles = []
-        for sample in proj.samples:
-            path_sample_tempfile = os.path.join(tempfolder,
-                                                "{}.yaml".format(sample.name))
-            sample.to_yaml(path_sample_tempfile)
-            sample_tempfiles.append(path_sample_tempfile)
-        for original_sample, temp_sample_path in zip(proj.samples,
-                                                     sample_tempfiles):
-            with open(temp_sample_path, 'r') as sample_file:
-                restored_sample_data = yaml.load(sample_file)
-            ad = AttributeDict(restored_sample_data)
-            self._metadata_equality(original_sample.prj, ad)
-
-
-    def test_modified_behavioral_metadata_preservation(self, tmpdir, proj):
-        """ Behavior metadata modifications are preserved to/from disk. """
-        tempfolder = str(tmpdir)
-        sample_tempfiles = []
-        samples = proj.samples
-        assert 1 < len(samples), "Too few samples: {}".format(len(samples))
-
-        # TODO: note that this may fail if metadata
-        # modification prohibition is implemented.
-        samples[0].prj.__dict__["_force_nulls"] = True
-        samples[1].prj.__dict__["_attribute_identity"] = True
-
-        for sample in proj.samples[:2]:
-            path_sample_tempfile = os.path.join(tempfolder,
-                                                "{}.yaml".format(sample.name))
-            sample.to_yaml(path_sample_tempfile)
-            sample_tempfiles.append(path_sample_tempfile)
-
-        with open(sample_tempfiles[0], 'r') as f:
-            sample_0_data = yaml.load(f)
-        assert AttributeDict(sample_0_data).prj._force_nulls is True
-
-        with open(sample_tempfiles[1], 'r') as f:
-            sample_1_data = yaml.load(f)
-        sample_1_restored_attrdict =  AttributeDict(sample_1_data)
-        assert sample_1_restored_attrdict.prj.does_not_exist == "does_not_exist"
-
-
-    def _check_nested_metadata(self, original, restored):
-        """
-        Check equality for metadata items, accounting for nesting within
-        instances of AttributeDict and its child classes.
-
-        :param AttributeDict original: original AttributeDict (or child) object
-        :param AttributeDict restored: instance restored from writing
-            original object to file, then reparsing and constructing
-            AttributeDict instance
-        :return bool: whether metadata items are equivalent between objects
-            at all nesting levels
-        """
-        for key, data in original.items():
-            if key not in restored:
-                return False
-            equal_level = self._metadata_equality(original, restored)
-            if not equal_level:
-                return False
-            if isinstance(original, AttributeDict):
-                return isinstance(restored, AttributeDict) and \
-                       self._check_nested_metadata(data, restored[key])
-            else:
-                return True
-
-
-    @staticmethod
-    def _metadata_equality(original, restored):
-        """
-        Check nested levels of metadata equality.
-
-        :param AttributeDict original: a raw AttributeDict or an
-            instance of a child class that was serialized and written to disk
-        :param AttributeDict restored: an AttributeDict instance created by
-            parsing the file associated with the original object
-        :return bool: whether all metadata keys/items have equal value
-            when comparing original object to restored version
-        """
-        for metadata_item in ATTRDICT_METADATA:
-            if metadata_item not in original or \
-                    metadata_item not in restored or \
-                    original[metadata_item] != restored[metadata_item]:
-                return False
-        return True
-
-
-
 class RunErrorReportTests:
     """ Tests for aggregation of submission failures. """