use default compute in generic Project; generalize looperenv naming; …

…fix AttributeDict comparison method
pepkit · Jun 12, 2017 · 70fd1e6 · 70fd1e6
1 parent e0ced3c
commit 70fd1e6
Show file tree

Hide file tree

Showing 7 changed files with 142 additions and 55 deletions.
diff --git a/looper/__init__.py b/looper/__init__.py
@@ -13,12 +13,6 @@
 from ._version import __version__
 
 
-LOOPERENV_VARNAME = "LOOPERENV"
-SUBMISSION_TEMPLATES_FOLDER = "submit_templates"
-DEFAULT_LOOPERENV_FILENAME = "default_looperenv.yaml"
-DEFAULT_LOOPERENV_CONFIG_RELATIVE = os.path.join(SUBMISSION_TEMPLATES_FOLDER,
-                                                 DEFAULT_LOOPERENV_FILENAME)
-
 LOGGING_LEVEL = "INFO"
 LOGGING_LOCATIONS = (stdout, )
 

diff --git a/looper/loodels.py b/looper/loodels.py
@@ -1,17 +1,12 @@
 """ Looper versions of NGS project models. """
 
-import os
 from . import models
 
 
 __author__ = "Vince Reuter"
 __email__ = "vreuter@virginia.edu"
 
 
-DEFAULT_PROJECT_COMPUTE_NAME = "default_looperenv.yaml"
-SUBMISSION_TEMPLATES_FOLDER = "submit_templates"
-
-
 
 class Project(models.Project):
     """
@@ -27,30 +22,13 @@ class Project(models.Project):
     :type default_compute: str
 
     """
-    def __init__(self, config_file,
-                 subproject=None, default_compute=None, **kwargs):
-        if not default_compute:
-            looper_folder = os.path.dirname(__file__)
-            default_compute = os.path.join(looper_folder,
-                    SUBMISSION_TEMPLATES_FOLDER, DEFAULT_PROJECT_COMPUTE_NAME)
+    def __init__(self, config_file, subproject=None, **kwargs):
         super(Project, self).__init__(
                 config_file, subproject=subproject, 
-                default_compute=default_compute,
                 no_environment_exception=RuntimeError,
                 no_compute_exception=RuntimeError, **kwargs)
 
 
-    @property
-    def compute_env_var(self):
-        """
-        Environment variable through which to access compute settings.
-
-        :return str: name of the environment variable to pointing to
-            compute settings
-        """
-        return "LOOPERENV"
-
-
     @property
     def required_metadata(self):
         """ Which metadata attributes are required. """

diff --git a/looper/looper.py b/looper/looper.py
@@ -12,8 +12,9 @@
 import sys
 import time
 import pandas as _pd
-from . import setup_looper_logger, LOGGING_LEVEL, __version__, LOOPERENV_VARNAME
+from . import setup_looper_logger, LOGGING_LEVEL, __version__
 from .loodels import Project
+from .models import COMPUTE_SETTINGS_VARNAME
 from .utils import VersionInHelpParser
 
 try:
@@ -32,6 +33,8 @@
 
 SAMPLE_EXECUTION_TOGGLE = "toggle"
 
+# Descending by severity for correspondence with logic inversion.
+# That is, greater verbosity setting corresponds to lower logging level.
 _LEVEL_BY_VERBOSITY = [logging.ERROR, logging.CRITICAL, logging.WARN,
                        logging.INFO, logging.DEBUG]
 
@@ -96,7 +99,7 @@ def parse_arguments():
             help="YAML file with looper environment compute settings.")
     run_subparser.add_argument(
             "--env",
-            default=os.getenv("{}".format(LOOPERENV_VARNAME), ""),
+            default=os.getenv("{}".format(COMPUTE_SETTINGS_VARNAME), ""),
             help="Employ looper environment compute settings.")
     run_subparser.add_argument(
             "--limit",

diff --git a/looper/models.py b/looper/models.py
@@ -68,6 +68,7 @@
     parse_ftype, check_bam, check_fastq, get_file_size, partition
 
 
+COMPUTE_SETTINGS_VARNAME = "PEPENV"
 DEFAULT_COMPUTE_RESOURCES_NAME = "default"
 DATA_SOURCE_COLNAME = "data_source"
 SAMPLE_NAME_COLNAME = "sample_name"
@@ -275,10 +276,20 @@ def __delitem__(self, item):
             _LOGGER.debug("No item {} to delete".format(item))
 
     def __eq__(self, other):
-        for k in iter(self):
-            if k in other and self.__dict__[k] == other[k]:
-                continue
+        try:
+            # Ensure target itself and any values are AttributeDict.
+            other = AttributeDict(other)
+        except Exception:
+            return False
+        if len(self) != len(other):
+            # Ensure we don't have to worry about other containing self.
             return False
+        for k, v in self.items():
+            try:
+                if v != other[k]:
+                    return False
+            except KeyError:
+                return  False
         return True
 
     def __ne__(self, other):
@@ -347,22 +358,23 @@ def __init__(self, config_file, subproject=None,
                  permissive=True, file_checks=False, compute_env_file=None,
                  no_environment_exception=None, no_compute_exception=None):
 
-        super(Project, self).__init__()
-
         _LOGGER.info("Creating %s from file: '%s'",
                           self.__class__.__name__, config_file)
+        super(Project, self).__init__()
+
+        default_compute = default_compute or self.default_cmpenv_file
 
         # Initialize local, serial compute as default (no cluster submission)
         # Start with default environment settings.
         _LOGGER.debug("Establishing default environment settings")
         self.environment, self.environment_file = None, None
-        if default_compute:
-            try:
-                self.update_environment(default_compute)
-            except Exception as e:
-                _LOGGER.error("Can't load environment config file '%s'",
-                              str(default_compute))
-                _LOGGER.error(str(type(e).__name__) + str(e))
+
+        try:
+            self.update_environment(default_compute)
+        except Exception as e:
+            _LOGGER.error("Can't load environment config file '%s'",
+                          str(default_compute))
+            _LOGGER.error(str(type(e).__name__) + str(e))
 
         self._handle_missing_env_attrs(
                 default_compute, when_missing=no_environment_exception)
@@ -433,6 +445,18 @@ def __init__(self, config_file, subproject=None,
         self.finalize_pipelines_directory()
 
 
+    @property
+    def default_cmpenv_file(self):
+        """ Path to default compute environment settings file. """
+        return _os.path.join(
+                self.templates_folder, "default_compute_settings.yaml")
+
+
+    @property
+    def templates_folder(self):
+        return _os.path.join(_os.path.dirname(__file__), "submit_templates")
+
+
     @property
     def compute_env_var(self):
         """
@@ -441,7 +465,7 @@ def compute_env_var(self):
         :return str: name of the environment variable to pointing to 
             compute settings
         """
-        return "COMPUTE_SETTINGS"
+        return COMPUTE_SETTINGS_VARNAME
 
 
     @property

diff --git a/...r/submit_templates/default_looperenv.yaml → ...t_templates/default_compute_settings.yaml b/...r/submit_templates/default_looperenv.yaml → ...t_templates/default_compute_settings.yaml
@@ -1,11 +1,11 @@
-# submission_template: the submission form which will be replaced with compute resource parameters
-# Use this to change your cluster manager (SLURM, SGE, LFS, etc)
-# Relative paths are relative to this looperenv yaml file
+# Use this to change your cluster manager (SLURM, SGE, LFS, etc).
+# Relative paths are relative to this compute environment configuration file.
+# Compute resource parameters fill submission_template file's fields.
 compute:
   default:
     submission_template: localhost_template.sub
     submission_command: sh
-    partition: longq  # For backwards-compatibility with old projects at CeMM; to be removed.
+    partition: longq  # Backwards compatibility with old projects at CeMM
   local:
     submission_template: localhost_template.sub
     submission_command: sh

diff --git a/tests/models/independent/test_AttributeDict.py b/tests/models/independent/test_AttributeDict.py
@@ -294,30 +294,35 @@ def test_merge_mappings(
                 self, name_setter_func, name_getter_func):
         """ During construction/insertion, KV pair mappings merge. """
 
+        # Create bare AttributeDict and select the parameterized get/set.
         attrdict = AttributeDict()
         raw_data = {}
         setter = getattr(attrdict, name_setter_func)
         getter = getattr(attrdict, name_getter_func)
 
+        # Add the JPA data.
         setter("JPA", self.WEST_COMPLEX_DATA)
         raw_data.update({"JPA": self.WEST_COMPLEX_DATA})
+        # Mappings are converted to AttributeDict when added.
         observed = getter("JPA")
         assert isinstance(observed, AttributeDict)
         assert self.WEST_COMPLEX_DATA == observed
 
+        # Perform the same sort of addition and assertions for the Lane data.
         setter("Lane", self.INITIAL_MR_DATA)
         raw_data.update({"Lane": self.INITIAL_MR_DATA})
         assert isinstance(getter("Lane"), AttributeDict)
         assert raw_data == attrdict
 
+        # Add Pinn data, also attributed to JPA. This should trigger a merge.
         setter("JPA", self.PINN_DATA)
         observed = getter("JPA")
-        assert isinstance(observed, AttributeDict)
         tempdict = deepcopy(self.WEST_COMPLEX_DATA)
-        tempdict.update(self.INITIAL_MR_DATA)
         tempdict.update(self.PINN_DATA)
-        assert tempdict == observed
+        assert isinstance(observed, AttributeDict)
+        assert AttributeDict(tempdict) == observed
 
+        tempdict.update(self.INITIAL_MR_DATA)
         setter("Lane", self.NEW_MR_DATA)
         higher_level_tempdict = {"JPA": self.WEST_COMPLEX_DATA}
         higher_level_tempdict["JPA"].update(self.PINN_DATA)
@@ -485,7 +490,7 @@ def test_pickle_restoration(self, tmpdir, data, data_type):
 
         # Validate equivalence between original and restored versions.
         with open(filepath, 'rb') as pkl:
-            restored_attrdict = pickle.load(pkl)
+            restored_attrdict = AttributeDict(pickle.load(pkl))
         assert restored_attrdict == original_attrdict
 
 

diff --git a/tests/models/independent/test_Project.py b/tests/models/independent/test_Project.py
@@ -8,7 +8,8 @@
 import yaml
 import looper
 from looper.models import \
-        Project, _MissingMetadataException, SAMPLE_ANNOTATIONS_KEY
+        AttributeDict, Project, \
+        _MissingMetadataException, SAMPLE_ANNOTATIONS_KEY
 
 
 __author__ = "Vince Reuter"
@@ -102,12 +103,36 @@ class ProjectDefaultEnvironmentSettingsTests:
     @pytest.mark.parametrize(
             argnames="explicit_null", argvalues=[False, True],
             ids=lambda explicit_null: "explicit_null={}".format(explicit_null))
+    @pytest.mark.parametrize(
+            argnames="compute_env_attname",
+            argvalues=["environment", "environment_file", "compute"],
+            ids=lambda attr: "attr={}".format(attr))
     def test_no_default_env_settings_provided(
-            self, minimal_project_conf_path, explicit_null):
+            self, minimal_project_conf_path,
+            explicit_null, compute_env_attname):
         """ Project doesn't require default environment settings. """
+
         kwargs = {"default_compute": None} if explicit_null else {}
         project = Project(minimal_project_conf_path, **kwargs)
-        self._assert_null_compute_environment(project)
+
+        observed_attribute = getattr(project, compute_env_attname)
+        expected_attribute = \
+                self.default_compute_settings(project)[compute_env_attname]
+
+        if compute_env_attname == "compute":
+            # 'compute' refers to a section in the default environment
+            # settings file and also to a Project attribute. A Project
+            # instance selects just one of the options in the 'compute'
+            # section of the file as the value for its 'compute' attribute.
+            expected_attribute = expected_attribute["default"]
+            observed_attribute = _compute_paths_to_names(observed_attribute)
+        elif compute_env_attname == "environment":
+            envs_with_reduced_filepaths = \
+                    _env_paths_to_names(observed_attribute["compute"])
+            observed_attribute = AttributeDict(
+                    {"compute": envs_with_reduced_filepaths})
+
+        assert expected_attribute == observed_attribute
 
 
     @pytest.mark.parametrize(
@@ -193,6 +218,16 @@ def _assert_null_compute_environment(project):
         assert project.compute is None
 
 
+    @staticmethod
+    def default_compute_settings(project):
+        settings_filepath = project.default_cmpenv_file
+        with open(settings_filepath, 'r') as settings_data_file:
+            settings = yaml.safe_load(settings_data_file)
+        return {"environment": copy.deepcopy(settings),
+                "environment_file": settings_filepath,
+                "compute": copy.deepcopy(settings)["compute"]}
+
+
 
 class DerivedColumnsTests:
     """ Tests for the behavior of Project's derived_columns attribute. """
@@ -523,3 +558,51 @@ def _write_project_config(config_data, dirpath, filename="proj-conf.yaml"):
     with open(conf_file_path, 'w') as conf_file:
         yaml.safe_dump(config_data, conf_file)
     return conf_file_path
+
+
+
+def _env_paths_to_names(envs):
+    """
+    Convert filepath(s) in each environment to filename for assertion.
+
+    Project instance will ensure that filepaths are absolute, but we want
+    assertion logic here to be independent of that (unless that's under test).
+
+    :param Mapping[str, Mapping]: environment data by name
+    :return Mapping[str, Mapping]: same as the input,
+        but with conversion(s) performed
+    """
+    reduced = {}
+    for env_name, env_data in envs.items():
+        # DEBUG
+        print(env_name)
+        reduced[env_name] = _compute_paths_to_names(env_data)
+    return reduced
+
+
+
+def _compute_paths_to_names(env):
+    """
+    Single-environment version of conversion of filepath(s) to name(s).
+
+    This is similarly motivated by allowing tests' assertions about
+    equality between Mappings to be independent of Project instance's
+    effort to ensure that filepaths are absolute.
+
+    :param Mapping env: environment datum by name
+    :return Mapping: same as the input, but with conversion(s) performed
+    """
+    reduced = copy.deepcopy(env)
+    for pathvar in ["submission_template"]:
+
+        # DEBUG
+        try:
+            _, reduced[pathvar] = os.path.split(reduced[pathvar])
+        except KeyError:
+            print("REDUCED: {}".format(reduced))
+            print("ENV: {}".format(env))
+            print("KEYS: {}".format(reduced.keys()))
+            print("ENV KEYS: {}".format(env.keys()))
+            raise
+
+    return reduced