Skip to content

Commit

Permalink
use default compute in generic Project; generalize looperenv naming; …
Browse files Browse the repository at this point in the history
…fix AttributeDict comparison method
  • Loading branch information
vreuter committed Jun 12, 2017
1 parent e0ced3c commit 70fd1e6
Show file tree
Hide file tree
Showing 7 changed files with 142 additions and 55 deletions.
6 changes: 0 additions & 6 deletions looper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,6 @@
from ._version import __version__


LOOPERENV_VARNAME = "LOOPERENV"
SUBMISSION_TEMPLATES_FOLDER = "submit_templates"
DEFAULT_LOOPERENV_FILENAME = "default_looperenv.yaml"
DEFAULT_LOOPERENV_CONFIG_RELATIVE = os.path.join(SUBMISSION_TEMPLATES_FOLDER,
DEFAULT_LOOPERENV_FILENAME)

LOGGING_LEVEL = "INFO"
LOGGING_LOCATIONS = (stdout, )

Expand Down
24 changes: 1 addition & 23 deletions looper/loodels.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
""" Looper versions of NGS project models. """

import os
from . import models


__author__ = "Vince Reuter"
__email__ = "vreuter@virginia.edu"


DEFAULT_PROJECT_COMPUTE_NAME = "default_looperenv.yaml"
SUBMISSION_TEMPLATES_FOLDER = "submit_templates"



class Project(models.Project):
"""
Expand All @@ -27,30 +22,13 @@ class Project(models.Project):
:type default_compute: str
"""
def __init__(self, config_file,
subproject=None, default_compute=None, **kwargs):
if not default_compute:
looper_folder = os.path.dirname(__file__)
default_compute = os.path.join(looper_folder,
SUBMISSION_TEMPLATES_FOLDER, DEFAULT_PROJECT_COMPUTE_NAME)
def __init__(self, config_file, subproject=None, **kwargs):
super(Project, self).__init__(
config_file, subproject=subproject,
default_compute=default_compute,
no_environment_exception=RuntimeError,
no_compute_exception=RuntimeError, **kwargs)


@property
def compute_env_var(self):
"""
Environment variable through which to access compute settings.
:return str: name of the environment variable to pointing to
compute settings
"""
return "LOOPERENV"


@property
def required_metadata(self):
""" Which metadata attributes are required. """
Expand Down
7 changes: 5 additions & 2 deletions looper/looper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
import sys
import time
import pandas as _pd
from . import setup_looper_logger, LOGGING_LEVEL, __version__, LOOPERENV_VARNAME
from . import setup_looper_logger, LOGGING_LEVEL, __version__
from .loodels import Project
from .models import COMPUTE_SETTINGS_VARNAME
from .utils import VersionInHelpParser

try:
Expand All @@ -32,6 +33,8 @@

SAMPLE_EXECUTION_TOGGLE = "toggle"

# Descending by severity for correspondence with logic inversion.
# That is, greater verbosity setting corresponds to lower logging level.
_LEVEL_BY_VERBOSITY = [logging.ERROR, logging.CRITICAL, logging.WARN,
logging.INFO, logging.DEBUG]

Expand Down Expand Up @@ -96,7 +99,7 @@ def parse_arguments():
help="YAML file with looper environment compute settings.")
run_subparser.add_argument(
"--env",
default=os.getenv("{}".format(LOOPERENV_VARNAME), ""),
default=os.getenv("{}".format(COMPUTE_SETTINGS_VARNAME), ""),
help="Employ looper environment compute settings.")
run_subparser.add_argument(
"--limit",
Expand Down
50 changes: 37 additions & 13 deletions looper/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
parse_ftype, check_bam, check_fastq, get_file_size, partition


COMPUTE_SETTINGS_VARNAME = "PEPENV"
DEFAULT_COMPUTE_RESOURCES_NAME = "default"
DATA_SOURCE_COLNAME = "data_source"
SAMPLE_NAME_COLNAME = "sample_name"
Expand Down Expand Up @@ -275,10 +276,20 @@ def __delitem__(self, item):
_LOGGER.debug("No item {} to delete".format(item))

def __eq__(self, other):
for k in iter(self):
if k in other and self.__dict__[k] == other[k]:
continue
try:
# Ensure target itself and any values are AttributeDict.
other = AttributeDict(other)
except Exception:
return False
if len(self) != len(other):
# Ensure we don't have to worry about other containing self.
return False
for k, v in self.items():
try:
if v != other[k]:
return False
except KeyError:
return False
return True

def __ne__(self, other):
Expand Down Expand Up @@ -347,22 +358,23 @@ def __init__(self, config_file, subproject=None,
permissive=True, file_checks=False, compute_env_file=None,
no_environment_exception=None, no_compute_exception=None):

super(Project, self).__init__()

_LOGGER.info("Creating %s from file: '%s'",
self.__class__.__name__, config_file)
super(Project, self).__init__()

default_compute = default_compute or self.default_cmpenv_file

# Initialize local, serial compute as default (no cluster submission)
# Start with default environment settings.
_LOGGER.debug("Establishing default environment settings")
self.environment, self.environment_file = None, None
if default_compute:
try:
self.update_environment(default_compute)
except Exception as e:
_LOGGER.error("Can't load environment config file '%s'",
str(default_compute))
_LOGGER.error(str(type(e).__name__) + str(e))

try:
self.update_environment(default_compute)
except Exception as e:
_LOGGER.error("Can't load environment config file '%s'",
str(default_compute))
_LOGGER.error(str(type(e).__name__) + str(e))

self._handle_missing_env_attrs(
default_compute, when_missing=no_environment_exception)
Expand Down Expand Up @@ -433,6 +445,18 @@ def __init__(self, config_file, subproject=None,
self.finalize_pipelines_directory()


@property
def default_cmpenv_file(self):
""" Path to default compute environment settings file. """
return _os.path.join(
self.templates_folder, "default_compute_settings.yaml")


@property
def templates_folder(self):
return _os.path.join(_os.path.dirname(__file__), "submit_templates")


@property
def compute_env_var(self):
"""
Expand All @@ -441,7 +465,7 @@ def compute_env_var(self):
:return str: name of the environment variable to pointing to
compute settings
"""
return "COMPUTE_SETTINGS"
return COMPUTE_SETTINGS_VARNAME


@property
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# submission_template: the submission form which will be replaced with compute resource parameters
# Use this to change your cluster manager (SLURM, SGE, LFS, etc)
# Relative paths are relative to this looperenv yaml file
# Use this to change your cluster manager (SLURM, SGE, LFS, etc).
# Relative paths are relative to this compute environment configuration file.
# Compute resource parameters fill submission_template file's fields.
compute:
default:
submission_template: localhost_template.sub
submission_command: sh
partition: longq # For backwards-compatibility with old projects at CeMM; to be removed.
partition: longq # Backwards compatibility with old projects at CeMM
local:
submission_template: localhost_template.sub
submission_command: sh
Expand Down
13 changes: 9 additions & 4 deletions tests/models/independent/test_AttributeDict.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,30 +294,35 @@ def test_merge_mappings(
self, name_setter_func, name_getter_func):
""" During construction/insertion, KV pair mappings merge. """

# Create bare AttributeDict and select the parameterized get/set.
attrdict = AttributeDict()
raw_data = {}
setter = getattr(attrdict, name_setter_func)
getter = getattr(attrdict, name_getter_func)

# Add the JPA data.
setter("JPA", self.WEST_COMPLEX_DATA)
raw_data.update({"JPA": self.WEST_COMPLEX_DATA})
# Mappings are converted to AttributeDict when added.
observed = getter("JPA")
assert isinstance(observed, AttributeDict)
assert self.WEST_COMPLEX_DATA == observed

# Perform the same sort of addition and assertions for the Lane data.
setter("Lane", self.INITIAL_MR_DATA)
raw_data.update({"Lane": self.INITIAL_MR_DATA})
assert isinstance(getter("Lane"), AttributeDict)
assert raw_data == attrdict

# Add Pinn data, also attributed to JPA. This should trigger a merge.
setter("JPA", self.PINN_DATA)
observed = getter("JPA")
assert isinstance(observed, AttributeDict)
tempdict = deepcopy(self.WEST_COMPLEX_DATA)
tempdict.update(self.INITIAL_MR_DATA)
tempdict.update(self.PINN_DATA)
assert tempdict == observed
assert isinstance(observed, AttributeDict)
assert AttributeDict(tempdict) == observed

tempdict.update(self.INITIAL_MR_DATA)
setter("Lane", self.NEW_MR_DATA)
higher_level_tempdict = {"JPA": self.WEST_COMPLEX_DATA}
higher_level_tempdict["JPA"].update(self.PINN_DATA)
Expand Down Expand Up @@ -485,7 +490,7 @@ def test_pickle_restoration(self, tmpdir, data, data_type):

# Validate equivalence between original and restored versions.
with open(filepath, 'rb') as pkl:
restored_attrdict = pickle.load(pkl)
restored_attrdict = AttributeDict(pickle.load(pkl))
assert restored_attrdict == original_attrdict


Expand Down
89 changes: 86 additions & 3 deletions tests/models/independent/test_Project.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
import yaml
import looper
from looper.models import \
Project, _MissingMetadataException, SAMPLE_ANNOTATIONS_KEY
AttributeDict, Project, \
_MissingMetadataException, SAMPLE_ANNOTATIONS_KEY


__author__ = "Vince Reuter"
Expand Down Expand Up @@ -102,12 +103,36 @@ class ProjectDefaultEnvironmentSettingsTests:
@pytest.mark.parametrize(
argnames="explicit_null", argvalues=[False, True],
ids=lambda explicit_null: "explicit_null={}".format(explicit_null))
@pytest.mark.parametrize(
argnames="compute_env_attname",
argvalues=["environment", "environment_file", "compute"],
ids=lambda attr: "attr={}".format(attr))
def test_no_default_env_settings_provided(
self, minimal_project_conf_path, explicit_null):
self, minimal_project_conf_path,
explicit_null, compute_env_attname):
""" Project doesn't require default environment settings. """

kwargs = {"default_compute": None} if explicit_null else {}
project = Project(minimal_project_conf_path, **kwargs)
self._assert_null_compute_environment(project)

observed_attribute = getattr(project, compute_env_attname)
expected_attribute = \
self.default_compute_settings(project)[compute_env_attname]

if compute_env_attname == "compute":
# 'compute' refers to a section in the default environment
# settings file and also to a Project attribute. A Project
# instance selects just one of the options in the 'compute'
# section of the file as the value for its 'compute' attribute.
expected_attribute = expected_attribute["default"]
observed_attribute = _compute_paths_to_names(observed_attribute)
elif compute_env_attname == "environment":
envs_with_reduced_filepaths = \
_env_paths_to_names(observed_attribute["compute"])
observed_attribute = AttributeDict(
{"compute": envs_with_reduced_filepaths})

assert expected_attribute == observed_attribute


@pytest.mark.parametrize(
Expand Down Expand Up @@ -193,6 +218,16 @@ def _assert_null_compute_environment(project):
assert project.compute is None


@staticmethod
def default_compute_settings(project):
settings_filepath = project.default_cmpenv_file
with open(settings_filepath, 'r') as settings_data_file:
settings = yaml.safe_load(settings_data_file)
return {"environment": copy.deepcopy(settings),
"environment_file": settings_filepath,
"compute": copy.deepcopy(settings)["compute"]}



class DerivedColumnsTests:
""" Tests for the behavior of Project's derived_columns attribute. """
Expand Down Expand Up @@ -523,3 +558,51 @@ def _write_project_config(config_data, dirpath, filename="proj-conf.yaml"):
with open(conf_file_path, 'w') as conf_file:
yaml.safe_dump(config_data, conf_file)
return conf_file_path



def _env_paths_to_names(envs):
"""
Convert filepath(s) in each environment to filename for assertion.
Project instance will ensure that filepaths are absolute, but we want
assertion logic here to be independent of that (unless that's under test).
:param Mapping[str, Mapping]: environment data by name
:return Mapping[str, Mapping]: same as the input,
but with conversion(s) performed
"""
reduced = {}
for env_name, env_data in envs.items():
# DEBUG
print(env_name)
reduced[env_name] = _compute_paths_to_names(env_data)
return reduced



def _compute_paths_to_names(env):
"""
Single-environment version of conversion of filepath(s) to name(s).
This is similarly motivated by allowing tests' assertions about
equality between Mappings to be independent of Project instance's
effort to ensure that filepaths are absolute.
:param Mapping env: environment datum by name
:return Mapping: same as the input, but with conversion(s) performed
"""
reduced = copy.deepcopy(env)
for pathvar in ["submission_template"]:

# DEBUG
try:
_, reduced[pathvar] = os.path.split(reduced[pathvar])
except KeyError:
print("REDUCED: {}".format(reduced))
print("ENV: {}".format(env))
print("KEYS: {}".format(reduced.keys()))
print("ENV KEYS: {}".format(env.keys()))
raise

return reduced

0 comments on commit 70fd1e6

Please sign in to comment.