From 7f6cbf3624a34f198532bde3a17013d2bfbb9f3e Mon Sep 17 00:00:00 2001 From: Thomas Cokelaer Date: Tue, 31 Aug 2021 21:55:35 +0200 Subject: [PATCH 1/2] Fix SequanaConfig class (regression bug). --- .github/workflows/main.yml | 2 +- README.rst | 3 ++ sequana_pipetools/completion.py | 10 ++-- .../snaketools/sequana_config.py | 53 +++++++++++-------- setup.py | 6 +-- tests/snaketools/test_sequana_config.py | 22 ++++++++ 6 files changed, 68 insertions(+), 28 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ebcf973..2b76ee2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 5 matrix: - python: [3.7,3.8] + python: [3.7,3.8,3.9] steps: - name: checkout git repo diff --git a/README.rst b/README.rst index 57e9d41..0f9ba4e 100644 --- a/README.rst +++ b/README.rst @@ -170,6 +170,9 @@ Changelog ========= ==================================================================== Version Description ========= ==================================================================== +0.6.3 * Fix SequanaConfig file +0.6.2 * Fix script creation to include wrapper and take new snakemake + syntax into account 0.6.1 * update schema handling 0.6.0 * Move all modules related to pipelines rom sequana into sequana_pipetools; This release should now be the entry point for diff --git a/sequana_pipetools/completion.py b/sequana_pipetools/completion.py index 18f9bfc..9dcbf54 100644 --- a/sequana_pipetools/completion.py +++ b/sequana_pipetools/completion.py @@ -14,6 +14,7 @@ import os import argparse import pkgutil +from pkg_resources import DistributionNotFound import importlib @@ -217,9 +218,12 @@ def main(args=None): if choice == "y": print("Please source the files using:: \n") for name in names: - c = Complete(name) - c.save_completion_script() - print("source ~/.config/sequana/pipelines/{}.sh".format(name)) + try: + c = Complete(name) + c.save_completion_script() + print("source ~/.config/sequana/pipelines/{}.sh".format(name)) + except DistributionNotFound: + print(f"# Warning {name} could not be imported. Nothing done") print("\nto activate the completion") else: # pragma: no cover print("Stopping creation of completion scripts") diff --git a/sequana_pipetools/snaketools/sequana_config.py b/sequana_pipetools/snaketools/sequana_config.py index e3d6bc2..360b1a0 100644 --- a/sequana_pipetools/snaketools/sequana_config.py +++ b/sequana_pipetools/snaketools/sequana_config.py @@ -20,9 +20,10 @@ from pykwalify.core import Core, CoreError, SchemaError import ruamel.yaml + try: import importlib.resources as pkg_resources -except ImportError: +except ImportError: # pragma: no cover # Try backported to PY<37 `importlib_resources`. import importlib_resources as pkg_resources @@ -75,33 +76,37 @@ def __init__(self, data=None): except TypeError: if hasattr(data, "config"): self.config = AttrDict(**data.config) - self._yaml_code = ruamel.yaml.comments.CommentedMap(self.config.copy()) + self._yaml_code = ruamel.yaml.comments.CommentedMap( + self.config.copy() + ) else: - self.config = self._read_file(data) + # populate self._yaml_code + config = self._read_file(data) + self.config = AttrDict(**config) else: self.config = AttrDict() self._yaml_code = ruamel.yaml.comments.CommentedMap() self.cleanup_config() def _read_file(self, data): - """Read yaml or json file""" - try: - is_yml = data.endswith((".yaml", ".yml")) - except AttributeError: - raise IOError("Data param does not have the good format. It's a filename or a dict") + """Read yaml""" + if not data.endswith((".yaml", ".yml")): + logger.warning("You should use a YAML file with .yaml or .yml extension") if os.path.exists(data): yaml = ruamel.yaml.YAML() - if is_yml: - with open(data, "r") as fh: - self._yaml_code = yaml.load(fh.read()) - else: - # read a JSON - with open(data, "r") as fh: - self._yaml_code = yaml.load(json.dumps(json.loads(fh.read()))) + with open(data, "r") as fh: + self._yaml_code = yaml.load(fh.read()) + + # import the Python yaml module to avoid ruamel.yaml ordereddict and + # other structures. We only want the data + with open(data, "r") as fh: + import yaml as _yaml + + config = _yaml.load(fh, Loader=_yaml.FullLoader) + return config else: raise FileNotFoundError(f"input string must be an existing file {data}") - return AttrDict(**self._yaml_code.copy()) def save(self, filename="config.yaml", cleanup=True): """Save the yaml code in _yaml_code with comments""" @@ -124,8 +129,8 @@ def _recursive_update(self, target, data): # !! essential to use the update() method of the dictionary otherwise # comments are lost - for key, value in data.items(): + if isinstance(value, dict): target.update({key: self._recursive_update(target[key], value)}) elif isinstance(value, list): @@ -141,10 +146,12 @@ def _recursive_update(self, target, data): value = data[key] target.update({key: value}) else: - logger.warning("This %s key was not in the original config" " but added" % key) + logger.warning( + "This %s key was not in the original config" " but added" % key + ) value = data[key] target.update({key: value}) - else: + else: # pragma: no cover raise NotImplementedError( "Only dictionaries and list are authorised in the input configuration file." f" Key/value that cause error are {key}/{value}" @@ -214,7 +221,7 @@ def check_config_with_schema(self, schemafile): """ # add custom extensions - with pkg_resources.path('sequana_pipetools.resources', 'ext.py') as ext_name: + with pkg_resources.path("sequana_pipetools.resources", "ext.py") as ext_name: extensions = [str(ext_name)] # causes issue with ruamel.yaml 0.12.13. Works for 0.15 warnings.simplefilter("ignore", ruamel.yaml.error.UnsafeLoaderWarning) @@ -222,7 +229,11 @@ def check_config_with_schema(self, schemafile): # open the config and the schema file with TempFile(suffix=".yaml") as fh: self.save(fh.name) - c = Core(source_file=fh.name, schema_files=[schemafile], extensions=extensions) + c = Core( + source_file=fh.name, + schema_files=[schemafile], + extensions=extensions, + ) c.validate() return True except (SchemaError, CoreError) as err: diff --git a/setup.py b/setup.py index 480b9b3..2182ec0 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ _MAJOR = 0 _MINOR = 6 -_MICRO = 2 +_MICRO = 3 version = f"{_MAJOR}.{_MINOR}.{_MICRO}" release = f"{_MAJOR}.{_MINOR}" @@ -22,9 +22,9 @@ "Intended Audience :: Science/Research", "License :: OSI Approved :: BSD License", "Operating System :: OS Independent", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Scientific/Engineering :: Bio-Informatics", "Topic :: Scientific/Engineering :: Information Analysis", diff --git a/tests/snaketools/test_sequana_config.py b/tests/snaketools/test_sequana_config.py index 1a04ccf..6d5bb62 100644 --- a/tests/snaketools/test_sequana_config.py +++ b/tests/snaketools/test_sequana_config.py @@ -16,6 +16,28 @@ def test_valid_config(tmpdir): config.save(fh) +# this is a regression bug that guarantees that +# changing an attribute is reflected in the output config file +# when we change a value +def test_attrdict(tmpdir): + s = Module("fastqc") + config = SequanaConfig(s.config) + + # This must be accessicle as an attribute + config.config.general.method_choice + + # moreover, we should be able to changed it + config.config.general.method_choice = "XXXX" + + # and save it . let us check that it was saved correcly + fh = tmpdir.join("config.yml") + config.save(fh) + + # by reading it back + config2 = SequanaConfig(str(fh)) + #assert config2.config.general.method_choice == "XXXX" + + def test_sequana_config(tmpdir): s = Module("fastqc") config = SequanaConfig(s.config) From afdd299683b4b92f4baaa2cc7261fa3df8c1812b Mon Sep 17 00:00:00 2001 From: Thomas Cokelaer Date: Wed, 1 Sep 2021 15:25:05 +0200 Subject: [PATCH 2/2] Cleanup SequanaConfig (way config and yaml are updated) and related tests. --- sequana_pipetools/__init__.py | 3 +- sequana_pipetools/snaketools/__init__.py | 2 +- .../snaketools/pipeline_manager.py | 1 + .../snaketools/sequana_config.py | 37 +++++++------------ tests/snaketools/test_pipeline_manager.py | 10 +++-- tests/snaketools/test_sequana_config.py | 33 ++++++++++------- 6 files changed, 43 insertions(+), 43 deletions(-) diff --git a/sequana_pipetools/__init__.py b/sequana_pipetools/__init__.py index 177edef..844f840 100644 --- a/sequana_pipetools/__init__.py +++ b/sequana_pipetools/__init__.py @@ -15,5 +15,6 @@ logger = colorlog.getLogger(logger.name) -from .snaketools import Module, SequanaConfig, PipelineManagerGeneric, PipelineManager +from .snaketools import Module, SequanaConfig, PipelineManagerGeneric, PipelineManager, PipelineManagerDirectory from .sequana_manager import SequanaManager, get_pipeline_location + diff --git a/sequana_pipetools/snaketools/__init__.py b/sequana_pipetools/snaketools/__init__.py index be7a02e..395dea3 100644 --- a/sequana_pipetools/snaketools/__init__.py +++ b/sequana_pipetools/snaketools/__init__.py @@ -2,7 +2,7 @@ from .dot_parser import DOTParser from .module import Module, modules, pipeline_names from .module_finder import ModuleFinder -from .pipeline_manager import PipelineManager, PipelineManagerGeneric +from .pipeline_manager import PipelineManager, PipelineManagerGeneric, PipelineManagerDirectory from .pipeline_utils import (message, OnSuccessCleaner, OnSuccess, build_dynamic_rule, get_pipeline_statistics, create_cleanup, Makefile) from .file_factory import FileFactory, FastQFactory diff --git a/sequana_pipetools/snaketools/pipeline_manager.py b/sequana_pipetools/snaketools/pipeline_manager.py index 108a866..52a8b04 100644 --- a/sequana_pipetools/snaketools/pipeline_manager.py +++ b/sequana_pipetools/snaketools/pipeline_manager.py @@ -156,6 +156,7 @@ def clean_multiqc(self, filename): def teardown(self, extra_dirs_to_remove=[], extra_files_to_remove=[], plot_stats=True): # create and save the stats plot if plot_stats: + logger.warning("deprecated no stats to be provided in the future") N = len(self.samples.keys()) self.plot_stats(N=N) diff --git a/sequana_pipetools/snaketools/sequana_config.py b/sequana_pipetools/snaketools/sequana_config.py index 360b1a0..9577128 100644 --- a/sequana_pipetools/snaketools/sequana_config.py +++ b/sequana_pipetools/snaketools/sequana_config.py @@ -86,7 +86,9 @@ def __init__(self, data=None): else: self.config = AttrDict() self._yaml_code = ruamel.yaml.comments.CommentedMap() - self.cleanup_config() + + # remove templates and None->"" + self._recursive_cleanup(self.config) def _read_file(self, data): """Read yaml""" @@ -108,11 +110,11 @@ def _read_file(self, data): else: raise FileNotFoundError(f"input string must be an existing file {data}") - def save(self, filename="config.yaml", cleanup=True): + def save(self, filename="config.yaml"): """Save the yaml code in _yaml_code with comments""" - # This works only if the input data was a yaml - if cleanup: - self.cleanup() # changes the config and yaml_code to remove %()s + # make sure that the changes made in the config are saved into the yaml + # before saving it + self._update_yaml() # get the YAML formatted code and save it yaml = ruamel.yaml.YAML() @@ -161,11 +163,6 @@ def _recursive_update(self, target, data): def _update_yaml(self): self._recursive_update(self._yaml_code, self.config) - def _update_config(self): - # probably useless function now since we do not use json as input - # anymore - self._recursive_update(self.config, self._yaml_code) - def _recursive_cleanup(self, d): # expand the tilde (see https://github.com/sequana/sequana/issues/486) # remove the %() templates @@ -176,24 +173,15 @@ def _recursive_cleanup(self, d): if value is None: d[key] = "" elif isinstance(value, str): - if value.startswith("%("): - d[key] = None - else: - d[key] = value.strip() + d[key] = value.strip() + # https://github.com/sequana/sequana/issues/486 if key.endswith("_directory") and value.startswith("~/"): d[key] = os.path.expanduser(value) if key.endswith("_file") and value.startswith("~/"): d[key] = os.path.expanduser(value) - def cleanup_config(self): - self._recursive_cleanup(self.config) - # self._update_yaml() - - def cleanup(self): - """Remove template elements and change None to empty string.""" - self._recursive_cleanup(self._yaml_code) - self._update_config() + self._update_yaml() def copy_requirements(self, target): """Copy files to run the pipeline @@ -201,13 +189,14 @@ def copy_requirements(self, target): If a requirement file exists, it is copied in the target directory. If not, it can be either an http resources or a sequana resources. """ - + # make sure that if the config changed, the yaml is up-to-date + self._update_yaml() if "requirements" in self._yaml_code.keys(): for requirement in self._yaml_code["requirements"]: if os.path.exists(requirement): try: shutil.copy(requirement, target) - except shutil.SameFileError: + except shutil.SameFileError: #pragma: no cover pass # the target and input may be the same elif requirement.startswith("http"): logger.info(f"This file {requirement} will be needed. Downloading") diff --git a/tests/snaketools/test_pipeline_manager.py b/tests/snaketools/test_pipeline_manager.py index 67fb9a2..fc8f6db 100644 --- a/tests/snaketools/test_pipeline_manager.py +++ b/tests/snaketools/test_pipeline_manager.py @@ -17,20 +17,17 @@ def test_pipeline_manager(tmpdir): # normal behaviour but no input provided: config = Module("fastqc")._get_config() cfg = SequanaConfig(config) - cfg.cleanup() # remove templates with pytest.raises(ValueError): pm = snaketools.PipelineManager("custom", cfg) # normal behaviour cfg = SequanaConfig(config) - cfg.cleanup() # remove templates file1 = os.path.join(test_dir, "data", "Hm2_GTGAAA_L005_R1_001.fastq.gz") cfg.config.input_directory, cfg.config.input_pattern = os.path.split(file1) pm = snaketools.PipelineManager("custom", cfg) assert not pm.paired cfg = SequanaConfig(config) - cfg.cleanup() # remove templates cfg.config.input_directory, cfg.config.input_pattern = os.path.split(file1) cfg.config.input_pattern = "Hm*gz" pm = snaketools.PipelineManager("custom", cfg) @@ -130,7 +127,12 @@ class WF: wf = WF() gg["workflow"] = wf pm.setup(gg) - pm.teardown() + try: + pm.teardown() + except Exception: + assert False + finally: + os.remove("Makefile") multiqc = tmpdir.join('multiqc.html') with open(multiqc, 'w') as fh: diff --git a/tests/snaketools/test_sequana_config.py b/tests/snaketools/test_sequana_config.py index 6d5bb62..5453e72 100644 --- a/tests/snaketools/test_sequana_config.py +++ b/tests/snaketools/test_sequana_config.py @@ -31,11 +31,13 @@ def test_attrdict(tmpdir): # and save it . let us check that it was saved correcly fh = tmpdir.join("config.yml") + config.save(fh) # by reading it back config2 = SequanaConfig(str(fh)) - #assert config2.config.general.method_choice == "XXXX" + assert config2.config.general.method_choice == "XXXX" + def test_sequana_config(tmpdir): @@ -49,17 +51,18 @@ def test_sequana_config(tmpdir): config = SequanaConfig() config = SequanaConfig({"test": 1}) assert config.config.test == 1 + # with a dictionary config = SequanaConfig(config.config) + # with a sequanaConfig instance config = SequanaConfig(config) - # with a non-yaml file - json = os.path.join(test_dir, "data", "test_summary_fastq_stats.json") - config = SequanaConfig(json) + + # with a non-existing file with pytest.raises(FileNotFoundError): config = SequanaConfig("dummy_dummy") - # Test an exception + # Test warning s = Module("fastqc") config = SequanaConfig(s.config) config._recursive_update(config._yaml_code, {"input_directory_dummy": "test"}) @@ -74,18 +77,26 @@ def test_sequana_config(tmpdir): # # is unchanged + # test all installed pipelines saving/reading config file output = tmpdir.join("test.yml") for pipeline in snaketools.pipeline_names: config_filename = Module(pipeline)._get_config() cfg1 = SequanaConfig(config_filename) - cfg1.cleanup() # remove templates and strip strings cfg1.save(output) cfg2 = SequanaConfig(str(output)) assert cfg2._yaml_code == cfg1._yaml_code - cfg2._update_config() assert cfg1.config == cfg2.config + # test config with a _directory or _file based on the fastqc pipeline + cfg = SequanaConfig(s.config) + cfg.config.input_directory = "~/" + cfg.config.multiqc.config_file = "~/" + cfg._recursive_cleanup(cfg.config) + assert cfg.config.input_directory.startswith("/") + assert cfg.config.multiqc.config_file.startswith("/") + + def test_copy_requirements(tmpdir): # We need 4 cases: @@ -106,10 +117,10 @@ def test_copy_requirements(tmpdir): cfg.config.requirements = [ "phiX174.fa", str(tmp_require), - # localfile, + "__init__.py", + "setup.py", "https://raw.githubusercontent.com/sequana/sequana/master/README.rst", ] - cfg._update_yaml() cfg.copy_requirements(target=str(tmpdir)) # error @@ -129,7 +140,6 @@ def test_check_bad_config_with_schema(): cfg = SequanaConfig(cfg_name) # threads can not be a string cfg.config["busco"]["threads"] = "" - cfg._update_yaml() assert not cfg.check_config_with_schema(schema) @@ -142,14 +152,11 @@ def test_check_config_with_schema_and_ext(tmpdir): # not nullable key cannot be empty cfg.config["busco"]["lineage"] = "" - cfg._update_yaml() assert not cfg.check_config_with_schema(schema) # but it is not a problem the rule is not necessary cfg.config["busco"]["do"] = False - cfg._update_yaml() assert cfg.check_config_with_schema(schema) cfg.config["busco"].update({"do": True, "lineage": "bacteria", "threads": ""}) - cfg._update_yaml() assert not cfg.check_config_with_schema(schema)