From d8264b062d9a5fc52603acf99277e99cc786dc7e Mon Sep 17 00:00:00 2001 From: Thomas Cokelaer Date: Tue, 15 Mar 2022 17:15:42 +0100 Subject: [PATCH 1/2] update metadata --- .github/workflows/main.yml | 2 +- README.rst | 1 + setup.py | 12 +++++------- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 24f2a12..40ed1c6 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 5 matrix: - python: [3.7,3.8] + python: [3.7,3.8,3.9] fail-fast: false diff --git a/README.rst b/README.rst index 255742a..f8f8cc0 100644 --- a/README.rst +++ b/README.rst @@ -91,6 +91,7 @@ ChangeLog ========= ==================================================================== Version Description ========= ==================================================================== +0.10.0 * add missing MANIFEST 0.9.0 * remove design_file for cutadapt to reflect changes in sequana 0.12.0 * update kraken rules to use a kraken2 version diff --git a/setup.py b/setup.py index 5578d16..de8fc08 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,9 @@ -# -*- coding: utf-8 -*- # License: 3-clause BSD -__revision__ = "$Id: $" # for the SVN Id from setuptools import setup, find_namespace_packages _MAJOR = 0 -_MINOR = 8 -_MICRO = 5 +_MINOR = 10 +_MICRO = 0 version = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO) release = '%d.%d' % (_MAJOR, _MINOR) @@ -18,7 +16,6 @@ 'platforms' : ['Linux', 'Unix', 'MacOsX', 'Windows'], 'keywords' : ['snakemake, NGS, sequana, denovo, assembly, short read'], 'classifiers' : [ - #'Development Status :: 4 - Beta', 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Education', 'Intended Audience :: End Users/Desktop', @@ -26,8 +23,9 @@ 'Intended Audience :: Science/Research', 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Topic :: Software Development :: Libraries :: Python Modules', 'Topic :: Scientific/Engineering :: Bio-Informatics', 'Topic :: Scientific/Engineering :: Information Analysis', From 8746112d14ae1d5facd6a1c622a92f564c8a4938 Mon Sep 17 00:00:00 2001 From: Thomas Cokelaer Date: Wed, 16 Mar 2022 15:37:09 +0100 Subject: [PATCH 2/2] add missing MANIFEST --- MANIFEST.in | 6 ++ sequana_pipelines/quality_control/main.py | 38 ++++----- .../quality_control/quality_control.rules | 77 ++++++++----------- 3 files changed, 56 insertions(+), 65 deletions(-) create mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..277de1f --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,6 @@ +recursive-exclude * __pycache__ +recursive-exclude * *pyc +recursive-exclude * doc/wiki +recursive-include * *rules +include requirements*txt +include README.rst diff --git a/sequana_pipelines/quality_control/main.py b/sequana_pipelines/quality_control/main.py index 61a6718..46e2626 100755 --- a/sequana_pipelines/quality_control/main.py +++ b/sequana_pipelines/quality_control/main.py @@ -1,4 +1,3 @@ - # # This file is part of Sequana software # @@ -30,9 +29,12 @@ class Options(argparse.ArgumentParser): def __init__(self, prog=NAME, epilog=None): usage = col.purple(sequana_prolog.format(**{"name": NAME})) - super(Options, self).__init__(usage=usage, prog=prog, description="", + super(Options, self).__init__( + usage=usage, + prog=prog, + description="", epilog=epilog, - formatter_class=argparse.ArgumentDefaultsHelpFormatter + formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # add a new group of options to the parser @@ -50,15 +52,13 @@ def __init__(self, prog=NAME, epilog=None): so.add_options(self) pipeline_group = self.add_argument_group("pipeline") - pipeline_group.add_argument("--skip-phix-removal", action="store_true", - help="Do no remove the Phix") - pipeline_group.add_argument("--skip-fastqc-raw", action="store_true", - help="Do not perform fastqc on raw data") - pipeline_group.add_argument("--skip-fastqc-cleaned", action="store_true", - help="Do not perform fastqc on cleaned data") - + pipeline_group.add_argument("--skip-phix-removal", action="store_true", help="Do no remove the Phix") + pipeline_group.add_argument("--skip-fastqc-raw", action="store_true", help="Do not perform fastqc on raw data") + pipeline_group.add_argument( + "--skip-fastqc-cleaned", action="store_true", help="Do not perform fastqc on cleaned data" + ) - so = TrimmingOptions(software=['cutadapt', 'atropos']) + so = TrimmingOptions(software=["cutadapt", "atropos"]) so.software_default = "cutadapt" so.add_options(self) @@ -66,8 +66,7 @@ def __init__(self, prog=NAME, epilog=None): so.add_options(self) # others - self.add_argument("--run", default=False, action="store_true", - help="execute the pipeline directly") + self.add_argument("--run", default=False, action="store_true", help="execute the pipeline directly") def main(args=None): @@ -99,9 +98,9 @@ def main(args=None): # --------------------------------------------------------- trimming cfg.trimming.software_choice = options.trimming_software_choice cfg.trimming.do = not options.disable_trimming - qual = options.trimming_quality + qual = options.trimming_quality - if options.trimming_software_choice in ['cutadapt', 'atropos']: + if options.trimming_software_choice in ["cutadapt", "atropos"]: cfg.cutadapt.tool_choice = options.trimming_software_choice cfg.cutadapt.fwd = options.trimming_adapter_read1 cfg.cutadapt.rev = options.trimming_adapter_read2 @@ -110,7 +109,6 @@ def main(args=None): cfg.cutadapt.options = options.trimming_cutadapt_options # trim Ns -O 6 cfg.cutadapt.quality = 30 if qual == -1 else qual - # -------------------------------------------------- bwa section cfg.bwa_mem_phix.do = not options.skip_phix_removal @@ -121,8 +119,7 @@ def main(args=None): cfg.kraken.do = True if options.kraken_databases: - cfg.kraken.databases = [os.path.abspath(x) - for x in options.kraken_databases] + cfg.kraken.databases = [os.path.abspath(x) for x in options.kraken_databases] for this in options.kraken_databases: manager.exists(this) @@ -132,14 +129,13 @@ def main(args=None): if options.skip_fastqc_raw: cfg.fastqc.do_raw = False - # finalise the command and save it; copy the snakemake. update the config # file and save it. manager.teardown() - if options.run: - subprocess.Popen(["sh", '{}.sh'.format(NAME)], cwd=options.workdir) + subprocess.Popen(["sh", "{}.sh".format(NAME)], cwd=options.workdir) + if __name__ == "__main__": main() diff --git a/sequana_pipelines/quality_control/quality_control.rules b/sequana_pipelines/quality_control/quality_control.rules index ec24e12..0e29a65 100644 --- a/sequana_pipelines/quality_control/quality_control.rules +++ b/sequana_pipelines/quality_control/quality_control.rules @@ -2,6 +2,7 @@ # # Copyright (c) 2016-2021 - Sequana Dev Team (https://sequana.readthedocs.io) # +# Distributed under the terms of the 3-clause BSD license. # The full license is in the LICENSE file, distributed with this software. # # Website: https://github.com/sequana/sequana @@ -27,7 +28,7 @@ configfile: "config.yaml" # The sequana pipeline manager manager = PipelineManager("quality_control", config) -manager.setup(globals(), mode="warning") + expected_output = [] @@ -36,7 +37,7 @@ expected_output += expand("{sample}/fastq_stats_samples/{sample}.json", sample=m if manager.config.fastqc.do_raw: expected_output += expand("{sample}/fastqc_raw/fastqc.done", sample=manager.samples) -# if we remove the phix, +# if we remove the phix if manager.config.bwa_mem_phix.do: expected_output += expand("{sample}/fastq_stats_phix/{sample}.json", sample=manager.samples) if manager.config.fastqc.do_after_phix_removal: @@ -51,7 +52,9 @@ if manager.config.kraken.do: expected_output += expand("{sample}/kraken/kraken/kraken.json", sample=manager.samples) rule pipeline: - input: ".sequana/rulegraph.svg", expand(manager.getname("summary_pipeline", ".json"), sample=manager.samples), expected_output + input: ".sequana/rulegraph.svg", + expand("{sample}/summary_pipeline/{sample}.json", sample=manager.samples), + expected_output # Sanity checks ============================================================ @@ -70,14 +73,11 @@ if manager.config.kraken.do is True: "applications.html#sequana-taxonomy") raise Exception -# Starting of the pipeline -__rawdata__input = manager.getrawdata() - # FASTQC on input data set if manager.config.fastqc.do_raw: rule fastqc_samples: - input: __rawdata__input + input: manager.getrawdata() output: done="{sample}/fastqc_raw/fastqc.done" params: @@ -94,7 +94,7 @@ if manager.config.fastqc.do_raw: # FASTQ stats on input data set rule fastq_stat_samples: - input: __rawdata__input + input: manager.getrawdata() output: json="{sample}/fastq_stats_samples/{sample}.json", gc="{sample}/fastq_stats_samples/{sample}_gc.png", @@ -192,16 +192,16 @@ if manager.config.bwa_mem_phix.do: - rule fastq_stat_phix: - input: __bwa_bam_to_fastq__fastq_output - output: - json="{sample}/fastq_stats_phix/{sample}.json", - gc="{sample}/fastq_stats_phix/{sample}_gc.png", - boxplot="{sample}/fastq_stats_phix/{sample}_boxplot.png" - params: - max_reads=config['fastq_stats']['max_reads'] - wrapper: - "main/wrappers/fastq_stats" + rule fastq_stat_phix: + input: __bwa_bam_to_fastq__fastq_output + output: + json="{sample}/fastq_stats_phix/{sample}.json", + gc="{sample}/fastq_stats_phix/{sample}_gc.png", + boxplot="{sample}/fastq_stats_phix/{sample}_boxplot.png" + params: + max_reads=config['fastq_stats']['max_reads'] + wrapper: + "main/wrappers/fastq_stats" @@ -286,33 +286,23 @@ if manager.config.kraken.do: include: sm.modules["kraken/2.0"] -# Include the rule graph -__rulegraph__output = ".sequana/rulegraph.svg" -__rulegraph__mapper = { - "fastqc_raw": "../fastqc_raw.html", - "fastqc_phix": "../fastqc_phix.html", - "fastqc_trimmed": "../fastqc_trimmed.html", - "cutadapt": "../cutadapt.html", - "kraken": "../kraken/kraken/kraken.html", - "kraken_translate": "../kraken/raken/kraken.html", - "kraken_to_krona": "../kraken/kraken/kraken.html", -} - -rule rulegraph: - input: str(manager.snakefile) - output: - svg = __rulegraph__output - params: - mapper = __rulegraph__mapper, - configname = "config.yaml" - wrapper: - "main/wrappers/rulegraph" +# ====================================================================== rulegraph +sequana_rulegraph_mapper = { + "fastqc_raw": "../fastqc_raw.html", + "fastqc_phix": "../fastqc_phix.html", + "fastqc_trimmed": "../fastqc_trimmed.html", + "cutadapt": "../cutadapt.html", + "kraken": "../kraken/kraken/kraken.html", + "kraken_translate": "../kraken/raken/kraken.html", + "kraken_to_krona": "../kraken/kraken/kraken.html", + } +include: sm.modules['rulegraph'] # create a json file that summarise information of your pipeline -__summary_pipeline__inputs = __rawdata__input +__summary_pipeline__inputs = manager.getrawdata() if manager.config['cutadapt'].do: # todo: handle all adapter removal cases __summary_pipeline__outputs = [ __cutadapt__output ] @@ -327,12 +317,12 @@ if not os.path.exists(".sequana/env.yaml"): with open(".sequana/env.yaml", "w") as f: f.write("") __summary_pipeline__html = [] -__summary_pipeline__rulegraph = __rulegraph__output +__summary_pipeline__rulegraph = ".sequana/rulegraph.svg" __summary_pipeline__requirements = ".sequana/env.yaml" __summary_pipeline__snakefile = str(manager.snakefile) __summary_pipeline__config = "config.yaml" __summary_pipeline__name = "Quality Control" -__summary_pipeline__json_output = manager.getname("summary_pipeline", ".json") +__summary_pipeline__json_output = "{sample}/summary_pipeline/{sample}.json" include: sm.modules["summary_pipeline"] @@ -394,6 +384,7 @@ onsuccess: # the phix section --------------------------------- if manager.config.bwa_mem_phix.do : + print(proj) phixmod = PhixModule(proj) sample_summary["phix_section_json"] = json.loads(phixmod._get_stats().to_json()) sample_summary["phix_section"] = phixmod._get_summary() @@ -474,8 +465,6 @@ onsuccess: sc = sm.OnSuccessCleaner("quality_control") sc.files_to_remove.append("phiX174.fa") sc.add_makefile() - # For later - #sm.clean_multiqc(__multiqc__output) onerror: from sequana_pipetools.errors import PipeError p = PipeError("quality_control")