From 8eae29ad488719b7f1d076737dcc2e9724af8db2 Mon Sep 17 00:00:00 2001 From: Thomas Cokelaer Date: Tue, 30 Aug 2022 23:40:50 +0200 Subject: [PATCH] Refactoring to use latest sequana_pipetools --- .github/workflows/main.yml | 63 ++++++++++++++++++ .github/workflows/pypi.yml | 39 +++++++++++ MANIFEST.in | 2 + README.rst | 15 ++++- requirements.txt | 4 +- .../downsampling/downsampling.rules | 23 ++----- sequana_pipelines/downsampling/main.py | 33 +++++++-- setup.py | 16 ++--- test/__init__.py | 3 + .../downsampling => test}/data/__init__.py | 0 .../data/data_R1_001.fasta | 0 .../data/data_R1_001.fastq.gz | Bin .../data/data_R2_001.fasta | 0 .../data/data_R2_001.fastq.gz | Bin test/test_main.py | 16 ++--- 15 files changed, 171 insertions(+), 43 deletions(-) create mode 100644 .github/workflows/main.yml create mode 100644 .github/workflows/pypi.yml rename {sequana_pipelines/downsampling => test}/data/__init__.py (100%) rename {sequana_pipelines/downsampling => test}/data/data_R1_001.fasta (100%) rename {sequana_pipelines/downsampling => test}/data/data_R1_001.fastq.gz (100%) rename {sequana_pipelines/downsampling => test}/data/data_R2_001.fasta (100%) rename {sequana_pipelines/downsampling => test}/data/data_R2_001.fastq.gz (100%) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..c0e9222 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,63 @@ +name: Tests + +on: + push: + branches: + - main + - dev + pull_request: + branches-ignore: [] + schedule: + - cron: '0 0 * * SUN' + +jobs: + build-linux: + runs-on: ubuntu-latest + strategy: + max-parallel: 5 + matrix: + python: [3.7,3.8,3.9] + fail-fast: false + + + steps: + + - name: install graphviz + run: | + sudo apt-get install -y graphviz + + - name: checkout git repo + uses: actions/checkout@v2 + + - name: Set up Python 3.X + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + + - name: Add conda to system path + run: | + # $CONDA is an environment variable pointing to the root of the miniconda directory + echo $CONDA/bin >> $GITHUB_PATH + + - name: conda + run: | + conda install -c conda-forge -c bioconda --quiet -y python=${{ matrix.python }} + conda install -c conda-forge -y ncurses + - name: Install dependencies + run: | + pip install coveralls pytest-cov pytest pytest-xdist + + - name: install package itself + run: | + pip install . + + - name: testing + run: | + pytest -v --cov-report term-missing --cov=sequana_pipelines.downsampling + + - name: coveralls + run: | + echo $COVERALLS_REPO_TOKEN + coveralls --service=github + env: + GITHUB_TOKEN: ${{ github.token }} diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 0000000..18a194d --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,39 @@ +name: Publish to PyPI +on: + workflow_dispatch: + push: + tags: + - '*' + +jobs: + build-n-publish: + name: Build and publish to PyPI and TestPyPI + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@main + - name: Set up Python 3.7 + uses: actions/setup-python@v1 + with: + python-version: 3.7 + + - name: Install package + run: | + pip install build + + - name: Build source tarball + run: | + rm -rf dist; + python setup.py sdist + + - name: Publish distribution to Test PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.TEST_PYPI_API_TOKEN }} + repository_url: https://test.pypi.org/legacy/ + - name: Publish distribution to PyPI + if: startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/MANIFEST.in b/MANIFEST.in index 5219b3c..3af15e8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,3 +3,5 @@ recursive-exclude * *pyc recursive-include * *rules include requirements*txt include README.rst +prune test +prune doc diff --git a/README.rst b/README.rst index 18f4c33..30d3c8e 100644 --- a/README.rst +++ b/README.rst @@ -1,9 +1,22 @@ + + +.. image:: https://badge.fury.io/py/sequana-downsampling.svg + :target: https://pypi.python.org/pypi/sequana_downsampling + +.. image:: http://joss.theoj.org/papers/10.21105/joss.00352/status.svg + :target: http://joss.theoj.org/papers/10.21105/joss.00352 + :alt: JOSS (journal of open source software) DOI + +.. image:: https://github.com/sequana/downsampling/actions/workflows/main.yml/badge.svg + :target: https://github.com/sequana/downsampling/actions/workflows/main.yaml + + This is is the **downsampling** pipeline from the `Sequana `_ project :Overview: downsample NGS data sets :Input: a set of FastQ or FASTA files :Output: a set of downsampled files -:Status: mature +:Status: production :Citation(sequana): Cokelaer et al, (2017), ‘Sequana’: a Set of Snakemake NGS pipelines, Journal of Open Source Software, 2(16), 352, JOSS DOI doi:10.21105/joss.00352 :Citation(pipeline): .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.4047837.svg diff --git a/requirements.txt b/requirements.txt index d2025de..1ab2e2b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -sequana>=0.9.8 -sequana_pipetools>=0.4.0 +sequana>=0.14.2 +sequana_pipetools>=0.9.2 diff --git a/sequana_pipelines/downsampling/downsampling.rules b/sequana_pipelines/downsampling/downsampling.rules index 7fd20d2..919c163 100644 --- a/sequana_pipelines/downsampling/downsampling.rules +++ b/sequana_pipelines/downsampling/downsampling.rules @@ -9,22 +9,20 @@ snakemake -s downsampling.rules --forceall --stats stats.txt --cores 4 """ -import sequana -from sequana import snaketools as sm +from sequana_pipetools import PipelineManager + from sequana import sequana_data -from sequana import FastQ # This must be defined before the include configfile: "config.yaml" if config['downsampling']['input_format'] in ['fastq', 'fasta']: - manager = sm.PipelineManager("downsampling", config, fastq=True) + manager = PipelineManager("downsampling", config, fastq=True) else: # not yet fully tested - manager = sm.PipelineManager("downsampling", config, fastq=False) + manager = PipelineManager("downsampling", config, fastq=False) -__rawdata__input = manager.getrawdata() @@ -35,7 +33,7 @@ rule pipeline: if config["downsampling"]['input_format'] == "fastq": rule downsampling: - input: __rawdata__input + input: manager.getrawdata() output: "output/{sample}.done" threads: config["downsampling"]['threads'] run: @@ -89,15 +87,8 @@ else: onsuccess: - from sequana.snaketools import OnSuccessCleaner - sc = OnSuccessCleaner(pipeline_name="downsampling") - - sc.files_to_remove += ["output/*.done"] - sc.custom_commands += "mv output/* . ; rm -rf output" - sc.add_makefile() - print("Once done, please clean up the directory using\n'make clean'") - - + manager.teardown(extra_files_to_remove=["*.done"]) + shell("mv output/* . && rm -rf output") onerror: print("An error occurred. See message above.") diff --git a/sequana_pipelines/downsampling/main.py b/sequana_pipelines/downsampling/main.py index 0f81575..d57ad6a 100755 --- a/sequana_pipelines/downsampling/main.py +++ b/sequana_pipelines/downsampling/main.py @@ -1,11 +1,28 @@ +# +# This file is part of Sequana software +# +# Copyright (c) 2016-2021 - Sequana Development Team +# +# File author(s): +# Thomas Cokelaer +# +# Distributed under the terms of the 3-clause BSD license. +# The full license is in the LICENSE file, distributed with this software. +# +# website: https://github.com/sequana/sequana +# documentation: http://sequana.readthedocs.io +# +############################################################################## import sys import os import argparse +import shutil import subprocess from sequana_pipetools.options import * from sequana_pipetools.misc import Colors from sequana_pipetools.info import sequana_epilog, sequana_prolog +from sequana_pipetools import SequanaManager col = Colors() @@ -15,9 +32,12 @@ class Options(argparse.ArgumentParser): def __init__(self, prog=NAME, epilog=None): usage = col.purple(sequana_prolog.format(**{"name": NAME})) - super(Options, self).__init__(usage=usage, prog=prog, description="", + super(Options, self).__init__( + usage=usage, + prog=prog, + description="", epilog=epilog, - formatter_class=argparse.ArgumentDefaultsHelpFormatter + formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) # add a new group of options to the parser @@ -27,6 +47,7 @@ def __init__(self, prog=NAME, epilog=None): # add a snakemake group of options to the parser so = SnakemakeOptions(working_directory=NAME) so.add_options(self) + so = InputOptions() so.add_options(self) @@ -68,16 +89,16 @@ def main(args=None): # option parsing including common epilog options = Options(NAME, epilog=sequana_epilog).parse_args(args[1:]) - - from sequana.pipelines_common import SequanaManager - # the real stuff is here manager = SequanaManager(options, NAME) # create the beginning of the command and the working directory manager.setup() from sequana import logger - logger.level = options.level + + logger.setLevel(options.level) + logger.name = "sequana_downsampling" + logger.info(f"#Welcome to sequana_revcomp pipeline.") # fill the config file with input parameters if options.from_project is None: diff --git a/setup.py b/setup.py index 5f498f9..58e80b3 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,8 @@ -# -*- coding: utf-8 -*- -# License: 3-clause BSD from setuptools import setup, find_namespace_packages _MAJOR = 0 -_MINOR = 8 -_MICRO = 5 +_MINOR = 9 +_MICRO = 0 version = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO) release = '%d.%d' % (_MAJOR, _MINOR) @@ -23,8 +21,9 @@ 'Intended Audience :: Science/Research', 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Topic :: Software Development :: Libraries :: Python Modules', 'Topic :: Scientific/Engineering :: Bio-Informatics', 'Topic :: Scientific/Engineering :: Information Analysis', @@ -49,8 +48,7 @@ classifiers = metainfo['classifiers'], # package installation - packages = ["sequana_pipelines.downsampling", - 'sequana_pipelines.downsampling.data' ], + packages = ["sequana_pipelines.downsampling"], install_requires = open("requirements.txt").read(), @@ -58,13 +56,11 @@ exclude_package_data = {"": ["__pycache__"]}, package_data = { '': ['*.yaml', "*.rules", "*.json", "requirements.txt", "*png"], - 'sequana_pipelines.downsampling.data' : ['*.*'], }, zip_safe=False, entry_points = {'console_scripts':[ - 'sequana_pipelines_downsampling=sequana_pipelines.downsampling.main:main', 'sequana_downsampling=sequana_pipelines.downsampling.main:main'] } diff --git a/test/__init__.py b/test/__init__.py index e69de29..9d3b5c5 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -0,0 +1,3 @@ +import os + +test_dir = os.path.dirname(os.path.realpath(__file__)) diff --git a/sequana_pipelines/downsampling/data/__init__.py b/test/data/__init__.py similarity index 100% rename from sequana_pipelines/downsampling/data/__init__.py rename to test/data/__init__.py diff --git a/sequana_pipelines/downsampling/data/data_R1_001.fasta b/test/data/data_R1_001.fasta similarity index 100% rename from sequana_pipelines/downsampling/data/data_R1_001.fasta rename to test/data/data_R1_001.fasta diff --git a/sequana_pipelines/downsampling/data/data_R1_001.fastq.gz b/test/data/data_R1_001.fastq.gz similarity index 100% rename from sequana_pipelines/downsampling/data/data_R1_001.fastq.gz rename to test/data/data_R1_001.fastq.gz diff --git a/sequana_pipelines/downsampling/data/data_R2_001.fasta b/test/data/data_R2_001.fasta similarity index 100% rename from sequana_pipelines/downsampling/data/data_R2_001.fasta rename to test/data/data_R2_001.fasta diff --git a/sequana_pipelines/downsampling/data/data_R2_001.fastq.gz b/test/data/data_R2_001.fastq.gz similarity index 100% rename from sequana_pipelines/downsampling/data/data_R2_001.fastq.gz rename to test/data/data_R2_001.fastq.gz diff --git a/test/test_main.py b/test/test_main.py index 2065c1d..edd99da 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -1,17 +1,17 @@ -import easydev import os import tempfile import subprocess import sys -from sequana.pipelines_common import get_pipeline_location as getpath -sharedir = getpath('downsampling') + +from . import test_dir +sharedir = f"{test_dir}/downsampling" def test_standalone_subprocess(): directory = tempfile.TemporaryDirectory() - cmd = """sequana_pipelines_downsampling --input-directory {} - --working-directory {} --force""".format(sharedir, directory.name) + cmd = f"""sequana_downsampling --input-directory {sharedir} + --working-directory {directory.name} --force""" subprocess.call(cmd.split()) @@ -27,7 +27,7 @@ def test_full(): with tempfile.TemporaryDirectory() as directory: wk = directory - cmd = "sequana_pipelines_downsampling --input-directory {} " + cmd = "sequana_downsampling --input-directory {} " cmd += "--working-directory {} --force" cmd = cmd.format(sharedir, wk) subprocess.call(cmd.split()) @@ -35,7 +35,7 @@ def test_full(): with tempfile.TemporaryDirectory() as directory: wk = directory - cmd = "sequana_pipelines_downsampling --input-directory {} " + cmd = "sequana_downsampling --input-directory {} " cmd += ' --input-pattern "*fasta"' cmd += " --working-directory {} --downsampling-method random_pct " cmd += " --downsampling-input-format fasta --force" @@ -45,6 +45,6 @@ def test_full(): def test_version(): - cmd = "sequana_pipelines_downsampling --version" + cmd = "sequana_downsampling --version" subprocess.call(cmd.split())