diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..ea27a584 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,27 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", + "python.formatting.yapfPath": "/opt/conda/bin/yapf", + "python.linting.flake8Path": "/opt/conda/bin/flake8", + "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", + "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", + "python.linting.pylintPath": "/opt/conda/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig index a9229959..588546de 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js,cff}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.gitattributes b/.gitattributes index 050bb120..7a2dabc2 100755 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 7a714a01..2bc9feaa 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -101,3 +101,18 @@ If you are using a new feature from core Nextflow, you may bump the minimum requ ### Images and figures For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/atacseq/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 2df8bea9..73301615 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,9 +42,9 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 21.10.3)_ + * Nextflow version _(eg. 23.04.0)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, or Apptainer)_ * OS _(eg. CentOS Linux, macOS, Linux Mint)_ * Version of nf-core/atacseq _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index a8bed26d..607aa029 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,7 +15,8 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/atac - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/atacseq/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/atacseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/atacseq/tree/master/.github/CONTRIBUTING.md) +- [ ] If necessary, also make a PR on the nf-core/atacseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 0c999f25..d904d3fe 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -17,19 +17,24 @@ jobs: aligner: ["bwa", "bowtie2", "chromap", "star"] steps: - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/atacseq/work-${{ github.sha }} parameters: | { + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/atacseq/results-${{ github.sha }}" "aligner": "${{ matrix.aligner }}" } - profiles: test_full,aws_tower + profiles: test_full + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 6ad381e8..7c21f3d8 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -12,18 +12,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: nf-core/tower-action@v3 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/atacseq/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/atacseq/results-test-${{ github.sha }}" } - profiles: test,aws_tower + profiles: test + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 5e6a135f..2e983e54 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,7 +13,7 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/atacseq' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/atacseq ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/atacseq ]] && [[ $GITHUB_HEAD_REF == "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 567683a1..707acd4c 100755 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,6 +11,10 @@ on: env: NXF_ANSI_LOG: false +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true + jobs: test: name: Run pipeline with test data @@ -20,11 +24,11 @@ jobs: strategy: matrix: NXF_VER: - - "21.10.3" + - "23.04.0" - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 @@ -35,39 +39,35 @@ jobs: run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results - parameters: - name: Test workflow parameters - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/atacseq') }} + test_controls: + name: Run pipeline with test data and controls + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/atacseq') }}" runs-on: ubuntu-latest - strategy: - matrix: - parameters: - - "--skip_trimming" - - "--skip_merge_replicates" - - "--skip_consensus_peaks" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 - - name: Run pipeline with various parameters + - name: Run pipeline with test data and controls run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test_controls,docker --outdir ./results - aligners: - name: Test available aligners + parameters: + name: Test workflow parameters if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/atacseq') }} runs-on: ubuntu-latest strategy: matrix: - aligner: - - "bowtie2" - - "chromap" - - "star" + parameters: + - "--skip_trimming" + - "--skip_merge_replicates" + - "--skip_consensus_peaks" + - "--aligner bowtie2" + - "--aligner chromap" + - "--aligner star" steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -77,6 +77,6 @@ jobs: wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ - - name: Run pipeline with the different aligners available + - name: Run pipeline with various parameters run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --aligner ${{ matrix.aligner }} --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml new file mode 100644 index 00000000..694e90ec --- /dev/null +++ b/.github/workflows/clean-up.yml @@ -0,0 +1,24 @@ +name: "Close user-tagged issues and PRs" +on: + schedule: + - cron: "0 0 * * 0" # Once a week + +jobs: + clean-up: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v7 + with: + stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." + stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." + close-issue-message: "This issue was closed because it has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor and then staled for 20 days with no activity." + days-before-stale: 30 + days-before-close: 20 + days-before-pr-close: -1 + any-of-labels: "awaiting-changes,awaiting-feedback" + exempt-issue-labels: "WIP" + exempt-pr-labels: "WIP" + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index 916df880..d64801df 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php @@ -34,9 +34,9 @@ jobs: id: prettier_status run: | if prettier --check ${GITHUB_WORKSPACE}; then - echo "::set-output name=result::pass" + echo "result=pass" >> $GITHUB_OUTPUT else - echo "::set-output name=result::fail" + echo "result=fail" >> $GITHUB_OUTPUT fi - name: Run 'prettier --write' diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8a5ce69b..888cb4bc 100755 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -4,6 +4,8 @@ name: nf-core linting # that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] @@ -12,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -25,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @@ -38,7 +40,7 @@ jobs: PythonBlack: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Check code lints with Black uses: psf/black@stable @@ -69,14 +71,14 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v4 with: - python-version: "3.7" + python-version: "3.8" architecture: "x64" - name: Install dependencies @@ -97,7 +99,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 04758f61..0bbcd30f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -18,7 +18,7 @@ jobs: - name: Get PR number id: pr_number - run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment uses: marocchino/sticky-pull-request-comment@v2 diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..25488dcc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..0c31cdb9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: + - repo: https://github.com/pre-commit/mirrors-prettier + rev: "v2.7.1" + hooks: + - id: prettier diff --git a/.prettierignore b/.prettierignore index eb74a574..437d763d 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,5 +1,6 @@ email_template.html adaptivecard.json +slackreport.json .nextflow* work/ data/ @@ -8,3 +9,4 @@ results/ testing/ testing* *.pyc +bin/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f3ebf92..badcbb04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,64 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [[2.0](https://github.com/nf-core/rnaseq/releases/tag/2.0)] - 2022-11-30 +## [[2.1.0](https://github.com/nf-core/atacseq/releases/tag/2.1.0)] - 2022-07-21 + +### Credits + +Special thanks to the following for their contributions to the release: + +- [Adam Talbot](https://github.com/adamrtalbot) +- [Björn Langer](https://github.com/bjlang) +- [Harshil Patel](https://github.com/drpatelh) +- [Maxime Garcia](https://github.com/maxulysse) +- [Rob Syme](https://github.com/robsyme) + +Thank you to everyone else that has contributed by reporting bugs, enhancements or in any other way, shape or form. + +### Enhancements & fixes + +- [[#226](https://github.com/nf-core/atacseq/issues/262)] - Add `ataqv_mito_reference` parameter. +- Optional support of control data analog to nf-core/chipseq. +- [[#277](https://github.com/nf-core/atacseq/issues/277)] - Fix error when using a gunziped fasta file. +- [[#286](https://github.com/nf-core/atacseq/issues/286)] - Fix error when no `--mito_name parameter is provided. +- [[#268](https://github.com/nf-core/atacseq/issues/268)] - Fix error when a bed file is provided using the `--blacklist` option. +- [[#278](https://github.com/nf-core/atacseq/issues/278)] - Make genome fasta file available when `IGV` process is run. +- [[#276](https://github.com/nf-core/atacseq/issues/276)] - Bump version 1.3.1 of ataqv to fix enrichment plots rendering. +- [[#290](https://github.com/nf-core/atacseq/issues/290)] - Fix case-sensitivity issue while sorting bedGraph. +- [[#295](https://github.com/nf-core/atacseq/issues/295)] - Enable downstream steps for bam files produced from paired-end reads by `chromap` after its upgrade. +- Updated pipeline template to [nf-core/tools 2.9](https://github.com/nf-core/tools/releases/tag/2.9) +- Make fasta index available for IGV session. +- [[nf-core/chipseq#347](https://github.com/nf-core/chipseq/issues/347)] - Add read group tag to bam files processed by bowtie2 + +### Parameters + +| Old parameter | New parameter | +| ------------- | ------------------------ | +| | `--with_control` | +| | `--ataqv_mito_reference` | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if parameter information isn't present. + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ----------------------- | ----------- | ----------- | +| `ataqv` | 1.3.0 | 1.3.1 | +| `chromap` | 0.2.1 | 0.2.4 | +| `multiqc` | 1.13 | 1.14 | +| `picard` | 2.27.4 | 3.0.0 | +| `samtools` | 1.15.1 | 1.17 | +| `ucsc-bedgraphtobigwig` | 377 | 445 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> **NB:** Dependency has been **added** if just the new version information is present. +> **NB:** Dependency has been **removed** if version information isn't present. + +## [[2.0](https://github.com/nf-core/atacseq/releases/tag/2.0)] - 2022-11-30 ### :warning: Major enhancements @@ -185,7 +242,7 @@ Note, since the pipeline is now using Nextflow DSL2, each process will be run wi ### `Fixed` -- **Change all parameters from `camelCase` to `snake_case` (see [Deprecated](#Deprecated))** +- **Change all parameters from `camelCase` to `snake_case` (see [Deprecated](#deprecated))** - [#41](https://github.com/nf-core/atacseq/issues/41) - Docs: Add example plot images - [#44](https://github.com/nf-core/atacseq/issues/44) - Output directory missing: macs2/consensus/deseq2 - [#45](https://github.com/nf-core/atacseq/issues/45) - Wrong x-axis scale for the HOMER: Peak annotation Counts tab plot? diff --git a/CITATIONS.md b/CITATIONS.md index e12945ff..42192b83 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -28,6 +28,8 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + - [featureCounts](https://www.ncbi.nlm.nih.gov/pubmed/24227677/) > Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677. @@ -118,5 +120,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index da83e526..6ff5a9f9 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,13 @@ [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/atacseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.2634132-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.2634132) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/atacseq) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23atacseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/atacseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23atacseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/atacseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -20,6 +20,8 @@ On release, automated continuous integration tests run the pipeline on a full-si ## Pipeline summary +![nf-core/atacseq metro map](docs/images/nf-core-atacseq_metro_map_grey.png) + 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) 3. Choice of multiple aligners @@ -46,7 +48,7 @@ On release, automated continuous integration tests run the pipeline on a full-si 3. Alignment-level QC and estimation of library complexity ([`picard`](https://broadinstitute.github.io/picard/), [`Preseq`](http://smithlabresearch.org/software/preseq/)) 4. Create normalised bigWig files scaled to 1 million mapped reads ([`BEDTools`](https://github.com/arq5x/bedtools2/), [`bedGraphToBigWig`](http://hgdownload.soe.ucsc.edu/admin/exe/)) 5. Generate gene-body meta-profile from bigWig files ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotProfile.html)) - 6. Calculate genome-wide enrichment ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html)) + 6. Calculate genome-wide enrichment (optionally relative to control) ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html)) 7. Call broad/narrow peaks ([`MACS2`](https://github.com/macs3-project/MACS)) 8. Annotate peaks relative to gene features ([`HOMER`](http://homer.ucsd.edu/homer/download.html)) 9. Create consensus peakset across all samples and create tabular file to aid in the filtering of the data ([`BEDTools`](https://github.com/arq5x/bedtools2/)) @@ -65,34 +67,42 @@ On release, automated continuous integration tests run the pipeline on a full-si 7. Create IGV session file containing bigWig tracks, peaks and differential sites for data visualisation ([`IGV`](https://software.broadinstitute.org/software/igv/)). 8. Present QC for raw read, alignment, peak-calling and differential accessibility results ([`ataqv`](https://github.com/ParkerLab/ataqv), [`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/)) -## Quick Start +## Usage -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) +> **Note** +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how +> to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) +> with `-profile test` before running the workflow on actual data. -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. +To run on your data, prepare a tab-separated samplesheet with your input data. Please follow the [documentation on samplesheets](https://nf-co.re/atacseq/usage#samplesheet-input) for more details. An example samplesheet for running the pipeline looks as follows: -3. Download the pipeline and test it on a minimal dataset with a single command: +```csv +sample,fastq_1,fastq_2,replicate +CONTROL,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,1 +CONTROL,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz,2 +CONTROL,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,3 +``` - ```bash - nextflow run nf-core/atacseq -profile test,YOURPROFILE --outdir - ``` +Now, you can run the pipeline using: - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. +```bash +nextflow run nf-core/atacseq --input samplesheet.csv --outdir --genome GRCh37 --read_length <50|100|150|200> -profile +``` - > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. +See [usage docs](https://nf-co.re/atacseq/usage) for all of the available options when running the pipeline. -4. Start running your own analysis! +> **Warning:** +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those +> provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). - ```bash - nextflow run nf-core/atacseq --input samplesheet.csv --outdir --genome GRCh37 -profile - ``` +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/atacseq/usage) and the [parameter documentation](https://nf-co.re/atacseq/parameters). -## Documentation +## Pipeline output -The nf-core/atacseq pipeline comes with documentation about the pipeline [usage](https://nf-co.re/atacseq/usage), [parameters](https://nf-co.re/atacseq/parameters) and [output](https://nf-co.re/atacseq/output). +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/atacseq/results) tab on the nf-core website pipeline page. +For more details about the output files and reports, please refer to the +[output documentation](https://nf-co.re/atacseq/output). ## Credits diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 4257efce..8789a829 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,17 +3,21 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/atacseq Methods Description" section_href: "https://github.com/nf-core/atacseq" plot_type: "html" -## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/atacseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/atacseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

    -
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
  • -
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
  • +
  • Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. doi: 10.1038/nbt.3820
  • +
  • Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. doi: 10.1038/s41587-020-0439-x
  • +
  • Grüning, B., Dale, R., Sjödin, A., Chapman, B. A., Rowe, J., Tomkins-Tinch, C. H., Valieris, R., Köster, J., & Bioconda Team. (2018). Bioconda: sustainable and comprehensive software distribution for the life sciences. Nature Methods, 15(7), 475–476. doi: 10.1038/s41592-018-0046-7
  • +
  • da Veiga Leprevost, F., Grüning, B. A., Alves Aflitos, S., Röst, H. L., Uszkoreit, J., Barsnes, H., Vaudel, M., Moreno, P., Gatto, L., Weber, J., Bai, M., Jimenez, R. C., Sachsenberg, T., Pfeuffer, J., Vera Alvarez, R., Griss, J., Nesvizhskii, A. I., & Perez-Riverol, Y. (2017). BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics (Oxford, England), 33(16), 2580–2582. doi: 10.1093/bioinformatics/btx192
  • + ${tool_bibliography}
Notes:
diff --git a/assets/multiqc/merged_library_frip_score_header.txt b/assets/multiqc/merged_library_frip_score_header.txt index d0edf803..f89a75f6 100644 --- a/assets/multiqc/merged_library_frip_score_header.txt +++ b/assets/multiqc/merged_library_frip_score_header.txt @@ -11,3 +11,4 @@ # ymax: 1 # ymin: 0 # tt_decimals: 2 +# cpswitch: False diff --git a/assets/multiqc/merged_library_peak_count_header.txt b/assets/multiqc/merged_library_peak_count_header.txt index 9fd8c830..60f5745c 100644 --- a/assets/multiqc/merged_library_peak_count_header.txt +++ b/assets/multiqc/merged_library_peak_count_header.txt @@ -7,3 +7,4 @@ #pconfig: # title: 'Total peak count' # ylab: 'Peak count' +# cpswitch: False diff --git a/assets/multiqc/merged_replicate_frip_score_header.txt b/assets/multiqc/merged_replicate_frip_score_header.txt index 8a2c5ad1..c0f25a5b 100644 --- a/assets/multiqc/merged_replicate_frip_score_header.txt +++ b/assets/multiqc/merged_replicate_frip_score_header.txt @@ -11,3 +11,4 @@ # ymax: 1 # ymin: 0 # tt_decimals: 2 +# cpswitch: False diff --git a/assets/multiqc/merged_replicate_peak_count_header.txt b/assets/multiqc/merged_replicate_peak_count_header.txt index 66796ffe..e4118505 100644 --- a/assets/multiqc/merged_replicate_peak_count_header.txt +++ b/assets/multiqc/merged_replicate_peak_count_header.txt @@ -7,3 +7,4 @@ #pconfig: # title: 'Total peak count' # ylab: 'Peak count' +# cpswitch: False diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 7d72fb96..03707ba5 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,7 @@ report_comment: > - This report has been generated by the nf-core/atacseq + This report has been generated by the nf-core/atacseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. data_format: "yaml" diff --git a/assets/nf-core-atacseq_logo_light.png b/assets/nf-core-atacseq_logo_light.png index 84c18bee..e1689615 100644 Binary files a/assets/nf-core-atacseq_logo_light.png and b/assets/nf-core-atacseq_logo_light.png differ diff --git a/assets/schema_input.json b/assets/schema_input.json index 1b481cf7..c8c1cf58 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -34,6 +34,16 @@ "type": "integer", "pattern": "^[1-9][0-9]*$", "errorMessage": "Integer representing replicate number. Must start from 1..." + }, + "control": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Control entry cannot contain spaces" + }, + "control_replicate": { + "type": "integer", + "pattern": "^[1-9][0-9]*$", + "errorMessage": "Integer representing control's replicate number." } }, "required": ["sample", "fastq_1", "replicate"] diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 00000000..98a534c5 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "nf-core/atacseq v${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/bampe_rm_orphan.py b/bin/bampe_rm_orphan.py index 4ab9935b..ddba1d6f 100755 --- a/bin/bampe_rm_orphan.py +++ b/bin/bampe_rm_orphan.py @@ -46,7 +46,6 @@ def makedir(path): - if not len(path) == 0: try: os.makedirs(path) @@ -63,7 +62,6 @@ def makedir(path): def bampe_rm_orphan(BAMIn, BAMOut, onlyFRPairs=False): - ## SETUP DIRECTORY/FILE STRUCTURE OutDir = os.path.dirname(BAMOut) makedir(OutDir) @@ -89,7 +87,6 @@ def bampe_rm_orphan(BAMIn, BAMOut, onlyFRPairs=False): ## FILTER FOR READS ON SAME CHROMOSOME IN FR ORIENTATION if onlyFRPairs: if pair1.tid == pair2.tid: - ## READ1 FORWARD AND READ2 REVERSE STRAND if not pair1.is_reverse and pair2.is_reverse: if pair1.reference_start <= pair2.reference_start: diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 91f2af4e..f422a287 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -6,12 +6,13 @@ def parse_args(args=None): - Description = "Reformat nf-core/atacseq design file and check its contents." - Epilog = "Example usage: python check_design.py " + Description = "Reformat nf-core/atacseq samplesheet file and check its contents." + Epilog = "Example usage: python check_samplesheet.py " parser = argparse.ArgumentParser(description=Description, epilog=Epilog) parser.add_argument("FILE_IN", help="Input samplesheet file.") parser.add_argument("FILE_OUT", help="Output file.") + parser.add_argument("--with_control", action="store_true", help="shows output") return parser.parse_args(args) @@ -34,7 +35,7 @@ def print_error(error, context="Line", context_str=""): sys.exit(1) -def check_samplesheet(file_in, file_out): +def check_samplesheet(file_in, file_out, with_control=False): """ This function checks that the samplesheet follows the following structure: sample,fastq_1,fastq_2,replicate @@ -44,15 +45,17 @@ def check_samplesheet(file_in, file_out): OSMOTIC_STRESS_T15,s3://nf-core-awsmegatests/atacseq/input_data/minimal/GSE66386/SRR1822158_1.fastq.gz,s3://nf-core-awsmegatests/atacseq/input_data/minimal/GSE66386/SRR1822158_2.fastq.gz,1 For an example see: - https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/samplesheet/v2.0/samplesheet_test.csv + https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/samplesheet/v2.1/samplesheet_test.csv """ sample_mapping_dict = {} with open(file_in, "r", encoding="utf-8-sig") as fin: - ## Check header MIN_COLS = 3 - HEADER = ["sample", "fastq_1", "fastq_2", "replicate"] + if with_control: + HEADER = ["sample", "fastq_1", "fastq_2", "replicate", "control", "control_replicate"] + else: + HEADER = ["sample", "fastq_1", "fastq_2", "replicate"] header = [x.strip('"') for x in fin.readline().strip().split(",")] if header[: len(HEADER)] != HEADER: print(f"ERROR: Please check samplesheet header -> {','.join(header)} != {','.join(HEADER)}") @@ -79,7 +82,9 @@ def check_samplesheet(file_in, file_out): ) ## Check sample name entries - sample, fastq_1, fastq_2, replicate = lspl[: len(HEADER)] + sample, fastq_1, fastq_2, replicate = lspl[: len(HEADER) - 2 if with_control else len(HEADER)] + control = lspl[len(HEADER) - 2] if with_control else "" + control_replicate = lspl[len(HEADER) - 1] if with_control else "" if sample.find(" ") != -1: print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}") sample = sample.replace(" ", "_") @@ -103,18 +108,27 @@ def check_samplesheet(file_in, file_out): print_error("Replicate id not an integer!", "Line", line) sys.exit(1) + if with_control and control: + if control.find(" ") != -1: + print(f"WARNING: Spaces have been replaced by underscores for control: {control}") + control = control.replace(" ", "_") + if not control_replicate.isdecimal(): + print_error("Control replicate id not an integer!", "Line", line) + sys.exit(1) + control = "{}_REP{}".format(control, control_replicate) + ## Auto-detect paired-end/single-end sample_info = [] ## Paired-end short reads if sample and fastq_1 and fastq_2: - sample_info = [fastq_1, fastq_2, replicate, "0"] + sample_info = [fastq_1, fastq_2, replicate, "0", control] ## Single-end short reads elif sample and fastq_1 and not fastq_2: - sample_info = [fastq_1, fastq_2, replicate, "1"] + sample_info = [fastq_1, fastq_2, replicate, "1", control] else: print_error("Invalid combination of columns provided!", "Line", line) - ## Create sample mapping dictionary = {sample: {replicate: [[ fastq_1, fastq_2, replicate, single_end ]]}} + ## Create sample mapping dictionary = {sample: {replicate: [[ fastq_1, fastq_2, replicate, control, single_end ]]}} replicate = int(replicate) sample_info = sample_info + lspl[len(HEADER) :] if sample not in sample_mapping_dict: @@ -132,10 +146,12 @@ def check_samplesheet(file_in, file_out): out_dir = os.path.dirname(file_out) make_dir(out_dir) with open(file_out, "w") as fout: - fout.write(",".join(HEADER + ["single_end"] + header[len(HEADER) :]) + "\n") + if with_control: + fout.write(",".join(HEADER[:-2] + ["single_end", "control"] + header[len(HEADER) :]) + "\n") + else: + fout.write(",".join(HEADER + ["single_end", "control"] + header[len(HEADER) :]) + "\n") for sample in sorted(sample_mapping_dict.keys()): - ## Check that replicate ids are in format 1.. uniq_rep_ids = sorted(list(set(sample_mapping_dict[sample].keys()))) if len(uniq_rep_ids) != max(uniq_rep_ids) or 1 != min(uniq_rep_ids): @@ -168,6 +184,19 @@ def check_samplesheet(file_in, file_out): sample, ) + for idx, val in enumerate(sample_mapping_dict[sample][replicate]): + control = "_REP".join(val[4].split("_REP")[:-1]) + control_replicate = val[4].split("_REP")[-1] + if control and ( + control not in sample_mapping_dict.keys() + or int(control_replicate) not in sample_mapping_dict[control].keys() + ): + print_error( + f"Control identifier and replicate has to match a provided sample identifier and replicate!", + "Control", + val[4], + ) + ## Write to file for idx in range(len(sample_mapping_dict[sample][replicate])): fastq_files = sample_mapping_dict[sample][replicate][idx] @@ -182,7 +211,7 @@ def check_samplesheet(file_in, file_out): def main(args=None): args = parse_args(args) - check_samplesheet(args.FILE_IN, args.FILE_OUT) + check_samplesheet(args.FILE_IN, args.FILE_OUT, args.with_control) if __name__ == "__main__": diff --git a/bin/get_autosomes.py b/bin/get_autosomes.py index 2b7189f6..deac8a00 100755 --- a/bin/get_autosomes.py +++ b/bin/get_autosomes.py @@ -34,7 +34,6 @@ def makedir(path): - if not len(path) == 0: try: os.makedirs(path) @@ -69,7 +68,6 @@ def makedir(path): def get_autosomes(FAIFile, OutFile): - makedir(os.path.dirname(OutFile)) ## READ IN CHROMOSOME IDS diff --git a/bin/igv_files_to_session.py b/bin/igv_files_to_session.py index ce1fac70..ad9f7476 100755 --- a/bin/igv_files_to_session.py +++ b/bin/igv_files_to_session.py @@ -49,7 +49,6 @@ def makedir(path): - if not len(path) == 0: try: os.makedirs(path) @@ -66,7 +65,6 @@ def makedir(path): def igv_files_to_session(XMLOut, ListFile, Genome, PathPrefix=""): - makedir(os.path.dirname(XMLOut)) fileList = [] diff --git a/bin/macs2_merged_expand.py b/bin/macs2_merged_expand.py index aa401123..28ffb087 100755 --- a/bin/macs2_merged_expand.py +++ b/bin/macs2_merged_expand.py @@ -55,7 +55,6 @@ def makedir(path): - if not len(path) == 0: try: os.makedirs(path) @@ -78,7 +77,6 @@ def makedir(path): def macs2_merged_expand(MergedIntervalTxtFile, SampleNameList, OutFile, isNarrow=False, minReplicates=1): - makedir(os.path.dirname(OutFile)) combFreqDict = {} diff --git a/conf/base.config b/conf/base.config index 7ff03098..f2ac8d7a 100755 --- a/conf/base.config +++ b/conf/base.config @@ -14,7 +14,7 @@ process { memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' diff --git a/conf/igenomes.config b/conf/igenomes.config index 7f282cee..84f38729 100755 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -48,6 +48,14 @@ params { "200" : 2892537351 ] } + 'CHM13' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" + bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" + gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" + mito_name = "chrM" + } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" @@ -655,11 +663,11 @@ params { readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" mito_name = "chrM" macs_gsize = [ - "50" : "11624332", - "75" : "11693438", - "100" : "11777680", - "150" : "11783749", - "200" : "11825681" + "50" : 11624332, + "75" : 11693438, + "100" : 11777680, + "150" : 11783749, + "200" : 11825681 ] } 'susScr3' { diff --git a/conf/modules.config b/conf/modules.config index a856f1b0..2da820b5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -22,6 +22,12 @@ process { ] withName: 'SAMPLESHEET_CHECK' { + ext.args = { + [ + 'samplesheet.valid.csv', + params.with_control ? "--with_control" : '' + ].join(' ').trim() + } publishDir = [ path: { "${params.outdir}/pipeline_info" }, mode: params.publish_dir_mode, @@ -252,6 +258,12 @@ if (params.aligner == 'bwa') { if (params.aligner == 'bowtie2') { process { withName: 'BOWTIE2_ALIGN' { + ext.args = { + [ + meta.read_group ? "--rg-id ${meta.id} --rg SM:${meta.id - ~/_T\d+$/} --rg PL:ILLUMINA --rg LB:${meta.id} --rg PU:1" : '', + params.seq_center ? "--rg CN:${params.seq_center}" : '' + ].join(' ').trim() + } ext.prefix = { "${meta.id}.Lb" } publishDir = [ [ @@ -454,11 +466,7 @@ process { } withName: '.*:MERGED_LIBRARY_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' { - ext.args = { - [ - (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' - ].join(' ').trim() - } + ext.args = { (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' } ext.prefix = { "${meta.id}.mLb.clN" } publishDir = [ [ @@ -568,7 +576,7 @@ if (!params.skip_plot_fingerprint) { [ '--skipZeros', "--numberOfSamples $params.fingerprint_bins", - "--labels ${meta.id}.mLb.clN" + params.with_control ? "--labels ${meta.id}.mLb.clN $meta.control" : "--labels ${meta.id}.mLb.clN" ].join(' ').trim() } ext.prefix = { "${meta.id}.mLb.clN" } @@ -779,11 +787,7 @@ if (!params.skip_merge_replicates) { } withName: '.*:MERGED_REPLICATE_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' { - ext.args = { - [ - (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' - ].join(' ').trim() - } + ext.args = { (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' } ext.prefix = { "${meta.id}.mRp.clN" } publishDir = [ [ @@ -951,8 +955,7 @@ if (!params.skip_igv) { [ path: { "${params.outdir}/genome" }, mode: params.publish_dir_mode, - pattern: '*.{fa,fasta}', - enabled: params.save_reference + pattern: '*.{fa,fasta,fai}' ] ] } diff --git a/conf/test_controls.config b/conf/test_controls.config new file mode 100644 index 00000000..a9483e7f --- /dev/null +++ b/conf/test_controls.config @@ -0,0 +1,33 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests with control samples +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/atacseq -profile test_controls, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile with controls' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/samplesheet/v2.1/samplesheet_test_controls.csv' + read_length = 50 + + // Genome references + mito_name = 'MT' + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genome.fa' + gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genes.gtf' + + // For speed to avoid CI time-out + fingerprint_bins = 100 +} diff --git a/docs/images/nf-core-atacseq_metro_map_grey.png b/docs/images/nf-core-atacseq_metro_map_grey.png new file mode 100644 index 00000000..1c48d61c Binary files /dev/null and b/docs/images/nf-core-atacseq_metro_map_grey.png differ diff --git a/docs/images/nf-core-atacseq_metro_map_grey.svg b/docs/images/nf-core-atacseq_metro_map_grey.svg new file mode 100644 index 00000000..f5fc9078 --- /dev/null +++ b/docs/images/nf-core-atacseq_metro_map_grey.svg @@ -0,0 +1,1696 @@ + + + +image/svg+xmlrfastagfffastqhtmlPre-processingGenome alignmentCutadaptFastQCFastQCMultiQC Peak calling & QCbambaibigWigSTARBWAChromapBowtie2Picard¹Filtering² Picard'sCollectMultipleMetricspreseqAlignment QCdeepToolsbedGraphToBigWigbedtoolsgenomecovMACS2Homersamtools+bedtoolsfeatureCountsR+DESeq2xmltab1.2.Merge alignments from multiple libraries or replicatesMark duplicates 12samtoolsbedtoolsbamtoolspysam (paired-ends only) License: ataqvEnrichment analysisMandatoryOptionalFile Inputmerged librariesmerged replicates (optional)IGVhtml diff --git a/docs/images/r_deseq2_ma_plot.png b/docs/images/r_deseq2_ma_plot.png deleted file mode 100755 index 67c598fc..00000000 Binary files a/docs/images/r_deseq2_ma_plot.png and /dev/null differ diff --git a/docs/images/r_deseq2_volcano_plot.png b/docs/images/r_deseq2_volcano_plot.png deleted file mode 100755 index 679746b4..00000000 Binary files a/docs/images/r_deseq2_volcano_plot.png and /dev/null differ diff --git a/docs/output.md b/docs/output.md index c9950f7f..5787a2ac 100755 --- a/docs/output.md +++ b/docs/output.md @@ -19,7 +19,7 @@ The initial QC and alignments are performed at the library-level e.g. if the sam ### Raw read QC
- Output files +Output files - `fastqc/` - `*_fastqc.html`: FastQC report containing quality metrics for read 1 (_and read2 if paired-end_) **before** adapter trimming. @@ -33,7 +33,7 @@ The initial QC and alignments are performed at the library-level e.g. if the sam ### Adapter trimming
- Output files +Output files - `trimgalore/` - `*fastq.gz`: If `--save_trimmed` is specified, FastQ files **after** adapter trimming will be placed in this directory. @@ -55,7 +55,7 @@ The initial QC and alignments are performed at the library-level e.g. if the sam The pipeline has been written in a way where all the files generated downstream of the alignment are placed in the same directory as specified by `--aligner` e.g. if `--aligner bwa` is specified then all the downstream results will be placed in the `bwa/` directory. This helps with organising the directory structure and more importantly, allows the end-user to get the results from multiple aligners by simply re-running the pipeline with a different `--aligner` option along the `-resume` parameter. It also means that results won't be overwritten when resuming the pipeline and can be used for benchmarking between alignment algorithms if required. Thus, `` in the directory structure below corresponds to the aligner set when running the pipeline.
- Output files +Output files - `/library/` - `*.bam`: The files resulting from the alignment of individual libraries are not saved by default so this directory will not be present in your results. You can override this behaviour with the use of the `--save_align_intermeds` flag in which case it will contain the coordinate sorted alignment files in [`*.bam`](https://samtools.github.io/hts-specs/SAMv1.pdf) format. @@ -83,7 +83,7 @@ The library-level alignments associated with the same sample are merged and subs The `--save_unaligned` parameter enables to obtain FastQ files containing unmapped reads (only available for STAR and Bowtie2).
- Output files +Output files - `/library/unmapped/` - `*.fastq.gz`: If `--save_unaligned` is specified, FastQ files containing unmapped reads will be placed in this directory. @@ -92,7 +92,7 @@ The `--save_unaligned` parameter enables to obtain FastQ files containing unmapp #### STAR logs
- Output files +Output files - `star/library/log/` - `*.SJ.out.tab`: File containing filtered splice junctions detected after mapping the reads. @@ -108,7 +108,7 @@ The library-level alignments associated with the same sample are merged and subs ### Alignment merging, duplicate marking, filtering and QC
- Output files +Output files - `/merged_library/` - `*.bam`: Merged library-level, coordinate sorted `*.bam` files after the marking of duplicates, and filtering based on various criteria. The file suffix for the final filtered files will be `*.mLb.clN.*`. If you specify the `--save_align_intermeds` parameter then two additional sets of files will be present. These represent the unfiltered alignments with duplicates marked (`*.mLb.mkD.*`), and in the case of paired-end datasets the filtered alignments before the removal of orphan read pairs (`*.mLb.flT.*`). @@ -143,7 +143,7 @@ The [Preseq](http://smithlabresearch.org/software/preseq/) package is aimed at p ### Normalised bigWig files
- Output files +Output files - `/merged_library/bigwig/` - `*.bigWig`: Normalised bigWig files scaled to 1 million mapped reads. @@ -155,7 +155,7 @@ The [bigWig](https://genome.ucsc.edu/goldenpath/help/bigWig.html) format is in a ### Coverage QC
- Output files +Output files - `/merged_library/deeptools/plotfingerprint/` - `*.plotFingerprint.pdf`, `*.plotFingerprint.qcmetrics.txt`, `*.plotFingerprint.raw.txt`: plotFingerprint output files. @@ -175,7 +175,7 @@ The results from deepTools plotProfile gives you a quick visualisation for the g ### Call peaks
- Output files +Output files - `/merged_library/macs2//` - `*.xls`, `*.broadPeak` or `*.narrowPeak`, `*.gappedPeak`, `*summits.bed`: MACS2 output files - the files generated will depend on whether MACS2 has been run in _narrowPeak_ or _broadPeak_ mode. @@ -204,7 +204,7 @@ Various QC plots per sample including number of peaks, fold-change distribution, ### Create and quantify consensus set of peaks
- Output files +Output files - `/merged_library/macs2//consensus/` - `*.bed`: Consensus peak-set across all samples in BED format. @@ -232,16 +232,16 @@ The [featureCounts](http://bioinf.wehi.edu.au/featureCounts/) tool is used to co ### Read counting and differential accessibility analysis
- Output files +Output files - `/merged_library/macs2//consensus/deseq2/` - - `*.sample.dists.txt`: Spreadsheet containing sample-to-sample distance. - `*.plots.pdf`: File containing PCA and hierarchical clustering plots. - `*.dds.RData`: File containing R `DESeqDataSet` object generated by DESeq2, with either an rlog or vst `assay` storing the variance-stabilised data. - `*.rds`: Alternative version of the RData file suitable for `readRDS` to give user control of the eventual object name. - `*pca.vals.txt`: Matrix of values for the first 2 principal components. + - `*.sample.dists.txt`: Sample distance matrix. - `R_sessionInfo.log`: File containing information about R, the OS and attached or loaded packages. - `/merged_library/macs2//consensus/sizeFactors/` - `*.txt`, `*.RData`: Files containing DESeq2 sizeFactors per sample. @@ -252,28 +252,18 @@ The [featureCounts](http://bioinf.wehi.edu.au/featureCounts/) tool is used to co **This pipeline uses a standardised DESeq2 analysis script to get an idea of the reproducibility within the experiment, and to assess the overall differential accessibility. Please note that this will not suit every experimental design, and if there are other problems with the experiment then it may not work as well as expected.** +The script included in the pipeline uses DESeq2 to normalise read counts across all of the provided samples in order to create a PCA plot and a clustered heatmap showing pairwise Euclidean distances between the samples in the experiment. These help to show the similarity between groups of samples and can reveal batch effects and other potential issues with the experiment. + By default, the pipeline uses the `vst` transformation which is more suited to larger experiments. You can set the parameter `--deseq2_vst false` if you wish to use the DESeq2 native `rlog` option. See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization) for a more detailed explanation. ![MultiQC - DESeq2 PCA plot](images/mqc_deseq2_pca_plot.png) -

- MultiQC - DESeq2 sample similarity plot -

- -By default, all possible pairwise comparisons across the groups are performed (as defined in [`design.csv`](usage.md#--design)). The DESeq2 results are generated by the pipeline in various ways. You can load up the results across all of the comparisons in a single spreadsheet, or individual folders will also be created that contain the results specific to a particular comparison. For the latter, additional files will also be generated where the intervals have been pre-filtered based on a couple of standard FDR thresholds. Please see [DESeq2 output](http://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#differential-expression-analysis) for a description of the columns generated by DESeq2. - -

- R - DESeq2 MA plot -

- -

- R - DESeq2 Volcano plot -

+

MultiQC - DESeq2 sample similarity plot

### ataqv
- Output files +Output files - `/merged_library/ataqv//` - `.json`: JSON files containing ATAC-seq specific metrics for each sample. @@ -299,7 +289,7 @@ You can skip this portion of the analysis by specifying the `--skip_merge_replic ### Present QC for the raw read, alignment, peak and differential accessibility results
- Output files +Output files - `multiqc//` - `multiqc_report.html`: A standalone HTML file that can be viewed in your web browser. @@ -317,7 +307,7 @@ The pipeline has special steps which also allow the software versions to be repo ### Create IGV session file
- Output files +Output files - `igv//` - `igv_session.xml`: Session file that can be directly loaded into IGV. @@ -342,7 +332,7 @@ Once installed, open IGV, go to `File > Open Session` and select the `igv_sessio ### Reference genome files
- Output files +Output files - `genome/` - A number of genome-specific files are generated by the pipeline in order to aid in the filtering of the data, and because they are required by standard tools such as BEDTools. These can be found in this directory along with the genome fasta file which is required by IGV. If using a genome from AWS iGenomes and if it exists a `README.txt` file containing information about the annotation version will also be saved in this directory. diff --git a/docs/usage.md b/docs/usage.md index 311f75af..cbec01e3 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -38,6 +38,10 @@ CONTROL,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,1 The pipeline will automatically append the `*_T` suffix to the sample name within the pipeline e.g. `CONTROL_REP1_T1`, `CONTROL_REP1_T2` and `CONTROL_REP1_T3` using the example above. +### Control data + +If controls are to be used for peak calling use the parameter `--with_control`. In this case, the samplesheet file needs the additional columns `control` and `control_replicate`. These should be the sample identifier and sample replicate for the controls. + ### Full samplesheet The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 4 columns to match those defined in the table below. @@ -45,24 +49,26 @@ The pipeline will auto-detect whether a sample is single- or paired-end using th A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 7 samples, where we have biological triplicates for both the `CONTROL` and `TREATMENT` groups, and the third replicate in the `TREATMENT` group has been a technical replicate as a result of being sequenced twice. ```console -sample,fastq_1,fastq_2,replicate -CONTROL,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,1 -CONTROL,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,2 -CONTROL,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,3 -TREATMENT,AEG588A4_S4_L003_R1_001.fastq.gz,,1 -TREATMENT,AEG588A5_S5_L003_R1_001.fastq.gz,,2 -TREATMENT,AEG588A6_S6_L003_R1_001.fastq.gz,,3 -TREATMENT,AEG588A6_S6_L004_R1_001.fastq.gz,,3 +sample,fastq_1,fastq_2,replicate,control,control_replicate +CONTROL,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz,1,, +CONTROL,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz,2,, +CONTROL,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz,3,, +TREATMENT,AEG588A4_S4_L003_R1_001.fastq.gz,,1,CONTROL,1 +TREATMENT,AEG588A5_S5_L003_R1_001.fastq.gz,,2,CONTROL,2 +TREATMENT,AEG588A6_S6_L003_R1_001.fastq.gz,,3,CONTROL,3 +TREATMENT,AEG588A6_S6_L004_R1_001.fastq.gz,,3,CONTROL,3 ``` -| Column | Description | -| ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `replicate` | Integer representing replicate number. Must start from `1..`. | +| Column | Description | +| ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `replicate` | Integer representing replicate number. This will be identical for re-sequenced libraries. Must start from `1..`. | +| `control` | Sample name for control sample. | +| `control_replicate` | Integer representing replicate number of the control sample | -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +Example sheets [without controls](../assets/samplesheet.csv) and [with controls](../assets/samplesheet_with_control.csv) have been provided with the pipeline. ## Reference genome files @@ -106,7 +112,7 @@ wget -L https://www.encodeproject.org/files/ENCFF356LFX/@@download/ENCFF356LFX.b The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/atacseq --input samplesheet.csv --outdir --genome GRCh37 -profile docker +nextflow run nf-core/atacseq --input ./samplesheet.csv --outdir ./results --genome GRCh37 --read_length 150 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -120,6 +126,30 @@ work # Directory containing the nextflow working files # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` +If you wish to repeatedly use the same parameters for multiple runs, rather than specifying each flag in the command, you can specify these in a params file. + +Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. + +> ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). + +The above pipeline run specified with a params file in yaml format: + +```bash +nextflow run nf-core/atacseq -profile docker -params-file params.yaml +``` + +with `params.yaml` containing: + +```yaml +input: './samplesheet.csv' +outdir: './results/' +genome: 'GRCh37' +read_length: 150 +<...> +``` + +You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: @@ -132,9 +162,13 @@ nextflow pull nf-core/atacseq It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/atacseq releases page](https://github.com/nf-core/atacseq/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [nf-core/atacseq releases page](https://github.com/nf-core/atacseq/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. + +To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. + +> 💡 If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. ## Core Nextflow arguments @@ -144,7 +178,7 @@ This version number will be logged in reports when you run the pipeline, so that Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. > We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. @@ -153,8 +187,11 @@ The pipeline also dynamically loads configurations from [https://github.com/nf-c Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters - `docker` - A generic configuration profile to be used with [Docker](https://docker.com/) - `singularity` @@ -165,11 +202,10 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) - `charliecloud` - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `apptainer` + - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) - `conda` - - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. ### `-resume` @@ -187,96 +223,19 @@ Specify the path to a specific config file (this is a core Nextflow command). Se Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: - -```console -[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' - -Caused by: - Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) - -Command executed: - STAR \ - --genomeDir star \ - --readFilesIn WT_REP1_trimmed.fq.gz \ - --runThreadN 2 \ - --outFileNamePrefix WT_REP1. \ - - -Command exit status: - 137 - -Command output: - (empty) - -Command error: - .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. -Work dir: - /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb - -Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` -``` - -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. -If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). -The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. -The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. -Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. -The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. - -```nextflow -process { - withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { - memory = 100.GB - } -} -``` - -> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. -> -> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. - -### Updating containers - -The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. - -1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) -2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) -3. Create the custom config accordingly: - - - For Docker: +To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +### Custom Containers - - For Singularity: +In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` +To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. - - For Conda: +### Custom Tool Arguments - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` +A pipeline might not always support every possible argument or option of a particular tool used in pipeline. Fortunately, nf-core pipelines provide some freedom to users to insert additional parameters that the pipeline does not include by default. -> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. +To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. ### nf-core/configs diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index b3d092f8..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,529 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - 'version', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - System.exit(1) - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 27feb009..408951ae 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -32,6 +32,25 @@ class NfcoreTemplate { } } + // + // Generate version string + // + public static String version(workflow) { + String version_string = "" + + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string + } + // // Construct and send completion email // @@ -61,7 +80,7 @@ class NfcoreTemplate { misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp def email_fields = [:] - email_fields['version'] = workflow.manifest.version + email_fields['version'] = NfcoreTemplate.version(workflow) email_fields['runName'] = workflow.runName email_fields['success'] = workflow.success email_fields['dateComplete'] = workflow.complete @@ -109,7 +128,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) @@ -146,10 +165,10 @@ class NfcoreTemplate { } // - // Construct and send adaptive card - // https://adaptivecards.io + // Construct and send a notification to a web server as JSON + // e.g. Microsoft Teams and Slack // - public static void adaptivecard(workflow, params, summary_params, projectDir, log) { + public static void IM_notification(workflow, params, summary_params, projectDir, log) { def hook_url = params.hook_url def summary = [:] @@ -170,7 +189,7 @@ class NfcoreTemplate { misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp def msg_fields = [:] - msg_fields['version'] = workflow.manifest.version + msg_fields['version'] = NfcoreTemplate.version(workflow) msg_fields['runName'] = workflow.runName msg_fields['success'] = workflow.success msg_fields['dateComplete'] = workflow.complete @@ -178,13 +197,16 @@ class NfcoreTemplate { msg_fields['exitStatus'] = workflow.exitStatus msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") msg_fields['projectDir'] = workflow.projectDir msg_fields['summary'] = summary << misc_fields // Render the JSON template def engine = new groovy.text.GStringTemplateEngine() - def hf = new File("$projectDir/assets/adaptivecard.json") + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("$projectDir/assets/${json_path}") def json_template = engine.createTemplate(hf).make(msg_fields) def json_message = json_template.toString() @@ -209,7 +231,7 @@ class NfcoreTemplate { if (workflow.stats.ignoredCount == 0) { log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" } } else { log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" @@ -297,6 +319,7 @@ class NfcoreTemplate { // public static String logo(workflow, monochrome_logs) { Map colors = logColours(monochrome_logs) + String workflow_version = NfcoreTemplate.version(workflow) String.format( """\n ${dashedLine(monochrome_logs)} @@ -305,7 +328,7 @@ class NfcoreTemplate { ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} ${dashedLine(monochrome_logs)} """.stripIndent() ) diff --git a/lib/WorkflowAtacseq.groovy b/lib/WorkflowAtacseq.groovy index 80618c09..235ec444 100755 --- a/lib/WorkflowAtacseq.groovy +++ b/lib/WorkflowAtacseq.groovy @@ -2,6 +2,7 @@ // This file holds several functions specific to the workflow/atacseq.nf in the nf-core/atacseq pipeline // +import nextflow.Nextflow import groovy.text.SimpleTemplateEngine class WorkflowAtacseq { @@ -9,15 +10,9 @@ class WorkflowAtacseq { // // Check and validate parameters // - public static void initialise(params, log, valid_params) { + public static void initialise(params, log) { genomeExistsError(params, log) - - if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - System.exit(1) - } - if (!params.gtf && !params.gff) { log.error "No GTF or GFF3 annotation specified! The pipeline requires at least one of these files." System.exit(1) @@ -35,12 +30,6 @@ class WorkflowAtacseq { log.error "Both '--read_length' and '--macs_gsize' not specified! Please specify either to infer MACS2 genome size for peak calling." System.exit(1) } - if (params.aligner) { - if (!valid_params['aligners'].contains(params.aligner)) { - log.error "Invalid option: '${params.aligner}'. Valid options for '--aligner': ${valid_params['aligners'].join(', ')}." - System.exit(1) - } - } } // @@ -70,32 +59,76 @@ class WorkflowAtacseq { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + // TODO Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() def description_html = engine.createTemplate(methods_text).make(meta) return description_html - }// + } + + // // Exit pipeline if incorrect --genome key provided // private static void genomeExistsError(params, log) { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - System.exit(1) + Nextflow.error(error_string) } } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 347edb92..cfc6ac43 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -2,6 +2,8 @@ // This file holds several functions specific to the main.nf workflow in the nf-core/atacseq pipeline // +import nextflow.Nextflow + class WorkflowMain { // @@ -17,67 +19,30 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Print help to screen if required - // - public static String help(workflow, params, log) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --outdir --genome GRCh37 -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Print parameter summary log to screen - // - public static String paramsSummaryLog(workflow, params, log) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } - // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params, log) - System.exit(0) - } - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) + // Print workflow version and exit on --version + if (params.version) { + String workflow_version = NfcoreTemplate.version(workflow) + log.info "${workflow.manifest.name} ${workflow_version}" + System.exit(0) } - // Print parameter summary log to screen - - log.info paramsSummaryLog(workflow, params, log) - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) // Check that conda channels are set-up correctly - if (params.enable_conda) { + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { Utils.checkCondaChannels(log) } // Check AWS batch settings NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) - } } + // // Get attribute from genome config file e.g. fasta // diff --git a/main.nf b/main.nf index 54aad22c..51db289d 100644 --- a/main.nf +++ b/main.nf @@ -36,6 +36,22 @@ params.macs_gsize = WorkflowMain.getMacsGsize(params) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/modules.json b/modules.json index 4149c653..b95992e1 100644 --- a/modules.json +++ b/modules.json @@ -7,172 +7,172 @@ "nf-core": { "ataqv/ataqv": { "branch": "master", - "git_sha": "56421e1a812bc2f9e77dbe9f297e9d9c580cb8a5", + "git_sha": "11c7e5b3073845889060c793786bf3177275d62e", "installed_by": ["modules"] }, "ataqv/mkarv": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "11c7e5b3073845889060c793786bf3177275d62e", "installed_by": ["modules"] }, "bowtie2/align": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "fastq_align_bowtie2"] + "git_sha": "fe54581f8bed20e4c4a51c616c93fd3379d89820", + "installed_by": ["fastq_align_bowtie2"] }, "bowtie2/build": { "branch": "master", - "git_sha": "e797efb47b0d3b2124753beb55dc83ab9512bceb", + "git_sha": "6a24fbe314bb2e6fe6306c29a63076ea87e8eb3c", "installed_by": ["modules"] }, "bwa/index": { "branch": "master", - "git_sha": "9518fa4f65f3fb8cde24fde7d40333b39ec8fd65", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "bwa/mem": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "fastq_align_bwa"] + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "installed_by": ["fastq_align_bwa"] }, "chromap/chromap": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "fastq_align_chromap"] + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "installed_by": ["fastq_align_chromap"] }, "chromap/index": { "branch": "master", - "git_sha": "3a8e3ca607132a468c07c69aaa3bccd55eb983b8", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "8022c68e7403eecbd8ba9c49496f69f8c49d50f0", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "custom/getchromsizes": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "deeptools/computematrix": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "deeptools/plotfingerprint": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "deeptools/plotheatmap": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "deeptools/plotprofile": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "fastqc": { "branch": "master", - "git_sha": "810e8f2603ec38401d49a4aaed06f6d058745552", - "installed_by": ["modules", "fastq_fastqc_umitools_trimgalore"] + "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", + "installed_by": ["fastq_fastqc_umitools_trimgalore"] }, "gffread": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "gunzip": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", "installed_by": ["modules"] }, "homer/annotatepeaks": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "khmer/uniquekmers": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "macs2/callpeak": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "picard/collectmultiplemetrics": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "picard/markduplicates": { "branch": "master", - "git_sha": "eca65aa4a5e2e192ac44d6962c8f9260f314ffb8", - "installed_by": ["modules", "bam_markduplicates_picard"] + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "installed_by": ["bam_markduplicates_picard"] }, "picard/mergesamfiles": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "preseq/lcextrap": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_stats_samtools"] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["bam_stats_samtools"] }, "samtools/idxstats": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_stats_samtools"] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["bam_stats_samtools"] }, "samtools/index": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_markduplicates_picard", "bam_sort_stats_samtools"] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools"] }, "samtools/sort": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_sort_stats_samtools"] + "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", + "installed_by": ["bam_sort_stats_samtools"] }, "samtools/stats": { "branch": "master", - "git_sha": "cf5b9c30a2adacc581793afb79fae5f5b50bed01", - "installed_by": ["modules", "bam_stats_samtools"] + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "installed_by": ["bam_stats_samtools"] }, "subread/featurecounts": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, "trimgalore": { "branch": "master", - "git_sha": "b51a69e30973c71950225c817ad07a3337d22c40", - "installed_by": ["modules", "fastq_fastqc_umitools_trimgalore"] + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["fastq_fastqc_umitools_trimgalore"] }, "ucsc/bedgraphtobigwig": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "66290981ab6038ea86177ade40b9449bc790b0ce", "installed_by": ["modules"] }, "umitools/extract": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["fastq_fastqc_umitools_trimgalore"] }, "untar": { "branch": "master", - "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", "installed_by": ["modules"] } } @@ -181,42 +181,37 @@ "nf-core": { "bam_markduplicates_picard": { "branch": "master", - "git_sha": "6daac2bc63f4847e0c7cc661f4f5b043ac13faaf", + "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653", "installed_by": ["subworkflows"] }, "bam_sort_stats_samtools": { "branch": "master", - "git_sha": "3911652a6b24249358f79e8b8466338d63efb2a2", - "installed_by": [ - "subworkflows", - "fastq_align_bwa", - "fastq_align_bowtie2", - "fastq_align_chromap" - ] + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "installed_by": ["fastq_align_bowtie2", "fastq_align_bwa", "fastq_align_chromap"] }, "bam_stats_samtools": { "branch": "master", - "git_sha": "92eb5091ae5368a60cda58b3a0ced8b36d715b0f", - "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools", "subworkflows"] + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", + "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools"] }, "fastq_align_bowtie2": { "branch": "master", - "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", + "git_sha": "fe54581f8bed20e4c4a51c616c93fd3379d89820", "installed_by": ["subworkflows"] }, "fastq_align_bwa": { "branch": "master", - "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", + "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653", "installed_by": ["subworkflows"] }, "fastq_align_chromap": { "branch": "master", - "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", + "git_sha": "735e1e04e7e01751d2d6e97055bbdb6f70683cc1", "installed_by": ["subworkflows"] }, "fastq_fastqc_umitools_trimgalore": { "branch": "master", - "git_sha": "b51a69e30973c71950225c817ad07a3337d22c40", + "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653", "installed_by": ["subworkflows"] } } diff --git a/modules/local/bam_remove_orphans.nf b/modules/local/bam_remove_orphans.nf index 3444a862..26230ba8 100644 --- a/modules/local/bam_remove_orphans.nf +++ b/modules/local/bam_remove_orphans.nf @@ -2,10 +2,10 @@ process BAM_REMOVE_ORPHANS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::pysam=0.19.0 bioconda::samtools=1.15.1" : null) + conda "bioconda::pysam=0.19.0 bioconda::samtools=1.15.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' : - 'quay.io/biocontainers/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' }" + 'biocontainers/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' }" input: tuple val(meta), path(bam) diff --git a/modules/local/bamtools_filter.nf b/modules/local/bamtools_filter.nf index ee469ffd..a1e8feed 100644 --- a/modules/local/bamtools_filter.nf +++ b/modules/local/bamtools_filter.nf @@ -2,10 +2,10 @@ process BAMTOOLS_FILTER { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bamtools=2.5.2 bioconda::samtools=1.15.1" : null) + conda "bioconda::bamtools=2.5.2 bioconda::samtools=1.15.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' : - 'quay.io/biocontainers/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' }" + 'biocontainers/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/local/bedtools_genomecov.nf b/modules/local/bedtools_genomecov.nf index 58ac1cae..66c96030 100644 --- a/modules/local/bedtools_genomecov.nf +++ b/modules/local/bedtools_genomecov.nf @@ -2,10 +2,10 @@ process BEDTOOLS_GENOMECOV { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + conda "bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0': - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" input: tuple val(meta), path(bam), path(flagstat) @@ -33,7 +33,9 @@ process BEDTOOLS_GENOMECOV { -scale \$SCALE_FACTOR \\ $pe \\ $args \\ - | sort -T '.' -k1,1 -k2,2n > ${prefix}.bedGraph + > tmp.bg + + bedtools sort -i tmp.bg > ${prefix}.bedGraph cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/deseq2_qc.nf b/modules/local/deseq2_qc.nf index 617b10a3..14b57f69 100644 --- a/modules/local/deseq2_qc.nf +++ b/modules/local/deseq2_qc.nf @@ -4,10 +4,10 @@ process DESEQ2_QC { // (Bio)conda packages have intentionally not been pinned to a specific version // This was to avoid the pipeline failing due to package conflicts whilst creating the environment when using -profile conda - conda (params.enable_conda ? "conda-forge::r-base bioconda::bioconductor-deseq2 bioconda::bioconductor-biocparallel bioconda::bioconductor-tximport bioconda::bioconductor-complexheatmap conda-forge::r-optparse conda-forge::r-ggplot2 conda-forge::r-rcolorbrewer conda-forge::r-pheatmap" : null) + conda "conda-forge::r-base bioconda::bioconductor-deseq2 bioconda::bioconductor-biocparallel bioconda::bioconductor-tximport bioconda::bioconductor-complexheatmap conda-forge::r-optparse conda-forge::r-ggplot2 conda-forge::r-rcolorbrewer conda-forge::r-pheatmap" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' : - 'quay.io/biocontainers/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' }" + 'biocontainers/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' }" input: tuple val(meta), path(counts) diff --git a/modules/local/frip_score.nf b/modules/local/frip_score.nf index cf0c1ce8..77f856bc 100644 --- a/modules/local/frip_score.nf +++ b/modules/local/frip_score.nf @@ -2,10 +2,10 @@ process FRIP_SCORE { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bedtools=2.30.0 bioconda::samtools=1.15.1" : null) + conda "bioconda::bedtools=2.30.0 bioconda::samtools=1.15.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0': - 'quay.io/biocontainers/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0' }" + 'biocontainers/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0' }" input: tuple val(meta), path(bam), path(peak) diff --git a/modules/local/genome_blacklist_regions.nf b/modules/local/genome_blacklist_regions.nf index 608e7788..74a7936f 100644 --- a/modules/local/genome_blacklist_regions.nf +++ b/modules/local/genome_blacklist_regions.nf @@ -1,10 +1,10 @@ process GENOME_BLACKLIST_REGIONS { tag "$sizes" - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + conda "bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0': - 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + 'biocontainers/bedtools:2.30.0--hc088bd4_0' }" input: path sizes diff --git a/modules/local/get_autosomes.nf b/modules/local/get_autosomes.nf index ead7c2e3..ee40ecd1 100644 --- a/modules/local/get_autosomes.nf +++ b/modules/local/get_autosomes.nf @@ -1,10 +1,10 @@ process GET_AUTOSOMES { tag "$fai" - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" + 'biocontainers/python:3.8.3' }" input: path fai diff --git a/modules/local/gtf2bed.nf b/modules/local/gtf2bed.nf index c04515bb..059b5d1c 100644 --- a/modules/local/gtf2bed.nf +++ b/modules/local/gtf2bed.nf @@ -2,10 +2,10 @@ process GTF2BED { tag "$gtf" label 'process_low' - conda (params.enable_conda ? "conda-forge::perl=5.26.2" : null) + conda "conda-forge::perl=5.26.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/perl:5.26.2': - 'quay.io/biocontainers/perl:5.26.2' }" + 'biocontainers/perl:5.26.2' }" input: path gtf diff --git a/modules/local/igv.nf b/modules/local/igv.nf index 542d6cc2..d060d0e7 100644 --- a/modules/local/igv.nf +++ b/modules/local/igv.nf @@ -1,12 +1,13 @@ process IGV { - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3': - 'quay.io/biocontainers/python:3.8.3' }" + 'biocontainers/python:3.8.3' }" input: path fasta + path fai path ("${bigwig_library_publish_dir}/*") path ("${peak_library_publish_dir}/*") path ("${consensus_library_publish_dir}/*") @@ -25,6 +26,7 @@ process IGV { path "*files.txt" , emit: txt path "*.xml" , emit: xml path fasta , emit: fasta + path fai , emit: fai path "versions.yml", emit: versions when: diff --git a/modules/local/macs2_consensus.nf b/modules/local/macs2_consensus.nf index b3178ee2..2e2ca7e8 100644 --- a/modules/local/macs2_consensus.nf +++ b/modules/local/macs2_consensus.nf @@ -2,10 +2,10 @@ process MACS2_CONSENSUS { tag "$meta.id" label 'process_long' - conda (params.enable_conda ? "conda-forge::biopython conda-forge::r-optparse=1.7.1 conda-forge::r-upsetr=1.4.0 bioconda::bedtools=2.30.0" : null) + conda "conda-forge::biopython conda-forge::r-optparse=1.7.1 conda-forge::r-upsetr=1.4.0 bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0': - 'quay.io/biocontainers/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0' }" + 'biocontainers/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0' }" input: tuple val(meta), path(peaks) @@ -23,12 +23,12 @@ process MACS2_CONSENSUS { task.ext.when == null || task.ext.when script: // This script is bundled with the pipeline, in nf-core/atacseq/bin/ - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def peak_type = is_narrow_peak ? 'narrowPeak' : 'broadPeak' - def mergecols = is_narrow_peak ? (2..10).join(',') : (2..9).join(',') - def collapsecols = is_narrow_peak ? (['collapse']*9).join(',') : (['collapse']*8).join(',') - def expandparam = is_narrow_peak ? '--is_narrow_peak' : '' + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def peak_type = is_narrow_peak ? 'narrowPeak' : 'broadPeak' + def mergecols = is_narrow_peak ? (2..10).join(',') : (2..9).join(',') + def collapsecols = is_narrow_peak ? (['collapse']*9).join(',') : (['collapse']*8).join(',') + def expandparam = is_narrow_peak ? '--is_narrow_peak' : '' """ sort -T '.' -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\ | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf index 3f6a8830..6064fd7d 100644 --- a/modules/local/multiqc.nf +++ b/modules/local/multiqc.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + conda 'bioconda::multiqc=1.13' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.13--pyhdfd78af_0' }" input: path multiqc_config diff --git a/modules/local/multiqc_custom_peaks.nf b/modules/local/multiqc_custom_peaks.nf index 9936e883..7972af76 100644 --- a/modules/local/multiqc_custom_peaks.nf +++ b/modules/local/multiqc_custom_peaks.nf @@ -1,9 +1,10 @@ process MULTIQC_CUSTOM_PEAKS { tag "$meta.id" - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(peak), path(frip) diff --git a/modules/local/plot_homer_annotatepeaks.nf b/modules/local/plot_homer_annotatepeaks.nf index ff2a04fb..ef4b1ce6 100644 --- a/modules/local/plot_homer_annotatepeaks.nf +++ b/modules/local/plot_homer_annotatepeaks.nf @@ -1,10 +1,10 @@ process PLOT_HOMER_ANNOTATEPEAKS { label 'process_medium' - conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0': - 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + 'biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" input: path annos diff --git a/modules/local/plot_macs2_qc.nf b/modules/local/plot_macs2_qc.nf index 8f6d865e..a2c39b02 100644 --- a/modules/local/plot_macs2_qc.nf +++ b/modules/local/plot_macs2_qc.nf @@ -1,10 +1,10 @@ process PLOT_MACS2_QC { label 'process_medium' - conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0': - 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + 'biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" input: path peaks diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index 38e9d9f5..fb76d767 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -1,10 +1,11 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" + label 'process_single' - conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + conda "conda-forge::python=3.8.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'quay.io/biocontainers/python:3.8.3' }" + 'biocontainers/python:3.8.3' }" input: path samplesheet @@ -17,10 +18,11 @@ process SAMPLESHEET_CHECK { task.ext.when == null || task.ext.when script: // This script is bundled with the pipeline, in nf-core/atacseq/bin/ + def args = task.ext.args ?: '' """ check_samplesheet.py \\ $samplesheet \\ - samplesheet.valid.csv + $args cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf index f1988bea..bd39cd07 100644 --- a/modules/local/star_align.nf +++ b/modules/local/star_align.nf @@ -3,10 +3,10 @@ process STAR_ALIGN { label 'process_high' // Note: 2.7X indices incompatible with AWS iGenomes. - conda (params.enable_conda ? "bioconda::star=2.6.1d" : null) + conda "bioconda::star=2.6.1d" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/star:2.6.1d--0' : - 'quay.io/biocontainers/star:2.6.1d--0' }" + 'biocontainers/star:2.6.1d--0' }" input: tuple val(meta), path(reads) diff --git a/modules/local/star_genomegenerate.nf b/modules/local/star_genomegenerate.nf index d2192297..a36af0a8 100644 --- a/modules/local/star_genomegenerate.nf +++ b/modules/local/star_genomegenerate.nf @@ -3,10 +3,10 @@ process STAR_GENOMEGENERATE { label 'process_high' // Note: 2.7X indices incompatible with AWS iGenomes. - conda (params.enable_conda ? "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" : null) + conda "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' : - 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' }" + 'biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' }" input: path fasta diff --git a/modules/local/tss_extract.nf b/modules/local/tss_extract.nf index 2889d055..e685256c 100644 --- a/modules/local/tss_extract.nf +++ b/modules/local/tss_extract.nf @@ -1,9 +1,9 @@ process TSS_EXTRACT { - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: path bed diff --git a/modules/nf-core/ataqv/ataqv/main.nf b/modules/nf-core/ataqv/ataqv/main.nf index 9cba912f..3a35b14d 100644 --- a/modules/nf-core/ataqv/ataqv/main.nf +++ b/modules/nf-core/ataqv/ataqv/main.nf @@ -2,10 +2,10 @@ process ATAQV_ATAQV { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::ataqv=1.3.0" : null) + conda 'bioconda::ataqv=1.3.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ataqv:1.3.0--py39hccc85d7_2' : - 'quay.io/biocontainers/ataqv:1.3.0--py39hccc85d7_2' }" + 'https://depot.galaxyproject.org/singularity/ataqv:1.3.1--py310ha155cf9_1' : + 'biocontainers/ataqv:1.3.1--py310ha155cf9_1' }" input: tuple val(meta), path(bam), path(bai), path(peak_file) diff --git a/modules/nf-core/ataqv/ataqv/meta.yml b/modules/nf-core/ataqv/ataqv/meta.yml index 0a15517b..86f7d845 100644 --- a/modules/nf-core/ataqv/ataqv/meta.yml +++ b/modules/nf-core/ataqv/ataqv/meta.yml @@ -1,6 +1,8 @@ name: ataqv_ataqv description: ataqv function of a corresponding ataqv tool keywords: + - ATAC-seq + - qc - ataqv tools: - ataqv: @@ -8,7 +10,7 @@ tools: homepage: https://github.com/ParkerLab/ataqv/blob/master/README.rst documentation: https://github.com/ParkerLab/ataqv/blob/master/README.rst tool_dev_url: https://github.com/ParkerLab/ataqv - doi: "https://doi.org/10.1016/j.cels.2020.02.009" + doi: "10.1016/j.cels.2020.02.009" licence: ["GPL v3"] input: diff --git a/modules/nf-core/ataqv/mkarv/main.nf b/modules/nf-core/ataqv/mkarv/main.nf index 3a0550d4..8722be5d 100644 --- a/modules/nf-core/ataqv/mkarv/main.nf +++ b/modules/nf-core/ataqv/mkarv/main.nf @@ -1,13 +1,13 @@ process ATAQV_MKARV { label 'process_medium' - conda (params.enable_conda ? "bioconda::ataqv=1.3.0" : null) + conda 'bioconda::ataqv=1.3.1' container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ataqv:1.3.0--py39hccc85d7_2': - 'quay.io/biocontainers/ataqv:1.3.0--py39hccc85d7_2' }" + 'https://depot.galaxyproject.org/singularity/ataqv:1.3.1--py310ha155cf9_1': + 'biocontainers/ataqv:1.3.1--py310ha155cf9_1' }" input: - path json + path "jsons/*" output: path "html" , emit: html @@ -24,7 +24,7 @@ process ATAQV_MKARV { --concurrency $task.cpus \\ --force \\ ./html/ \\ - ${json.join(' ')} + jsons/* cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/ataqv/mkarv/meta.yml b/modules/nf-core/ataqv/mkarv/meta.yml index 8da66c61..6ad6d69e 100644 --- a/modules/nf-core/ataqv/mkarv/meta.yml +++ b/modules/nf-core/ataqv/mkarv/meta.yml @@ -1,14 +1,19 @@ name: "ataqv_mkarv" description: mkarv function of a corresponding ataqv tool keywords: + - ataqv + - ATAC-seq + - qc + - ataqv - mkarv + tools: - "ataqv": description: "ataqv is a toolkit for measuring and comparing ATAC-seq results. It was written to help understand how well ATAC-seq assays have worked, and to make it easier to spot differences that might be caused by library prep or sequencing." homepage: "https://github.com/ParkerLab/ataqv/blob/master/README.rst" documentation: "https://github.com/ParkerLab/ataqv/blob/master/README.rst" tool_dev_url: "https://github.com/ParkerLab/ataqv" - doi: "" + licence: "['GPL v3']" input: diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf index 909119d2..a77114d2 100644 --- a/modules/nf-core/bowtie2/align/main.nf +++ b/modules/nf-core/bowtie2/align/main.nf @@ -2,10 +2,10 @@ process BOWTIE2_ALIGN { tag "$meta.id" label "process_high" - conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6" : null) - container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ? - "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0" : - "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0" }" + conda "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' : + 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }" input: tuple val(meta) , path(reads) @@ -14,10 +14,10 @@ process BOWTIE2_ALIGN { val sort_bam output: - tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true - path "versions.yml" , emit: versions + tuple val(meta), path("*.{bam,sam}"), emit: aligned + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz") , emit: fastq, optional:true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -38,10 +38,12 @@ process BOWTIE2_ALIGN { } def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"` - [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"` + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 bowtie2 \\ @@ -51,7 +53,7 @@ process BOWTIE2_ALIGN { $unaligned \\ $args \\ 2> ${prefix}.bowtie2.log \\ - | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.${extension} - if [ -f ${prefix}.unmapped.fastq.1.gz ]; then mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz @@ -68,4 +70,25 @@ process BOWTIE2_ALIGN { pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) END_VERSIONS """ + + stub: + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + + """ + touch ${prefix}.${extension} + touch ${prefix}.bowtie2.log + touch ${prefix}.unmapped_1.fastq.gz + touch ${prefix}.unmapped_2.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + } diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml index c8e9a001..60d04c12 100644 --- a/modules/nf-core/bowtie2/align/meta.yml +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -46,10 +46,10 @@ input: description: use samtools sort (true) or samtools view (false) pattern: "true or false" output: - - bam: + - aligned: type: file - description: Output BAM file containing read alignments - pattern: "*.{bam}" + description: Output BAM/SAM file containing read alignments + pattern: "*.{bam,sam}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/bowtie2/build/main.nf b/modules/nf-core/bowtie2/build/main.nf index 218c174a..069d9c12 100644 --- a/modules/nf-core/bowtie2/build/main.nf +++ b/modules/nf-core/bowtie2/build/main.nf @@ -2,10 +2,10 @@ process BOWTIE2_BUILD { tag "$fasta" label 'process_high' - conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null) + conda "bioconda::bowtie2=2.4.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' : - 'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }" + 'biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }" input: tuple val(meta), path(fasta) @@ -27,4 +27,16 @@ process BOWTIE2_BUILD { bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') END_VERSIONS """ + + stub: + """ + mkdir bowtie2 + touch bowtie2/${fasta.baseName}.{1..4}.bt2 + touch bowtie2/${fasta.baseName}.rev.{1,2}.bt2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf index 6d70fc15..8d2e56d9 100644 --- a/modules/nf-core/bwa/index/main.nf +++ b/modules/nf-core/bwa/index/main.nf @@ -2,10 +2,10 @@ process BWA_INDEX { tag "$fasta" label 'process_single' - conda (params.enable_conda ? "bioconda::bwa=0.7.17" : null) + conda "bioconda::bwa=0.7.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : - 'quay.io/biocontainers/bwa:0.7.17--hed695b0_7' }" + 'biocontainers/bwa:0.7.17--hed695b0_7' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/bwa/mem/main.nf b/modules/nf-core/bwa/mem/main.nf index 7eb9d5c2..d2f85daf 100644 --- a/modules/nf-core/bwa/mem/main.nf +++ b/modules/nf-core/bwa/mem/main.nf @@ -2,10 +2,10 @@ process BWA_MEM { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1" : null) + conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' : - 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }" + 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }" input: tuple val(meta), path(reads) @@ -25,7 +25,7 @@ process BWA_MEM { def prefix = task.ext.prefix ?: "${meta.id}" def samtools_command = sort_bam ? 'sort' : 'view' """ - INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` bwa mem \\ $args \\ diff --git a/modules/nf-core/chromap/chromap/main.nf b/modules/nf-core/chromap/chromap/main.nf index 5425f402..a39302e3 100644 --- a/modules/nf-core/chromap/chromap/main.nf +++ b/modules/nf-core/chromap/chromap/main.nf @@ -2,15 +2,15 @@ process CHROMAP_CHROMAP { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::chromap=0.2.1 bioconda::samtools=1.16.1" : null) + conda "bioconda::chromap=0.2.4 bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1f09f39f20b1c4ee36581dc81cc323c70e661633:25259bafb105193269a9fd7595434c6fbddd4d3b-0' : - 'quay.io/biocontainers/mulled-v2-1f09f39f20b1c4ee36581dc81cc323c70e661633:25259bafb105193269a9fd7595434c6fbddd4d3b-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1f09f39f20b1c4ee36581dc81cc323c70e661633:5b2e433ab8b3d1ef098fc944b567fd98caa23f56-0' : + 'biocontainers/mulled-v2-1f09f39f20b1c4ee36581dc81cc323c70e661633:5b2e433ab8b3d1ef098fc944b567fd98caa23f56-0' }" input: tuple val(meta), path(reads) tuple val(meta2), path(fasta) - tuple val(meta2), path(index) + tuple val(meta3), path(index) path barcodes path whitelist path chr_order diff --git a/modules/nf-core/chromap/chromap/meta.yml b/modules/nf-core/chromap/chromap/meta.yml index 7583cc33..05f70cf0 100644 --- a/modules/nf-core/chromap/chromap/meta.yml +++ b/modules/nf-core/chromap/chromap/meta.yml @@ -20,7 +20,7 @@ tools: homepage: https://github.com/haowenz/chromap documentation: https://github.com/haowenz/chromap tool_dev_url: https://github.com/haowenz/chromap - doi: "" + licence: ["GPL v3"] input: - meta: @@ -36,12 +36,17 @@ input: - meta2: type: map description: | - Groovy Map containing sample information + Groovy Map containing information for the fasta e.g. [ id:'test' ] - fasta: type: file description: | The fasta reference file. + - meta3: + type: map + description: | + Groovy Map containing information for the index + e.g. [ id:'test' ] - index: type: file description: | diff --git a/modules/nf-core/chromap/index/main.nf b/modules/nf-core/chromap/index/main.nf index c25bf9e5..fb3773f8 100644 --- a/modules/nf-core/chromap/index/main.nf +++ b/modules/nf-core/chromap/index/main.nf @@ -2,10 +2,10 @@ process CHROMAP_INDEX { tag "$fasta" label 'process_medium' - conda (params.enable_conda ? "bioconda::chromap=0.2.1" : null) + conda "bioconda::chromap=0.2.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/chromap:0.2.1--hd03093a_0' : - 'quay.io/biocontainers/chromap:0.2.1--hd03093a_0' }" + 'https://depot.galaxyproject.org/singularity/chromap:0.2.4--hd03093a_0' : + 'biocontainers/chromap:0.2.4--hd03093a_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/chromap/index/meta.yml b/modules/nf-core/chromap/index/meta.yml index d14694b2..39c5459b 100644 --- a/modules/nf-core/chromap/index/meta.yml +++ b/modules/nf-core/chromap/index/meta.yml @@ -11,7 +11,7 @@ tools: homepage: https://github.com/haowenz/chromap documentation: https://github.com/haowenz/chromap tool_dev_url: https://github.com/haowenz/chromap - doi: "" + licence: ["GPL v3"] input: diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index cebb6e05..ebc87273 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? 'bioconda::multiqc=1.13' : null) + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml index 60b546a0..c32657de 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/meta.yml @@ -1,7 +1,9 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json name: custom_dumpsoftwareversions description: Custom module used to dump software versions within the nf-core pipeline template keywords: - custom + - dump - version tools: - custom: diff --git a/modules/nf-core/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf index 9f1984b0..060a2e88 100644 --- a/modules/nf-core/custom/getchromsizes/main.nf +++ b/modules/nf-core/custom/getchromsizes/main.nf @@ -2,10 +2,10 @@ process CUSTOM_GETCHROMSIZES { tag "$fasta" label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) + conda "bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'biocontainers/samtools:1.16.1--h6899075_1' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/deeptools/computematrix/main.nf b/modules/nf-core/deeptools/computematrix/main.nf index 96dfef3c..e77e2839 100644 --- a/modules/nf-core/deeptools/computematrix/main.nf +++ b/modules/nf-core/deeptools/computematrix/main.nf @@ -2,10 +2,10 @@ process DEEPTOOLS_COMPUTEMATRIX { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + conda "bioconda::deeptools=3.5.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : - 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + 'biocontainers/deeptools:3.5.1--py_0' }" input: tuple val(meta), path(bigwig) diff --git a/modules/nf-core/deeptools/computematrix/meta.yml b/modules/nf-core/deeptools/computematrix/meta.yml index eaa990dd..8a148f5d 100644 --- a/modules/nf-core/deeptools/computematrix/meta.yml +++ b/modules/nf-core/deeptools/computematrix/meta.yml @@ -8,7 +8,6 @@ keywords: tools: - deeptools: description: A set of user-friendly tools for normalization and visualization of deep-sequencing data - homepage: documentation: https://deeptools.readthedocs.io/en/develop/index.html tool_dev_url: https://github.com/deeptools/deepTools doi: "10.1093/nar/gku365" diff --git a/modules/nf-core/deeptools/plotfingerprint/main.nf b/modules/nf-core/deeptools/plotfingerprint/main.nf index 83613be7..3159e941 100644 --- a/modules/nf-core/deeptools/plotfingerprint/main.nf +++ b/modules/nf-core/deeptools/plotfingerprint/main.nf @@ -2,10 +2,10 @@ process DEEPTOOLS_PLOTFINGERPRINT { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + conda "bioconda::deeptools=3.5.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : - 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + 'biocontainers/deeptools:3.5.1--py_0' }" input: tuple val(meta), path(bams), path(bais) diff --git a/modules/nf-core/deeptools/plotfingerprint/meta.yml b/modules/nf-core/deeptools/plotfingerprint/meta.yml index 07c25748..29ddf893 100644 --- a/modules/nf-core/deeptools/plotfingerprint/meta.yml +++ b/modules/nf-core/deeptools/plotfingerprint/meta.yml @@ -8,7 +8,6 @@ keywords: tools: - deeptools: description: A set of user-friendly tools for normalization and visualization of deep-sequencing data - homepage: documentation: https://deeptools.readthedocs.io/en/develop/index.html tool_dev_url: https://github.com/deeptools/deepTools doi: "10.1093/nar/gku365" diff --git a/modules/nf-core/deeptools/plotheatmap/main.nf b/modules/nf-core/deeptools/plotheatmap/main.nf index 1e402e39..58309eea 100644 --- a/modules/nf-core/deeptools/plotheatmap/main.nf +++ b/modules/nf-core/deeptools/plotheatmap/main.nf @@ -2,10 +2,10 @@ process DEEPTOOLS_PLOTHEATMAP { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + conda "bioconda::deeptools=3.5.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : - 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + 'biocontainers/deeptools:3.5.1--py_0' }" input: tuple val(meta), path(matrix) diff --git a/modules/nf-core/deeptools/plotheatmap/meta.yml b/modules/nf-core/deeptools/plotheatmap/meta.yml index ea206fb6..9296f503 100644 --- a/modules/nf-core/deeptools/plotheatmap/meta.yml +++ b/modules/nf-core/deeptools/plotheatmap/meta.yml @@ -8,7 +8,6 @@ keywords: tools: - deeptools: description: A set of user-friendly tools for normalization and visualization of deep-sequencing data - homepage: documentation: https://deeptools.readthedocs.io/en/develop/index.html tool_dev_url: https://github.com/deeptools/deepTools doi: "10.1093/nar/gku365" diff --git a/modules/nf-core/deeptools/plotprofile/main.nf b/modules/nf-core/deeptools/plotprofile/main.nf index d83a9493..b1ab454d 100644 --- a/modules/nf-core/deeptools/plotprofile/main.nf +++ b/modules/nf-core/deeptools/plotprofile/main.nf @@ -2,10 +2,10 @@ process DEEPTOOLS_PLOTPROFILE { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + conda "bioconda::deeptools=3.5.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : - 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + 'biocontainers/deeptools:3.5.1--py_0' }" input: tuple val(meta), path(matrix) diff --git a/modules/nf-core/deeptools/plotprofile/meta.yml b/modules/nf-core/deeptools/plotprofile/meta.yml index 795fda44..44612ea9 100644 --- a/modules/nf-core/deeptools/plotprofile/meta.yml +++ b/modules/nf-core/deeptools/plotprofile/meta.yml @@ -8,7 +8,6 @@ keywords: tools: - deeptools: description: A set of user-friendly tools for normalization and visualization of deep-sequencing data - homepage: documentation: https://deeptools.readthedocs.io/en/develop/index.html tool_dev_url: https://github.com/deeptools/deepTools doi: "10.1093/nar/gku365" diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 47fd0e58..249f9064 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -2,10 +2,10 @@ process FASTQC { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + conda "bioconda::fastqc=0.11.9" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" + 'biocontainers/fastqc:0.11.9--0' }" input: tuple val(meta), path(reads) @@ -29,7 +29,11 @@ process FASTQC { printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name done - fastqc $args --threads $task.cpus $renamed_files + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf index 7c575c97..f4472b0e 100644 --- a/modules/nf-core/gffread/main.nf +++ b/modules/nf-core/gffread/main.nf @@ -2,10 +2,10 @@ process GFFREAD { tag "$gff" label 'process_low' - conda (params.enable_conda ? "bioconda::gffread=0.12.1" : null) + conda "bioconda::gffread=0.12.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : - 'quay.io/biocontainers/gffread:0.12.1--h8b12597_0' }" + 'biocontainers/gffread:0.12.1--h8b12597_0' }" input: path gff diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index fa6ba26a..73bf08cd 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -2,10 +2,10 @@ process GUNZIP { tag "$archive" label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) @@ -21,10 +21,14 @@ process GUNZIP { def args = task.ext.args ?: '' gunzip = archive.toString() - '.gz' """ - gunzip \\ - -f \\ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ $args \\ - $archive + $archive \\ + > $gunzip cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 4d2ebc84..4cdcdf4c 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -3,31 +3,32 @@ description: Compresses and decompresses files. keywords: - gunzip - compression + - decompression tools: - gunzip: - description: | - gzip is a file format and a software application used for file compression and decompression. - documentation: https://www.gnu.org/software/gzip/manual/gzip.html - licence: ["GPL-3.0-or-later"] + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] input: - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" + type: file + description: File to be compressed/uncompressed + pattern: "*.*" output: - gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" + type: file + description: Compressed/uncompressed file + pattern: "*.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/homer/annotatepeaks/main.nf b/modules/nf-core/homer/annotatepeaks/main.nf index 9056a5ab..cfc140f5 100644 --- a/modules/nf-core/homer/annotatepeaks/main.nf +++ b/modules/nf-core/homer/annotatepeaks/main.nf @@ -3,10 +3,10 @@ process HOMER_ANNOTATEPEAKS { label 'process_medium' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::homer=4.11" : null) + conda "bioconda::homer=4.11" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/homer:4.11--pl526hc9558a2_3' : - 'quay.io/biocontainers/homer:4.11--pl526hc9558a2_3' }" + 'biocontainers/homer:4.11--pl526hc9558a2_3' }" input: tuple val(meta), path(peak) diff --git a/modules/nf-core/khmer/uniquekmers/main.nf b/modules/nf-core/khmer/uniquekmers/main.nf index 8ad89a62..9576034f 100644 --- a/modules/nf-core/khmer/uniquekmers/main.nf +++ b/modules/nf-core/khmer/uniquekmers/main.nf @@ -2,10 +2,10 @@ process KHMER_UNIQUEKMERS { tag "$fasta" label 'process_low' - conda (params.enable_conda ? "bioconda::khmer=3.0.0a3" : null) + conda "bioconda::khmer=3.0.0a3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/khmer:3.0.0a3--py37haa7609a_2' : - 'quay.io/biocontainers/khmer:3.0.0a3--py37haa7609a_2' }" + 'biocontainers/khmer:3.0.0a3--py37haa7609a_2' }" input: path fasta diff --git a/modules/nf-core/khmer/uniquekmers/meta.yml b/modules/nf-core/khmer/uniquekmers/meta.yml index 31405cc1..f9f63972 100644 --- a/modules/nf-core/khmer/uniquekmers/meta.yml +++ b/modules/nf-core/khmer/uniquekmers/meta.yml @@ -1,5 +1,5 @@ name: "khmer_uniquekmers" -description: +description: In-memory nucleotide sequence k-mer counting, filtering, graph traversal and more keywords: - khmer - k-mer diff --git a/modules/nf-core/macs2/callpeak/main.nf b/modules/nf-core/macs2/callpeak/main.nf index 9aaf97a9..cbef7838 100644 --- a/modules/nf-core/macs2/callpeak/main.nf +++ b/modules/nf-core/macs2/callpeak/main.nf @@ -2,10 +2,10 @@ process MACS2_CALLPEAK { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::macs2=2.2.7.1" : null) + conda "bioconda::macs2=2.2.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/macs2:2.2.7.1--py38h4a8c8d9_3' : - 'quay.io/biocontainers/macs2:2.2.7.1--py38h4a8c8d9_3' }" + 'biocontainers/macs2:2.2.7.1--py38h4a8c8d9_3' }" input: tuple val(meta), path(ipbam), path(controlbam) diff --git a/modules/nf-core/macs2/callpeak/meta.yml b/modules/nf-core/macs2/callpeak/meta.yml index 982bc5b2..6e2bc1db 100644 --- a/modules/nf-core/macs2/callpeak/meta.yml +++ b/modules/nf-core/macs2/callpeak/meta.yml @@ -8,10 +8,10 @@ keywords: tools: - macs2: description: Model Based Analysis for ChIP-Seq data - homepage: None + documentation: https://docs.csc.fi/apps/macs2/ tool_dev_url: https://github.com/macs3-project/MACS - doi: "https://doi.org/10.1101/496521" + doi: "10.1101/496521" licence: ["BSD"] input: diff --git a/modules/nf-core/picard/collectmultiplemetrics/main.nf b/modules/nf-core/picard/collectmultiplemetrics/main.nf index 63f4e872..91fe9170 100644 --- a/modules/nf-core/picard/collectmultiplemetrics/main.nf +++ b/modules/nf-core/picard/collectmultiplemetrics/main.nf @@ -1,16 +1,16 @@ process PICARD_COLLECTMULTIPLEMETRICS { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: - tuple val(meta), path(bam) - path fasta - path fai + tuple val(meta) , path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*_metrics"), emit: metrics @@ -24,15 +24,15 @@ process PICARD_COLLECTMULTIPLEMETRICS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ picard \\ - -Xmx${avail_mem}g \\ + -Xmx${avail_mem}M \\ CollectMultipleMetrics \\ $args \\ --INPUT $bam \\ diff --git a/modules/nf-core/picard/collectmultiplemetrics/meta.yml b/modules/nf-core/picard/collectmultiplemetrics/meta.yml index c11b02cf..22656080 100644 --- a/modules/nf-core/picard/collectmultiplemetrics/meta.yml +++ b/modules/nf-core/picard/collectmultiplemetrics/meta.yml @@ -23,11 +23,25 @@ input: e.g. [ id:'test', single_end:false ] - bam: type: file - description: BAM file - pattern: "*.{bam}" + description: SAM/BAM/CRAM file + pattern: "*.{sam,bam,cram}" + - bai: + type: file + description: Optional SAM/BAM/CRAM file index + pattern: "*.{sai,bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] - fasta: type: file description: Genome fasta file + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] - fai: type: file description: Index of FASTA file. Only needed when fasta is supplied. diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf index d1f3aaa1..facd7efb 100644 --- a/modules/nf-core/picard/markduplicates/main.nf +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -2,15 +2,15 @@ process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: tuple val(meta), path(bam) - path fasta - path fai + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*.bam") , emit: bam @@ -24,15 +24,15 @@ process PICARD_MARKDUPLICATES { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ picard \\ - -Xmx${avail_mem}g \\ + -Xmx${avail_mem}M \\ MarkDuplicates \\ $args \\ --INPUT $bam \\ diff --git a/modules/nf-core/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml index 3f2357bb..f7693d2f 100644 --- a/modules/nf-core/picard/markduplicates/meta.yml +++ b/modules/nf-core/picard/markduplicates/meta.yml @@ -25,10 +25,20 @@ input: type: file description: BAM file pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: type: file description: Reference genome fasta file pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fai: type: file description: Reference genome fasta index @@ -58,3 +68,4 @@ output: authors: - "@drpatelh" - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/picard/mergesamfiles/main.nf b/modules/nf-core/picard/mergesamfiles/main.nf index cccf4d3d..fc23ea01 100644 --- a/modules/nf-core/picard/mergesamfiles/main.nf +++ b/modules/nf-core/picard/mergesamfiles/main.nf @@ -2,10 +2,10 @@ process PICARD_MERGESAMFILES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'biocontainers/picard:3.0.0--hdfd78af_1' }" input: tuple val(meta), path(bams) @@ -21,16 +21,16 @@ process PICARD_MERGESAMFILES { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def bam_files = bams.sort() - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[Picard MergeSamFiles] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } if (bam_files.size() > 1) { """ picard \\ - -Xmx${avail_mem}g \\ + -Xmx${avail_mem}M \\ MergeSamFiles \\ $args \\ ${'--INPUT '+bam_files.join(' --INPUT ')} \\ diff --git a/modules/nf-core/preseq/lcextrap/main.nf b/modules/nf-core/preseq/lcextrap/main.nf index a98a922c..12546f0a 100644 --- a/modules/nf-core/preseq/lcextrap/main.nf +++ b/modules/nf-core/preseq/lcextrap/main.nf @@ -3,10 +3,10 @@ process PRESEQ_LCEXTRAP { label 'process_single' label 'error_ignore' - conda (params.enable_conda ? "bioconda::preseq=3.1.2" : null) + conda "bioconda::preseq=3.1.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/preseq:3.1.2--h445547b_2': - 'quay.io/biocontainers/preseq:3.1.2--h445547b_2' }" + 'biocontainers/preseq:3.1.2--h445547b_2' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/preseq/lcextrap/meta.yml b/modules/nf-core/preseq/lcextrap/meta.yml index f1be05a2..1391961c 100755 --- a/modules/nf-core/preseq/lcextrap/meta.yml +++ b/modules/nf-core/preseq/lcextrap/meta.yml @@ -10,7 +10,7 @@ tools: homepage: http://smithlabresearch.org/software/preseq/ documentation: http://smithlabresearch.org/wp-content/uploads/manual.pdf tool_dev_url: https://github.com/smithlabcode/preseq - doi: "" + licence: ["GPL"] input: diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index 33ee499a..eb7e72fc 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml index 95269063..954225df 100644 --- a/modules/nf-core/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -14,7 +14,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf index 19330e53..a257d700 100644 --- a/modules/nf-core/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -1,11 +1,11 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml index 3710ab88..dda87e1e 100644 --- a/modules/nf-core/samtools/idxstats/meta.yml +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -15,7 +15,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 8577dc9d..0b20aa4b 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index e5cadbc2..8bd2fa6f 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf index ac43e67a..2b7753fd 100644 --- a/modules/nf-core/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(bam) @@ -23,7 +23,13 @@ process SAMTOOLS_SORT { def prefix = task.ext.prefix ?: "${meta.id}" if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + samtools sort \\ + $args \\ + -@ $task.cpus \\ + -o ${prefix}.bam \\ + -T $prefix \\ + $bam + cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml index 09289751..07328431 100644 --- a/modules/nf-core/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf index 5d2fd68a..4a2607de 100644 --- a/modules/nf-core/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -2,14 +2,14 @@ process SAMTOOLS_STATS { tag "$meta.id" label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.16.1" : null) + conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : - 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" input: tuple val(meta), path(input), path(input_index) - path fasta + tuple val(meta2), path(fasta) output: tuple val(meta), path("*.stats"), emit: stats diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml index cac50b1c..90e6345f 100644 --- a/modules/nf-core/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -13,7 +13,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -23,16 +23,21 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - input: - type: file - description: BAM/CRAM file from alignment - pattern: "*.{bam,cram}" + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" - input_index: - type: file - description: BAI/CRAI file from alignment - pattern: "*.{bai,crai}" + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] - fasta: - type: optional file - description: Reference file the CRAM was created with + type: file + description: Reference file the CRAM was created with (optional) pattern: "*.{fasta,fa}" output: - meta: @@ -51,3 +56,4 @@ output: authors: - "@drpatelh" - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/subread/featurecounts/main.nf b/modules/nf-core/subread/featurecounts/main.nf index 18e2a92b..a524b92f 100644 --- a/modules/nf-core/subread/featurecounts/main.nf +++ b/modules/nf-core/subread/featurecounts/main.nf @@ -2,10 +2,10 @@ process SUBREAD_FEATURECOUNTS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::subread=2.0.1" : null) + conda "bioconda::subread=2.0.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/subread:2.0.1--hed695b0_0' : - 'quay.io/biocontainers/subread:2.0.1--hed695b0_0' }" + 'biocontainers/subread:2.0.1--hed695b0_0' }" input: tuple val(meta), path(bams), path(annotation) diff --git a/modules/nf-core/trimgalore/main.nf b/modules/nf-core/trimgalore/main.nf index 5b45e4d7..dcb77ae7 100644 --- a/modules/nf-core/trimgalore/main.nf +++ b/modules/nf-core/trimgalore/main.nf @@ -2,22 +2,21 @@ process TRIMGALORE { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::trim-galore=0.6.7' : null) + conda "bioconda::trim-galore=0.6.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' : - 'quay.io/biocontainers/trim-galore:0.6.7--hdfd78af_0' }" + 'biocontainers/trim-galore:0.6.7--hdfd78af_0' }" input: tuple val(meta), path(reads) output: - tuple val(meta), path("*{trimmed,val}*.fq.gz"), emit: reads - tuple val(meta), path("*report.txt") , emit: log - path "versions.yml" , emit: versions - - tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true - tuple val(meta), path("*.html") , emit: html , optional: true - tuple val(meta), path("*.zip") , emit: zip , optional: true + tuple val(meta), path("*{3prime,5prime,trimmed,val}*.fq.gz"), emit: reads + tuple val(meta), path("*report.txt") , emit: log , optional: true + tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true + tuple val(meta), path("*.html") , emit: html , optional: true + tuple val(meta), path("*.zip") , emit: zip , optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -38,10 +37,12 @@ process TRIMGALORE { // Added soft-links to original fastqs for consistent naming in MultiQC def prefix = task.ext.prefix ?: "${meta.id}" if (meta.single_end) { + def args_list = args.split("\\s(?=--)").toList() + args_list.removeAll { it.toLowerCase().contains('_r2 ') } """ [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz trim_galore \\ - $args \\ + ${args_list.join(' ')} \\ --cores $cores \\ --gzip \\ ${prefix}.fastq.gz diff --git a/modules/nf-core/trimgalore/meta.yml b/modules/nf-core/trimgalore/meta.yml index 439f566d..f84c4d77 100644 --- a/modules/nf-core/trimgalore/meta.yml +++ b/modules/nf-core/trimgalore/meta.yml @@ -36,7 +36,7 @@ output: description: | List of input adapter trimmed FastQ files of size 1 and 2 for single-end and paired-end data, respectively. - pattern: "*.{fq.gz}" + pattern: "*{3prime,5prime,trimmed,val}*.fq.gz" - unpaired: type: file description: | diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/main.nf b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf index a2979e04..06bb4709 100644 --- a/modules/nf-core/ucsc/bedgraphtobigwig/main.nf +++ b/modules/nf-core/ucsc/bedgraphtobigwig/main.nf @@ -3,10 +3,10 @@ process UCSC_BEDGRAPHTOBIGWIG { label 'process_single' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::ucsc-bedgraphtobigwig=377" : null) + conda "bioconda::ucsc-bedgraphtobigwig=445" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:377--h446ed27_1' : - 'quay.io/biocontainers/ucsc-bedgraphtobigwig:377--h446ed27_1' }" + 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:445--h954228d_0' : + 'biocontainers/ucsc-bedgraphtobigwig:445--h954228d_0' }" input: tuple val(meta), path(bedgraph) @@ -22,7 +22,7 @@ process UCSC_BEDGRAPHTOBIGWIG { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def VERSION = '445' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ bedGraphToBigWig \\ $bedgraph \\ @@ -34,4 +34,16 @@ process UCSC_BEDGRAPHTOBIGWIG { ucsc: $VERSION END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '445' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.bigWig + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ } diff --git a/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml index 1be1a3b7..416c91e0 100755 --- a/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml +++ b/modules/nf-core/ucsc/bedgraphtobigwig/meta.yml @@ -3,13 +3,14 @@ description: Convert a bedGraph file to bigWig format. keywords: - bedgraph - bigwig + - ucsc + - bedgraphtobigwig + - converter tools: - ucsc: description: Convert a bedGraph file to bigWig format. - homepage: None - documentation: None - tool_dev_url: None - doi: "" + homepage: http://hgdownload.cse.ucsc.edu/admin/exe/ + documentation: https://genome.ucsc.edu/goldenPath/help/bigWig.html licence: ["varies; see http://genome.ucsc.edu/license"] input: diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf index 22a405b9..2f94fa93 100644 --- a/modules/nf-core/umitools/extract/main.nf +++ b/modules/nf-core/umitools/extract/main.nf @@ -1,11 +1,12 @@ process UMITOOLS_EXTRACT { tag "$meta.id" - label "process_low" + label "process_single" + label "process_long" - conda (params.enable_conda ? "bioconda::umi_tools=1.1.2" : null) + conda "bioconda::umi_tools=1.1.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.2--py38h4a8c8d9_0' : - 'quay.io/biocontainers/umi_tools:1.1.2--py38h4a8c8d9_0' }" + 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' : + 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }" input: tuple val(meta), path(reads) diff --git a/modules/nf-core/umitools/extract/meta.yml b/modules/nf-core/umitools/extract/meta.yml index 7fc23f72..db64a0f8 100644 --- a/modules/nf-core/umitools/extract/meta.yml +++ b/modules/nf-core/umitools/extract/meta.yml @@ -5,42 +5,42 @@ keywords: - extract tools: - umi_tools: - description: > - UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) - and single cell RNA-Seq cell barcodes - documentation: https://umi-tools.readthedocs.io/en/latest/ - license: ["MIT"] + description: > + UMI-tools contains tools for dealing with Unique Molecular Identifiers (UMIs)/Random Molecular Tags (RMTs) + and single cell RNA-Seq cell barcodes + documentation: https://umi-tools.readthedocs.io/en/latest/ + license: ["MIT"] input: - meta: - type: map - description: | - Groovy Map containing sample information + type: map + description: | + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - reads: - type: list - description: | - List of input FASTQ files whose UMIs will be extracted. + type: list + description: | + List of input FASTQ files whose UMIs will be extracted. output: - meta: - type: map - description: | - Groovy Map containing sample information + type: map + description: | + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - reads: - type: file - description: > - Extracted FASTQ files. | - For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + type: file + description: > + Extracted FASTQ files. | + For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. - pattern: "*.{fastq.gz}" + pattern: "*.{fastq.gz}" - log: - type: file - description: Logfile for umi_tools - pattern: "*.{log}" + type: file + description: Logfile for umi_tools + pattern: "*.{log}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf index 71eea7b2..8cd1856c 100644 --- a/modules/nf-core/untar/main.nf +++ b/modules/nf-core/untar/main.nf @@ -2,17 +2,17 @@ process UNTAR { tag "$archive" label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" + 'nf-core/ubuntu:20.04' }" input: tuple val(meta), path(archive) output: - tuple val(meta), path("$untar"), emit: untar - path "versions.yml" , emit: versions + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,31 +20,29 @@ process UNTAR { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - untar = archive.toString() - '.tar.gz' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) """ - mkdir output + mkdir $prefix ## Ensures --strip-components only applied when top level of tar contents is a directory - ## If just files or multiple directories, place all in output - if [[ \$(tar -tzf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then tar \\ - -C output --strip-components 1 \\ - -xzvf \\ + -C $prefix --strip-components 1 \\ + -xavf \\ $args \\ $archive \\ $args2 else tar \\ - -C output \\ - -xzvf \\ + -C $prefix \\ + -xavf \\ $args \\ $archive \\ $args2 fi - mv output ${untar} - cat <<-END_VERSIONS > versions.yml "${task.process}": untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') @@ -52,9 +50,10 @@ process UNTAR { """ stub: - untar = archive.toString() - '.tar.gz' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) """ - touch $untar + mkdir $prefix + touch ${prefix}/file.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml index ea7a3f38..db241a6e 100644 --- a/modules/nf-core/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -3,6 +3,7 @@ description: Extract files. keywords: - untar - uncompress + - extract tools: - untar: description: | diff --git a/nextflow.config b/nextflow.config index 8495b492..251cf88b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,12 +14,15 @@ params { fragment_size = 200 fingerprint_bins = 500000 read_length = null + with_control = false + // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes' igenomes_ignore = false save_reference = false + ataqv_mito_reference = null // Options: Trimming clip_r1 = null @@ -29,6 +32,7 @@ params { trim_nextseq = null skip_trimming = false save_trimmed = false + min_trimmed_reads = 10000 // Options: Alignment aligner = 'bwa' @@ -79,7 +83,6 @@ params { // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -87,18 +90,15 @@ params { monochrome_logs = false hook_url = null help = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' - enable_conda = false + version = false // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null // Max resource options // Defaults only, expecting to be overwritten @@ -106,6 +106,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -127,18 +134,21 @@ try { // } profiles { - debug { process.beforeScript = 'echo $HOSTNAME' } + debug { + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + } conda { - params.enable_conda = true conda.enabled = true docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } mamba { - params.enable_conda = true conda.enabled = true conda.useMamba = true docker.enabled = false @@ -146,51 +156,88 @@ profiles { podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } docker { docker.enabled = true docker.userEmulation = true + conda.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false + } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { singularity.enabled = true singularity.autoMounts = true + conda.enabled = false docker.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } podman { podman.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false shifter.enabled = false charliecloud.enabled = false + apptainer.enabled = false } shifter { shifter.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false charliecloud.enabled = false + apptainer.enabled = false } charliecloud { charliecloud.enabled = true + conda.enabled = false docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false + apptainer.enabled = false + } + apptainer { + apptainer.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false } gitpod { executor.name = 'local' executor.cpus = 16 executor.memory = 60.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_controls { includeConfig 'conf/test_controls.config' } + test_full { includeConfig 'conf/test_full.config' } +} + +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -217,29 +264,29 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { name = 'nf-core/atacseq' - author = 'Patel H, Langer B, Espinosa-Carrasco J, Syme R' + author = """Patel H, Langer B, Espinosa-Carrasco J, Syme R""" homePage = 'https://github.com/nf-core/atacseq' - description = 'ATACSeq peak-calling and differential analysis pipeline.' + description = """ATACSeq peak-calling and differential analysis pipeline.""" mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' - version = '2.0' + nextflowVersion = '!>=23.04.0' + version = '2.1.0' doi = 'https://doi.org/10.5281/zenodo.2634132' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 41d5c76e..fd39fea4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -15,11 +15,11 @@ "input": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 5 columns, and a header row. See [usage docs](https://nf-co.re/atacseq/docs/usage#introduction).", + "help_text": "You will need to create a samplesheet with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 5 columns, and a header row. See [usage docs](https://nf-co.re/atacseq/docs/usage#introduction).", "fa_icon": "fas fa-file-csv" }, "fragment_size": { @@ -35,10 +35,17 @@ }, "read_length": { "type": "integer", - "description": "Read length used to calculate MACS2 genome size for peak calling if `--macs_gsize` isn't provided.", + "description": "Read length used to calculate or retrieve pre-computed MACS2 genome size for peak calling if `--macs_gsize` isn't provided.", "fa_icon": "fas fa-chart-area", + "help_text": "Read length together with the genome fasta are used to calculate MACS2 genome size using the `khmer` program as explained [here](https://deeptools.readthedocs.io/en/develop/content/feature/effectiveGenomeSize.html#effective-genome-size). For all the genomes present in the `igenomes.config` the genome size has been already precomputed and the read length is then used to retrieve the corresponding value", "enum": [50, 75, 100, 150, 200] }, + "with_control": { + "type": "boolean", + "description": "Use controls.", + "help_text": "Use this to indicate that your samplesheet lists controls.", + "fa_icon": "fas fa-check-square" + }, "outdir": { "type": "string", "format": "directory-path", @@ -74,6 +81,7 @@ "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", @@ -83,6 +91,7 @@ "gtf": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.gtf(\\.gz)?$", "description": "Path to GTF annotation file.", @@ -92,6 +101,7 @@ "gff": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.gff(\\.gz)?$", "fa_icon": "fas fa-code-branch", @@ -100,30 +110,36 @@ }, "bwa_index": { "type": "string", + "format": "path", + "exists": true, "description": "Path to directory or tar.gz archive for pre-built BWA index.", "fa_icon": "fas fa-bezier-curve" }, "bowtie2_index": { "type": "string", "format": "path", + "exists": true, "fa_icon": "fas fa-bezier-curve", "description": "Path to directory or tar.gz archive for pre-built Bowtie2 index." }, "chromap_index": { "type": "string", "format": "path", + "exists": true, "fa_icon": "fas fa-bezier-curve", "description": "Path to directory or tar.gz archive for pre-built Chromap index." }, "star_index": { "type": "string", "format": "path", + "exists": true, "fa_icon": "fas fa-bezier-curve", "description": "Path to directory or tar.gz archive for pre-built STAR index." }, "gene_bed": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.bed(\\.gz)?$", "fa_icon": "fas fa-procedures", @@ -132,6 +148,7 @@ "tss_bed": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.bed(\\.gz)?$", "fa_icon": "fas fa-procedures", @@ -145,6 +162,8 @@ }, "blacklist": { "type": "string", + "format": "path", + "exists": true, "description": "Path to blacklist regions in BED format, used for filtering alignments.", "help_text": "If provided, alignments that overlap with the regions in this file will be filtered out (see [ENCODE blacklists](https://sites.google.com/site/anshulkundaje/projects/blacklists)). The file should be in BED format. Blacklisted regions for *GRCh37*, *GRCh38*, *GRCm38*, *hg19*, *hg38*, *mm10* are bundled with the pipeline in the [`blacklists`](../assets/blacklists/) directory, and as such will be automatically used if any of those genomes are specified with the `--genome` parameter.", "fa_icon": "fas fa-book-dead" @@ -179,9 +198,17 @@ "keep_mito": { "type": "boolean", "description": "Reads mapping to mitochondrial contig are not filtered from alignments.", + "fa_icon": "fas fa-cart-arrow-down", "default": false + }, + "ataqv_mito_reference": { + "type": "string", + "description": "Sets the value of the ataqv --mitochondrial-reference-name argument", + "help_text": "By default takes the value of the mito_name parameter, if set. However, some plants and algae have chloroplast genomes in addition to a mitochondrial genome and thus mito_name can have values as multiple names that are separated by a | symbol that will break ataqv, in these cases this parameter can be used to overwrite these values.", + "fa_icon": "fas fa-signature" } - } + }, + "required": ["fasta"] }, "adapter_trimming_options": { "title": "Adapter trimming options", @@ -215,6 +242,12 @@ "help_text": "This enables the option Cutadapt `--nextseq-trim=3'CUTOFF` option via Trim Galore, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases.", "fa_icon": "fas fa-cut" }, + "min_trimmed_reads": { + "type": "integer", + "default": 10000, + "fa_icon": "fas fa-hand-paper", + "description": "Minimum number of trimmed reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low." + }, "skip_trimming": { "type": "boolean", "description": "Skip the adapter trimming step.", @@ -278,6 +311,8 @@ }, "bamtools_filter_pe_config": { "type": "string", + "format": "path", + "exists": true, "default": "$projectDir/assets/bamtools_filter_pe.json", "hidden": true, "description": "BAMTools JSON file with custom filters for paired-end data.", @@ -285,6 +320,8 @@ }, "bamtools_filter_se_config": { "type": "string", + "format": "path", + "exists": true, "default": "$projectDir/assets/bamtools_filter_se.json", "hidden": true, "description": "BAMTools JSON file with custom filters for single-end data.", @@ -501,7 +538,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -520,6 +557,12 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, "publish_dir_mode": { "type": "string", "default": "copy", @@ -568,33 +611,35 @@ "type": "string", "description": "Incoming hook URL for messaging service", "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", + "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true }, "multiqc_config": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true }, "multiqc_logo": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file", "fa_icon": "fas fa-image", "hidden": true }, "multiqc_methods_description": { "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -602,18 +647,28 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", + "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, - "enable_conda": { + "validationFailUnrecognisedParams": { "type": "boolean", - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "default": false, "hidden": true, - "fa_icon": "fas fa-bacon" + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 13bcd5a3..1d832adc 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -6,8 +6,10 @@ include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' workflow INPUT_CHECK { take: - samplesheet // file: /path/to/samplesheet.csv - seq_center // string: sequencing center for read group + samplesheet // file: /path/to/samplesheet.csv + seq_center // string: sequencing center for read group + with_control // boolean: samplesheet contains controls + main: SAMPLESHEET_CHECK ( samplesheet ) @@ -26,10 +28,11 @@ def create_fastq_channel(LinkedHashMap row, String seq_center) { def meta = [:] meta.id = row.sample meta.single_end = row.single_end.toBoolean() + meta.control = row.control - def read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\'" + def read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id - ~/_T\d+$/}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\'" if (seq_center) { - read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\\tCN:${seq_center}\'" + read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id - ~/_T\d+$/}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\\tCN:${seq_center}\'" } meta.read_group = read_group diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index a4825b8e..d6dbffcf 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -44,7 +44,7 @@ workflow PREPARE_GENOME { ch_fasta = GUNZIP_FASTA ( [ [:], params.fasta ] ).gunzip.map{ it[1] } ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) } else { - ch_fasta = file(params.fasta) + ch_fasta = Channel.value(file(params.fasta)) } // @@ -55,14 +55,14 @@ workflow PREPARE_GENOME { ch_gtf = GUNZIP_GTF ( [ [:], params.gtf ] ).gunzip.map{ it[1] } ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) } else { - ch_gtf = file(params.gtf) + ch_gtf = Channel.value(file(params.gtf)) } } else if (params.gff) { if (params.gff.endsWith('.gz')) { ch_gff = GUNZIP_GFF ( [ [:], params.gff ] ).gunzip.map{ it[1] } ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) } else { - ch_gff = file(params.gff) + ch_gff = Channel.value(file(params.gff)) } ch_gtf = GFFREAD ( ch_gff ).gtf ch_versions = ch_versions.mix(GFFREAD.out.versions) @@ -77,7 +77,7 @@ workflow PREPARE_GENOME { ch_blacklist = GUNZIP_BLACKLIST ( [ [:], params.blacklist ] ).gunzip.map{ it[1] } ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions) } else { - ch_blacklist = Channel.of(params.blacklist) + ch_blacklist = Channel.value(file(params.blacklist)) } } @@ -104,7 +104,7 @@ workflow PREPARE_GENOME { ch_gene_bed = GUNZIP_GENE_BED ( [ [:], params.gene_bed ] ).gunzip.map{ it[1] } ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions) } else { - ch_gene_bed = file(params.gene_bed) + ch_gene_bed = Channel.value(file(params.gene_bed)) } } @@ -116,14 +116,15 @@ workflow PREPARE_GENOME { ch_tss_bed = GUNZIP_TSS_BED ( [ [:], params.tss_bed ] ).gunzip.map{ it[1] } ch_versions = ch_versions.mix(GUNZIP_TSS_BED.out.versions) } else { - ch_tss_bed = file(params.tss_bed) + ch_tss_bed = Channel.value(file(params.tss_bed)) } } // // Create chromosome sizes file // - ch_chrom_sizes = CUSTOM_GETCHROMSIZES ( [ [:], ch_fasta ] ).sizes.map{ it[1] } + CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } ) + ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map{ it[1] } ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) @@ -145,7 +146,7 @@ workflow PREPARE_GENOME { GENOME_BLACKLIST_REGIONS ( ch_chrom_sizes, ch_blacklist.ifEmpty([]), - params.mito_name, + params.mito_name ?: '', params.keep_mito ) ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.bed @@ -164,7 +165,7 @@ workflow PREPARE_GENOME { ch_bwa_index = [ [:], file(params.bwa_index) ] } } else { - ch_bwa_index = BWA_INDEX ( [ [:], ch_fasta ] ).index + ch_bwa_index = BWA_INDEX ( ch_fasta.map { [ [:], it ] } ).index ch_versions = ch_versions.mix(BWA_INDEX.out.versions) } } @@ -182,7 +183,7 @@ workflow PREPARE_GENOME { ch_bowtie2_index = [ [:], file(params.bowtie2_index) ] } } else { - ch_bowtie2_index = BOWTIE2_BUILD ( [ [:], ch_fasta ] ).index + ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta.map { [ [:], it ] } ).index ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) } } @@ -200,7 +201,7 @@ workflow PREPARE_GENOME { ch_chromap_index = [ [:], file(params.chromap_index) ] } } else { - ch_chromap_index = CHROMAP_INDEX ( [ [:], ch_fasta ] ).index + ch_chromap_index = CHROMAP_INDEX ( ch_fasta.map { [ [:], it ] } ).index ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions) } } @@ -215,7 +216,7 @@ workflow PREPARE_GENOME { ch_star_index = UNTAR_STAR_INDEX ( [ [:], params.star_index ] ).untar.map{ it[1] } ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) } else { - ch_star_index = file(params.star_index) + ch_star_index = Channel.value(file(params.star_index)) } } else { ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf ).index diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf index 9cb24cdc..6e3df332 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/main.nf +++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -9,9 +9,9 @@ include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' workflow BAM_MARKDUPLICATES_PICARD { take: - ch_bam // channel: [ val(meta), [ bam ] ] - ch_fasta // channel: [ fasta ] - ch_fai // channel: [ fai ] + ch_bam // channel: [ val(meta), path(bam) ] + ch_fasta // channel: [ path(fasta) ] + ch_fai // channel: [ path(fai) ] main: @@ -23,7 +23,7 @@ workflow BAM_MARKDUPLICATES_PICARD { SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - PICARD_MARKDUPLICATES.out.bam + ch_bam_bai = PICARD_MARKDUPLICATES.out.bam .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) .map { @@ -34,20 +34,19 @@ workflow BAM_MARKDUPLICATES_PICARD { [ meta, bam, csi ] } } - .set { ch_bam_bai } BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) emit: - bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), [ bam ] ] - metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), [ bam ] ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] - - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(bam) ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml index fdd9f8d1..d5e71609 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml +++ b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json name: "bam_markduplicates_picard" description: Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats keywords: @@ -14,48 +15,47 @@ modules: - samtools/flagstat input: - - meta: - type: map + - ch_bam: description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - fasta: - type: file - description: Reference genome fasta file - pattern: "*.{fasta,fa}" - + BAM/CRAM/SAM file + Structure: [ val(meta), path(bam) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] + - ch_fasta: + description: | + Index of the reference genome fasta file + Structure: [ path(fai) ] output: - - meta: - type: map + - bam: description: | - Groovy Map containing sample information - e.g. [ id:'test' ] + processed BAM/CRAM/SAM file + Structure: [ val(meta), path(bam) ] - bai: - type: file - description: BAM/CRAM/SAM samtools index - pattern: "*.{bai,crai,sai}" + description: | + BAM/CRAM/SAM samtools index + Structure: [ val(meta), path(bai) ] - csi: - type: file - description: CSI samtools index - pattern: "*.csi" + description: | + CSI samtools index + Structure: [ val(meta), path(csi) ] - stats: - type: file - description: File containing samtools stats output + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] - flagstat: - type: file - description: File containing samtools flagstat output + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] - idxstats: - type: file - description: File containing samtools idxstats output - pattern: "*.{idxstats}" + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + description: | + Files containing software versions + Structure: [ path(versions.yml) ] authors: - "@dmarron" - "@drpatelh" diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf index 617871fe..fc1c652b 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -9,7 +9,7 @@ include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' workflow BAM_SORT_STATS_SAMTOOLS { take: ch_bam // channel: [ val(meta), [ bam ] ] - ch_fasta // channel: [ fasta ] + ch_fasta // channel: [ val(meta), path(fasta) ] main: diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml index 131065be..8dfbd58d 100644 --- a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json name: bam_sort_stats_samtools description: Sort SAM/BAM/CRAM file keywords: diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf index cfcc48dd..44d4c010 100644 --- a/subworkflows/nf-core/bam_stats_samtools/main.nf +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -8,25 +8,25 @@ include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/m workflow BAM_STATS_SAMTOOLS { take: - bam_bai // channel: [ val(meta), [ bam/cram ], [bai/csi] ] - fasta // channel: [ fasta ] + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ val(meta), path(fasta) ] main: ch_versions = Channel.empty() - SAMTOOLS_STATS ( bam_bai, fasta ) + SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) - SAMTOOLS_FLAGSTAT ( bam_bai ) + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) - SAMTOOLS_IDXSTATS ( bam_bai ) + SAMTOOLS_IDXSTATS ( ch_bam_bai ) ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) emit: - stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml index 5252b0e4..b05086bc 100644 --- a/subworkflows/nf-core/bam_stats_samtools/meta.yml +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json name: bam_stats_samtools description: Produces comprehensive statistics from SAM/BAM/CRAM file keywords: @@ -11,44 +12,30 @@ modules: - samtools/idxstats - samtools/flagstat input: - - meta: - type: map + - ch_bam_bai: description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Index for BAM/CRAM/SAM file - pattern: "*.{bai,crai,sai}" - - fasta: - type: file - description: Reference genome fasta file - pattern: "*.{fasta,fa}" -output: - - meta: - type: map + The input channel containing the BAM/CRAM and it's index + Structure: [ val(meta), path(bam), path(bai) ] + - ch_fasta: description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] + Reference genome fasta file + Structure: [ path(fasta) ] +output: - stats: - type: file - description: File containing samtools stats output - pattern: "*.{stats}" + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] - flagstat: - type: file - description: File containing samtools flagstat output - pattern: "*.{flagstat}" + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] - idxstats: - type: file - description: File containing samtools idxstats output - pattern: "*.{idxstats}" + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + description: | + Files containing software versions + Structure: [ path(versions.yml) ] authors: - "@drpatelh" diff --git a/subworkflows/nf-core/fastq_align_bowtie2/main.nf b/subworkflows/nf-core/fastq_align_bowtie2/main.nf index eaf4ac5e..ba4420f7 100644 --- a/subworkflows/nf-core/fastq_align_bowtie2/main.nf +++ b/subworkflows/nf-core/fastq_align_bowtie2/main.nf @@ -21,18 +21,18 @@ workflow FASTQ_ALIGN_BOWTIE2 { // Map reads with Bowtie2 // BOWTIE2_ALIGN ( ch_reads, ch_index, save_unaligned, sort_bam ) - ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) // // Sort, index BAM file and run samtools stats, flagstat and idxstats // - BAM_SORT_STATS_SAMTOOLS ( BOWTIE2_ALIGN.out.bam, ch_fasta ) + BAM_SORT_STATS_SAMTOOLS ( BOWTIE2_ALIGN.out.aligned, ch_fasta ) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) emit: - bam_orig = BOWTIE2_ALIGN.out.bam // channel: [ val(meta), bam ] - log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] - fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] + bam_orig = BOWTIE2_ALIGN.out.aligned // channel: [ val(meta), aligned ] + log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] + fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] diff --git a/subworkflows/nf-core/fastq_align_bowtie2/meta.yml b/subworkflows/nf-core/fastq_align_bowtie2/meta.yml index ad378077..4900670f 100644 --- a/subworkflows/nf-core/fastq_align_bowtie2/meta.yml +++ b/subworkflows/nf-core/fastq_align_bowtie2/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json name: fastq_align_bowtie2 description: Align reads to a reference genome using bowtie2 then sort with samtools keywords: @@ -33,9 +34,10 @@ input: Save reads that do not map to the reference (true) or discard them (false) (default: false) - sort_bam: + type: boolean description: | Save reads that do not map to the reference (true) or discard them (false) - (default: false) + default: false - ch_fasta: type: file description: Reference fasta file diff --git a/subworkflows/nf-core/fastq_align_bwa/main.nf b/subworkflows/nf-core/fastq_align_bwa/main.nf index 4a0cf732..4ce4f886 100644 --- a/subworkflows/nf-core/fastq_align_bwa/main.nf +++ b/subworkflows/nf-core/fastq_align_bwa/main.nf @@ -7,10 +7,10 @@ include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' workflow FASTQ_ALIGN_BWA { take: - ch_reads // channel (mandatory): [ val(meta), [ reads ] ] - ch_index // channel (mandatory): [ val(meta2, [ index ] ] - sort_bam // val - ch_fasta // channel (optional) : /path/to/reference.fasta + ch_reads // channel (mandatory): [ val(meta), [ path(reads) ] ] + ch_index // channel (mandatory): [ val(meta2), path(index) ] + val_sort_bam // boolean (mandatory): true or false + ch_fasta // channel (optional) : [ path(fasta) ] main: ch_versions = Channel.empty() @@ -18,24 +18,26 @@ workflow FASTQ_ALIGN_BWA { // // Map reads with BWA // - BWA_MEM ( ch_reads, ch_index, sort_bam ) + + BWA_MEM ( ch_reads, ch_index, val_sort_bam ) ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) // // Sort, index BAM file and run samtools stats, flagstat and idxstats // + BAM_SORT_STATS_SAMTOOLS ( BWA_MEM.out.bam, ch_fasta ) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) emit: - bam_orig = BWA_MEM.out.bam // channel: [ val(meta), bam ] + bam_orig = BWA_MEM.out.bam // channel: [ val(meta), path(bam) ] - bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] - stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), path(bam) ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), path(bai) ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), path(csi) ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/nf-core/fastq_align_bwa/meta.yml b/subworkflows/nf-core/fastq_align_bwa/meta.yml index 894e5dda..548fec3f 100644 --- a/subworkflows/nf-core/fastq_align_bwa/meta.yml +++ b/subworkflows/nf-core/fastq_align_bwa/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json name: fastq_align_bwa description: Align reads to a reference genome using bwa then sort with samtools keywords: @@ -12,67 +13,58 @@ modules: - samtools/stats - samtools/idxstats - samtools/flagstat -# TODO Update when we decide on a standard for subworkflow docs input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - ch_reads: - type: file description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] + Structure: [ val(meta), [ path(reads) ] ] - ch_index: - type: file - description: BWA genome index files - pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" - - sort_bam: + description: | + BWA genome index files + Structure: [ val(meta2), path(index) ] + - val_sort_bam: type: boolean description: If true bwa modules sort resulting bam files - pattern: "true or false" + pattern: "true|false" - ch_fasta: type: file description: | - Structure: [val(meta2), path(fasta)] - Reference fasta file - pattern: "*.{fasta,fa}" + Optional reference fasta file. This only needs to be given if val_sort_bam = true + Structure: [ path(fasta) ] -# TODO Update when we decide on a standard for subworkflow docs output: - bam_orig: - type: file - description: BAM file produced by bwa - pattern: "*.bam" + description: | + BAM file produced by bwa + Structure: [ val(meta), path(bam) ] - bam: - type: file - description: BAM file ordered by samtools - pattern: "*.bam" + description: | + BAM file ordered by samtools + Structure: [ val(meta), path(bam) ] - bai: - type: file - description: BAM index (currently only for snapaligner) - pattern: "*.bai" + description: | + BAI index of the ordered BAM file + Structure: [ val(meta), path(bai) ] + - csi: + description: | + CSI index of the ordered BAM file + Structure: [ val(meta), path(csi) ] - stats: - type: file - description: File containing samtools stats output - pattern: "*.{stats}" + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] - flagstat: - type: file - description: File containing samtools flagstat output - pattern: "*.{flagstat}" + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] - idxstats: - type: file - description: File containing samtools idxstats output - pattern: "*.{idxstats}" + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + description: | + Files containing software versions + Structure: [ path(versions.yml) ] authors: - "@JoseEspinosa" diff --git a/subworkflows/nf-core/fastq_align_chromap/main.nf b/subworkflows/nf-core/fastq_align_chromap/main.nf index 9d706c98..26b6a4be 100644 --- a/subworkflows/nf-core/fastq_align_chromap/main.nf +++ b/subworkflows/nf-core/fastq_align_chromap/main.nf @@ -27,7 +27,7 @@ workflow FASTQ_ALIGN_CHROMAP { // // Sort, index BAM file and run samtools stats, flagstat and idxstats // - BAM_SORT_STATS_SAMTOOLS(CHROMAP_CHROMAP.out.bam, ch_fasta.map { it[1] }) + BAM_SORT_STATS_SAMTOOLS(CHROMAP_CHROMAP.out.bam, ch_fasta) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) emit: diff --git a/subworkflows/nf-core/fastq_align_chromap/meta.yml b/subworkflows/nf-core/fastq_align_chromap/meta.yml index 6d701bce..d5f59cac 100644 --- a/subworkflows/nf-core/fastq_align_chromap/meta.yml +++ b/subworkflows/nf-core/fastq_align_chromap/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json name: "fastq_align_chromap" description: Align high throughput chromatin profiles using Chromap then sort with samtools keywords: diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf index f3313256..db2e5b32 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf +++ b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf @@ -23,20 +23,21 @@ def getTrimGaloreReadsAfterFiltering(log_file) { workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE { take: - reads // channel: [ val(meta), [ reads ] ] - skip_fastqc // boolean: true/false - with_umi // boolean: true/false - skip_umi_extract // boolean: true/false - skip_trimming // boolean: true/false - umi_discard_read // integer: 0, 1 or 2 + reads // channel: [ val(meta), [ reads ] ] + skip_fastqc // boolean: true/false + with_umi // boolean: true/false + skip_umi_extract // boolean: true/false + skip_trimming // boolean: true/false + umi_discard_read // integer: 0, 1 or 2 + min_trimmed_reads // integer: > 0 main: - ch_versions = Channel.empty() fastqc_html = Channel.empty() fastqc_zip = Channel.empty() if (!skip_fastqc) { - FASTQC ( reads ).html.set { fastqc_html } + FASTQC (reads) + fastqc_html = FASTQC.out.html fastqc_zip = FASTQC.out.zip ch_versions = ch_versions.mix(FASTQC.out.versions.first()) } @@ -44,8 +45,8 @@ workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE { umi_reads = reads umi_log = Channel.empty() if (with_umi && !skip_umi_extract) { - - UMITOOLS_EXTRACT ( reads ).reads.set { umi_reads } + UMITOOLS_EXTRACT (reads) + umi_reads = UMITOOLS_EXTRACT.out.reads umi_log = UMITOOLS_EXTRACT.out.log ch_versions = ch_versions.mix(UMITOOLS_EXTRACT.out.versions.first()) @@ -54,24 +55,22 @@ workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE { UMITOOLS_EXTRACT .out .reads - .map { meta, reads -> - if (!meta.single_end) { - meta['single_end'] = true - reads = reads[umi_discard_read % 2] - } - return [ meta, reads ] + .map { + meta, reads -> + meta.single_end ? [ meta, reads ] : [ meta + ['single_end': true], reads[umi_discard_read % 2] ] } .set { umi_reads } } } - trim_reads = umi_reads - trim_unpaired = Channel.empty() - trim_html = Channel.empty() - trim_zip = Channel.empty() - trim_log = Channel.empty() + trim_reads = umi_reads + trim_unpaired = Channel.empty() + trim_html = Channel.empty() + trim_zip = Channel.empty() + trim_log = Channel.empty() + trim_read_count = Channel.empty() if (!skip_trimming) { - TRIMGALORE ( umi_reads ).reads.set { trim_reads } + TRIMGALORE (umi_reads) trim_unpaired = TRIMGALORE.out.unpaired trim_html = TRIMGALORE.out.html trim_zip = TRIMGALORE.out.zip @@ -79,20 +78,31 @@ workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE { ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) // - // Filter empty FastQ files after adapter trimming + // Filter FastQ files based on minimum trimmed read count after adapter trimming // - trim_reads - .join(trim_log) + TRIMGALORE + .out + .reads + .join(trim_log, remainder: true) .map { meta, reads, trim_log -> - if (!meta.single_end) { - trim_log = trim_log[-1] - } - if (getTrimGaloreReadsAfterFiltering(trim_log) > 0) { - [ meta, reads ] + if (trim_log) { + num_reads = getTrimGaloreReadsAfterFiltering(meta.single_end ? trim_log : trim_log[-1]) + [ meta, reads, num_reads ] + } else { + [ meta, reads, min_trimmed_reads.toFloat() + 1 ] } } + .set { ch_num_trimmed_reads } + + ch_num_trimmed_reads + .filter { meta, reads, num_reads -> num_reads >= min_trimmed_reads.toFloat() } + .map { meta, reads, num_reads -> [ meta, reads ] } .set { trim_reads } + + ch_num_trimmed_reads + .map { meta, reads, num_reads -> [ meta, num_reads ] } + .set { trim_read_count } } emit: @@ -107,6 +117,7 @@ workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE { trim_html // channel: [ val(meta), [ html ] ] trim_zip // channel: [ val(meta), [ zip ] ] trim_log // channel: [ val(meta), [ txt ] ] + trim_read_count // channel: [ val(meta), val(count) ] versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml index 02a02a6a..3b1a675c 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml +++ b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/meta.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json name: "fastq_fastqc_umitools_trimgalore" description: Read QC, UMI extraction and trimming keywords: @@ -42,15 +43,19 @@ input: type: integer description: | Discard R1 / R2 if required + - min_trimmed_reads: + type: integer + description: | + Inputs with fewer than this reads will be filtered out of the "reads" output channel output: - reads: - type: file - description: > - Extracted FASTQ files. | - For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | - For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. - pattern: "*.{fastq.gz}" + type: file + description: > + Extracted FASTQ files. | + For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. | + For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz. + pattern: "*.{fastq.gz}" - fastqc_html: type: file description: FastQC report @@ -80,6 +85,9 @@ output: type: file description: Trim Galore! trimming report pattern: "*_{report.txt}" + - trim_read_count: + type: integer + description: Number of reads remaining after trimming for all input samples - versions: type: file description: File containing software versions diff --git a/tower.yml b/tower.yml index b5156fe5..c8bb670c 100644 --- a/tower.yml +++ b/tower.yml @@ -1,6 +1,8 @@ reports: multiqc_report.html: display: "MultiQC HTML report" + samplesheet.csv: + display: "Auto-created samplesheet with collated metadata and FASTQ paths" macs2_peak.mLb.clN.plots.pdf: display: "Merged library all samples MACS2 peak QC PDF plots" macs2_peak.mRp.clN.plots.pdf: diff --git a/workflows/atacseq.nf b/workflows/atacseq.nf index c370ff29..c76ac139 100644 --- a/workflows/atacseq.nf +++ b/workflows/atacseq.nf @@ -1,31 +1,29 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def valid_params = [ - aligners : [ 'bwa', 'bowtie2', 'chromap', 'star' ] -] +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) + +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation // Validate input parameters -WorkflowAtacseq.initialise(params, log, valid_params) - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.input, params.multiqc_config, - params.fasta, - params.gtf, params.gff, params.gene_bed, params.tss_bed, - params.bwa_index, params.bowtie2_index, params.chromap_index, params.star_index, - params.blacklist, - params.bamtools_filter_pe_config, params.bamtools_filter_se_config -] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } +WorkflowAtacseq.initialise(params, log) // Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } +ch_input = file(params.input) + +// Check ataqv_mito_reference parameter +ataqv_mito_reference = params.ataqv_mito_reference +if (!params.ataqv_mito_reference && params.mito_name) { + ataqv_mito_reference = params.mito_name +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -34,14 +32,14 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample */ ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) // JSON files required by BAMTools for alignment filtering -ch_bamtools_filter_se_config = file(params.bamtools_filter_se_config, checkIfExists: true) -ch_bamtools_filter_pe_config = file(params.bamtools_filter_pe_config, checkIfExists: true) +ch_bamtools_filter_se_config = file(params.bamtools_filter_se_config) +ch_bamtools_filter_pe_config = file(params.bamtools_filter_pe_config) // Header files for MultiQC ch_multiqc_merged_library_peak_count_header = file("$projectDir/assets/multiqc/merged_library_peak_count_header.txt", checkIfExists: true) @@ -137,9 +135,13 @@ workflow ATACSEQ { // INPUT_CHECK ( ch_input, - params.seq_center + params.seq_center, + params.with_control ) ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") + // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ + // ! There is currently no tooling to help you write a sample sheet schema // // SUBWORKFLOW: Read QC and trim adapters @@ -150,7 +152,8 @@ workflow ATACSEQ { false, false, params.skip_trimming, - 0 + 0, + params.min_trimmed_reads ) ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions) @@ -168,6 +171,9 @@ workflow ATACSEQ { PREPARE_GENOME.out.bwa_index, false, PREPARE_GENOME.out.fasta + .map { + [ [:], it ] + } ) ch_genome_bam = FASTQ_ALIGN_BWA.out.bam ch_genome_bam_index = FASTQ_ALIGN_BWA.out.bai @@ -187,6 +193,9 @@ workflow ATACSEQ { params.save_unaligned, false, PREPARE_GENOME.out.fasta + .map { + [ [:], it ] + } ) ch_genome_bam = FASTQ_ALIGN_BOWTIE2.out.bam ch_genome_bam_index = FASTQ_ALIGN_BOWTIE2.out.bai @@ -200,38 +209,8 @@ workflow ATACSEQ { // SUBWORKFLOW: Alignment with CHROMAP & BAM QC // if (params.aligner == 'chromap') { - - // Filter out paired-end reads until the issue below is fixed - // https://github.com/nf-core/chipseq/issues/291 - FASTQ_FASTQC_UMITOOLS_TRIMGALORE - .out - .reads - .branch { - meta, reads -> - single_end: meta.single_end - return [ meta, reads ] - paired_end: !meta.single_end - return [ meta, reads ] - } - .set { ch_reads_chromap } - - ch_reads_chromap - .paired_end - .collect() - .map { - it -> - def count = it.size() - if (count > 0) { - log.warn "=============================================================================\n" + - " Paired-end files produced by chromap cannot be used by some downstream tools due to the issue below:\n" + - " https://github.com/nf-core/chipseq/issues/291\n" + - " They will be excluded from the analysis. Consider using a different aligner\n" + - "===================================================================================" - } - } - FASTQ_ALIGN_CHROMAP ( - ch_reads_chromap.single_end, + FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, PREPARE_GENOME.out.chromap_index, PREPARE_GENOME.out.fasta .map { @@ -258,7 +237,10 @@ workflow ATACSEQ { ALIGN_STAR ( FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.reads, PREPARE_GENOME.out.star_index, - PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fasta + .map { + [ [:], it ] + }, params.seq_center ?: '' ) ch_genome_bam = ALIGN_STAR.out.bam @@ -299,8 +281,16 @@ workflow ATACSEQ { // MERGED_LIBRARY_MARKDUPLICATES_PICARD ( PICARD_MERGESAMFILES_LIBRARY.out.bam, - PREPARE_GENOME.out.fasta, + PREPARE_GENOME + .out + .fasta + .map { + [ [:], it ] + }, PREPARE_GENOME.out.fai + .map { + [ [:], it ] + } ) ch_versions = ch_versions.mix(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.versions) @@ -310,7 +300,12 @@ workflow ATACSEQ { MERGED_LIBRARY_FILTER_BAM ( MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bam.join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0]), PREPARE_GENOME.out.filtered_bed.first(), - PREPARE_GENOME.out.fasta, + PREPARE_GENOME + .out + .fasta + .map { + [ [:], it ] + }, ch_bamtools_filter_se_config, ch_bamtools_filter_pe_config ) @@ -334,9 +329,24 @@ workflow ATACSEQ { ch_picardcollectmultiplemetrics_multiqc = Channel.empty() if (!params.skip_picard_metrics) { MERGED_LIBRARY_PICARD_COLLECTMULTIPLEMETRICS ( - MERGED_LIBRARY_FILTER_BAM.out.bam, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai, + MERGED_LIBRARY_FILTER_BAM + .out + .bam + .map { + [ it[0], it[1], [] ] + }, + PREPARE_GENOME + .out + .fasta + .map { + [ [:], it ] + }, + PREPARE_GENOME + .out + .fai + .map { + [ [:], it ] + } ) ch_picardcollectmultiplemetrics_multiqc = MERGED_LIBRARY_PICARD_COLLECTMULTIPLEMETRICS.out.metrics ch_versions = ch_versions.mix(MERGED_LIBRARY_PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) @@ -365,13 +375,31 @@ workflow ATACSEQ { ch_versions = ch_versions.mix(MERGED_LIBRARY_BIGWIG_PLOT_DEEPTOOLS.out.versions) } - // Create channels: [ meta, [bam], [bai] ] + // Create channels: [ meta, [bam], [bai] ] or [ meta, [ bam, control_bam ] [ bai, control_bai ] ] MERGED_LIBRARY_FILTER_BAM .out .bam .join(MERGED_LIBRARY_FILTER_BAM.out.bai, by: [0]) .set { ch_bam_bai } + if (params.with_control) { + ch_bam_bai + .map { + meta, bam, bai -> + meta.control ? null : [ meta.id, [ bam ] , [ bai ] ] + } + .set { ch_control_bam_bai } + + ch_bam_bai + .map { + meta, bam, bai -> + meta.control ? [ meta.control, meta, [ bam ], [ bai ] ] : null + } + .combine(ch_control_bam_bai, by: 0) + .map { it -> [ it[1] , it[2] + it[4], it[3] + it[5] ] } + .set { ch_bam_bai } + } + // // MODULE: deepTools plotFingerprint QC // @@ -384,13 +412,22 @@ workflow ATACSEQ { ch_versions = ch_versions.mix(MERGED_LIBRARY_DEEPTOOLS_PLOTFINGERPRINT.out.versions.first()) } - // Create channels: [ meta, bam, ([] for control_bam) ] - ch_bam_bai - .map { - meta, bam, bai -> - [ meta , bam, [] ] - } - .set { ch_bam_library } + // Create channel: [ val(meta), bam, control_bam ] + if (params.with_control) { + ch_bam_bai + .map { + meta, bams, bais -> + [ meta , bams[0], bams[1] ] + } + .set { ch_bam_library } + } else { + ch_bam_bai + .map { + meta, bam, bai -> + [ meta , bam, [] ] + } + .set { ch_bam_library } + } // // SUBWORKFLOW: Call peaks with MACS2, annotate with HOMER and perform downstream QC @@ -451,7 +488,7 @@ workflow ATACSEQ { MERGED_LIBRARY_ATAQV_ATAQV ( ch_bam_peaks, 'NA', - params.mito_name, + ataqv_mito_reference ?: '', PREPARE_GENOME.out.tss_bed, [], PREPARE_GENOME.out.autosomes @@ -490,6 +527,7 @@ workflow ATACSEQ { meta, bam -> def meta_clone = meta.clone() meta_clone.id = meta_clone.id - ~/_REP\d+$/ + meta_clone.control = meta_clone.control ? meta_clone.control - ~/_REP\d+$/ : "" [ meta_clone.id, meta_clone, bam ] } .groupTuple() @@ -514,8 +552,16 @@ workflow ATACSEQ { // MERGED_REPLICATE_MARKDUPLICATES_PICARD ( PICARD_MERGESAMFILES_REPLICATE.out.bam, - PREPARE_GENOME.out.fasta, + PREPARE_GENOME + .out + .fasta + .map { + [ [:], it ] + }, PREPARE_GENOME.out.fai + .map { + [ [:], it ] + } ) ch_markduplicates_replicate_stats = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.stats ch_markduplicates_replicate_flagstat = MERGED_REPLICATE_MARKDUPLICATES_PICARD.out.flagstat @@ -533,15 +579,36 @@ workflow ATACSEQ { ch_versions = ch_versions.mix(MERGED_REPLICATE_BAM_TO_BIGWIG.out.versions) // Create channels: [ meta, bam, ([] for control_bam) ] - MERGED_REPLICATE_MARKDUPLICATES_PICARD - .out - .bam - .map { - meta, bam -> - [ meta , bam, [] ] - } - .set { ch_bam_replicate } - + if (params.with_control) { + MERGED_REPLICATE_MARKDUPLICATES_PICARD + .out + .bam + .map { + meta, bam -> + meta.control ? null : [ meta.id, bam ] + } + .set { ch_bam_merged_control } + + MERGED_REPLICATE_MARKDUPLICATES_PICARD + .out + .bam + .map { + meta, bam -> + meta.control ? [ meta.control, meta, bam ] : null + } + .combine( ch_bam_merged_control, by: 0) + .map { it -> [ it[1] , it[2], it[3] ] } + .set { ch_bam_replicate } + } else { + MERGED_REPLICATE_MARKDUPLICATES_PICARD + .out + .bam + .map { + meta, bam -> + [ meta , bam, [] ] + } + .set { ch_bam_replicate } + } // // SUBWORKFLOW: Call peaks with MACS2, annotate with HOMER and perform downstream QC // @@ -593,6 +660,7 @@ workflow ATACSEQ { if (!params.skip_igv) { IGV ( PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai, MERGED_LIBRARY_BAM_TO_BIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]), MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peaks.collect{it[1]}.ifEmpty([]), ch_macs2_consensus_library_bed.collect{it[1]}.ifEmpty([]), @@ -633,8 +701,8 @@ workflow ATACSEQ { workflow_summary = WorkflowAtacseq.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowAtacseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) - ch_methods_description = Channel.value(methods_description) + methods_description = WorkflowAtacseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + ch_methods_description = Channel.value(methods_description) MULTIQC ( ch_multiqc_config, @@ -701,7 +769,7 @@ workflow.onComplete { } if (params.hook_url) { - NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) } NfcoreTemplate.summary(workflow, params, log)