From c076b4dd96f3cab445f879d394c75986b410839a Mon Sep 17 00:00:00 2001 From: Charlotte Capitanchik Date: Wed, 12 Jul 2023 17:57:49 +0900 Subject: [PATCH] NEW MODULE: PURECLIP (#3624) * initial commit * module version 1 * full testing * linting * fix version yaml * fix version yaml command * fix version yml maybe this time please * fix version yml for real this time * try again versions yaml * fix version yml * vyml * version yaml * vyml * update ref to have meta --- modules/nf-core/pureclip/main.nf | 69 +++++++++++++++ modules/nf-core/pureclip/meta.yml | 83 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 + tests/modules/nf-core/pureclip/main.nf | 45 ++++++++++ .../modules/nf-core/pureclip/nextflow.config | 5 ++ tests/modules/nf-core/pureclip/test.yml | 17 ++++ 6 files changed, 223 insertions(+) create mode 100644 modules/nf-core/pureclip/main.nf create mode 100644 modules/nf-core/pureclip/meta.yml create mode 100644 tests/modules/nf-core/pureclip/main.nf create mode 100644 tests/modules/nf-core/pureclip/nextflow.config create mode 100644 tests/modules/nf-core/pureclip/test.yml diff --git a/modules/nf-core/pureclip/main.nf b/modules/nf-core/pureclip/main.nf new file mode 100644 index 00000000000..1beb198fe4e --- /dev/null +++ b/modules/nf-core/pureclip/main.nf @@ -0,0 +1,69 @@ +process PURECLIP { + tag "$meta.id" + label 'process_high' + + conda "bioconda::pureclip=1.3.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pureclip:1.3.1--0': + 'biocontainers/pureclip:1.3.1--0' }" + + input: + tuple val(meta), path(ipbam), path(controlbam) + tuple val(meta), path(ipbai), path(controlbai) + tuple val(meta2), path(genome_fasta) + val input_control + + output: + tuple val(meta), path("${crosslinks_output_name}"), emit: crosslinks + tuple val(meta), path("${peaks_output_name}") , emit: peaks + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + crosslinks_output_name = "${prefix}_pureclip_crosslinks.bed" + peaks_output_name = "${prefix}_pureclip_peaks.bed" + + if(input_control){ + control_bam = "-ibam $controlbam" + control_bai = "-ibai $controlbai" + } else { + control_bam = "" + control_bai = "" + } + + """ + pureclip \ + -i $ipbam \ + -bai $ipbai \ + -g $genome_fasta \ + -nt ${task.cpus} \ + -o $crosslinks_output_name \ + -or $peaks_output_name \ + ${control_bam} \ + ${control_bai} \ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pureclip: \$(echo \$(pureclip --version 2>&1) | sed 's/^.*pureclip //; s/Using.*\$//; s/version: //; s/ Seq.*//' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}_pureclip_crosslinks.bed + touch ${prefix}_pureclip_peaks.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pureclip: \$(echo \$(pureclip --version 2>&1) | sed 's/^.*pureclip //; s/Using.*\$//; s/version: //; s/ Seq.*//' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/pureclip/meta.yml b/modules/nf-core/pureclip/meta.yml new file mode 100644 index 00000000000..16033b32a4e --- /dev/null +++ b/modules/nf-core/pureclip/meta.yml @@ -0,0 +1,83 @@ +--- +name: "pureclip" +description: PureCLIP is a tool to detect protein-RNA interaction footprints from single-nucleotide CLIP-seq data, such as iCLIP and eCLIP. +keywords: + - iCLIP + - eCLIP + - CLIP +tools: + - "pureclip": + description: "PureCLIP is a tool to detect protein-RNA interaction footprints from single-nucleotide CLIP-seq data, such as iCLIP and eCLIP." + homepage: "https://github.com/skrakau/PureCLIP" + documentation: "https://pureclip.readthedocs.io/en/latest/GettingStarted/index.html" + tool_dev_url: "https://github.com/skrakau/PureCLIP" + doi: "10.1186/s13059-017-1364-2" + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + + - ipbam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + + - controlbam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + + - ipbai: + type: file + description: BAM index + pattern: "*.{bai}" + + - controlbai: + type: file + description: BAM index + pattern: "*.{bai}" + + - input_control: + type: boolean + description: Whether to run PureCLIP with an input control + + - genome_fasta: + type: file + description: FASTA file of reference genome + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + + - crosslinks: + type: file + description: Bed file of crosslinks + pattern: "*.{bed}" + + - peaks: + type: file + description: Bed file of peaks + pattern: "*.{bed}" + +authors: + - "@charlotteanne" + - "@marcjones" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index d0e3790e851..7b39824656f 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -2891,6 +2891,10 @@ proteus/readproteingroups: - modules/nf-core/proteus/readproteingroups/** - tests/modules/nf-core/proteus/readproteingroups/** +pureclip: + - modules/nf-core/pureclip/** + - tests/modules/nf-core/pureclip/** + purecn/coverage: - modules/nf-core/purecn/coverage/** - tests/modules/nf-core/purecn/coverage/** diff --git a/tests/modules/nf-core/pureclip/main.nf b/tests/modules/nf-core/pureclip/main.nf new file mode 100644 index 00000000000..27139984fee --- /dev/null +++ b/tests/modules/nf-core/pureclip/main.nf @@ -0,0 +1,45 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { PURECLIP } from '../../../../modules/nf-core/pureclip/main.nf' + +workflow test_pureclip_no_ctrl { + input_bam = [ [ id:'test', single_end:false ], // meta map + [ file( params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], + [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) ]] //dummy file + + input_bai = [ [ id:'test', single_end:false ], // meta map + [ file( params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ], + [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true) ]] //dummy file + + fasta = [ [ id:'test', single_end:false ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]] + + PURECLIP ( + input_bam, + input_bai, + fasta, + false + ) +} + +workflow test_pureclip_input_ctrl { + input_bam = [ [ id:'test', single_end:false ], // meta map + [ file( params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) ], + [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true) ]] + + input_bai = [ [ id:'test', single_end:false ], // meta map + [ file( params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) ], + [ file( params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true) ]] + + fasta = [ [ id:'test', single_end:false ], + [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]] + + PURECLIP ( + input_bam, + input_bai, + fasta, + true + ) +} \ No newline at end of file diff --git a/tests/modules/nf-core/pureclip/nextflow.config b/tests/modules/nf-core/pureclip/nextflow.config new file mode 100644 index 00000000000..50f50a7a357 --- /dev/null +++ b/tests/modules/nf-core/pureclip/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} \ No newline at end of file diff --git a/tests/modules/nf-core/pureclip/test.yml b/tests/modules/nf-core/pureclip/test.yml new file mode 100644 index 00000000000..055f8bfb24e --- /dev/null +++ b/tests/modules/nf-core/pureclip/test.yml @@ -0,0 +1,17 @@ +- name: pureclip test_pureclip_no_ctrl + command: nextflow run ./tests/modules/nf-core/pureclip -entry test_pureclip_no_ctrl -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/pureclip/nextflow.config + tags: + - pureclip + files: + - path: output/pureclip/test_pureclip_crosslinks.bed + - path: output/pureclip/test_pureclip_peaks.bed + - path: output/pureclip/versions.yml + +- name: pureclip test_pureclip_input_ctrl + command: nextflow run ./tests/modules/nf-core/pureclip -entry test_pureclip_input_ctrl -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/pureclip/nextflow.config + tags: + - pureclip + files: + - path: output/pureclip/test_pureclip_crosslinks.bed + - path: output/pureclip/test_pureclip_peaks.bed + - path: output/pureclip/versions.yml