Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implementation of goleft/indexcov #1312

Open
wants to merge 11 commits into
base: dev
Choose a base branch
from
26 changes: 26 additions & 0 deletions conf/modules/goleft.indexcov.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.prefix = File name prefix for output files.
ext.when = When to run the module.
----------------------------------------------------------------------------------------
*/


process {
if (params.tools && params.tools.split(',').contains('indexcov')) {
withName: 'GOLEFT_INDEXCOV' {
ext.when = { !params.wes }
ext.args = {""}
ext.prefix = { "indexcov" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/variant_calling/indexcov" },
pattern: "*.png"
]
}
}
}
24 changes: 24 additions & 0 deletions conf/modules/indexcov_reindex.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.prefix = File name prefix for output files.
ext.when = When to run the module.
----------------------------------------------------------------------------------------
*/


process {
if (params.tools && params.tools.split(',').contains('indexcov')) {
withName: 'SAMTOOLS_INDEX_FOR_INDEXCOV' {
ext.when = { !params.wes }
ext.args = {"-F 3844 -q 30"}
ext.prefix = { "${meta.id}" }
publishDir = [
enabled : false
]
}
}
}
4 changes: 2 additions & 2 deletions conf/test_full_germline.config
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ params {
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/sarek/testdata/csv/NA12878_WGS_30x_full_test.csv'

// Other params
tools = 'strelka,freebayes,haplotypecaller,deepvariant,manta,tiddit,cnvkit,vep'
split_fastq = 50000000
tools = 'strelka,freebayes,haplotypecaller,deepvariant,manta,tiddit,cnvkit,vep,indexcov'
split_fastq = 50000000
}
51 changes: 51 additions & 0 deletions modules/local/goleft/indexcov/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
process GOLEFT_INDEXCOV {
label 'process_single'

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/goleft:0.2.4--h9ee0642_1':
'biocontainers/goleft:0.2.4--h9ee0642_1' }"

input:
val(meta)
path(bams)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the tool run on all bams for all samples in one job?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nevermind I see it further down. The docs say 30sec per 30 WGS 🤯 . Any idea how it scales with number of genomes?

path(fasta)
path(fai)
output:
path("${prefix}/*"),emit:output
path("versions.yml"),emit:versions
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def input_files = bams.findAll{it.name.endsWith(".bam")} + bams.findAll{it.name.endsWith(".crai")}
def extranormalize = input_files.any{it.name.endsWith(".crai")} ? " --extranormalize " : ""
"""
goleft indexcov \\
--fai "${fai}" \\
--directory "${prefix}" \\
${extranormalize} \\
$args \\
${input_files.join(" ")}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
goleft: \$(goleft --version 2>&1 | head -n 1 | sed 's/^.*goleft Version: //')
END_VERSIONS
"""
Comment on lines +2 to +33
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you indent so it is similar to the other modules?


stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
"""
mkdir "${prefix}"
touch "${prefix}/${prefix}-indexcov.bed.gz"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
goleft: \$(goleft --version 2>&1 | head -n 1 | sed 's/^.*goleft Version: //')
END_VERSIONS
"""
}




57 changes: 57 additions & 0 deletions modules/local/samtools_index_for_indexcov/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
process SAMTOOLS_INDEX_FOR_INDEXCOV {
tag "$meta.id"
label 'process_low'

conda "bioconda::samtools=1.17"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
'biocontainers/samtools:1.17--h00cdaf9_0' }"

input:
tuple val(meta), path(input), path(bai)
path(fasta)
path(fai)

output:
tuple path("${meta.id}.indexcov.bam"),path("${meta.id}.indexcov.bam.bai"), emit: output
path "versions.yml" , emit: versions

script:
def args = task.ext.args ?: ''

"""
# write BAM header only
samtools view --header-only -O BAM \
--threads ${task.cpus} \
-o "${meta.id}.indexcov.bam" \
--reference "${fasta}" \
"${input}"

# create index without writing BAM (redirecting to /dev/null)
samtools view ${args} --uncompressed \
--threads ${task.cpus} \
-o "/dev/null##idx##${meta.id}.indexcov.bam.bai" \
--write-index \
-O BAM \
--reference "${fasta}" \
"${input}"



cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""

stub:
"""
touch "${meta.id}.indexcov.bam"
touch "${meta.id}.indexcov.bam.bai"

cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
END_VERSIONS
"""
}
2 changes: 2 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,8 @@ includeConfig 'conf/modules/cnvkit.config'
includeConfig 'conf/modules/controlfreec.config'
includeConfig 'conf/modules/deepvariant.config'
includeConfig 'conf/modules/freebayes.config'
includeConfig 'conf/modules/indexcov_reindex.config'
includeConfig 'conf/modules/goleft.indexcov.config'
includeConfig 'conf/modules/haplotypecaller.config'
includeConfig 'conf/modules/joint_germline.config'
includeConfig 'conf/modules/manta.config'
Expand Down
2 changes: 1 addition & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
"fa_icon": "fas fa-toolbox",
"description": "Tools to use for duplicate marking, variant calling and/or for annotation.",
"help_text": "Multiple tools separated with commas.\n\n**Variant Calling:**\n\nGermline variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: DeepVariant, FreeBayes, GATK HaplotypeCaller, mpileup, Sentieon Haplotyper, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit\n\nTumor-only somatic variant calling can currently be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, mpileup, Mutect2, Strelka\n- Structural Variants: Manta, TIDDIT\n- Copy-number: CNVKit, ControlFREEC\n\nSomatic variant calling can currently only be performed with the following variant callers:\n- SNPs/Indels: FreeBayes, Mutect2, Strelka2\n- Structural variants: Manta, TIDDIT\n- Copy-Number: ASCAT, CNVKit, Control-FREEC\n- Microsatellite Instability: MSIsensorpro\n\n> **NB** Mutect2 for somatic variant calling cannot be combined with `--no_intervals`\n\n**Annotation:**\n \n- snpEff, VEP, merge (both consecutively).\n\n> **NB** As Sarek will use bgzip and tabix to compress and index VCF files annotated, it expects VCF files to be sorted when starting from `--step annotate`.",
"pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|sentieon_dedup|snpeff|strelka|tiddit|vep)?,?)*(?<!,)$"
"pattern": "^((ascat|bcfann|cnvkit|controlfreec|deepvariant|freebayes|haplotypecaller|sentieon_dnascope|sentieon_haplotyper|manta|merge|mpileup|msisensorpro|mutect2|sentieon_dedup|snpeff|strelka|tiddit|vep|indexcov)?,?)*(?<!,)$"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need alphabetical sorting or Maxime won't let it pass 😄

},
"skip_tools": {
"type": "string",
Expand Down
29 changes: 29 additions & 0 deletions subworkflows/local/bam_goleft_indexcov/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//
// INDEXCOV : call large CNVs using goleft indexcov
//
// For all modules here:
// A when clause condition is defined in the conf/modules.config to determine if the module should be run

include { SAMTOOLS_INDEX_FOR_INDEXCOV } from '../../../modules/local/samtools_index_for_indexcov/main'
include { GOLEFT_INDEXCOV } from '../../../modules/local/goleft/indexcov/main'


workflow BAM_GOLEFT_INDEXCOV {
take:
bam // channel: [mandatory] [ meta, bam, bai ]
fasta // channel: [mandatory] [ fasta ]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
fasta // channel: [mandatory] [ fasta ]
fasta // channel: [mandatory] [ fasta ]

fai // channel: [mandatory] [ fai ]

main:
versions = Channel.empty()

samtools_index = SAMTOOLS_INDEX_FOR_INDEXCOV(bam, fasta, fai)
versions = versions.mix(samtools_index.versions.first())

indexcov_ch = GOLEFT_INDEXCOV([id:"PREFIX"], samtools_index.output.flatten().collect(), fasta, fai)
versions = versions.mix(indexcov_ch.versions)

emit:
versions
}

12 changes: 12 additions & 0 deletions subworkflows/local/bam_variant_calling_germline_all/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ include { BAM_VARIANT_CALLING_SINGLE_STRELKA } from '../bam_variant_calling
include { BAM_VARIANT_CALLING_SINGLE_TIDDIT } from '../bam_variant_calling_single_tiddit/main'
include { SENTIEON_DNAMODELAPPLY } from '../../../modules/nf-core/sentieon/dnamodelapply/main'
include { VCF_VARIANT_FILTERING_GATK } from '../vcf_variant_filtering_gatk/main'
include { BAM_GOLEFT_INDEXCOV } from '../bam_goleft_indexcov/main'



Expand Down Expand Up @@ -190,6 +191,17 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL {
versions = versions.mix(BAM_VARIANT_CALLING_GERMLINE_MANTA.out.versions)
}

// INDEXCOV
if (tools.split(',').contains('indexcov')) {
BAM_GOLEFT_INDEXCOV (
cram,
fasta,
fasta_fai
)
versions = versions.mix(BAM_GOLEFT_INDEXCOV.out.versions)
}


// SENTIEON DNASCOPE
if (tools.split(',').contains('sentieon_dnascope')) {
BAM_VARIANT_CALLING_SENTIEON_DNASCOPE(
Expand Down
Loading