Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sentieon dedup #3224

Merged
merged 34 commits into from
Mar 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
5d5c6f4
Fixing stub
asp8200 Mar 30, 2023
7374179
Adding Sentieon-Dedup module and test
asp8200 Mar 30, 2023
add64e5
Skipping conda for all tests tagged sentieon
asp8200 Mar 30, 2023
2ea37fe
Adding test for sentieon-dedup
asp8200 Mar 30, 2023
86098e4
Not running conda test for sentieon modules
asp8200 Mar 30, 2023
61cf873
WIP: Trying to hardcode output to cram
asp8200 Mar 30, 2023
ee789cf
Fixing suffix
asp8200 Mar 30, 2023
58c5c76
WIP: Fetch md5sums
asp8200 Mar 30, 2023
c807d54
Updating md5sums
asp8200 Mar 30, 2023
bd0f2e2
Removing outdated comments
asp8200 Mar 30, 2023
e057a4b
Adding extra args for driver, LocusCollector and Dedup
asp8200 Mar 30, 2023
edd38c7
yml-doc for Dedup-module
asp8200 Mar 30, 2023
da64b21
Adding updating for rmdup in test-config
asp8200 Mar 30, 2023
0931c74
WIP: Adding test for dedup rmdup
asp8200 Mar 30, 2023
61538ca
Removing unwanted tags
asp8200 Mar 30, 2023
58005f8
Trying to run two dedup-tests
asp8200 Mar 30, 2023
cfe63c5
Adding test of sentieon dedup with removal of reads
asp8200 Mar 30, 2023
6181cac
Re-adding secrets
asp8200 Mar 30, 2023
95af42e
Removing option rmdup from test yaml
asp8200 Mar 30, 2023
8c36d9a
Update tests/modules/nf-core/sentieon/dedup/nextflow.config
asp8200 Mar 30, 2023
e6cd688
Update modules/nf-core/sentieon/dedup/main.nf
asp8200 Mar 30, 2023
41aa5f2
Update modules/nf-core/sentieon/dedup/main.nf
asp8200 Mar 30, 2023
208e3b0
Update modules/nf-core/sentieon/bwamem/main.nf
asp8200 Mar 30, 2023
9087f69
Update modules/nf-core/sentieon/dedup/main.nf
asp8200 Mar 30, 2023
9729998
Update modules/nf-core/sentieon/dedup/main.nf
asp8200 Mar 30, 2023
dd2e387
Update modules/nf-core/sentieon/dedup/main.nf
asp8200 Mar 30, 2023
77e34ca
Update modules/nf-core/sentieon/dedup/main.nf
asp8200 Mar 30, 2023
e6f4cad
Update modules/nf-core/sentieon/dedup/main.nf
asp8200 Mar 30, 2023
c5a18fc
Adding prefix for stub
asp8200 Mar 30, 2023
12c564a
fix
asp8200 Mar 30, 2023
d26f187
fix
asp8200 Mar 30, 2023
a00e480
fix
asp8200 Mar 30, 2023
b26cbb5
Updating md5sums
asp8200 Mar 30, 2023
cbba4dd
Merge branch 'master' into sentieon_dedup
asp8200 Mar 30, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/pytest-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ jobs:
tags: sentieon/bwaindex
- profile: "conda"
tags: sentieon/bwamem
- profile: "conda"
tags: sentieon/dedup
- profile: "conda"
tags: universc
- profile: "singularity"
Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/sentieon/bwamem/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ process SENTIEON_BWAMEM {

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: ''
def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: ''
Expand Down Expand Up @@ -59,6 +58,7 @@ process SENTIEON_BWAMEM {
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
asp8200 marked this conversation as resolved.
Show resolved Hide resolved
touch ${prefix}.bam
touch ${prefix}.bam.bai
Expand Down
4 changes: 2 additions & 2 deletions modules/nf-core/sentieon/bwamem/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ input:
pattern: "*.{fa,fasta}"
- fasta_fai:
type: file
description: The index of the FASTA reference. Needed when the argument `--sorted` is used
description: The index of the FASTA reference.
pattern: "*.fai"
output:
- meta:
Expand All @@ -49,7 +49,7 @@ output:
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file. If the BAM file is not indexed, this tool will run samtools index before extracting reads.
description: BAM file.
pattern: "*.bam"
- bai:
description: BAI file
Expand Down
75 changes: 75 additions & 0 deletions modules/nf-core/sentieon/dedup/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
process SENTIEON_DEDUP {
tag "$meta.id"
label 'process_medium'
label 'sentieon'

secret 'SENTIEON_LICENSE_BASE64'

// Exit if running this module with -profile conda / -profile mamba
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
exit 1, "Sentieon modules does not support Conda. Please use Docker / Singularity / Podman instead."
}

container 'nfcore/sentieon:202112.06'

input:
tuple val(meta), path(bam), path(bai)
path fasta
path fasta_fai

output:
tuple val(meta), path("*.cram"), emit: cram, optional: true
tuple val(meta), path("*.crai"), emit: crai // Sentieon will generate a .crai AND a .bai no matter which output file type is chosen.
tuple val(meta), path("*.bam"), emit: bam, optional: true
tuple val(meta), path("*.bai"), emit: bai
tuple val(meta), path("*.score"), emit: score
tuple val(meta), path("*.metrics"), emit: metrics
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def args3 = task.ext.args3 ?: ''
def args4 = task.ext.args4 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def suffix = task.ext.suffix ?: ".cram" // The suffix should be either ".cram" or ".bam".
def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: ''
def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: ''
def input_list = bam.collect{"-i $it"}.join(' ')

"""
export SENTIEON_LICENSE=\$(echo -n "\$SENTIEON_LICENSE_BASE64" | base64 -d)

if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then
# If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license.
export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d)
export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d)
echo "Decoded and exported Sentieon test-license system environment variables"
fi

sentieon driver $args $input_list -r ${fasta} --algo LocusCollector $args2 --fun score_info ${prefix}.score
sentieon driver $args3 -t $task.cpus $input_list -r ${fasta} --algo Dedup $args4 --score_info ${prefix}.score --metrics ${prefix}.metrics ${prefix}${suffix}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g")
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
asp8200 marked this conversation as resolved.
Show resolved Hide resolved
touch ${prefix}.cram
touch ${prefix}.cram.crai
touch ${prefix}.metrics
touch ${prefix}.score

cat <<-END_VERSIONS > versions.yml
"${task.process}":
sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g")
END_VERSIONS
"""
}
72 changes: 72 additions & 0 deletions modules/nf-core/sentieon/dedup/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
name: sentieon_dedup
description: Runs the sentieon tool LocusCollector followed by Dedup. LocusCollector collects read information that is used by Dedup which in turn marks or removes duplicate reads.
keywords:
- mem
- dedup
- map
- bam
- cram
- sentieon
tools:
- sentieon:
description: |
Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads.
Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system.
homepage: https://www.sentieon.com/
documentation: https://www.sentieon.com/
input:
- meta:
type: map
description: |
Groovy Map containing reference information.
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM file.
pattern: "*.bam"
- bai:
description: BAI file
pattern: "*.bai"
- fasta:
type: file
description: Genome fasta file
pattern: "*.{fa,fasta}"
- fasta_fai:
type: file
description: The index of the FASTA reference.
pattern: "*.fai"
output:
- meta:
type: map
description: |
Groovy Map containing reference information.
e.g. [ id:'test', single_end:false ]
- cram:
type: file
description: CRAM file
pattern: "*.cram"
- crai:
type: file
description: CRAM index file
pattern: "*.crai"
- bam:
type: file
description: BAM file.
pattern: "*.bam"
- bai:
description: BAI file
pattern: "*.bai"
- score:
type: file
description: The score file indicates which reads LocusCollector finds are likely duplicates.
pattern: "*.score"
- metrics:
type: file
description: Output file containing the metrics data from Dedup.
pattern: "*.metrics"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@asp8200"
6 changes: 5 additions & 1 deletion tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2921,7 +2921,11 @@ sentieon/bwaindex:

sentieon/bwamem:
- modules/nf-core/sentieon/bwamem/**
- tests/modules/nf-core/sentieon/bwameminde/**
- tests/modules/nf-core/sentieon/bwamem/**

sentieon/dedup:
- modules/nf-core/sentieon/dedup/**
- tests/modules/nf-core/sentieon/dedup/**

seqkit/pair:
- modules/nf-core/seqkit/pair/**
Expand Down
34 changes: 34 additions & 0 deletions tests/modules/nf-core/sentieon/dedup/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { SENTIEON_DEDUP as SENTIEON_DEDUP_MARK } from '../../../../../modules/nf-core/sentieon/dedup/main.nf'
include { SENTIEON_DEDUP as SENTIEON_DEDUP_REMOVE } from '../../../../../modules/nf-core/sentieon/dedup/main.nf'

workflow test_dedup_mark_duplicate_reads {

fasta_file = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai_file = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)

bam_ch = [
[ id: 'test' ],
file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true)
]

SENTIEON_DEDUP_MARK ( bam_ch, fasta_file, fasta_fai_file )
}

workflow test_dedup_remove_duplicate_reads {

fasta_file = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
fasta_fai_file = file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)

bam_ch = [
[ id: 'test' ],
file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam_bai'], checkIfExists: true)
]

SENTIEON_DEDUP_REMOVE ( bam_ch, fasta_file, fasta_fai_file )
}
14 changes: 14 additions & 0 deletions tests/modules/nf-core/sentieon/dedup/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

withLabel: 'sentieon' {
ext.sentieon_auth_mech_base64 = secrets.SENTIEON_AUTH_MECH_BASE64
ext.sentieon_auth_data_base64 = secrets.SENTIEON_AUTH_DATA_BASE64
}

withName: 'SENTIEON_DEDUP_REMOVE' {
ext.args4 = '--rmdup'
}

}
30 changes: 30 additions & 0 deletions tests/modules/nf-core/sentieon/dedup/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
- name: sentieon test_dedup_mark_duplicate_reads
command: nextflow run ./tests/modules/nf-core/sentieon/dedup -entry test_dedup_mark_duplicate_reads -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/sentieon/dedup/nextflow.config
tags:
- sentieon
- sentieon/dedup
files:
- path: ./output/sentieon/test.cram
md5sum: 6abb0fc667edc1ce61b67929c1ec0881
- path: ./output/sentieon/test.cram.crai
md5sum: f8da02b770410adb02561aa69a46cc15
- path: ./output/sentieon/test.metrics
md5sum: aba47c5fed7143500d91cc39df1e2eb2
- path: ./output/sentieon/test.score
md5sum: 4a36fe59dc6865cd5ee9e7ecd936106e
- path: ./output/sentieon/versions.yml
- name: sentieon test_dedup_remove_duplicate_reads
command: nextflow run ./tests/modules/nf-core/sentieon/dedup -entry test_dedup_remove_duplicate_reads -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/sentieon/dedup/nextflow.config
tags:
- sentieon
- sentieon/dedup
files:
- path: ./output/sentieon/test.cram
md5sum: a605337533680a85453df5e34900f833
- path: ./output/sentieon/test.cram.crai
md5sum: ac694a8870a098b8f81a87ef3f5bd590
- path: ./output/sentieon/test.metrics
md5sum: da2dd03dca502c3fbe23f655e44963f3
- path: ./output/sentieon/test.score
md5sum: 4a36fe59dc6865cd5ee9e7ecd936106e
- path: ./output/sentieon/versions.yml