From 09cb221996275e1e393083344a5eccf3b4658e73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Tue, 18 Jun 2024 17:35:25 +0200 Subject: [PATCH 1/9] wittyer removed for now --- README.md | 18 +- bin/add_svtype.py | 4 +- bin/reclassfy_svaba.py | 102 +++++ conf/modules.config | 20 +- modules.json | 111 ++++-- modules/local/bcftools_fill_from_fasta.nf | 62 ++++ modules/local/happy_ftx.nf | 65 ---- modules/local/vcfdist.nf | 42 --- modules/nf-core/wittyer/README.md | 28 -- modules/nf-core/wittyer/environment.yml | 7 - modules/nf-core/wittyer/main.nf | 69 ---- modules/nf-core/wittyer/meta.yml | 66 ---- modules/nf-core/wittyer/tests/main.nf.test | 100 ----- .../nf-core/wittyer/tests/main.nf.test.snap | 347 ------------------ modules/nf-core/wittyer/tests/tags.yml | 2 - subworkflows/local/prepare_vcfs_test.nf | 14 + subworkflows/local/sv_germline_benchmark.nf | 33 +- 17 files changed, 296 insertions(+), 794 deletions(-) create mode 100644 bin/reclassfy_svaba.py create mode 100644 modules/local/bcftools_fill_from_fasta.nf delete mode 100644 modules/local/happy_ftx.nf delete mode 100644 modules/local/vcfdist.nf delete mode 100644 modules/nf-core/wittyer/README.md delete mode 100644 modules/nf-core/wittyer/environment.yml delete mode 100644 modules/nf-core/wittyer/main.nf delete mode 100644 modules/nf-core/wittyer/meta.yml delete mode 100644 modules/nf-core/wittyer/tests/main.nf.test delete mode 100644 modules/nf-core/wittyer/tests/main.nf.test.snap delete mode 100644 modules/nf-core/wittyer/tests/tags.yml diff --git a/README.md b/README.md index 081fc58..82d16fc 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,23 @@ 1. Standardization of SVs in test VCF files 2. Normalization of SVs in test VCF files 3. Normalization of SVs in truth VCF files -4. SV stats and histograms +4. SV stats and histograms (Survivor) + +5. Germline benchmarking of small variants + - Tools: + Happy + RTGtools 5. Germline benchmarking of SVs -6. Somatic benchmarking of SVs + - Tools: + Truvari + Svbenchmark + Wittyer: Only works with Truth files annotated with SVTYPE and SVLENGHT + +6. Somatic benchmarking of small variants + - Tools: + Happy + RTGtools + 7. Final report and comparisons ## Usage diff --git a/bin/add_svtype.py b/bin/add_svtype.py index 1ff6af9..6bea45a 100755 --- a/bin/add_svtype.py +++ b/bin/add_svtype.py @@ -18,8 +18,8 @@ -in_vcf = pysam.VariantFile(args.graph) -out_name = os.path.basename(args.graph) +in_vcf = pysam.VariantFile(args.input) +out_name = os.path.basename(args.input) if out_name.endswith('.gz'): out_name = out_name[:-3] if out_name.endswith('.vcf'): diff --git a/bin/reclassfy_svaba.py b/bin/reclassfy_svaba.py new file mode 100644 index 0000000..afe7f1c --- /dev/null +++ b/bin/reclassfy_svaba.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +import re +import sys +import os + +#make mates dictionary given list input of non-comment lines +def makeMateDict(m): + d = {} + for index1, line1 in enumerate(m): + id1 = line1.split('\t')[2] + numMate = re.search(r':(\d)',id1).group(1) + origId = re.search(r'(\d+):',id1).group(1) + if int(numMate) == 1: + for index2, line2 in enumerate(m): + #never start from beginning of file + if index2 <= index1: + continue + # print str(index1) + " : " + str(index2) + id2 = line2.split('\t')[2] + duplicateId = re.search(r'(\d+):',id2).group(1) + duplicateNumMate = re.search(r':(\d)',id2).group(1) + if duplicateId == origId and int(duplicateNumMate) == 2: + d[line1] = line2 + break + return d + +def classify(line, ALT_INDEX, mdict): + #get alt, chrom1, chrom2, position (pos), id, old SVTYPE (should be BND if virgin svaba vcf) from line + s = line.split("\t") + alt = s[ALT_INDEX] + chrom1 = s[0] + pos = int(s[1]) + id=s[2] + + if int(re.search(r':(\d)',id).group(1)) != 1: + return "NONE" + + mateLine = mdict[line].split('\t') + mateChrom = mateLine[0] + mateAlt = mateLine[ALT_INDEX] + + oldType = re.search(r'SVTYPE=(.+?)(\s+?|:)',line).group(1) + + # get new type + if oldType == 'BND' and chrom1 == mateChrom: + INV_PATTERN_1 = re.compile(r'\D\].+\]') + INV_PATTERN_2 = re.compile(r'\[.+\[\D') + if INV_PATTERN_1.match(alt) and INV_PATTERN_1.match(mateAlt): + return "INV" + if INV_PATTERN_2.match(alt) and INV_PATTERN_2.match(mateAlt): + return "INV" + + # DEL + DEL_PATTERN_THIS = re.compile(r'\D\[.+\[') + DEL_PATTERN_MATE = re.compile(r'\].+\]\D') + if DEL_PATTERN_THIS.match(alt) and DEL_PATTERN_MATE.match(mateAlt): + return "DEL" + + # INS + INS_PATTERN_THIS = re.compile(r'\D\].+\]') + INS_PATTERN_MATE = re.compile(r'\[.+\[\D') + if INS_PATTERN_THIS.match(alt) and INS_PATTERN_MATE.match(mateAlt): + return "DUP/INS" + + return 'BND' + +if __name__ == "__main__": + file = sys.argv[1] + if not os.path.exists(file): + raise IOError(file) + alt_index = -1 + #generate mate:mate dictionary + #load file into ram + vcf_file=[] + with open (file, 'r') as f: + for line in f: + if line.startswith('#'): + continue + vcf_file.append(line) + matesDict = makeMateDict(vcf_file) + with open(file, "r") as f: + for line in f: + # print comments + if line.startswith("##"): + sys.stdout.write(line) + continue + # header contains indexes + if line.startswith('#'): + split = line.split("\t") + for index, val in enumerate(split): + if val == "ALT": + alt_index = index + break + sys.stdout.write(line) + continue + if alt_index == -1: + print "ERROR: NO ALT INDEX FOUND" + exit(1) + newType = classify(line, alt_index, matesDict) + if newType != "NONE": + newLine = re.sub(r'SVTYPE=BND',"SVTYPE="+newType,line) + sys.stdout.write(newLine) \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index c6f6fe6..6e552ed 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,12 +27,21 @@ process { } withName: "BCFTOOLS_NORM" { ext.prefix = { vcf.baseName - ".vcf" + ".norm"} - ext.args = {"--output-type z -N -m-any -c s" } + ext.args = {"--output-type z -m-any -c w" } publishDir = [ path: { "${params.outdir}/test" }, enabled: false ] } + withName: "BCFTOOLS_FILL_FROM_FASTA" { + ext.prefix = { vcf.baseName - ".vcf" + ".fill"} + ext.args = {"--output-type z" } + publishDir = [ + path: { "${params.outdir}/test" }, + enabled: false + ] + } + withName: "BCFTOOLS_DEDUP" { ext.prefix = { vcf.baseName - ".vcf" + ".dedup"} ext.args = {"--output-type z --rm-du exact -c s" } @@ -198,7 +207,7 @@ process { } withName: WITTYER { ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}"} - ext.args = {"-em cts"} + ext.args = {""} ext.when = { params.method.split(',').contains('wittyer') } publishDir = [ path: {"${params.outdir}/${meta.id}/wittyer_bench"}, @@ -260,6 +269,13 @@ process { ext.prefix = {input.toString() - ".vcf.gz"} } + withName: TABIX_BGZIP_TRUTH{ + ext.prefix = {input.toString() - ".vcf.gz"} + } + + withName: TABIX_BGZIP_QUERY{ + ext.prefix = {input.toString() - ".vcf.gz"} + } withName: SURVIVOR_MERGE { ext.prefix = {"${meta.id}.${meta.vartype}.${meta.tag}"} publishDir = [ diff --git a/modules.json b/modules.json index c7484f6..207c1a6 100644 --- a/modules.json +++ b/modules.json @@ -8,122 +8,163 @@ "bcftools/filter": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/norm": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/query": { "branch": "master", "git_sha": "a5ba4d59c2b248c0379b0f8aeb4e7e754566cd1f", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/reheader": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/sort": { "branch": "master", "git_sha": "487d92367b4d7bb9f1ca694bf72736be90720b15", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/stats": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/view": { "branch": "master", "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "happy/happy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "happy/prepy": { "branch": "master", "git_sha": "01b55f288c25490236af1cd044c9bca17598ecfe", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "happy/sompy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "manta/convertinversion": { "branch": "master", "git_sha": "1e2b7fb7106852388610c0360d234b0829eb980e", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "rtgtools/format": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "rtgtools/vcfeval": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "survivor/filter": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "survivor/merge": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "survivor/stats": { "branch": "master", "git_sha": "398375d72766cac92c4ffcf6927cac2d65310cfd", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "svanalyzer/svbenchmark": { "branch": "master", "git_sha": "8a2dd9e84cde68ebba45fb2c602c312c999c02a0", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "svync": { "branch": "master", "git_sha": "3df1a372b844c98c31ec3446faac7c75e64518a3", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/bgzip": { "branch": "master", "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/bgziptabix": { "branch": "master", "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/tabix": { "branch": "master", "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "truvari/bench": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] - }, - "wittyer": { - "branch": "master", - "git_sha": "61f2ea506bd87ef436b0086f91a07abc6035fcd0", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -132,20 +173,26 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/local/bcftools_fill_from_fasta.nf b/modules/local/bcftools_fill_from_fasta.nf new file mode 100644 index 0000000..8cdaedc --- /dev/null +++ b/modules/local/bcftools_fill_from_fasta.nf @@ -0,0 +1,62 @@ +process BCFTOOLS_FILL_FROM_FASTA { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + + """ + bcftools +fill-from-fasta \\ + ${vcf} \\ + $args \\ + --output ${prefix}.${extension}\\ + --threads $task.cpus \\ + -- \\ + -c REF \\ + --fasta ${fasta} \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/happy_ftx.nf b/modules/local/happy_ftx.nf deleted file mode 100644 index 425f528..0000000 --- a/modules/local/happy_ftx.nf +++ /dev/null @@ -1,65 +0,0 @@ -process HAPPY_FTX { - tag "$meta.id" - label 'process_medium' - - // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hap.py:0.3.14--py27h5c5a3ab_0': - 'biocontainers/hap.py:0.3.14--py27h5c5a3ab_0' }" - - input: - tuple val(meta), path(input_vcf), path(regions_bed), path(targets_bed) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fasta_fai) - tuple val(meta4), path(bams) - - output: - tuple val(meta), path('*.csv') , emit: features - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def regions = regions_bed ? "-R ${regions_bed}" : "" - def targets = targets_bed ? "-T ${targets_bed}" : "" - def bams = bams ? "--bam ${bams}" : "" - def features = meta.id.contains("mutect2") ? "generic" : - meta.id.contains("strelka") ? "hcc.strelka.${meta.vartype}" : - meta.id.contains("varscan") ? "hcc.varsacan2.${meta.vartype}" : - meta.id.contains("pisces") ? "hcc.pisces.${meta.vartype}" : - "generic" - - def VERSION = '0.3.14' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - ftx.py \\ - --feature-table $features \\ - ${args} \\ - --reference ${fasta} \\ - ${regions} \\ - ${targets} \\ - $bams \\ - -o ${prefix}.${meta.vartype} \\ - ${input_vcf} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - hap.py: $VERSION - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def VERSION = '0.3.14' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - touch ${prefix}.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - hap.py: $VERSION - END_VERSIONS - """ -} diff --git a/modules/local/vcfdist.nf b/modules/local/vcfdist.nf deleted file mode 100644 index 2149f72..0000000 --- a/modules/local/vcfdist.nf +++ /dev/null @@ -1,42 +0,0 @@ -process VCFDIST { - tag "$meta.id" - label 'process_single' - - conda "" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://timd1/vcfdist:v2.3.2' : - 'timd1/vcfdist:v2.3.2' }" - - input: - tuple val(meta),path(vcf), path(tbi), path(truth_vcf), path(truth_tbi), path(bed) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - - output: - tuple val(meta), path("*.tsv,vcf"), emit: bench - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def regions = bed ? "-b $bed" : "" - - """ - vcfdist \\ - ${vcf} \\ - ${truth_vcf} \\ - $fasta \\ - -p ${prefix} \\ - ${regions} \\ - ${args} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - vcfdist: \$(echo \$(vcfdist --version 2>&1) | sed 's/^.*vcfdist v//') - END_VERSIONS - """ - -} diff --git a/modules/nf-core/wittyer/README.md b/modules/nf-core/wittyer/README.md deleted file mode 100644 index d2ba0e0..0000000 --- a/modules/nf-core/wittyer/README.md +++ /dev/null @@ -1,28 +0,0 @@ -# Updating the docker container and making a new module release - -witty.er is a commercial tool from Illumina. The container provided for the witty.er nf-core module is not provided nor supported by Illumina. Updating the witty.er versions in the container and pushing the update to Dockerhub needs to be done manually. - -1. Navigate to the witty.er github repository. - [witty.er](https://github.com/Illumina/witty.er) -2. Download the latest release. - ```bash - wget https://github.com/Illumina/witty.er/archive/refs/tags/.tar.gz - ``` -3. Uncompress the released package. - ```bash - tar -xvf .tar.gz - ``` -4. Change to the uncompressed directory. -5. Build docker image using provided Dockerfile. - - ```bash - docker build -t wittyer: --platform linux/amd64 . - ``` - -6. Access rights are needed to push the container to the Dockerhub nfcore organization, please ask a core team member to do so. - - ```bash - docker tag wittyer: quay.io/nf-core/wittyer: - docker push quay.io/nf-core/wittyer: - ``` - -7. Make the image public. diff --git a/modules/nf-core/wittyer/environment.yml b/modules/nf-core/wittyer/environment.yml deleted file mode 100644 index f8378df..0000000 --- a/modules/nf-core/wittyer/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: wittyer -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - tabix diff --git a/modules/nf-core/wittyer/main.nf b/modules/nf-core/wittyer/main.nf deleted file mode 100644 index c2b943f..0000000 --- a/modules/nf-core/wittyer/main.nf +++ /dev/null @@ -1,69 +0,0 @@ -process WITTYER { - tag "$meta.id" - label 'process_single' - - container "nf-core/wittyer:0.3.3.0" - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "WITTYER module does not support Conda. Please use Docker / Singularity / Podman instead." - } - - input: - tuple val(meta), path(query_vcf), path(query_vcf_index), path(truth_vcf), path(truth_vcf_index), path(bed) - - output: - tuple val(meta), path("*.json") , emit: report - tuple val(meta), path("*.vcf.gz") , emit: bench_vcf - tuple val(meta), path("*.vcf.gz.tbi") , emit: bench_vcf_tbi - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def regions = bed ? "--includeBed=$bed" : "" - if ("$truth_vcf" == "${prefix}.vcf.gz") error "Input and output names are the same, set prefix in module configuration to disambiguate!" - if ("$query_vcf" == "${prefix}.vcf.gz") error "Input and output names are the same, set prefix in module configuration to disambiguate!" - if ("$query_vcf_index" == "${prefix}.vcf.gz.tbi") error "Input and output names are the same, set prefix in module configuration to disambiguate!" - if ("$query_vcf_index" == "${prefix}.vcf.gz.tbi") error "Input and output names are the same, set prefix in module configuration to disambiguate!" - - // dotnet /opt/Wittyer/Wittyer.dll might need to be replaced with new docker image - """ - mkdir bench - - dotnet /opt/Wittyer/Wittyer.dll \\ - --truthVcf=${truth_vcf} \\ - --inputVcf=${query_vcf} \\ - --outputDirectory=bench \\ - ${regions} \\ - ${args} - - mv bench/Wittyer.Stats.json ${prefix}.json - mv bench/*.vcf.gz ${prefix}.vcf.gz - mv bench/*.vcf.gz.tbi ${prefix}.vcf.gz.tbi - - rm -rf bench - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wittyer: \$(dotnet /opt/Wittyer/Wittyer.dll --version |& sed '1!d ; s/witty.er //') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.json - echo "" | gzip > ${prefix}.vcf.gz - touch ${prefix}.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wittyer: \$(dotnet /opt/Wittyer/Wittyer.dll --version |& sed '1!d ; s/witty.er //') - END_VERSIONS - """ -} diff --git a/modules/nf-core/wittyer/meta.yml b/modules/nf-core/wittyer/meta.yml deleted file mode 100644 index 097f90f..0000000 --- a/modules/nf-core/wittyer/meta.yml +++ /dev/null @@ -1,66 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: wittyer -description: A large variant benchmarking tool analogous to hap.py for small variants. -keywords: - - structural-variants - - benchmarking - - vcf -tools: - - wittyer: - description: "Illumina tool for large variant benchmarking" - homepage: "https://github.com/Illumina/witty.er" - documentation: "https://github.com/Illumina/witty.er" - tool_dev_url: "https://github.com/Illumina/witty.er" - licence: ["BSD-2"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - query_vcf: - type: file - description: A VCF with called variants to benchmark against the standard - pattern: "*.{vcf,vcf.gz}" - - query_vcf_index: - type: file - description: The index of the called VCF (optional) - pattern: "*.tbi" - - truth_vcf: - type: file - description: A standard VCF to compare against - pattern: "*.{vcf,vcf.gz}" - - truth_vcf_index: - type: file - description: The index of the standard VCF (optional) - pattern: "*.tbi" - - bed: - type: file - description: A BED file specifying regions to be included in the analysis (optional) - pattern: "*.bed" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - report: - type: file - description: Detailed per-sample-pair, per-svtype, per-bin stats - pattern: "*.json" - - bench_vcf: - type: file - description: Updated query and truth entries merged into one file - pattern: "*.vcf.gz" - - bench_vcf_tbi: - type: file - description: Index of merged query and truth entries VCF file - pattern: "*.vcf.gz.tbi" -authors: - - "@famosab" -maintainers: - - "@famosab" diff --git a/modules/nf-core/wittyer/tests/main.nf.test b/modules/nf-core/wittyer/tests/main.nf.test deleted file mode 100644 index 3c23ffe..0000000 --- a/modules/nf-core/wittyer/tests/main.nf.test +++ /dev/null @@ -1,100 +0,0 @@ -nextflow_process { - - name "Test Process WITTYER" - script "../main.nf" - process "WITTYER" - - tag "modules" - tag "modules_nfcore" - tag "wittyer" - - test("human - simulatedSV - vcf_gz") { - - when { - process { - """ - input[0] = [ - [ id:'test', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['illumina']['simulated_sv'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['simulated_sv_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['simulated_sv2'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['simulated_sv2_tbi'], checkIfExists: true), - [] - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot(process.out.bench_vcf).match("bench_vcf") }, - { assert snapshot(process.out.bench_vcf_tbi).match("bench_vcf_tbi") }, - { assert snapshot(process.out.report).match("report") }, - { assert snapshot(process.out.version).match("version") } - ) - } - - } - - test("human - simulatedSV - vcf_gz - bed") { - - when { - process { - """ - input[0] = [ - [ id:'test_bed', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['illumina']['simulated_sv'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['simulated_sv_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['simulated_sv2'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['simulated_sv2_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true) - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot(process.out.bench_vcf).match("bed_bench_vcf") }, - { assert snapshot(process.out.bench_vcf_tbi).match("bed_bench_vcf_tbi") }, - { assert snapshot(process.out.report).match("bed_report") }, - { assert snapshot(process.out.version).match("bed_version") } - ) - } - - } - - test("human - simulatedSV - vcf_gz - stub") { - - options "-stub" - - when { - process { - """ - input[0] = [ - [ id:'test_stub', single_end:false ], // meta map - file(params.test_data['homo_sapiens']['illumina']['simulated_sv'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['simulated_sv_tbi'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['simulated_sv2'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['simulated_sv2_tbi'], checkIfExists: true), - [] - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() }, - { assert snapshot(process.out.version).match("stub_version") } - ) - } - - } - -} diff --git a/modules/nf-core/wittyer/tests/main.nf.test.snap b/modules/nf-core/wittyer/tests/main.nf.test.snap deleted file mode 100644 index a25d824..0000000 --- a/modules/nf-core/wittyer/tests/main.nf.test.snap +++ /dev/null @@ -1,347 +0,0 @@ -{ - "human - simulatedSV - vcf_gz - bed": { - "content": [ - { - "0": [ - [ - { - "id": "test_bed", - "single_end": false - }, - "test_bed.json:md5,c6515ada81b5ccf5aa5b4f1268da2800" - ] - ], - "1": [ - [ - { - "id": "test_bed", - "single_end": false - }, - "test_bed.vcf.gz:md5,7e5f24415c80ca986e81be90f831e000" - ] - ], - "2": [ - [ - { - "id": "test_bed", - "single_end": false - }, - "test_bed.vcf.gz.tbi:md5,e4de1e1d27208b56f5a7bfbe31542240" - ] - ], - "3": [ - "versions.yml:md5,4a5148f206a3b12f0ebe87e81cedc31a" - ], - "bench_vcf": [ - [ - { - "id": "test_bed", - "single_end": false - }, - "test_bed.vcf.gz:md5,7e5f24415c80ca986e81be90f831e000" - ] - ], - "bench_vcf_tbi": [ - [ - { - "id": "test_bed", - "single_end": false - }, - "test_bed.vcf.gz.tbi:md5,e4de1e1d27208b56f5a7bfbe31542240" - ] - ], - "report": [ - [ - { - "id": "test_bed", - "single_end": false - }, - "test_bed.json:md5,c6515ada81b5ccf5aa5b4f1268da2800" - ] - ], - "versions": [ - "versions.yml:md5,4a5148f206a3b12f0ebe87e81cedc31a" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T09:41:49.782336703" - }, - "stub_version": { - "content": null, - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T09:36:57.223103062" - }, - "human - simulatedSV - vcf_gz": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.json:md5,fb70eac691c1067167091ab2d3b12de3" - ] - ], - "1": [ - [ - { - "id": "test", - "single_end": false - }, - "test.vcf.gz:md5,ff56c3084a59507362f6b7b7dc46ffdc" - ] - ], - "2": [ - [ - { - "id": "test", - "single_end": false - }, - "test.vcf.gz.tbi:md5,b9b448e5f11eebbcfeb9a123e838caa4" - ] - ], - "3": [ - "versions.yml:md5,4a5148f206a3b12f0ebe87e81cedc31a" - ], - "bench_vcf": [ - [ - { - "id": "test", - "single_end": false - }, - "test.vcf.gz:md5,ff56c3084a59507362f6b7b7dc46ffdc" - ] - ], - "bench_vcf_tbi": [ - [ - { - "id": "test", - "single_end": false - }, - "test.vcf.gz.tbi:md5,b9b448e5f11eebbcfeb9a123e838caa4" - ] - ], - "report": [ - [ - { - "id": "test", - "single_end": false - }, - "test.json:md5,fb70eac691c1067167091ab2d3b12de3" - ] - ], - "versions": [ - "versions.yml:md5,4a5148f206a3b12f0ebe87e81cedc31a" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T09:41:44.466462714" - }, - "bench_vcf": { - "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.vcf.gz:md5,ff56c3084a59507362f6b7b7dc46ffdc" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T09:36:47.523573536" - }, - "bench_vcf_tbi": { - "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.vcf.gz.tbi:md5,b9b448e5f11eebbcfeb9a123e838caa4" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T09:36:47.532504108" - }, - "report": { - "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.json:md5,fb70eac691c1067167091ab2d3b12de3" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T09:36:47.537692046" - }, - "bed_report": { - "content": [ - [ - [ - { - "id": "test_bed", - "single_end": false - }, - "test_bed.json:md5,c6515ada81b5ccf5aa5b4f1268da2800" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T09:36:52.863563564" - }, - "bed_version": { - "content": null, - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T09:36:52.868467701" - }, - "version": { - "content": null, - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T09:36:47.542666285" - }, - "human - simulatedSV - vcf_gz - stub": { - "content": [ - { - "0": [ - [ - { - "id": "test_stub", - "single_end": false - }, - "test_stub.json:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test_stub", - "single_end": false - }, - "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "2": [ - [ - { - "id": "test_stub", - "single_end": false - }, - "test_stub.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "3": [ - "versions.yml:md5,4a5148f206a3b12f0ebe87e81cedc31a" - ], - "bench_vcf": [ - [ - { - "id": "test_stub", - "single_end": false - }, - "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "bench_vcf_tbi": [ - [ - { - "id": "test_stub", - "single_end": false - }, - "test_stub.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "report": [ - [ - { - "id": "test_stub", - "single_end": false - }, - "test_stub.json:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "versions": [ - "versions.yml:md5,4a5148f206a3b12f0ebe87e81cedc31a" - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T09:36:57.215084162" - }, - "bed_bench_vcf": { - "content": [ - [ - [ - { - "id": "test_bed", - "single_end": false - }, - "test_bed.vcf.gz:md5,7e5f24415c80ca986e81be90f831e000" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T09:36:52.852045028" - }, - "bed_bench_vcf_tbi": { - "content": [ - [ - [ - { - "id": "test_bed", - "single_end": false - }, - "test_bed.vcf.gz.tbi:md5,e4de1e1d27208b56f5a7bfbe31542240" - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-03-20T09:36:52.857651771" - } -} \ No newline at end of file diff --git a/modules/nf-core/wittyer/tests/tags.yml b/modules/nf-core/wittyer/tests/tags.yml deleted file mode 100644 index 177db94..0000000 --- a/modules/nf-core/wittyer/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -wittyer: - - "modules/nf-core/wittyer/**" diff --git a/subworkflows/local/prepare_vcfs_test.nf b/subworkflows/local/prepare_vcfs_test.nf index 9e28815..fe1a875 100644 --- a/subworkflows/local/prepare_vcfs_test.nf +++ b/subworkflows/local/prepare_vcfs_test.nf @@ -10,6 +10,7 @@ include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort' include { HAPPY_PREPY } from '../../modules/nf-core/happy/prepy/main' addParams( options: params.options ) include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm' addParams( options: params.options ) include { TABIX_TABIX as TABIX_TABIX_1 } from '../../modules/nf-core/tabix/tabix' addParams( options: params.options ) +include { TABIX_TABIX as TABIX_TABIX_2 } from '../../modules/nf-core/tabix/tabix' addParams( options: params.options ) include { TABIX_TABIX as TABIX_TABIX_3 } from '../../modules/nf-core/tabix/tabix' addParams( options: params.options ) include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_1 } from '../../modules/nf-core/tabix/bgziptabix' addParams( options: params.options ) include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_2 } from '../../modules/nf-core/tabix/bgziptabix' addParams( options: params.options ) @@ -19,6 +20,7 @@ include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_CONTIGS } from '../../modules/nf-core/b include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_SNV } from '../../modules/nf-core/bcftools/view' addParams( options: params.options ) include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_INDEL } from '../../modules/nf-core/bcftools/view' addParams( options: params.options ) include { BCFTOOLS_REHEADER as BCFTOOLS_REHEADER_TEST } from '../../modules/nf-core/bcftools/reheader' addParams( options: params.options ) +include { BCFTOOLS_FILL_FROM_FASTA } from '../../modules/local/bcftools_fill_from_fasta.nf' addParams( options: params.options ) workflow PREPARE_VCFS_TEST { @@ -129,6 +131,18 @@ workflow PREPARE_VCFS_TEST { } + BCFTOOLS_FILL_FROM_FASTA( + vcf_ch, + fasta + ) + + TABIX_TABIX_2( + BCFTOOLS_FILL_FROM_FASTA.out.vcf + ) + + BCFTOOLS_FILL_FROM_FASTA.out.vcf.join(TABIX_TABIX_2.out.tbi, by:0) + .set{vcf_ch} + // somatic spesific preperations vcf_ch.branch{ sv: it[0].vartype == "sv" diff --git a/subworkflows/local/sv_germline_benchmark.nf b/subworkflows/local/sv_germline_benchmark.nf index 945ec18..0377eae 100644 --- a/subworkflows/local/sv_germline_benchmark.nf +++ b/subworkflows/local/sv_germline_benchmark.nf @@ -7,14 +7,12 @@ params.options = [:] include { TRUVARI_PHAB } from '../../modules/local/truvari_phab' addParams( options: params.options ) include { TRUVARI_BENCH } from '../../modules/nf-core/truvari/bench' addParams( options: params.options ) include { SVANALYZER_SVBENCHMARK } from '../../modules/nf-core/svanalyzer/svbenchmark' addParams( options: params.options ) -include { WITTYER } from '../../modules/nf-core/wittyer' addParams( options: params.options ) -include { VCFDIST } from '../../modules/local/vcfdist' addParams( options: params.options ) workflow SV_GERMLINE_BENCHMARK { take: input_ch // channel: [val(meta),test_vcf,test_index,truth_vcf,truth_index, bed] - fasta // reference channel [val(meta), ref.fa] - fai // reference channel [val(meta), ref.fa.fai] + fasta // reference channel [val(meta), ref.fa] + fai // reference channel [val(meta), ref.fa.fai] main: @@ -26,7 +24,7 @@ workflow SV_GERMLINE_BENCHMARK { if (params.method.contains('truvari')){ - if(params.harmonize){ + if(params.sv_standardization.contains('harmonize')){ // // TRUVARI: TRUVARI_PHAB // @@ -107,31 +105,6 @@ workflow SV_GERMLINE_BENCHMARK { } - if (params.method.contains('wittyer')){ - - // - // MODULE: WITTYER - // - // BIG Advantage: reports by variant type - // Able to report CNV - WITTYER( - input_ch - ) - versions = versions.mix(WITTYER.out.versions) - } - - if (params.method.contains('vcfdist')){ - // - // MODULE: VCFDIST - // - VCFDIST( - input_ch, - fasta, - fai - ) - versions = versions.mix(VCFDIST.out.versions) - } - emit: tagged_variants summary_reports From 7f1bab1583bb849c9f0c6145b0b2d50f897e963d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Wed, 19 Jun 2024 10:19:49 +0200 Subject: [PATCH 2/9] fix module.config --- bin/reclassfy_svaba.py | 102 ----------------------------------------- conf/modules.config | 28 +---------- 2 files changed, 1 insertion(+), 129 deletions(-) delete mode 100644 bin/reclassfy_svaba.py diff --git a/bin/reclassfy_svaba.py b/bin/reclassfy_svaba.py deleted file mode 100644 index afe7f1c..0000000 --- a/bin/reclassfy_svaba.py +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env python -import re -import sys -import os - -#make mates dictionary given list input of non-comment lines -def makeMateDict(m): - d = {} - for index1, line1 in enumerate(m): - id1 = line1.split('\t')[2] - numMate = re.search(r':(\d)',id1).group(1) - origId = re.search(r'(\d+):',id1).group(1) - if int(numMate) == 1: - for index2, line2 in enumerate(m): - #never start from beginning of file - if index2 <= index1: - continue - # print str(index1) + " : " + str(index2) - id2 = line2.split('\t')[2] - duplicateId = re.search(r'(\d+):',id2).group(1) - duplicateNumMate = re.search(r':(\d)',id2).group(1) - if duplicateId == origId and int(duplicateNumMate) == 2: - d[line1] = line2 - break - return d - -def classify(line, ALT_INDEX, mdict): - #get alt, chrom1, chrom2, position (pos), id, old SVTYPE (should be BND if virgin svaba vcf) from line - s = line.split("\t") - alt = s[ALT_INDEX] - chrom1 = s[0] - pos = int(s[1]) - id=s[2] - - if int(re.search(r':(\d)',id).group(1)) != 1: - return "NONE" - - mateLine = mdict[line].split('\t') - mateChrom = mateLine[0] - mateAlt = mateLine[ALT_INDEX] - - oldType = re.search(r'SVTYPE=(.+?)(\s+?|:)',line).group(1) - - # get new type - if oldType == 'BND' and chrom1 == mateChrom: - INV_PATTERN_1 = re.compile(r'\D\].+\]') - INV_PATTERN_2 = re.compile(r'\[.+\[\D') - if INV_PATTERN_1.match(alt) and INV_PATTERN_1.match(mateAlt): - return "INV" - if INV_PATTERN_2.match(alt) and INV_PATTERN_2.match(mateAlt): - return "INV" - - # DEL - DEL_PATTERN_THIS = re.compile(r'\D\[.+\[') - DEL_PATTERN_MATE = re.compile(r'\].+\]\D') - if DEL_PATTERN_THIS.match(alt) and DEL_PATTERN_MATE.match(mateAlt): - return "DEL" - - # INS - INS_PATTERN_THIS = re.compile(r'\D\].+\]') - INS_PATTERN_MATE = re.compile(r'\[.+\[\D') - if INS_PATTERN_THIS.match(alt) and INS_PATTERN_MATE.match(mateAlt): - return "DUP/INS" - - return 'BND' - -if __name__ == "__main__": - file = sys.argv[1] - if not os.path.exists(file): - raise IOError(file) - alt_index = -1 - #generate mate:mate dictionary - #load file into ram - vcf_file=[] - with open (file, 'r') as f: - for line in f: - if line.startswith('#'): - continue - vcf_file.append(line) - matesDict = makeMateDict(vcf_file) - with open(file, "r") as f: - for line in f: - # print comments - if line.startswith("##"): - sys.stdout.write(line) - continue - # header contains indexes - if line.startswith('#'): - split = line.split("\t") - for index, val in enumerate(split): - if val == "ALT": - alt_index = index - break - sys.stdout.write(line) - continue - if alt_index == -1: - print "ERROR: NO ALT INDEX FOUND" - exit(1) - newType = classify(line, alt_index, matesDict) - if newType != "NONE": - newLine = re.sub(r'SVTYPE=BND',"SVTYPE="+newType,line) - sys.stdout.write(newLine) \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index 6e552ed..1c0f352 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -205,25 +205,7 @@ process { mode: params.publish_dir_mode ] } - withName: WITTYER { - ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}"} - ext.args = {""} - ext.when = { params.method.split(',').contains('wittyer') } - publishDir = [ - path: {"${params.outdir}/${meta.id}/wittyer_bench"}, - pattern: "*{json,vcf.gz.tbi,vcf.gz}", - mode: params.publish_dir_mode - ] - } - withName: VCFDIST { - ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}"} - ext.args = {"-v 0"} - publishDir = [ - path: {"${params.outdir}/${meta.id}/vcfdist_bench"}, - pattern: "*{.vcf,tsv}", - mode: params.publish_dir_mode - ] - } + withName: BAMSURGEON_EVALUATOR { ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}"} publishDir = [ @@ -268,14 +250,6 @@ process { withName: TABIX_BGZIP_BENCH{ ext.prefix = {input.toString() - ".vcf.gz"} } - - withName: TABIX_BGZIP_TRUTH{ - ext.prefix = {input.toString() - ".vcf.gz"} - } - - withName: TABIX_BGZIP_QUERY{ - ext.prefix = {input.toString() - ".vcf.gz"} - } withName: SURVIVOR_MERGE { ext.prefix = {"${meta.id}.${meta.vartype}.${meta.tag}"} publishDir = [ From c8bc4d155cc9de19c4b9ab2f8e89329da6ea95ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Wed, 19 Jun 2024 10:24:20 +0200 Subject: [PATCH 3/9] prettier --- README.md | 32 ++++++++-------- modules.json | 106 +++++++++++++-------------------------------------- 2 files changed, 44 insertions(+), 94 deletions(-) diff --git a/README.md b/README.md index 82d16fc..3a3df39 100644 --- a/README.md +++ b/README.md @@ -37,21 +37,23 @@ 4. SV stats and histograms (Survivor) 5. Germline benchmarking of small variants - - Tools: - Happy - RTGtools -5. Germline benchmarking of SVs - - Tools: - Truvari - Svbenchmark - Wittyer: Only works with Truth files annotated with SVTYPE and SVLENGHT - -6. Somatic benchmarking of small variants - - Tools: - Happy - RTGtools - -7. Final report and comparisons + - Tools: + Happy + RTGtools +6. Germline benchmarking of SVs + + - Tools: + Truvari + Svbenchmark + Wittyer: Only works with Truth files annotated with SVTYPE and SVLENGHT + +7. Somatic benchmarking of small variants + + - Tools: + Happy + RTGtools + +8. Final report and comparisons ## Usage diff --git a/modules.json b/modules.json index 207c1a6..a888d8f 100644 --- a/modules.json +++ b/modules.json @@ -8,163 +8,117 @@ "bcftools/filter": { "branch": "master", "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/norm": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/query": { "branch": "master", "git_sha": "a5ba4d59c2b248c0379b0f8aeb4e7e754566cd1f", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/reheader": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/sort": { "branch": "master", "git_sha": "487d92367b4d7bb9f1ca694bf72736be90720b15", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/stats": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/view": { "branch": "master", "git_sha": "1013101da4252623fd7acf19cc581bae91d4f839", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "happy/happy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "happy/prepy": { "branch": "master", "git_sha": "01b55f288c25490236af1cd044c9bca17598ecfe", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "happy/sompy": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "manta/convertinversion": { "branch": "master", "git_sha": "1e2b7fb7106852388610c0360d234b0829eb980e", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "rtgtools/format": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "rtgtools/vcfeval": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "survivor/filter": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "survivor/merge": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "survivor/stats": { "branch": "master", "git_sha": "398375d72766cac92c4ffcf6927cac2d65310cfd", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "svanalyzer/svbenchmark": { "branch": "master", "git_sha": "8a2dd9e84cde68ebba45fb2c602c312c999c02a0", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "svync": { "branch": "master", "git_sha": "3df1a372b844c98c31ec3446faac7c75e64518a3", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tabix/bgzip": { "branch": "master", "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tabix/bgziptabix": { "branch": "master", "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tabix/tabix": { "branch": "master", "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "truvari/bench": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -173,26 +127,20 @@ "utils_nextflow_pipeline": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "92de218a329bfc9a9033116eb5f65fd270e72ba3", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfvalidation_plugin": { "branch": "master", "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From 041ef22b93942077396818277efb67582a13c4d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Thu, 20 Jun 2024 11:37:10 +0200 Subject: [PATCH 4/9] wittyer latest version added --- assets/samplesheet_HG002_hg37_full.csv | 6 + assets/samplesheet_HG002_hg38_full.csv | 5 + bin/merge_reports.py | 38 +- conf/modules.config | 23 +- conf/test.config | 3 +- conf/test_full.config | 1 - conf/test_hg19.config | 1 - conf/test_hg37.config | 1 - conf/test_hg37_full.config | 43 +++ conf/test_hg38.config | 3 +- modules.json | 5 + modules/local/bcftools_fill_from_fasta.nf | 62 ---- modules/nf-core/wittyer/Dockerfile | 12 + modules/nf-core/wittyer/README.md | 29 ++ modules/nf-core/wittyer/environment.yml | 7 + modules/nf-core/wittyer/main.nf | 70 ++++ modules/nf-core/wittyer/meta.yml | 60 ++++ modules/nf-core/wittyer/tests/main.nf.test | 145 ++++++++ .../nf-core/wittyer/tests/main.nf.test.snap | 329 ++++++++++++++++++ modules/nf-core/wittyer/tests/tags.yml | 2 + modules/nf-core/wittyer/tests/test.config | 8 + nextflow_schema.json | 5 - subworkflows/local/prepare_vcfs_test.nf | 14 - subworkflows/local/sv_germline_benchmark.nf | 30 +- 24 files changed, 800 insertions(+), 102 deletions(-) create mode 100644 assets/samplesheet_HG002_hg37_full.csv create mode 100644 assets/samplesheet_HG002_hg38_full.csv create mode 100644 conf/test_hg37_full.config delete mode 100644 modules/local/bcftools_fill_from_fasta.nf create mode 100644 modules/nf-core/wittyer/Dockerfile create mode 100644 modules/nf-core/wittyer/README.md create mode 100644 modules/nf-core/wittyer/environment.yml create mode 100644 modules/nf-core/wittyer/main.nf create mode 100644 modules/nf-core/wittyer/meta.yml create mode 100644 modules/nf-core/wittyer/tests/main.nf.test create mode 100644 modules/nf-core/wittyer/tests/main.nf.test.snap create mode 100644 modules/nf-core/wittyer/tests/tags.yml create mode 100644 modules/nf-core/wittyer/tests/test.config diff --git a/assets/samplesheet_HG002_hg37_full.csv b/assets/samplesheet_HG002_hg37_full.csv new file mode 100644 index 0000000..0a06623 --- /dev/null +++ b/assets/samplesheet_HG002_hg37_full.csv @@ -0,0 +1,6 @@ +test_vcf,caller,vartype,pctsize,pctseq,pctovl,refdist,chunksize,normshift,normdist,normsizediff,maxdist +/Users/w620-admin/Desktop/nf-core/dataset/hg37/dragen_paper/HG002_delly_SV_hg19.vcf.gz,delly,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000 +/Users/w620-admin/Desktop/nf-core/dataset/hg37/dragen_paper/HG002_DRAGEN_SV_hg19.vcf.gz,dragen,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000 +/Users/w620-admin/Desktop/nf-core/dataset/hg37/dragen_paper/HG002_lumpy_SV_hg19.sorted.vcf.gz,lumpy,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000 +/Users/w620-admin/Desktop/nf-core/dataset/hg37/dragen_paper/HG002_manta_SV_hg19_genotype.vcf.gz,manta,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000 + diff --git a/assets/samplesheet_HG002_hg38_full.csv b/assets/samplesheet_HG002_hg38_full.csv new file mode 100644 index 0000000..c1233f2 --- /dev/null +++ b/assets/samplesheet_HG002_hg38_full.csv @@ -0,0 +1,5 @@ +test_vcf,caller,vartype,pctsize,pctseq,pctovl,refdist,chunksize,normshift,normdist,normsizediff,maxdist +/Users/w620-admin/Desktop/nf-core/dataset/hg38/GIAB_GRCh38_SVs_06252018/ajtrio.lumpy.svtyper.HG002.md.sorted.recal.vcf.gz,lumpy,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000 +/Users/w620-admin/Desktop/nf-core/dataset/hg38/GIAB_GRCh38_SVs_06252018/manta.HG002.vcf.gz,manta,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000 +/Users/w620-admin/Desktop/nf-core/dataset/wittyer_test_cases/HG002.sv.with.corr.vcf.gz,unknown,sv,0.3,0,0,100000,100000,0.3,0.3,0.3,100000 + diff --git a/bin/merge_reports.py b/bin/merge_reports.py index 885f48b..5291ee3 100755 --- a/bin/merge_reports.py +++ b/bin/merge_reports.py @@ -11,7 +11,7 @@ def parse_args(args=None): Description = "Merges svbenchmark or truvari bench reports from multiple samples" - Epilog = "Example usage: python merge_reports.py file1 file2 file3 -o merged_table.csv -b truvari/svbenchmark/happy/sompy -v snv/indel -a germline/somatic " + Epilog = "Example usage: python merge_reports.py file1 file2 file3 -o merged_table.csv -b truvari/svbenchmark/wittyer/happy/sompy -v snv/indel -a germline/somatic " parser = argparse.ArgumentParser(description=Description, epilog=Epilog) parser.add_argument("inputs", nargs="+", help="List of files to merge") @@ -98,6 +98,37 @@ def get_truvari_resuls(file_paths): return merged_df +def get_wittyer_resuls(file_paths): + # Initialize an empty DataFrame to store the merged data + merged_df = pd.DataFrame() + + # Iterate over each table file + for file in file_paths: + # Read the json into a DataFrame + filename = os.path.basename(file) + with open(file, 'r') as f: + data = pd.read_json(f) + + relevant_data = [] + for sample in data['PerSampleStats']: + for stats in sample['OverallStats']: + relevant_data.append({ + "Tool": filename.split(".")[0], + "StatsType": stats["StatsType"], + "TP_base": stats["TruthTpCount"], + "TP_comp": stats["QueryTpCount"], + "FP": stats["QueryFpCount"], + "FN": stats["TruthFnCount"], + "Precision": stats["Precision"], + "Recall": stats["Recall"], + "F1": stats["Fscore"]} + ) + + df = pd.DataFrame(relevant_data) + merged_df = pd.concat([merged_df, df]) + + return merged_df + def get_rtgtools_resuls(file_paths): # Initialize an empty DataFrame to store the merged data merged_df = pd.DataFrame() @@ -189,13 +220,16 @@ def main(args=None): elif args.bench == "svbenchmark": summ_table = get_svbenchmark_resuls(args.inputs) + elif args.bench == "wittyer": + summ_table = get_wittyer_resuls(args.inputs) + elif args.bench == "rtgtools": summ_table = get_rtgtools_resuls(args.inputs) elif args.bench == "happy": summ_table = get_happy_resuls(args.inputs) else: - raise ValueError('Only truvari/svbenchmark/rtgtools/happy results can be merged for germline analysis!!') + raise ValueError('Only truvari/svbenchmark/wittyer/rtgtools/happy results can be merged for germline analysis!!') summ_table.reset_index(drop=True, inplace=True) summ_table.to_csv(args.output + ".summary.txt", index=False) diff --git a/conf/modules.config b/conf/modules.config index 1c0f352..b280cc7 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -33,18 +33,10 @@ process { enabled: false ] } - withName: "BCFTOOLS_FILL_FROM_FASTA" { - ext.prefix = { vcf.baseName - ".vcf" + ".fill"} - ext.args = {"--output-type z" } - publishDir = [ - path: { "${params.outdir}/test" }, - enabled: false - ] - } withName: "BCFTOOLS_DEDUP" { ext.prefix = { vcf.baseName - ".vcf" + ".dedup"} - ext.args = {"--output-type z --rm-du exact -c s" } + ext.args = {"--output-type z --rm-du exact -c w" } publishDir = [ path: { "${params.outdir}/test" }, enabled: false @@ -205,6 +197,16 @@ process { mode: params.publish_dir_mode ] } + withName: WITTYER { + ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}"} + ext.args = {"-em cts --pt 0.8 --bpd 100000"} + ext.when = { params.method.split(',').contains('wittyer') } + publishDir = [ + path: {"${params.outdir}/${meta.id}/wittyer_bench"}, + pattern: "*{.vcf.gz,tbi,json}", + mode: params.publish_dir_mode + ] + } withName: BAMSURGEON_EVALUATOR { ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}"} @@ -246,8 +248,7 @@ process { mode: params.publish_dir_mode ] } - - withName: TABIX_BGZIP_BENCH{ + withName: "TABIX_BGZIP*"{ ext.prefix = {input.toString() - ".vcf.gz"} } withName: SURVIVOR_MERGE { diff --git a/conf/test.config b/conf/test.config index cbaf8c3..f0d74e8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -28,8 +28,7 @@ params { // Processes analysis = 'germline' - method = 'happy,truvari,svanalyzer' // - similarity = 0 + method = 'happy,truvari,svanalyzer,wittyer' // preprocess = "normalization,deduplication,prepy" //variant_filtering = "include" // null, include, exclude //expression = 'FILTER="."' diff --git a/conf/test_full.config b/conf/test_full.config index ed7ebf3..52723f8 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -28,7 +28,6 @@ params { // Processes analysis = 'germline' method = 'happy,truvari,svanalyzer' // - similarity = 0 preprocess = "normalization,deduplication,prepy" //variant_filtering = "include" // null, include, exclude //expression = 'FILTER="."' diff --git a/conf/test_hg19.config b/conf/test_hg19.config index d7a89f6..c7c2e18 100644 --- a/conf/test_hg19.config +++ b/conf/test_hg19.config @@ -30,7 +30,6 @@ params { analysis = 'germline' //somatic method = 'truvari,svanalyzer' // --not working for now : wittyer, vcfdist - similarity = 0 // determines the sequence similarity level in benchmarking. standardization = true preprocess = "normalization, deduplication" //bnd_to_inv = true diff --git a/conf/test_hg37.config b/conf/test_hg37.config index d47de30..1fe7511 100644 --- a/conf/test_hg37.config +++ b/conf/test_hg37.config @@ -30,7 +30,6 @@ params { analysis = 'germline' //somatic method = 'truvari,svanalyzer' // --not working for now : wittyer, vcfdist - similarity = 0 // determines the sequence similarity level in benchmarking. standardization = true preprocess = "normalization, deduplication" //bnd_to_inv = true diff --git a/conf/test_hg37_full.config b/conf/test_hg37_full.config new file mode 100644 index 0000000..5eff4db --- /dev/null +++ b/conf/test_hg37_full.config @@ -0,0 +1,43 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/benchmark -profile -config conf/test_hg38.config --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 16 + max_memory = '100.GB' + max_time = '8.h' + + // Input data + // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets + // TODO nf-core: Give any required params for the test so that command line flags are not needed + input = 'assets/samplesheet_HG002_hg37_full.csv' + outdir = 'results' + + // Genome references + genome = 'GRCh37' + + // Processes + analysis = 'germline' //somatic + method = 'truvari,svanalyzer,wittyer' // --not working for now : vcfdist + preprocess = "normalization, deduplication, filter_contigs" + min_sv_size = 30 + //variant_filtering = "include" // null, include, exclude + //expression = 'FILTER="PASS"' + + sample = "HG002" // available samples: SEQC2, HG002 + truth_sv = "/Users/w620-admin/Desktop/nf-core/dataset/hg37/NIST_SV/HG002_SVs_Tier1_v0.6.vcf.gz" + high_conf_sv = "/Users/w620-admin/Desktop/nf-core/dataset/hg37/NIST_SV/HG002_SVs_Tier1_v0.6.bed" + +} diff --git a/conf/test_hg38.config b/conf/test_hg38.config index 2e84ac3..e09318c 100644 --- a/conf/test_hg38.config +++ b/conf/test_hg38.config @@ -31,14 +31,13 @@ params { // Processes analysis = 'germline' //somatic method = 'truvari,svanalyzer,wittyer' // --not working for now : vcfdist - similarity = 0 // determines the sequence similarity level in benchmarking. preprocess = "normalization, deduplication" min_sv_size = 30 //variant_filtering = "include" // null, include, exclude //expression = 'FILTER="PASS"' sample = "HG002" // available samples: SEQC2, HG002 - truth_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.vcf.gz" + truth_sv = "HG002_GRCh38_difficult_medical_gene_SV_benchmark_v0.01.chr21.annotated.vcf.gz" high_conf_sv = "https://raw.githubusercontent.com/kubranarci/benchmark_datasets/main/SV_testdata/hg38/truth/HG002_GRCh38_difficult_medical_gene_SV_benchmark_v01.ch21.bed" } diff --git a/modules.json b/modules.json index a888d8f..0ba9842 100644 --- a/modules.json +++ b/modules.json @@ -119,6 +119,11 @@ "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] + }, + "wittyer": { + "branch": "master", + "git_sha": "be84844983b332fa7d3d38dec06af6f953d83189", + "installed_by": ["modules"] } } }, diff --git a/modules/local/bcftools_fill_from_fasta.nf b/modules/local/bcftools_fill_from_fasta.nf deleted file mode 100644 index 8cdaedc..0000000 --- a/modules/local/bcftools_fill_from_fasta.nf +++ /dev/null @@ -1,62 +0,0 @@ -process BCFTOOLS_FILL_FROM_FASTA { - tag "$meta.id" - label 'process_medium' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': - 'biocontainers/bcftools:1.18--h8b25389_0' }" - - input: - tuple val(meta), path(vcf), path(tbi) - tuple val(meta2), path(fasta) - - output: - tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}") , emit: vcf - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '--output-type z' - def prefix = task.ext.prefix ?: "${meta.id}" - def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : - args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : - args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : - args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf.gz" - - """ - bcftools +fill-from-fasta \\ - ${vcf} \\ - $args \\ - --output ${prefix}.${extension}\\ - --threads $task.cpus \\ - -- \\ - -c REF \\ - --fasta ${fasta} \\ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ - - stub: - def args = task.ext.args ?: '--output-type z' - def prefix = task.ext.prefix ?: "${meta.id}" - def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : - args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : - args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : - args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : - "vcf.gz" - """ - touch ${prefix}.${extension} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/wittyer/Dockerfile b/modules/nf-core/wittyer/Dockerfile new file mode 100644 index 0000000..3955699 --- /dev/null +++ b/modules/nf-core/wittyer/Dockerfile @@ -0,0 +1,12 @@ +FROM mcr.microsoft.com/dotnet/sdk:6.0 as builder +WORKDIR /src +COPY . /src +RUN cd Wittyer \ + && dotnet publish -f net6.0 -r linux-x64 -c Release -o /output \ + && chmod +x /output/Wittyer + +FROM mcr.microsoft.com/dotnet/runtime:6.0 +LABEL git_repository=https://git.illumina.com/DASTE/Ilmn.Das.App.Wittyer.git +WORKDIR /opt/Wittyer +RUN apt-get -y update && apt-get -y install tabix libunwind8 openssl procps +COPY --from=builder /output /opt/Wittyer diff --git a/modules/nf-core/wittyer/README.md b/modules/nf-core/wittyer/README.md new file mode 100644 index 0000000..8aa81a5 --- /dev/null +++ b/modules/nf-core/wittyer/README.md @@ -0,0 +1,29 @@ +# Updating the docker container and making a new module release + +witty.er is a commercial tool from Illumina. The container provided for the witty.er nf-core module is not provided nor supported by Illumina. Updating the witty.er versions in the container and pushing the update to Dockerhub needs to be done manually. +NOTE: an updated version of Dockerfile than the official one in github is used to build nf-core/wittyer:0.5.2, which is inserted here ./Dockerfile. + +1. Navigate to the witty.er github repository. - [witty.er](https://github.com/Illumina/witty.er) +2. Download the latest release. + ```bash + wget https://github.com/Illumina/witty.er/archive/refs/tags/.tar.gz + ``` +3. Uncompress the released package. + ```bash + tar -xvf .tar.gz + ``` +4. Change to the uncompressed directory. +5. Build docker image using provided Dockerfile. + + ```bash + docker build -t wittyer: --platform linux/amd64 . + ``` + +6. Access rights are needed to push the container to the Dockerhub nfcore organization, please ask a core team member to do so. + + ```bash + docker tag wittyer: quay.io/nf-core/wittyer: + docker push quay.io/nf-core/wittyer: + ``` + +7. Make the image public. diff --git a/modules/nf-core/wittyer/environment.yml b/modules/nf-core/wittyer/environment.yml new file mode 100644 index 0000000..f8378df --- /dev/null +++ b/modules/nf-core/wittyer/environment.yml @@ -0,0 +1,7 @@ +name: wittyer +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - tabix diff --git a/modules/nf-core/wittyer/main.nf b/modules/nf-core/wittyer/main.nf new file mode 100644 index 0000000..1dda32e --- /dev/null +++ b/modules/nf-core/wittyer/main.nf @@ -0,0 +1,70 @@ +process WITTYER { + tag "$meta.id" + label 'process_single' + + container "nf-core/wittyer:0.5.2.0" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "WITTYER module does not support Conda. Please use Docker / Singularity / Podman instead." + } + + input: + tuple val(meta), path(query_vcf), path(truth_vcf), path(bed) + + output: + tuple val(meta), path("*.json") , emit: report + tuple val(meta), path("*.vcf.gz") , emit: bench_vcf + tuple val(meta), path("*.vcf.gz.tbi") , emit: bench_vcf_tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions = bed ? "-b $bed" : "" + if ("$truth_vcf" == "${prefix}.vcf") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + if ("$query_vcf" == "${prefix}.vcf") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + // dotnet /opt/Wittyer/Wittyer.dll might need to be replaced with new docker image + """ + mkdir bench + + dotnet /opt/Wittyer/Wittyer.dll \\ + -t ${truth_vcf} \\ + -i ${query_vcf} \\ + -o bench \\ + ${regions} \\ + ${args} + + mv bench/Wittyer.Stats.json ${prefix}.json + mv bench/*.vcf.gz ${prefix}.vcf.gz + mv bench/*.vcf.gz.tbi ${prefix}.vcf.gz.tbi + + rm -rf bench + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wittyer: \$(dotnet /opt/Wittyer/Wittyer.dll --version |& sed '1!d ; s/witty.er //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$truth_vcf" == "${prefix}.vcf") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + if ("$query_vcf" == "${prefix}.vcf") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ + touch ${prefix}.json + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wittyer: \$(dotnet /opt/Wittyer/Wittyer.dll --version |& sed '1!d ; s/witty.er //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/wittyer/meta.yml b/modules/nf-core/wittyer/meta.yml new file mode 100644 index 0000000..3fbba86 --- /dev/null +++ b/modules/nf-core/wittyer/meta.yml @@ -0,0 +1,60 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: wittyer +description: A large variant benchmarking tool analogous to hap.py for small variants. +keywords: + - structural-variants + - benchmarking + - vcf +tools: + - wittyer: + description: "Illumina tool for large variant benchmarking" + homepage: "https://github.com/Illumina/witty.er" + documentation: "https://github.com/Illumina/witty.er" + tool_dev_url: "https://github.com/Illumina/witty.er" + licence: ["BSD-2"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - query_vcf: + type: file + description: A VCF with called variants to benchmark against the standard + pattern: "*.{vcf}" + + - truth_vcf: + type: file + description: A standard VCF to compare against + pattern: "*.{vcf}" + + - bed: + type: file + description: A BED file specifying regions to be included in the analysis (optional) + pattern: "*.bed" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - report: + type: file + description: Detailed per-sample-pair, per-svtype, per-bin stats + pattern: "*.json" + - bench_vcf: + type: file + description: Updated query and truth entries merged into one file + pattern: "*.vcf.gz" + - bench_vcf_tbi: + type: file + description: Index of merged query and truth entries VCF file + pattern: "*.vcf.gz.tbi" +authors: + - "@famosab" +maintainers: + - "@famosab" diff --git a/modules/nf-core/wittyer/tests/main.nf.test b/modules/nf-core/wittyer/tests/main.nf.test new file mode 100644 index 0000000..75bf599 --- /dev/null +++ b/modules/nf-core/wittyer/tests/main.nf.test @@ -0,0 +1,145 @@ +nextflow_process { + + name "Test Process WITTYER" + script "../main.nf" + process "WITTYER" + + tag "modules" + tag "modules_nfcore" + tag "wittyer" + tag "tabix/bgzip" + + test("human - simulatedSV - vcf_gz") { + config "./test.config" + + setup { + run('TABIX_BGZIP', alias: 'TABIX_BGZIP_1') { + script "../../../nf-core/tabix/bgzip" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['simulated_sv'], checkIfExists: true) + ] + """ + } + } + + run('TABIX_BGZIP', alias: 'TABIX_BGZIP_2') { + script "../../../nf-core/tabix/bgzip" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['simulated_sv2'], checkIfExists: true) + ] + """ + } + } + } + when { + process { + """ + input[0] = TABIX_BGZIP_1.out.output.join(TABIX_BGZIP_2.out.output).map{meta, vcf1, vcf2 -> tuple(meta, vcf1, vcf2, [])} + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.bench_vcf).match("bench_vcf") }, + { assert snapshot(process.out.bench_vcf_tbi).match("bench_vcf_tbi") }, + { assert snapshot(process.out.report).match("report") }, + { assert snapshot(process.out.version).match("version") } + ) + } + + } + test("human - simulatedSV - vcf_gz - bed") { + config "./test.config" + + setup { + run('TABIX_BGZIP', alias: 'TABIX_BGZIP_1') { + script "../../../nf-core/tabix/bgzip" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['simulated_sv'], checkIfExists: true) + ] + """ + } + } + + run('TABIX_BGZIP', alias: 'TABIX_BGZIP_2') { + script "../../../nf-core/tabix/bgzip" + + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['simulated_sv2'], checkIfExists: true) + ] + """ + } + } + } + when { + process { + """ + input[0] = TABIX_BGZIP_1.out.output + .join(TABIX_BGZIP_2.out.output) + .map{meta, vcf1, vcf2 -> + tuple(meta, vcf1, vcf2, + [file(params.test_data['homo_sapiens']['genome']['genome_21_multi_interval_bed'], checkIfExists: true)]) + } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.bench_vcf).match("bench2_vcf") }, + { assert snapshot(process.out.bench_vcf_tbi).match("bench2_vcf_tbi") }, + { assert snapshot(process.out.report).match("report2") }, + { assert snapshot(process.out.version).match("version2") } + ) + } + + } + + test("human - simulatedSV - vcf_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test_stub', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['simulated_sv'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['simulated_sv2'], checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot(process.out.version).match("stub_version") } + ) + } + + } + +} diff --git a/modules/nf-core/wittyer/tests/main.nf.test.snap b/modules/nf-core/wittyer/tests/main.nf.test.snap new file mode 100644 index 0000000..4fb5e48 --- /dev/null +++ b/modules/nf-core/wittyer/tests/main.nf.test.snap @@ -0,0 +1,329 @@ +{ + "human - simulatedSV - vcf_gz - bed": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.json:md5,8dfd8086fcb0ccc677fb31ea1f3dfb67" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,cc98541af04597aca412190ef2e30ad3" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,7edea8651ab3a419e132c9b0c9a5e18c" + ] + ], + "3": [ + "versions.yml:md5,0db2ca1c465a92f9352d4cfc4e5082d7" + ], + "bench_vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,cc98541af04597aca412190ef2e30ad3" + ] + ], + "bench_vcf_tbi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,7edea8651ab3a419e132c9b0c9a5e18c" + ] + ], + "report": [ + [ + { + "id": "test" + }, + "test.json:md5,8dfd8086fcb0ccc677fb31ea1f3dfb67" + ] + ], + "versions": [ + "versions.yml:md5,0db2ca1c465a92f9352d4cfc4e5082d7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-06-19T14:08:56.301601" + }, + "bench2_vcf": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,cc98541af04597aca412190ef2e30ad3" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-06-19T14:13:49.114615" + }, + "report2": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.json:md5,8dfd8086fcb0ccc677fb31ea1f3dfb67" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-06-19T14:13:49.159771" + }, + "stub_version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T09:36:57.223103062" + }, + "human - simulatedSV - vcf_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.json:md5,d1e6eccb85b0f4b5bf82336bbf1f8ce3" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,c4d57ea31aefabe0b90bf971fb32ccf5" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,0a7e5b371ec0103c945f5c15b15a952b" + ] + ], + "3": [ + "versions.yml:md5,0db2ca1c465a92f9352d4cfc4e5082d7" + ], + "bench_vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,c4d57ea31aefabe0b90bf971fb32ccf5" + ] + ], + "bench_vcf_tbi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,0a7e5b371ec0103c945f5c15b15a952b" + ] + ], + "report": [ + [ + { + "id": "test" + }, + "test.json:md5,d1e6eccb85b0f4b5bf82336bbf1f8ce3" + ] + ], + "versions": [ + "versions.yml:md5,0db2ca1c465a92f9352d4cfc4e5082d7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-06-19T14:01:43.19118" + }, + "bench2_vcf_tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,7edea8651ab3a419e132c9b0c9a5e18c" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-06-19T14:13:49.14488" + }, + "version2": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-06-19T14:13:49.167498" + }, + "bench_vcf": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,c4d57ea31aefabe0b90bf971fb32ccf5" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-06-19T14:01:43.23893" + }, + "bench_vcf_tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,0a7e5b371ec0103c945f5c15b15a952b" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-06-19T14:01:43.278491" + }, + "report": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.json:md5,d1e6eccb85b0f4b5bf82336bbf1f8ce3" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-06-19T14:01:43.317753" + }, + "version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T09:36:47.542666285" + }, + "human - simulatedSV - vcf_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,0db2ca1c465a92f9352d4cfc4e5082d7" + ], + "bench_vcf": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "bench_vcf_tbi": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "report": [ + [ + { + "id": "test_stub", + "single_end": false + }, + "test_stub.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,0db2ca1c465a92f9352d4cfc4e5082d7" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-06-19T14:01:49.799446" + } +} \ No newline at end of file diff --git a/modules/nf-core/wittyer/tests/tags.yml b/modules/nf-core/wittyer/tests/tags.yml new file mode 100644 index 0000000..177db94 --- /dev/null +++ b/modules/nf-core/wittyer/tests/tags.yml @@ -0,0 +1,2 @@ +wittyer: + - "modules/nf-core/wittyer/**" diff --git a/modules/nf-core/wittyer/tests/test.config b/modules/nf-core/wittyer/tests/test.config new file mode 100644 index 0000000..afd097c --- /dev/null +++ b/modules/nf-core/wittyer/tests/test.config @@ -0,0 +1,8 @@ +process{ + withName: TABIX_BGZIP_1 { + ext.prefix = {input.toString() - ".vcf.gz"} + } + withName: TABIX_BGZIP_2 { + ext.prefix = {input.toString() - ".vcf.gz"} + } +} diff --git a/nextflow_schema.json b/nextflow_schema.json index 737195b..379c0b4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -107,11 +107,6 @@ "description": "", "fa_icon": "fas fa-folder-open" }, - "similarity": { - "type": "integer", - "description": "", - "fa_icon": "fas fa-folder-open" - }, "method": { "type": "string", "description": "", diff --git a/subworkflows/local/prepare_vcfs_test.nf b/subworkflows/local/prepare_vcfs_test.nf index fe1a875..9e28815 100644 --- a/subworkflows/local/prepare_vcfs_test.nf +++ b/subworkflows/local/prepare_vcfs_test.nf @@ -10,7 +10,6 @@ include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort' include { HAPPY_PREPY } from '../../modules/nf-core/happy/prepy/main' addParams( options: params.options ) include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm' addParams( options: params.options ) include { TABIX_TABIX as TABIX_TABIX_1 } from '../../modules/nf-core/tabix/tabix' addParams( options: params.options ) -include { TABIX_TABIX as TABIX_TABIX_2 } from '../../modules/nf-core/tabix/tabix' addParams( options: params.options ) include { TABIX_TABIX as TABIX_TABIX_3 } from '../../modules/nf-core/tabix/tabix' addParams( options: params.options ) include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_1 } from '../../modules/nf-core/tabix/bgziptabix' addParams( options: params.options ) include { TABIX_BGZIPTABIX as TABIX_BGZIPTABIX_2 } from '../../modules/nf-core/tabix/bgziptabix' addParams( options: params.options ) @@ -20,7 +19,6 @@ include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_CONTIGS } from '../../modules/nf-core/b include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_SNV } from '../../modules/nf-core/bcftools/view' addParams( options: params.options ) include { BCFTOOLS_VIEW as BCFTOOLS_VIEW_INDEL } from '../../modules/nf-core/bcftools/view' addParams( options: params.options ) include { BCFTOOLS_REHEADER as BCFTOOLS_REHEADER_TEST } from '../../modules/nf-core/bcftools/reheader' addParams( options: params.options ) -include { BCFTOOLS_FILL_FROM_FASTA } from '../../modules/local/bcftools_fill_from_fasta.nf' addParams( options: params.options ) workflow PREPARE_VCFS_TEST { @@ -131,18 +129,6 @@ workflow PREPARE_VCFS_TEST { } - BCFTOOLS_FILL_FROM_FASTA( - vcf_ch, - fasta - ) - - TABIX_TABIX_2( - BCFTOOLS_FILL_FROM_FASTA.out.vcf - ) - - BCFTOOLS_FILL_FROM_FASTA.out.vcf.join(TABIX_TABIX_2.out.tbi, by:0) - .set{vcf_ch} - // somatic spesific preperations vcf_ch.branch{ sv: it[0].vartype == "sv" diff --git a/subworkflows/local/sv_germline_benchmark.nf b/subworkflows/local/sv_germline_benchmark.nf index 0377eae..39806b4 100644 --- a/subworkflows/local/sv_germline_benchmark.nf +++ b/subworkflows/local/sv_germline_benchmark.nf @@ -4,9 +4,12 @@ params.options = [:] -include { TRUVARI_PHAB } from '../../modules/local/truvari_phab' addParams( options: params.options ) +include { TRUVARI_PHAB } from '../../modules/local/truvari_phab' addParams( options: params.options ) include { TRUVARI_BENCH } from '../../modules/nf-core/truvari/bench' addParams( options: params.options ) include { SVANALYZER_SVBENCHMARK } from '../../modules/nf-core/svanalyzer/svbenchmark' addParams( options: params.options ) +include { WITTYER } from '../../modules/nf-core/wittyer' addParams( options: params.options ) +include { TABIX_BGZIP as TABIX_BGZIP_QUERY } from '../../modules/nf-core/tabix/bgzip' addParams( options: params.options ) +include { TABIX_BGZIP as TABIX_BGZIP_TRUTH } from '../../modules/nf-core/tabix/bgzip' addParams( options: params.options ) workflow SV_GERMLINE_BENCHMARK { take: @@ -103,6 +106,31 @@ workflow SV_GERMLINE_BENCHMARK { tagged_variants = tagged_variants.mix(vcf_fn) tagged_variants = tagged_variants.mix(vcf_fp) + } + if (params.method.contains('wittyer')){ + + TABIX_BGZIP_QUERY( + input_ch.map{it -> tuple(it[0], it[1])} + ) + TABIX_BGZIP_TRUTH( + input_ch.map{it -> tuple(it[0], it[3])} + ) + bed = input_ch.map{it -> tuple(it[0], it[5])} + + // + // MODULE: WITTYER + // + WITTYER( + TABIX_BGZIP_QUERY.out.output.join(TABIX_BGZIP_TRUTH.out.output).join(bed) + ) + versions = versions.mix(WITTYER.out.versions) + + WITTYER.out.report + .map { meta, file -> tuple([vartype: meta.vartype] + [benchmark_tool: "wittyer"], file) } + .groupTuple() + .set{ report} + summary_reports = summary_reports.mix(report) + } emit: From a95d50c343bd749e61f5533bb3374c9b929e5d44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Thu, 20 Jun 2024 16:17:27 +0200 Subject: [PATCH 5/9] changes in configs --- assets/schema_input.json | 33 ++++++++++++--- conf/modules.config | 4 +- nextflow.config | 16 +------ nextflow_schema.json | 91 ++++++++++++++++++---------------------- 4 files changed, 72 insertions(+), 72 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 7911f34..6bf8add 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -91,11 +91,34 @@ "meta": ["chunksize"], "default": 500 }, - "pick": { - "type": "string", - "default": "ac", - "enum": ["single", "ac", "multi"], - "meta": ["pick"] + "bpDistance": { + "type": "integer", + "errorMessage": "bpDistance is a wittyer parameter. Upper bound of boundary distance when comparing truth and query. By default it is 500bp for all types except for Insertions, which are 100bp.Please note that if you set this value in the command line, it overrides all the defaults, so Insertions and other types will have the same bpd.", + "meta": ["bpDistance"], + "default": 500 + }, + "percentThreshold": { + "type": "number", + "errorMessage": "percentThreshold is a wittyer parameter. This is used for percentage thresholding. For CopyNumberTandemRepeats, this determines how large of a RepeatUnitCount (RUC) threshold to use for large tandem repeats. For all other SVs, in order to match between query and truth, the distance between boundaries should be within a number thats proportional to total SV (default 0.25)", + "meta": ["percentThreshold"], + "default": 0.25 + }, + "absoluteThreshold": { + "type": "integer", + "errorMessage": "absoluteThreshold is a wittyer parameter. This is used for absolute thresholding. For CopyNumberTandemRepeats, this determines how large of a RepeatUnitCount (RUC) threshold to use. For all other SVs, this is the upper bound of boundary distance when comparing truth and query. (default 10000)", + "meta": ["absoluteThreshold"], + "default": 10000 + }, + "maxMatches": { + "type": "integer", + "errorMessage": "maxMatches is a wittyer parameter. This is used for matching behaviour. Negative value means to match any number (for large SVs it is not recommended).", + "meta": ["maxMatches"], + "default": 10 + }, + "similarityThreshold": { + "type": "number", + "errorMessage": "similarityThreshold is a wittyer parameter. This is used for sequence similarity thresholding.", + "meta": ["similarityThreshold"] } }, "required": ["test_vcf", "caller", "vartype"] diff --git a/conf/modules.config b/conf/modules.config index b280cc7..831fa53 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -179,7 +179,7 @@ process { } withName: "TRUVARI_BENCH" { ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}"} - ext.args = {"--pctsize ${meta.pctsize} --pctovl ${meta.pctovl} --pctseq ${meta.pctseq} --refdist ${meta.refdist} --pick ${meta.pick} --chunksize ${meta.chunksize}"} + ext.args = {"--pctsize ${meta.pctsize} --pctovl ${meta.pctovl} --pctseq ${meta.pctseq} --refdist ${meta.refdist} --chunksize ${meta.chunksize}"} ext.when = { params.method.split(',').contains('truvari') } publishDir = [ path: {"${params.outdir}/${meta.id}/truvari_bench"}, @@ -199,7 +199,7 @@ process { } withName: WITTYER { ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}"} - ext.args = {"-em cts --pt 0.8 --bpd 100000"} + ext.args = {"-em cts --ef "" --pt ${meta.percentThreshold} --pd ${meta.percentDistance} --at ${meta.absoluteThreshold} --bpd ${meta.bpDistance} --mm ${meta.maxMatches}"} ext.when = { params.method.split(',').contains('wittyer') } publishDir = [ path: {"${params.outdir}/${meta.id}/wittyer_bench"}, diff --git a/nextflow.config b/nextflow.config index a966a9a..ab30cee 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,7 @@ params { // Input options input = null outdir = "results" + sample = null // truth vcf and high confidence bed files // Small variant benchmarking: @@ -32,7 +33,6 @@ params { // deduplication removes one of the variants in the same position preprocess = "" //normalization,deduplication,prepy,filter_contigs sv_standardization = "" // harmonize,standardization,dup_to_ins,bnd_to_inv,gridss_annotate - similarity = null // Benchmarking method method = 'truvari,svanalyzer,happy,rtgtools,wittyer' // --not working for now : vcfdist @@ -50,20 +50,6 @@ params { expression = null analysis = "germline" - //truvari benchmark parameters - pctsize = 0.7 - pctseq = 0.7 // has to be 0 for unresolved variants to be benchmarked or when --dup-to-ins unsed - pctovl = 0 - refdist = 500 - chunksize = 500 - pick = "single" - - //svanalyzer benchmark parameters - normshift = 0.2 - normdist = 0.2 - normsizediff = 0.2 - maxdist = 100000 - // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes/' diff --git a/nextflow_schema.json b/nextflow_schema.json index 379c0b4..4aced2b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -29,6 +29,11 @@ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, + "sample": { + "type": "string", + "description": "Sample id: HG002/SEQC2", + "fa_icon": "fas fa-folder-open" + }, "high_conf_small": { "type": "string", "format": "file-path", @@ -47,6 +52,42 @@ "help_text": "SMALL: Truth or golden set VCF file, to be used for comparisons", "fa_icon": "fas fa-file-csv" }, + "high_conf_snv": { + "type": "string", + "format": "file-path", + "exists": false, + "pattern": "^\\S+\\.(bed)?(\\.gz)?$", + "description": "SNVs: Path to the high confidence BED files.", + "help_text": "SNVs: High confidence BED files", + "fa_icon": "fas fa-file-csv" + }, + "truth_snv": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.(vcf)?(\\.gz)?$", + "description": "SNVs: Path to the golden set VCF files.", + "help_text": "SNVs: Truth or golden set VCF file, to be used for comparisons", + "fa_icon": "fas fa-file-csv" + }, + "high_conf_indel": { + "type": "string", + "format": "file-path", + "exists": false, + "pattern": "^\\S+\\.(bed)?(\\.gz)?$", + "description": "INDELs: Path to the high confidence BED files.", + "help_text": "INDELs: High confidence BED files", + "fa_icon": "fas fa-file-csv" + }, + "truth_indel": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.(vcf)?(\\.gz)?$", + "description": "INDELs: Path to the golden set VCF files.", + "help_text": "INDELs: Truth or golden set VCF file, to be used for comparisons", + "fa_icon": "fas fa-file-csv" + }, "high_conf_sv": { "type": "string", "format": "file-path", @@ -132,56 +173,6 @@ "description": "Minimum number of read supporting variants to benchmark, Use, -1 to disable , Default:-1", "fa_icon": "fas fa-folder-open" }, - "pctsize": { - "type": "number", - "description": "TRUVARI PARAMETER. Ratio of min(base_size, comp_size)/max(base_size, comp_size)", - "fa_icon": "fas fa-folder-open" - }, - "refdist": { - "type": "integer", - "description": "TRUVARI PARAMETER. Maximum distance comparison calls must be within from base call's start/end", - "fa_icon": "fas fa-folder-open" - }, - "chunksize": { - "type": "integer", - "description": "TRUVARI PARAMETER.", - "fa_icon": "fas fa-folder-open" - }, - "pctseq": { - "type": "number", - "description": "TRUVARI PARAMETER. Edit distance ratio between the REF/ALT haplotype sequences of base and comparison call.", - "fa_icon": "fas fa-folder-open" - }, - "pctovl": { - "type": "number", - "description": "TRUVARI PARAMETER. Ratio of two calls' (overlapping bases)/(longest span).", - "fa_icon": "fas fa-folder-open" - }, - "pick": { - "type": "string", - "description": "TRUVARI PARAMETER.How many matches a variant is allowed to participate in is controlled: single,ac,multi", - "fa_icon": "fas fa-folder-open" - }, - "normshift": { - "type": "number", - "description": "SVANALYZER PARAMETER. Disallow matches if alignments between alternate alleles have normalized shift greater than normshift (default 0.2) ", - "fa_icon": "fas fa-folder-open" - }, - "normdist": { - "type": "number", - "description": "SVANALYZER PARAMETER. Disallow matches if alternate alleles have normalized edit distance greater than normdist (default 0.2)", - "fa_icon": "fas fa-folder-open" - }, - "normsizediff": { - "type": "number", - "description": "SVANALYZER PARAMETER. Disallow matches if alternate alleles have normalized size difference greater than normsizediff (default 0.2) ", - "fa_icon": "fas fa-folder-open" - }, - "maxdist": { - "type": "integer", - "description": "SVANALYZER PARAMETER. Disallow matches if positions of two variants are more than maxdist bases from each other (default 100,000).", - "fa_icon": "fas fa-folder-open" - }, "variant_filtering": { "type": "string", "description": "Use either exclude or include to enable variant filtering using bcftools expressions, Default:null", From daba7b883a3b70d4c0fa1a14c35676a423bbc64f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Thu, 20 Jun 2024 16:25:13 +0200 Subject: [PATCH 6/9] missing params --- nextflow.config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nextflow.config b/nextflow.config index ab30cee..954b1b7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -19,6 +19,10 @@ params { // Small variant benchmarking: truth_small = null high_conf_small = null + truth_snv = null + high_conf_snv = null + truth_indel = null + high_conf_indel = null // Structural variant benchmarking truth_sv = null high_conf_sv = null From 0200d39355694cf699a43f616fe8065430bba421 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Thu, 20 Jun 2024 15:11:07 +0000 Subject: [PATCH 7/9] filter changed --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 831fa53..26257de 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -199,7 +199,7 @@ process { } withName: WITTYER { ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}"} - ext.args = {"-em cts --ef "" --pt ${meta.percentThreshold} --pd ${meta.percentDistance} --at ${meta.absoluteThreshold} --bpd ${meta.bpDistance} --mm ${meta.maxMatches}"} + ext.args = {"-em cts --ef [] --pt ${meta.percentThreshold} --pd ${meta.percentDistance} --at ${meta.absoluteThreshold} --bpd ${meta.bpDistance} --mm ${meta.maxMatches}"} ext.when = { params.method.split(',').contains('wittyer') } publishDir = [ path: {"${params.outdir}/${meta.id}/wittyer_bench"}, From 75fc70f51a316443c7f9ee5b56da56e4a2545143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Fri, 21 Jun 2024 06:30:34 +0000 Subject: [PATCH 8/9] wittyer params --- assets/schema_input.json | 3 ++- conf/modules.config | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index 6bf8add..d5d2b67 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -118,7 +118,8 @@ "similarityThreshold": { "type": "number", "errorMessage": "similarityThreshold is a wittyer parameter. This is used for sequence similarity thresholding.", - "meta": ["similarityThreshold"] + "meta": ["similarityThreshold"], + "default": 0.7 } }, "required": ["test_vcf", "caller", "vartype"] diff --git a/conf/modules.config b/conf/modules.config index 26257de..8b31d49 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -199,7 +199,7 @@ process { } withName: WITTYER { ext.prefix = {"${meta.id}.${params.sample}.${meta.vartype}"} - ext.args = {"-em cts --ef [] --pt ${meta.percentThreshold} --pd ${meta.percentDistance} --at ${meta.absoluteThreshold} --bpd ${meta.bpDistance} --mm ${meta.maxMatches}"} + ext.args = {"-em cts --ef [] --pt ${meta.percentThreshold} --at ${meta.absoluteThreshold} --bpd ${meta.bpDistance} --mm ${meta.maxMatches} --st ${meta.similarityThreshold}"} ext.when = { params.method.split(',').contains('wittyer') } publishDir = [ path: {"${params.outdir}/${meta.id}/wittyer_bench"}, From aca8e59313ead66f633a3a49bd1adc260436341e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=BCbra=20Narc=C4=B1?= Date: Mon, 24 Jun 2024 08:14:57 +0000 Subject: [PATCH 9/9] schema input --- assets/schema_input.json | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index d5d2b67..c0a22d0 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -95,19 +95,23 @@ "type": "integer", "errorMessage": "bpDistance is a wittyer parameter. Upper bound of boundary distance when comparing truth and query. By default it is 500bp for all types except for Insertions, which are 100bp.Please note that if you set this value in the command line, it overrides all the defaults, so Insertions and other types will have the same bpd.", "meta": ["bpDistance"], - "default": 500 + "default": 500, + "minimum": 0 }, "percentThreshold": { "type": "number", "errorMessage": "percentThreshold is a wittyer parameter. This is used for percentage thresholding. For CopyNumberTandemRepeats, this determines how large of a RepeatUnitCount (RUC) threshold to use for large tandem repeats. For all other SVs, in order to match between query and truth, the distance between boundaries should be within a number thats proportional to total SV (default 0.25)", "meta": ["percentThreshold"], - "default": 0.25 + "default": 0.25, + "minimum": 0, + "maximum": 1 }, "absoluteThreshold": { "type": "integer", "errorMessage": "absoluteThreshold is a wittyer parameter. This is used for absolute thresholding. For CopyNumberTandemRepeats, this determines how large of a RepeatUnitCount (RUC) threshold to use. For all other SVs, this is the upper bound of boundary distance when comparing truth and query. (default 10000)", "meta": ["absoluteThreshold"], - "default": 10000 + "default": 10000, + "minimum": 0 }, "maxMatches": { "type": "integer", @@ -119,7 +123,9 @@ "type": "number", "errorMessage": "similarityThreshold is a wittyer parameter. This is used for sequence similarity thresholding.", "meta": ["similarityThreshold"], - "default": 0.7 + "default": 0.7, + "minimum": 0, + "maximum": 1 } }, "required": ["test_vcf", "caller", "vartype"]